### Streaming data simulation

In [1]:
import apache_beam as beam
from apache_beam.transforms.window import FixedWindows
from apache_beam.options.pipeline_options import PipelineOptions
import time

# Simulate streaming by reading lines from data.txt at fixed intervals
def simulate_streaming(file_name, interval=1):
    with open(file_name, 'r') as file:
        for line in file:
            yield line
            time.sleep(interval)

# Process each window of data
def process_window(elements):
    # For this example, we'll just return the elements as they are
    # You can add any processing logic here
    return elements

# Define the pipeline
options = PipelineOptions(streaming=True)
with beam.Pipeline(options=options) as pipeline:
    (
        pipeline
        | 'Simulate Streaming' >> beam.Create(simulate_streaming('data.txt'))
        | 'Assign to Fixed Windows' >> beam.WindowInto(FixedWindows(size=10))  # 10 seconds window
        | 'Process Window' >> beam.Map(process_window)
        | 'Write Results' >> beam.io.WriteToText('windowed_results.txt')
    )






### Overlapping windows

In [2]:
from apache_beam.transforms.window import SlidingWindows
from apache_beam.options.pipeline_options import PipelineOptions
import time

# Simulate streaming by reading lines from data.txt at fixed intervals
def simulate_streaming(file_name, interval=1):
    with open(file_name, 'r') as file:
        for line in file:
            yield line
            time.sleep(interval)

# Process each window of data
def process_window(elements):
    # For this example, we'll just return the elements as they are
    # You can add any processing logic here
    return elements

# Define the pipeline
options = PipelineOptions(streaming=True)
with beam.Pipeline(options=options) as pipeline:
    (
        pipeline
        | 'Simulate Streaming' >> beam.Create(simulate_streaming('data.txt'))
        | 'Assign to Sliding Windows' >> beam.WindowInto(SlidingWindows(size=10, period=5))  # 10 seconds window that slides every 5 seconds
        | 'Process Window' >> beam.Map(process_window)
        | 'Write Results' >> beam.io.WriteToText('sliding_window_results.txt')
    )




### Session windows

In [3]:
from apache_beam.transforms.window import Sessions
from apache_beam.options.pipeline_options import PipelineOptions
import time

# Simulate streaming by reading lines from data.txt at fixed intervals
def simulate_streaming(file_name, interval=1):
    with open(file_name, 'r') as file:
        for line in file:
            yield line
            time.sleep(interval)

# Process each window of data
def process_window(elements):
    # For this example, we'll just return the elements as they are
    # You can add any processing logic here
    return elements

# Define the pipeline
options = PipelineOptions(streaming=True)
with beam.Pipeline(options=options) as pipeline:
    (
        pipeline
        | 'Simulate Streaming' >> beam.Create(simulate_streaming('data.txt'))
        | 'Assign to Session Windows' >> beam.WindowInto(Sessions(gap_size=5*60))  # 5 minutes gap size
        | 'Process Window' >> beam.Map(process_window)
        | 'Write Results' >> beam.io.WriteToText('session_window_results.txt')
    )


