In [5]:
import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import GoogleCloudOptions
from apache_beam.transforms.window import FixedWindows

class PrintMessages(beam.DoFn):
    def process(self, element):
        print(element)
        yield element

def run():
    pipeline_options = PipelineOptions()

    # Set the Google Cloud project and specify the Dataflow runner
    google_cloud_options = pipeline_options.view_as(GoogleCloudOptions)
    google_cloud_options.project = 'my-another-394512'
    google_cloud_options.job_name = 'pubsub-to-bq'
    google_cloud_options.staging_location = 'gs://maniprakash-bucket/staging'
    google_cloud_options.temp_location = 'gs://maniprakash-bucket/temp'
    google_cloud_options.region = 'europe-west2'

    # Enable streaming mode
    pipeline_options.view_as(beam.options.pipeline_options.StandardOptions).streaming = True

    # Create the pipeline
    pipeline = beam.Pipeline(options=pipeline_options)

    # Read messages from Pub/Sub topic
    messages = (
        pipeline
        | 'Read from Pub/Sub' >> beam.io.ReadFromPubSub(subscription='projects/my-another-394512/subscriptions/my-topic-sub')
        | 'Decode message' >> beam.Map(lambda x: x.decode('utf-8'))
        | 'Add timestamps' >> beam.Map(lambda x: beam.window.TimestampedValue(x, 0))  # Assign fixed timestamps
    )

    # Apply fixed windowing
    windowed_messages = messages | 'Apply Windowing' >> beam.WindowInto(FixedWindows(60))  # 1 second window

    # Print messages to console
    windowed_messages | 'Print Messages' >> beam.ParDo(PrintMessages())

    # Write messages to BigQuery
    table_spec = 'my-another-394512:mydataset.pubsub'
    windowed_messages | 'Write to BigQuery' >> beam.io.WriteToBigQuery(
        table_spec,
        schema='msg:STRING',  # Define your schema here
        write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND,
        create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED
    )

    pipeline.run()

if __name__ == '__main__':
    run()




Netpulse
Netpulse
Netpulse
Netpulse
Netpulse
Netpulse
Netpulse
Netpulse
Netpulse
Netpulse
