In [None]:
!pip install --upgrade google-cloud-pubsub
!pip install --upgrade 'apache-beam[gcp]'
!pip install --upgrade google-apitools

In [None]:
PROJECT_ID = %env GOOGLE_CLOUD_PROJECT

In [None]:
import apache_beam as beam
import sys

from apache_beam.transforms.combiners import Count
from apache_beam.options.pipeline_options import PipelineOptions, StandardOptions

sys.argv = ["12_session_window.ipynb"]
encoding = "utf-8"

input_subscription = "projects/{}/subscriptions/ratings-sub".format(PROJECT_ID)
ratings_topic = "projects/{}/topics/ratings".format(PROJECT_ID)
rating_count_topic = "projects/{}/topics/ratings_count_window".format(
    PROJECT_ID)

options = PipelineOptions()
options.view_as(StandardOptions).streaming = True

p = beam.Pipeline(options=options)


def format_output(el):
    (movie_id, ratings) = el
    return "{r} ratings for movieId {id}".format(r=ratings, id=movie_id)


pubsub_pipeline = (
    p
    | "Read from PubSub topic" >> beam.io.ReadFromPubSub(topic=ratings_topic)
    | "Split the records by comma" >> beam.Map(lambda row: row.decode(encoding).split(","))
    | "Form KV pair" >> beam.Map(lambda r: (r[2], r[0]))
    | "Window" >> beam.WindowInto(beam.window.Sessions(25))
    | "Count the ratings" >> Count.PerKey()
    | "Format output" >> beam.Map(format_output)
    | "Write to PubSub" >> beam.io.WriteStringsToPubSub(rating_count_topic)
)

result = p.run()
result.wait_until_finish()