In [None]:
# !pip install --upgrade google-cloud-pubsublite

<h4>Publish data for Testing

In [None]:
import json
from google.cloud.pubsublite.cloudpubsub import PublisherClient
from google.cloud.pubsublite.types import MessageMetadata
from google.cloud import storage
from random import randrange
import time,datetime
bucket_name = ""
topic = ""

storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob("gcs_bucket_path/events.json")
data = json.loads(blob.download_as_string(client=None))

with PublisherClient() as publisher_client:
    for row in data[:10]:
        now = datetime.datetime.now()
        row['event_date']= now.strftime("%Y-%m-%d %H:%M:%S")
        data = json.dumps(row).encode("utf-8")
        api_future = publisher_client.publish(
            topic,
            data=data
        )
        message_id = api_future.result()
        message_metadata = MessageMetadata.decode(message_id)
        print(
            f"Published {data} to partition {message_metadata.partition.value} and offset {message_metadata.cursor.offset}."
        )


<h4>Publish data for Tumbling Window

In [None]:
import json
from google.cloud.pubsublite.cloudpubsub import PublisherClient
from google.cloud.pubsublite.types import MessageMetadata
from google.cloud import storage
import random
import time,datetime
bucket_name = ""

topic = ""

storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob("gcs_bucket_path/events.json")
data = json.loads(blob.download_as_string(client=None))

with PublisherClient() as publisher_client:
    for row in data:
        now = datetime.datetime.now()
        row['event_date']= now.strftime("%Y-%m-%d %H:%M:%S")
        data = json.dumps(row).encode("utf-8")
        api_future = publisher_client.publish(
            topic,
            data=data
        )
        message_id = api_future.result()
        message_metadata = MessageMetadata.decode(message_id)
        print(
            f"Published {data} to partition {message_metadata.partition.value} and offset {message_metadata.cursor.offset}."
        )
        time.sleep(random.randint(0,100))


<h4>Publish data for Window with Watermark

In [None]:
import json
from google.cloud.pubsublite.cloudpubsub import PublisherClient
from google.cloud.pubsublite.types import MessageMetadata
from google.cloud import storage
import random
import time,datetime
bucket_name = ""

topic = ""

storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob("gcs_bucket_path/events.json")
data = json.loads(blob.download_as_string(client=None))

with PublisherClient() as publisher_client:
    
    i=0 
    for row in data:
        
        now = datetime.datetime.now()
        
        if i == random.randint(0,100): 
            row['event_date']= now.strftime("%Y-%m-%d %H:%M:%S") + datetime.timedelta(minutes=2)
        else : 
            row['event_date']= now.strftime("%Y-%m-%d %H:%M:%S")
                
        data = json.dumps(row).encode("utf-8")
        api_future = publisher_client.publish(
            topic,
            data=data
        )
        message_id = api_future.result()
        message_metadata = MessageMetadata.decode(message_id)
        print(
            f"Published {data} to partition {message_metadata.partition.value} and offset {message_metadata.cursor.offset}."
        )
        i+=1
        time.sleep(random.randint(0,100))


<h4>Write Data to pubsub - Product Discount Project 

In [None]:
import json
from google.cloud.pubsublite.cloudpubsub import PublisherClient
from google.cloud.pubsublite.types import MessageMetadata
from google.cloud import storage
import random
import time,datetime
bucket_name = ""

topic = ""

storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob("gcs_bucket_path/events.json")
data = json.loads(blob.download_as_string(client=None))

with PublisherClient() as publisher_client:
    
    for row in data:
        now = datetime.datetime.now()
        row['event_date']= now.strftime("%Y-%m-%d %H:%M:%S")
        data = json.dumps(row).encode("utf-8")
        api_future = publisher_client.publish(
            topic,
            data=data
        )
        message_id = api_future.result()
        message_metadata = MessageMetadata.decode(message_id)
        print(
            f"Published {data} to partition {message_metadata.partition.value} and offset {message_metadata.cursor.offset}."
        )
        time.sleep(random.randint(0,2))


<h3>Join two streaming dataframes | Generate browsing Events 

In [None]:
import json
from google.cloud.pubsublite.cloudpubsub import PublisherClient
from google.cloud.pubsublite.types import MessageMetadata
from google.cloud import storage
import random,secrets
import time,datetime 
bucket_name = ""

storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob("gcs_bucket_path/events.json")

browsing_events_topic = ""

data = json.loads(blob.download_as_string(client=None))

user_ids = [99185,
                77533,
                4708,
                15657,
                2194,
                22177,
                28923]


product_ids = [27569,27445]


event_types = ["cart","view_product"]
uris = ["/add_to_cart","/product_landing_page","/add_to_wishlist"]


with PublisherClient() as publisher_client:
    
    i = 0
    for row in data:
        now = datetime.datetime.now()
        row['event_date']= now.strftime("%Y-%m-%d %H:%M:%S")
        row['product_id'] = secrets.choice(product_ids)
        row['event_type'] = secrets.choice(event_types)
        row['uri'] = secrets.choice(uris)
        row['user_id'] = secrets.choice(user_ids)
        
        data = json.dumps(row).encode("utf-8")
        api_future = publisher_client.publish(
            browsing_events_topic,
            data=data
        )
        message_id = api_future.result()
        message_metadata = MessageMetadata.decode(message_id)
        print(
            f"Published {data} to partition {message_metadata.partition.value} and offset {message_metadata.cursor.offset}."
        )
        
        if i == random.randint(0,100):
            time.sleep(random.randint(20,60))
        else :
            time.sleep(random.randint(1,10))
        
        i += 1
        

<h3>Join two streaming dataframes | Generate purchase events 

In [None]:
import json
from google.cloud.pubsublite.cloudpubsub import PublisherClient
from google.cloud.pubsublite.types import MessageMetadata
from google.cloud import storage
import random,secrets
import time,datetime 
bucket_name = ""

storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)

blob = bucket.blob("gcs_bucket_path/events.json")

purchase_topic = ""

data = json.loads(blob.download_as_string(client=None))

user_ids = [99017,
                57831,
                35360,
                36957,
                56074,
                59698,
                24254,
                54923,
                35969,
                9079,
                57355]

product_ids = [27569,27445,27457,27466]

with PublisherClient() as publisher_client:
    
    purchase_event = dict()
    
    i = 0    
    for row in data:
        now = datetime.datetime.now()
        purchase_event['event_date']= now.strftime("%Y-%m-%d %H:%M:%S")
        purchase_event['product_id'] = secrets.choice(product_ids)
        purchase_event['user_id'] = secrets.choice(user_ids)
        data = json.dumps(purchase_event).encode("utf-8")
        api_future = publisher_client.publish(
            purchase_topic,
            data=data
        )
        message_id = api_future.result()
        message_metadata = MessageMetadata.decode(message_id)
        print(
            f"Published {data} to partition {message_metadata.partition.value} and offset {message_metadata.cursor.offset}."
        )

        if i == random.randint(0,100):
            time.sleep(random.randint(20,60))
        else :
            time.sleep(random.randint(1,10))
            
        i += 1
