In [None]:
# Import KafkaConsumer from kafka library to connect and consume messages from Kafka topics
from kafka import KafkaConsumer

# Import sleep from time module to add delays in the script execution for timing control
from time import sleep

# Import dumps and loads from json module to serialize (convert Python object into JSON string) and deserialize (convert JSON string into Python object) data
from json import dumps, loads

# Import json module for JSON operations like parsing JSON data
import json

# Import S3FileSystem from s3fs to interact with Amazon S3 using file system semantics, allowing for easy manipulation of objects in S3 as if they were files
from s3fs import S3FileSystem

In [None]:
# Initialize a KafkaConsumer to consume messages from the 'demo_test' Kafka topic
consumer = KafkaConsumer(
    'konarkykafkatest',  # Specify the Kafka topic to consume messages from
    bootstrap_servers=['Add your EC2 public IP here:9092'],  # List of broker addresses (here, your EC2 public IP and Kafka's default port)
    value_deserializer=lambda x: loads(x.decode('utf-8'))  # Deserialize the message value from JSON formatted string to Python dictionary
)

In [None]:
# Create an S3FileSystem instance to interact with Amazon S3
# This allows for performing file operations on S3 buckets and objects using familiar filesystem semantics
s3 = S3FileSystem()

In [None]:
# Iterate over messages consumed from the Kafka topic using the consumer
for count, ind in enumerate(consumer):
    # For each message, open a new file in an Amazon S3 bucket using the s3fs library
    # The file is named uniquely using the message count to avoid overwriting previous messages
    with s3.open("s3://konarky_kafka_project/stock_market_{}.json".format(count), 'w') as file:
        # Serialize the message value (which is assumed to be a Python dictionary after deserialization) to JSON format
        # and write it to the newly created file in the S3 bucket
        json.dump(ind.value, file)