In [1]:
from kafka.admin import KafkaAdminClient, NewTopic
from kafka.errors import TopicAlreadyExistsError
from kafka import KafkaProducer
import json

In [2]:
# Define the Kafka bootstrap servers
kafka_bootstrap_servers = 'kafka:9092'

# Define the topic name and configuration
topic_name = 'notification_topic_test'
topic_partitions = 1
topic_replication_factor = 1

In [3]:
%%time

# Create an instance of KafkaAdminClient
admin_client = KafkaAdminClient(bootstrap_servers=kafka_bootstrap_servers)

# Check if the topic already exists
topic_exists = topic_name in admin_client.list_topics()

# Create the topic if it doesn't exist
if not topic_exists:
    new_topic = NewTopic(name=topic_name, num_partitions=topic_partitions, replication_factor=topic_replication_factor)
    try:
        admin_client.create_topics(new_topics=[new_topic])
        print("Topic created successfully.")
    except TopicAlreadyExistsError:
        print("Topic already exists.")
else:
    print("Topic already exists.")

# Close the admin client
admin_client.close()

Topic created successfully.
CPU times: user 66.1 ms, sys: 7.28 ms, total: 73.4 ms
Wall time: 838 ms


In [5]:
%%time
try:
    # Create a Kafka producer
    producer = KafkaProducer(bootstrap_servers=kafka_bootstrap_servers,
                             value_serializer=lambda v: json.dumps(v).encode('utf-8'))

    # Sample data to send as a notification
    notification_data = {"message": "Hello, ReactJS!"}

    # Convert the data to JSON and send it to Kafka
    producer.send(topic_name, value=notification_data)

    # Flush and close the producer
    producer.flush()
    producer.close()

    print("Notification sent successfully.")

except Exception as e:
    print(f"Failed to send notification: {str(e)}")

Notification sent successfully.
CPU times: user 31.7 ms, sys: 2.84 ms, total: 34.6 ms
Wall time: 135 ms


In [8]:
from pyspark.sql import SparkSession

# Create a SparkSession
spark = SparkSession.builder.getOrCreate()

# Example data: List of dictionaries
data = [
    {"name": "John", "age": 30, "city": "New York"},
    {"name": "Alice", "age": 28, "city": "San Francisco"},
    {"name": "Bob", "age": 35, "city": "Chicago"}
]

# Convert list of dictionaries to RDD
rdd = spark.sparkContext.parallelize(data)

# Create DataFrame from RDD
df = spark.createDataFrame(rdd)

# Show the DataFrame
df.show()

+---+-------------+-----+
|age|         city| name|
+---+-------------+-----+
| 30|     New York| John|
| 28|San Francisco|Alice|
| 35|      Chicago|  Bob|
+---+-------------+-----+



In [11]:
rdd.collect()

[{'name': 'John', 'age': 30, 'city': 'New York'},
 {'name': 'Alice', 'age': 28, 'city': 'San Francisco'},
 {'name': 'Bob', 'age': 35, 'city': 'Chicago'}]