In [None]:
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
import time
import random
import numpy as np

# Initialize SparkContext and StreamingContext
sc = SparkContext("local[2]", "RandomDataStream")
ssc = StreamingContext(sc, 1)  # Batch interval of 1 second

def generate_data():
    return [int(np.random.normal(140, 40)) for _ in range(100)]  # Generate 100 random numbers to represent Tweet lengths in characters

def process_data(data):
    if data.isEmpty():
        print("All Batches Processed! Run Again for More Data")
        ssc.stop(stopSparkContext=True, stopGraceFully=True)
    else:

        # Calculate mean
        count = data.count()
        sum_of_data = data.reduce(lambda x, y: x + y)
        mean = sum_of_data / count
        print("New Batch Arrived!")
        print("Average characters per Tweet in this batch:", mean)

        # Calculate min and max
        min_val = data.min()
        max_val = data.max()
        print(f"Shortest Tweet in this batch: {min_val} characters")
        print(f"Longest Tweet in this batch: {max_val} characters\n")

# Create a DStream from a function that generates data
stream = ssc.queueStream([sc.parallelize(generate_data()) for _ in range(10)])

# Process the stream
stream.foreachRDD(process_data)

# Start the streaming context
print("Starting Spark Streaming context...")
ssc.start()

# Wait for the termination of the streaming context
print("Awaiting termination...")
ssc.awaitTermination()



Starting Spark Streaming context...
Awaiting termination...
New Batch Arrived!
Average characters per Tweet in this batch: 141.5
Shortest Tweet in this batch: 47 characters
Longest Tweet in this batch: 247 characters

New Batch Arrived!
Average characters per Tweet in this batch: 140.15
Shortest Tweet in this batch: 56 characters
Longest Tweet in this batch: 251 characters

New Batch Arrived!
Average characters per Tweet in this batch: 135.09
Shortest Tweet in this batch: 46 characters
Longest Tweet in this batch: 231 characters

New Batch Arrived!
Average characters per Tweet in this batch: 144.17
Shortest Tweet in this batch: 48 characters
Longest Tweet in this batch: 248 characters

New Batch Arrived!
Average characters per Tweet in this batch: 139.5
Shortest Tweet in this batch: 35 characters
Longest Tweet in this batch: 225 characters

New Batch Arrived!
Average characters per Tweet in this batch: 135.64
Shortest Tweet in this batch: 36 characters
Longest Tweet in this batch: 261 