In [0]:
from pyspark.sql.types import StructType, StringType, IntegerType

schema = StructType() \
    .add("order_id", StringType()) \
    .add("customer_id", StringType()) \
    .add("product", StringType()) \
    .add("quantity", IntegerType()) \
    .add("region", StringType())

initial_data = [
    ("1", "C101", "Laptop", 2, "South"),
    ("2", "C102", "Chair", 6, "North"),
    ("3", "C103", "Mobile", 1, "East")
]

df = spark.createDataFrame(initial_data, schema)

df.write \
  .mode("overwrite") \
  .option("header", True) \
  .csv("dbfs:/tmp/stream/orders")

In [0]:
orders_stream = (
    spark.readStream
    .schema(schema)
    .option("header",True)
    .csv("dbfs:/tmp/stream/orders")
)

In [0]:

from pyspark.sql.functions import when

transformed_orders = orders_stream.withColumn(
    "bulk_order",
    when(orders_stream["quantity"] > 5, True).otherwise(False)
)


In [0]:
from pyspark.sql.functions import col# Same rate stream and transformation

rate_df = (
    spark.readStream
    .format("rate")
    .option("rowsPerSecond", 1)
    .load()
)

transformed_df = rate_df.withColumn("is_even", (col("value") % 2 == 0))

# Write to memory (temp table)
query = (
    transformed_df.writeStream
    .format("memory")  # 👈 this is key
    .queryName("rate_table")  # 👈 table name to query later
    .outputMode("append")
    .start()
)






spark.sql("SELECT * FROM rate_table").show()

+---------+-----+-------+
|timestamp|value|is_even|
+---------+-----+-------+
+---------+-----+-------+



In [0]:
from pyspark.sql.functions import col 

rate_df = (
    spark.readStream
        .format("rate")
        .option("rowsPerSecond", 1)
        .load()
)

transformed_df = rate_df.withColumn("is_odd", (col("value") % 2 != 0))

query = (
    transformed_df.writeStream
        .format("memory")     
        .queryName("rate_table") 
        .outputMode("append")
        
)
