In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DoubleType, DateType
from datetime import date

# Start Spark session
spark = SparkSession.builder.appName("CustomerProductOrder").getOrCreate()

# ======================
# Customer table
# ======================
customer_schema = StructType([
    StructField("cust_id", IntegerType(), True),
    StructField("name", StringType(), True),
    StructField("contact_number", StringType(), True)
])

customer_data = [
    (1, "Alice", "9876543210"),
    (2, "Bob", "8765432109"),
    (3, "Charlie", "7654321098")
]

df_customer = spark.createDataFrame(customer_data, schema=customer_schema)

#

In [0]:
# ======================
# Product table
# ======================
product_schema = StructType([
    StructField("product_id", IntegerType(), True),
    StructField("name", StringType(), True),
    StructField("description", StringType(), True)
])

product_data = [
    (101, "Laptop", "Gaming Laptop"),
    (102, "Phone", "Smartphone with 5G"),
    (103, "Tablet", "Android tablet")
]

df_product = spark.createDataFrame(product_data, schema=product_schema)


In [0]:

# ======================
# Order table
# ======================
order_schema = StructType([
    StructField("sr_no", IntegerType(), True),
    StructField("order_id", IntegerType(), True),
    StructField("cust_id", IntegerType(), True),
    StructField("product_id", IntegerType(), True),
    StructField("transaction_date", DateType(), True),
    StructField("rate", DoubleType(), True),
    StructField("amount", DoubleType(), True)
])

order_data = [
    (1, 5001, 1, 101, date(2025, 8, 1), 80000.0, 80000.0),
    (2, 5002, 2, 101, date(2024, 8, 2), 30000.0, 30000.0),
    (3, 5003, 1, 103, date(2023, 8, 3), 15000.0, 15000.0),
    (4, 5003, 2, 102, date(2022, 8, 3), 15000.0, 15000.0)
]

df_order = spark.createDataFrame(order_data, schema=order_schema)

In [0]:
from pyspark.sql.functions import *
df_customer.createOrReplaceTempView("customer")
df_product.createOrReplaceTempView("product")
df_order.createOrReplaceTempView("order")



In [0]:
%sql

select *  from product where product_id not in (
select o.product_id from order o  
left join customer c on o.cust_id = c.cust_id 
left join product p on o.product_id = p.product_id
where transaction_date > '2022-08-03'
)


