# Task 3: Create Enriched Orders Table

This notebook demonstrates the creation of the **Enriched Orders Table**, which provides a denormalized, comprehensive view of each sale by joining order, customer, and product information.


### 0. Environment Setup and Data Loading

In [7]:
import os
import sys
from pyspark.sql import SparkSession
from pyspark.sql.types import *

# Add project root to path for module imports
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

from src.spark_session import get_spark_session
from src.load_source_data import load_customer_data, load_orders_data, load_products_data

# Initialize Spark session
spark = get_spark_session("EnrichedOrdersNotebook")

# Load Raw Data
customers_df = load_customer_data(spark, os.path.join(PROJECT_ROOT, "data", "Customer.xlsx"))
orders_df = load_orders_data(spark, os.path.join(PROJECT_ROOT, "data", "Orders.json"))
products_df = load_products_data(spark, os.path.join(PROJECT_ROOT, "data", "Products.csv"))

print("Data loaded.")


Checking file at: /Users/kushalsenlaskar/Documents/E-commerce Sales Data/data/Customer.xlsx
File found. Loading Excel data using Spark...
Customer data loaded successfully

Checking file at: /Users/kushalsenlaskar/Documents/E-commerce Sales Data/data/Orders.json
File found. Loading JSON data using Spark...
Orders data loaded successfully

Checking file at: /Users/kushalsenlaskar/Documents/E-commerce Sales Data/data/Products.csv
File found. Loading CSV data using Spark...
Products data loaded successfully
Data loaded.


### 1. Create Enriched Orders Table

In [8]:
import importlib
import src.load_enriched_orders
importlib.reload(src.load_enriched_orders)

from src.load_enriched_orders import create_enriched_orders_table

# Create Enriched Orders Table
print("Creating Enriched Orders Table...")
enriched_orders_df = create_enriched_orders_table(orders_df, customers_df, products_df)

print(f"\nEnriched orders table created with {enriched_orders_df.count()} rows\n")
enriched_orders_df.printSchema()
enriched_orders_df.show(5)

Creating Enriched Orders Table...

Enriched orders table created with 10133 rows

root
 |-- Order ID: string (nullable = true)
 |-- Order Date: string (nullable = true)
 |-- Ship Date: string (nullable = true)
 |-- Ship Mode: string (nullable = true)
 |-- Customer Name: string (nullable = true)
 |-- Country: string (nullable = true)
 |-- Category: string (nullable = true)
 |-- Sub-Category: string (nullable = true)
 |-- Product Name: string (nullable = true)
 |-- Quantity: integer (nullable = true)
 |-- Price: double (nullable = true)
 |-- Discount: double (nullable = true)
 |-- Profit: double (nullable = true)


Enriched orders table created with 10133 rows

root
 |-- Order ID: string (nullable = true)
 |-- Order Date: string (nullable = true)
 |-- Ship Date: string (nullable = true)
 |-- Ship Mode: string (nullable = true)
 |-- Customer Name: string (nullable = true)
 |-- Country: string (nullable = true)
 |-- Category: string (nullable = true)
 |-- Sub-Category: string (nullable = t

### 2. Data Quality Validation

In [9]:
from src.load_enriched_orders import validate_enriched_orders_table

print("Running data quality checks on enriched orders data...")
validated_enriched_orders_df = validate_enriched_orders_table(enriched_orders_df, orders_df)
print("\nEnriched orders data validation complete.")


Running data quality checks on enriched orders data...

--- Validating Enriched Orders Table ---



Data quality checks for enriched orders table passed successfully.

Enriched orders data validation complete.

Data quality checks for enriched orders table passed successfully.

Enriched orders data validation complete.


### 3. Creation of Enriched Views

In [None]:
# Create temporary views
validated_enriched_orders_df.createOrReplaceTempView("enriched_orders_view")

print("Temporary view 'enriched_orders_view' created.")


### 4. Displaying data from Enriched Views

In [None]:
print("Enriched Orders View...\n")
spark.sql("SELECT * FROM enriched_orders_view LIMIT 5").show()


### 5. Stop Spark Session

In [None]:
spark.stop()
print("Spark session stopped.")