# Load Bronze Data to Silver Table - OrderPayment (ADB Channel)

## Overview
Load OrderPayment sample data from Bronze lakehouse CSV file into Silver table in Databricks workspace catalog.

## Data Flow
- **Source (CSV)**: `DBFS sales/OrderPayment_ADB.csv`
- **Target**: Workspace catalog table: `sales.orderpayment`
- **Process**: Import libraries, define variables, read CSV, validate schema, check data quality, load to Delta table, verify load

---

In [None]:
# Import required libraries
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql.functions import col, sum as spark_sum
import os

In [None]:
# Define variables for schema, table name, and file path
dbutils.widgets.text("catalog_name", "maag_adb2")
dbutils.widgets.text("schema_name", "sales")
dbutils.widgets.text("base_path", "/FileStore/tables/sales")

catalog_name = dbutils.widgets.get("catalog_name")
schema_name = dbutils.widgets.get("schema_name")
base_path = dbutils.widgets.get("base_path")

table_orderpayment = 'orderpayment'
orderpayment_csv = f'{base_path}/OrderPayment_ADB.csv'
target_full_path = f'{catalog_name}.{schema_name}.{table_orderpayment}'

In [None]:
# Read CSV into DataFrame
print(f'🔄 Loading OrderPayment data')
print(f'📂 Source: {orderpayment_csv}')
print(f'🎯 Target: {target_full_path}')

orderpayment_df = spark.read.option('header', True).option('inferSchema', True).csv(orderpayment_csv)

print(f'✅ Data loaded successfully')
print(f'📊 Records: {orderpayment_df.count()}')
print(f'📋 Columns: {orderpayment_df.columns}')

# Display sample data from dataframe
print(f'📖 Sample data:')
orderpayment_df.show(10, truncate=False)

In [None]:
# Validate and conform to target schema
print(f'🔍 Validating data quality...')
required_columns = [
    'OrderId', 'PaymentMethod', 'TransactionId'
]
missing_columns = [c for c in required_columns if c not in orderpayment_df.columns]
if missing_columns:
    print(f'⚠️ Warning: Missing columns in source data: {missing_columns}')
else:
    print(f'✅ All required columns present in source data.')
from pyspark.sql import functions as F
for col_name in missing_columns:
    orderpayment_df = orderpayment_df.withColumn(col_name, F.lit(''))
from pyspark.sql.types import StringType
orderpayment_df = orderpayment_df.withColumn('OrderId', col('OrderId').cast(StringType()))
orderpayment_df = orderpayment_df.withColumn('PaymentMethod', col('PaymentMethod').cast(StringType()))
orderpayment_df = orderpayment_df.withColumn('TransactionId', col('TransactionId').cast(StringType()))
orderpayment_df = orderpayment_df.select(required_columns)

print(f'📊 Data Quality Check:')
null_counts = orderpayment_df.select([F.sum(col(c).isNull().cast('int')).alias(c) for c in required_columns]).collect()[0]
for col_name in required_columns:
    null_count = null_counts[col_name]
    if null_count > 0:
        print(f'  {col_name}: {null_count} null values')
    else:
        print(f'  {col_name}: ✅ No nulls')

print(f'🎯 PaymentMethod Distribution:')
orderpayment_df.groupBy('PaymentMethod').count().orderBy('PaymentMethod').show()
spark.sql(f'CREATE SCHEMA IF NOT EXISTS {schema_name}')
print(f'💾 Loading data to databricks table: {target_full_path}')

try:
    orderpayment_df.write \
        .format('delta') \
        .mode('overwrite') \
        .option('overwriteSchema', 'true') \
        .saveAsTable(target_full_path)
    print(f'✅ Data loaded successfully to {target_full_path}')

    result_count = spark.sql(f'SELECT COUNT(*) as count FROM {target_full_path}').collect()[0]['count']
    print(f'📊 Records in target table: {result_count}')
    print(f'\n📖 Sample from Silver table:')

    spark.sql(f'SELECT * FROM {target_full_path} ORDER BY OrderId').show(10, truncate=False)
    print(f'🎉 OrderPayment data load complete!')
except Exception as e:
    print(f'❌ Error loading data to table: {str(e)}')
    raise