# Load Silver Table to Gold Table - shared.productCategory

## Overview
Load ProductCategory data from Silver lakehouse table to Gold lakehouse table.

## Data Flow
- **Source**: Silver Lakehouse shared.productCategory
- **Target**: Gold Lakehouse shared.productCategory
- **Process**: Read Silver table, apply transformations, load to Gold Delta table

---

In [None]:
# Configuration - Silver to Gold data flow
WORKSPACE_NAME = "Fabric_MAAG"
SOURCE_LAKEHOUSE_NAME = "maag_silver"
SOURCE_SCHEMA = "shared"
SOURCE_TABLE = "productcategory"

# Source: Absolute path to Silver lakehouse table
SOURCE_TABLE_PATH = f"abfss://{WORKSPACE_NAME}@onelake.dfs.fabric.microsoft.com/{SOURCE_LAKEHOUSE_NAME}.Lakehouse/Tables/{SOURCE_SCHEMA}/{SOURCE_TABLE}"

# Target: Gold lakehouse (attached as default)
TARGET_SCHEMA = "shared"
TARGET_TABLE = "productcategory"
TARGET_FULL_PATH = f"{TARGET_SCHEMA}.{TARGET_TABLE}"

print(f"🔄 Loading ProductCategory from Silver to Gold")
print(f"📂 Source: {SOURCE_TABLE_PATH}")
print(f"🎯 Target: {TARGET_FULL_PATH}")
print("="*50)

# Read from Silver lakehouse table
df = spark.read.format("delta").load(SOURCE_TABLE_PATH)

print(f"✅ Data loaded from Silver table")
print(f"📊 Records: {df.count()}")
print(f"📋 Columns: {df.columns}")

# Display sample data
print(f"\n📖 Sample data from Silver:")
df.show(10, truncate=False)

In [None]:
# Apply Gold layer transformations and data quality
print(f"🔧 Applying Gold layer transformations...")

# Add audit columns for Gold layer if needed (example: GoldLoadTimestamp)
from pyspark.sql.functions import current_timestamp, col, sum as spark_sum
df_gold = df.withColumn("GoldLoadTimestamp", current_timestamp())

# Data quality checks for Gold layer
print(f"\n🔍 Gold layer data quality validation...")

# Check for duplicates
duplicate_count = df_gold.groupBy("CategoryId").count().filter(col("count") > 1).count()
if duplicate_count > 0:
    print(f"⚠️ Found {duplicate_count} duplicate CategoryIds")
else:
    print(f"✅ No duplicates found")

# Check for nulls in key fields
null_checks = df_gold.select(
    spark_sum(col("CategoryId").isNull().cast("int")).alias("null_ids"),
    spark_sum(col("CategoryName").isNull().cast("int")).alias("null_names"),
    spark_sum(col("IsActive").isNull().cast("int")).alias("null_isactive")
).collect()[0]

if null_checks["null_ids"] > 0 or null_checks["null_names"] > 0 or null_checks["null_isactive"] > 0:
    print(f"⚠️ Found nulls: CategoryId={null_checks['null_ids']}, CategoryName={null_checks['null_names']}, IsActive={null_checks['null_isactive']}")
else:
    print(f"✅ No nulls in key fields")

print(f"\n📖 Sample Gold data:")
df_gold.show(10, truncate=False)

In [None]:
# Load data to Gold table
print(f"💾 Loading data to Gold table: {TARGET_FULL_PATH}")

try:
    # Write to Gold Delta table (default lakehouse)
    df_gold.write \
      .format("delta") \
      .mode("overwrite") \
      .option("overwriteSchema", "true") \
      .saveAsTable(TARGET_FULL_PATH)

    print(f"✅ Data loaded successfully to Gold table")

    # Verify the load
    result_count = spark.sql(f"SELECT COUNT(*) as count FROM {TARGET_FULL_PATH}").collect()[0]["count"]
    print(f"📊 Records in Gold table: {result_count}")

    # Show sample of loaded Gold data
    print(f"\n📖 Sample from Gold table:")
    spark.sql(f"SELECT * FROM {TARGET_FULL_PATH} ORDER BY CategoryId").show(10, truncate=False)

    print(f"🎉 Silver to Gold data load complete!")

except Exception as e:
    print(f"❌ Error loading data to Gold table: {str(e)}")
    raise