# Load Silver Table to Gold Table - Account

## Overview
Load Account data from Silver lakehouse table to Gold lakehouse table.

## Data Flow
- **Source**: Silver Lakehouse.finance.account (Silver lakehouse table)
- **Target**: Gold Lakehouse.finance.account (Gold lakehouse - attached as default)
- **Process**: Read Silver table, apply transformations, load to Gold Delta table


In [None]:
import pandas as pd
from pyspark.sql.types import *
from pyspark.sql.functions import col, sum as spark_sum, current_timestamp
import os

# Configuration - Silver to Gold data flow
WORKSPACE_NAME = "Fabric_MAAG"
SOURCE_LAKEHOUSE_NAME = "maag_silver"
SOURCE_SCHEMA = "finance"
SOURCE_TABLE = "account"

# Source: Absolute path to Silver lakehouse table
SOURCE_TABLE_PATH = f"abfss://{WORKSPACE_NAME}@onelake.dfs.fabric.microsoft.com/{SOURCE_LAKEHOUSE_NAME}.Lakehouse/Tables/{SOURCE_SCHEMA}/{SOURCE_TABLE}"

# Target: Gold lakehouse (attached as default)
TARGET_SCHEMA = "finance"
TARGET_TABLE = "account"
TARGET_FULL_PATH = f"{TARGET_SCHEMA}.{TARGET_TABLE}"

print(f"üîÑ Loading Account from Silver to Gold")
print(f"üìÇ Source: {SOURCE_TABLE_PATH}")
print(f"üéØ Target: {TARGET_FULL_PATH}")
print("="*50)

# Read from Silver lakehouse table
df = spark.read.format("delta").load(SOURCE_TABLE_PATH)

print(f"‚úÖ Data loaded from Silver table")
print(f"üìä Records: {df.count()}")
print(f"üìã Columns: {df.columns}")

# Display sample data
print(f"\nüìñ Sample data from Silver:")
df.show(10, truncate=False)

In [None]:
# --- Gold layer transformations and data quality ---
print(f"üîß Applying Gold layer transformations...")

# Add audit columns for Gold layer and set default for CreatedBy if blank or null
from pyspark.sql.functions import when, trim

df_gold = df.withColumn("GoldLoadTimestamp", current_timestamp())\
    .withColumn("CreatedBy", when(trim(col("CreatedBy")).isNull() | (trim(col("CreatedBy")) == ""), "Sample script").otherwise(col("CreatedBy")))

# Data quality checks for Gold layer
print(f"\nüîç Gold layer data quality validation...")

# Check for duplicates
duplicate_count = df_gold.groupBy("AccountId").count().filter(col("count") > 1).count()
if duplicate_count > 0:
    print(f"‚ö†Ô∏è Found {duplicate_count} duplicate AccountId values")
else:
    print(f"‚úÖ No duplicates found")

# Check for nulls in key fields
null_checks = df_gold.select(
    spark_sum(col("AccountId").isNull().cast("int")).alias("null_accountid"),
    spark_sum(col("AccountType").isNull().cast("int")).alias("null_accounttype")
).collect()[0]

if null_checks["null_accountid"] > 0 or null_checks["null_accounttype"] > 0:
    print(f"‚ö†Ô∏è Found nulls: AccountId={null_checks['null_accountid']}, AccountType={null_checks['null_accounttype']}")
else:
    print(f"‚úÖ No nulls in key fields")

print(f"\nüìñ Sample Gold data:")
df_gold.show(10, truncate=False)

In [None]:
# --- Load data to Gold table ---
print(f"üíæ Loading data to Gold table: {TARGET_FULL_PATH}")

try:
    # Write to Gold Delta table (default lakehouse)
    df_gold.write \
      .format("delta") \
      .mode("overwrite") \
      .option("overwriteSchema", "true") \
      .saveAsTable(TARGET_FULL_PATH)

    print(f"‚úÖ Data loaded successfully to Gold table")

    # Verify the load
    result_count = spark.sql(f"SELECT COUNT(*) as count FROM {TARGET_FULL_PATH}").collect()[0]["count"]
    print(f"üìä Records in Gold table: {result_count}")

    # Show sample of loaded Gold data
    print(f"\nüìñ Sample from Gold table:")
    spark.sql(f"SELECT * FROM {TARGET_FULL_PATH} ORDER BY AccountId").show(10, truncate=False)

    print(f"üéâ Silver to Gold data load complete!")

except Exception as e:
    print(f"‚ùå Error loading data to Gold table: {str(e)}")
    raise