# Lakehouse Architecture Demo

This notebook demonstrates key features of the Lakehouse Architecture:
- Bronze, Silver, and Gold layers
- Delta Lake features (Time Travel, ACID transactions)
- Data quality checks
- Querying and analytics


In [None]:
# Import required libraries
import sys
sys.path.append('../')

from config.spark_config import create_spark_session, get_table_paths
from pyspark.sql.functions import *


In [None]:
# Create Spark session
spark, base_path = create_spark_session(app_name="LakehouseDemo")
paths = get_table_paths(base_path)

print(f"Base path: {base_path}")
print(f"Bronze path: {paths['bronze']}")
print(f"Silver path: {paths['silver']}")
print(f"Gold path: {paths['gold']}")


## Bronze Layer - Raw Data


## Phase 5: Delta Lake Features - Time Travel

Query historical versions of data using version number or timestamp.


In [None]:
# Time Travel: Query table history
from scripts.delta_features.delta_lake_features import get_table_history, time_travel_by_version

# Show version history
get_table_history(spark, f"{paths['bronze']}/transactions")

# Query version 0 (oldest/first load)
# historical_df = time_travel_by_version(spark, f"{paths['bronze']}/transactions", 0)
# historical_df.show(5)


## Schema Evolution

Add new columns without breaking existing queries. Delta Lake supports `mergeSchema` option.


In [None]:
# Schema evolution example - add new columns with defaults
# df_with_new_cols = bronze_transactions.withColumn("data_quality_score", lit(100))
# df_with_new_cols.write.format("delta").mode("overwrite").option("mergeSchema", "true").save(...)


In [None]:
# Read Bronze transactions
bronze_transactions = spark.read.format("delta").load(f"{paths['bronze']}/transactions")

print("Bronze Transactions Schema:")
bronze_transactions.printSchema()

print("\nSample Records:")
bronze_transactions.show(5)
