In [0]:
%run ../.././start_up 

In [0]:
logger = create_logger(notebook_name="landing_to_bronze", log_level="DEBUG")
logger.info("🚀 Initializing landing_to_bronze notebook")

# Extract frequently used config values into variables
catalog = pipeline_config["catalog"]
bronze_schema = pipeline_config["schemas"]["bronze"]
bronze_path = pipeline_config["paths"]["bronze_path"]
bronze_volume_path = pipeline_config["paths"]["bronze_volume_path"]
silver_schema = pipeline_config["schemas"]["silver"]
silver_path = pipeline_config["paths"]["silver_path"]
landing_schema= pipeline_config["schemas"]["landing"]
landing_path = pipeline_config["paths"]["landing_path"]
logs_schema = pipeline_config["schemas"]["logs"]
table_name = "patients"
logger.info("Extracted frequently used config values into variables")

In [0]:
# patients_to_silver.py
from pyspark.sql.functions import col, current_timestamp
from datetime import datetime

# Create schema if needed
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog}.{silver_schema}")

# Table-specific config
input_path = f"{bronze_volume_path}/{table_name}"
output_table = f"{catalog}.{silver_schema}.{table_name}"

# Read from bronze
logger.info(f"Reading: {input_path}")
df = spark.read.format("csv").option("header", "true").load(input_path)

# Rename columns
for old_col, new_col in column_mappings[table_name].items():
    if old_col in df.columns:
        df = df.withColumnRenamed(old_col, new_col)

# Add ingestion_date if missing
if "ingestion_date" not in df.columns:
    df = df.withColumn("ingestion_date", current_timestamp())

# Enforce data types
for col_name, dtype in table_config[table_name]["columns"].items():
    if col_name in df.columns:
        df = df.withColumn(col_name, col(col_name).cast(dtype))

# Write to silver
df.write.format("delta").mode("overwrite").saveAsTable(output_table)
print(f"✅ Silver table created: {output_table}")
