In [0]:
from pyspark.sql.functions import input_file_name
from pyspark.sql import DataFrame
from pyspark.sql.functions import lit, current_timestamp
import re

# Folder where your raw files exist
landing_path = "/Volumes/leelastestdata/default/rawdata/landing/"

# Target UC catalog + schema
catalog = "leelastestdata"
schema = "default"

# Enable write optimizations (works on CE)
#spark.conf.set("spark.databricks.delta.optimizeWrite.enabled", "true")
#spark.conf.set("spark.databricks.delta.autoCompact.enabled", "true")

# Get list of all files in landing folder
files = dbutils.fs.ls(landing_path)

for f in files:
    file_path = f.path
    file_name = f.name
    
    # remove extension → table name
    table_name = re.sub(r"\.[^.]+$", "", file_name).lower()
    full_table_name = f"{catalog}.{schema}.{table_name}"

    print(f"\n=== Processing {file_name} → {full_table_name} ===")

    # Decide file format dynamically
    if file_name.lower().endswith(".csv"):
        df = spark.read.csv(file_path, header=True, inferSchema=True)

    elif file_name.lower().endswith(".json"):
        df = spark.read.json(file_path)

    elif file_name.lower().endswith(".parquet"):
        df = spark.read.parquet(file_path)

    else:
        print(f"Skipping unsupported file → {file_name}")
        continue

    # Add audit metadata
    df = df.withColumn("source_file", lit(file_path)) \
           .withColumn("load_timestamp", current_timestamp())

    # Write to Unity Catalog as Delta table
    df.write.format("delta") \
        .mode("append") \
        .option("mergeSchema", "true") \
        .saveAsTable(full_table_name)

    print(f"✔ Successfully loaded → {full_table_name}")
