In [0]:
dbutils.widgets.text("start_ts", "", "Manual Start(Backfill)")
dbutils.widgets.text("end_ts", "", "Manual End")

In [0]:
# MAGIC %load_ext autoreload
# MAGIC %autoreload 2

In [0]:
import sys
import os
import pyspark.sql.functions as f
from datetime import datetime
sys.path.append(os.path.abspath('../..'))

In [0]:
from transformations.customer_transforms import transform_customers, upsert_customer
from transformations.transform_utils import normalize_raw_schema, get_watermark, update_watermark

In [0]:
start = dbutils.widgets.get("start_ts").strip()
end = dbutils.widgets.get("end_ts").strip()

In [0]:
CATALOG = "pei"

In [0]:
raw_customer_table_name = "raw_customers"
enriched_customer_table_name = "customers_enriched"

In [0]:
CUSTOMER_TARGET_COLUMNS = [
    "customer_id", "customer_name", "first_name", "last_name", "email", 
    "phone", "address", "segment", "country", "city", "state", 
    "postal_code", "region", "file_path", "ingestion_timestamp", "processing_timestamp"
]

In [0]:
try:
    is_backfill = False if start == "" else True

    last_ts = start if is_backfill else get_watermark(spark, raw_customer_table_name)
    upper_bound = end if end != "" else datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    df_raw_customers = (
        spark.read.table(f"{CATALOG}.bronze.{raw_customer_table_name}")
        .filter(f.col("ingestion_timestamp") > last_ts)
        .filter(f.col("ingestion_timestamp") <= upper_bound)
    )

    if not df_raw_customers.isEmpty(): 
        df_normalized = normalize_raw_schema(df_raw_customers)
        
        df_enriched = transform_customers(df_normalized)
        df_enforced = df_enriched.select(*CUSTOMER_TARGET_COLUMNS)

        upsert_customer(spark, 
                        df_enforced, 
                        f"{CATALOG}.silver.{enriched_customer_table_name}")

        if not is_backfill:
            update_watermark(spark, raw_customer_table_name, upper_bound)

        print(f"Finished processing Customers up to {upper_bound}")
    else: 
        print("No new Customer data to process.")
except Exception as e: 
    print(f"FAILED: Customer Enrichment. Error: {str(e)}")