In [0]:
dbutils.widgets.text("start_ts", "", "Manual Start(Backfill)")
dbutils.widgets.text("end_ts", "", "Manual End")

In [0]:
# MAGIC %load_ext autoreload
# MAGIC %autoreload 2

In [0]:
import sys
import os
import pyspark.sql.functions as f
from datetime import datetime
sys.path.append(os.path.abspath('../..'))

In [0]:
from transformations.product_transforms import transform_products, upsert_product
from transformations.transform_utils import normalize_raw_schema, get_watermark, update_watermark

In [0]:
start = dbutils.widgets.get("start_ts").strip()
end = dbutils.widgets.get("end_ts").strip()

In [0]:
raw_product_table_name = "raw_products"
enriched_product_table_name = "products_enriched"

In [0]:
CATALOG = "pei"

In [0]:
PRODUCT_TARGET_COLUMNS = ["product_id", "category", "sub_category", "product_name", "state","price_per_product", "file_path", "ingestion_timestamp", "processing_timestamp"]

In [0]:
try:
    is_backfill = False if start == "" else True

    last_ts = start if is_backfill else get_watermark(spark, raw_product_table_name)
    upper_bound = end if end != "" else datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    df_raw_products = (
        spark.read.table(f"{CATALOG}.bronze.{raw_product_table_name}")
        .filter(f.col("ingestion_timestamp") > last_ts)
        .filter(f.col("ingestion_timestamp") <= upper_bound)
    )

    if not df_raw_products.isEmpty(): 
        df_normalized = normalize_raw_schema(df_raw_products)
        
        df_enriched = transform_products(df_normalized)
        df_enforced = df_enriched.select(*PRODUCT_TARGET_COLUMNS)
        
        upsert_product(spark, 
                       df_enforced, 
                        f"{CATALOG}.silver.{enriched_product_table_name}"
                        )

        if not is_backfill:
            update_watermark(spark, raw_product_table_name, upper_bound)

        print(f"Finished processing Products up to {upper_bound}")
    else: 
        print("No new Product data to process.")
except Exception as e: 
    print(f"FAILED: Product Enrichment. Error: {str(e)}")