# Daily Load Orchestration Pipeline

This notebook orchestrates the full medallion architecture pipeline:
1. **Discovers** date folders in the landing zone dynamically
2. **Runs** Bronze ‚Üí Silver ‚Üí Gold notebooks for each date
3. **Validates** data integrity between layers

In [None]:
from src.formula1.formula1_constants import *
from src.formula1.formula1_utils import (
    get_date_folders,
    run_notebook_safely,
    validate_layer_transition
)

## Configuration

In [None]:
# Notebook paths organized by layer
# Using numbered prefixes for execution order
BRONZE_NOTEBOOKS = [
    "01_bronze/1.ingest_circuits_file",
    "01_bronze/2.ingest_races_file",
    "01_bronze/3.ingest_constructors_file",
    "01_bronze/4.ingest_drivers_file",
    "01_bronze/5.ingest_results_file",
    "01_bronze/6.ingest_pit_stops_file",
    "01_bronze/7.ingest_lap_times_file",
    "01_bronze/8.ingest_qualifying_file",
]

SILVER_NOTEBOOKS = [
    "02_silver/1.process_circuits",
    "02_silver/2.process_races",
    "02_silver/3.process_constructors",
    "02_silver/4.process_drivers",
    "02_silver/5.process_results",
    "02_silver/6.process_pit_stops",
    "02_silver/7.process_lap_times",
    "02_silver/8.process_qualifying",
]

GOLD_NOTEBOOKS = [
    "03_gold/1.race_results",
    "03_gold/2.driver_standings",
    "03_gold/3.constructor_standings",
]

# Tables for validation
VALIDATION_PAIRS = [
    ("f1_bronze.circuits", "f1_silver.circuits"),
    ("f1_bronze.races", "f1_silver.races"),
    ("f1_bronze.constructors", "f1_silver.constructors"),
    ("f1_bronze.drivers", "f1_silver.drivers"),
]

## Step 1: Discover Date Folders

In [None]:
# Dynamically discover date folders in landing zone
date_folders = get_date_folders(dbutils, landing_folder_path)
print(f"Found {len(date_folders)} date folders to process: {date_folders}")

## Step 2: Run Pipeline for Each Date

In [None]:
# Track execution results
execution_log = []

for file_date in date_folders:
    print(f"\n{'='*50}")
    print(f"Processing date: {file_date}")
    print(f"{'='*50}")
    
    date_results = {"date": file_date, "bronze": [], "silver": [], "gold": []}
    
    # --- Bronze Layer Ingestion ---
    print("\nü•â Running Bronze Layer...")
    for notebook in BRONZE_NOTEBOOKS:
        success, result = run_notebook_safely(
            dbutils, 
            notebook, 
            {"p_file_date": file_date, "p_data_source": "ergast"}
        )
        date_results["bronze"].append({"notebook": notebook, "success": success})
        if not success:
            print(f"‚ö†Ô∏è Bronze layer failed at {notebook}, skipping remaining...")
            break
    
    # --- Silver Layer Transformation ---
    print("\nü•à Running Silver Layer...")
    for notebook in SILVER_NOTEBOOKS:
        success, result = run_notebook_safely(
            dbutils, 
            notebook, 
            {"p_file_date": file_date}
        )
        date_results["silver"].append({"notebook": notebook, "success": success})
        if not success:
            print(f"‚ö†Ô∏è Silver layer failed at {notebook}, skipping remaining...")
            break
    
    # --- Gold Layer Presentation ---
    print("\nü•á Running Gold Layer...")
    for notebook in GOLD_NOTEBOOKS:
        success, result = run_notebook_safely(
            dbutils, 
            notebook, 
            {"p_file_date": file_date}
        )
        date_results["gold"].append({"notebook": notebook, "success": success})
        if not success:
            print(f"‚ö†Ô∏è Gold layer failed at {notebook}, skipping remaining...")
            break
    
    execution_log.append(date_results)
    print(f"\n‚úÖ Finished processing date: {file_date}")

## Step 3: Validate Layer Transitions

In [None]:
print("\nüìä Validating Bronze ‚Üí Silver transitions...")
validation_results = []

for source, target in VALIDATION_PAIRS:
    result = validate_layer_transition(spark, source, target)
    validation_results.append(result)

## Execution Summary

In [None]:
# Summary
total_dates = len(date_folders)
successful_dates = sum(
    1 for log in execution_log 
    if all(nb["success"] for nb in log["bronze"] + log["silver"] + log["gold"])
)

print(f"\n{'='*50}")
print(f"üìà PIPELINE EXECUTION SUMMARY")
print(f"{'='*50}")
print(f"Total dates processed: {total_dates}")
print(f"Successful: {successful_dates}")
print(f"Failed: {total_dates - successful_dates}")
print(f"\n‚úÖ All dates processed.")