In [0]:
# FRED Pipeline Orchestrator with Frequency Logic
# Notebook: 04_fred_orchestrator

from datetime import datetime
import time
from pyspark.sql import Row

start_time = time.time()
today = datetime.today()
month = today.month
day = today.day
print(f"[START] Orchestration started at: {today.date()} UTC")

# Track which pipelines actually ran
pipelines_ran = []

# === Always Run Daily Pipeline ===
print("\n[RUNNING] 01_fred_daily_pipeline")
dbutils.notebook.run("01_fred_daily_pipeline", timeout_seconds=1800)
pipelines_ran.append("daily")

# === Run Monthly on 1st Day of Month ===
if day == 1:
    print("\n[RUNNING] 02_fred_monthly_pipeline")
    dbutils.notebook.run("02_fred_monthly_pipeline", timeout_seconds=1800)
    pipelines_ran.append("monthly")

# === Run Quarterly on 1st of Jan, Apr, Jul, Oct ===
if day == 1 and month in [1, 4, 7, 10]:
    print("\n[RUNNING] 03_fred_quarterly_pipeline")
    dbutils.notebook.run("03_fred_quarterly_pipeline", timeout_seconds=1800)
    pipelines_ran.append("quarterly")

# === Completion ===
end_time = time.time()
total_duration = int(end_time - start_time)
print(f"\n[COMPLETE] Pipelines finished in {total_duration} seconds at {datetime.utcnow()} UTC")

# === Log Metadata ===
from pyspark.sql.types import StructType, StructField, StringType, TimestampType, LongType

log_data = [Row(
    job_name="fred_etl_orchestrator",
    table_name="n/a",
    run_date=datetime.utcnow(),
    row_count=len(pipelines_ran),
    status="success",
    duration_sec=total_duration,
    frequency="daily"
)]

log_schema = StructType([
    StructField("job_name", StringType(), True),
    StructField("table_name", StringType(), True),
    StructField("run_date", TimestampType(), True),
    StructField("row_count", LongType(), True),
    StructField("status", StringType(), True),
    StructField("duration_sec", LongType(), True),
    StructField("frequency", StringType(), True)
])

log_df = spark.createDataFrame(log_data, schema=log_schema)

log_df.write \
    .format("delta") \
    .mode("append") \
    .partitionBy("frequency") \
    .save("/mnt/fred/logs/job_metadata") 

# === Register log table for Fabric ===
spark.sql("DROP TABLE IF EXISTS log_fred_orchestrator")
spark.sql("CREATE TABLE log_fred_orchestrator USING DELTA LOCATION '/mnt/fred/logs/job_metadata'")