In [1]:
from datetime import date
from dateutil.relativedelta import relativedelta

In [3]:
# CONFIG
START_DATE = date(1987, 10, 1)
END_DATE   = date(2008, 5, 1)
OUTPUT_SQL_FILE = "../sql/staging/02_build_daily_fact.sql"

# SQL TEMPLATE
SQL_TEMPLATE = """
-- ============================
-- {year}-{month:02d}
-- ============================

DELETE FROM flights_fact_daily
WHERE flight_date >= '{start_date}'
  AND flight_date <  '{end_date}';

INSERT INTO flights_fact_daily
SELECT
    sf.flight_date,
    a.airline_id,
    ao.airport_id,
    ad.airport_id,

    COUNT(*) AS total_flights,
    SUM(sf.cancelled) AS cancelled_flights,
    SUM(sf.diverted) AS diverted_flights,
    AVG(sf.dep_delay) AS avg_dep_delay,
    AVG(sf.arr_delay) AS avg_arr_delay,
    SUM(sf.distance) AS total_distance

FROM staging_flights sf
JOIN airlines_dim a ON sf.airline_code = a.airline_code
JOIN airports_dim ao ON sf.origin = ao.airport_code
JOIN airports_dim ad ON sf.dest = ad.airport_code
WHERE sf.flight_date >= '{start_date}'
  AND sf.flight_date <  '{end_date}'
GROUP BY
    sf.flight_date,
    a.airline_id,
    ao.airport_id,
    ad.airport_id;

COMMIT;
"""

# GENERATE SQL FILE
current = START_DATE

with open(OUTPUT_SQL_FILE, "w") as f:
    f.write("USE airline_analytics;\n")
    f.write("SET autocommit = 0;\n\n")

    while current < END_DATE:
        next_month = current + relativedelta(months=1)

        f.write(
            SQL_TEMPLATE.format(
                year=current.year,
                month=current.month,
                start_date=current.isoformat(),
                end_date=next_month.isoformat()
            )
        )

        current = next_month

print(f"SQL file generated successfully: {OUTPUT_SQL_FILE}")

SQL file generated successfully: ../sql/staging/02_build_daily_fact.sql
