In [1]:
import pandas as pd
import altair as alt

def plot_yearly_passenger_trends(start_year):
    all_data = []

    # Load each year's file
    for year in range(start_year, 2026):
        try:
            df = pd.read_csv(f"clean_data/flights_{year}_clean.csv")

            # Aggregate to monthly totals
            monthly = df.groupby('MONTH')['PASSENGERS'].sum().reset_index()
            monthly['YEAR'] = year
            monthly['date'] = pd.to_datetime(
                monthly['YEAR'].astype(str) + "-" + monthly['MONTH'].astype(str) + "-01"
            )

            all_data.append(monthly)
        except FileNotFoundError:
            print(f"Missing file for {year} â€” skipping.")
            continue

    # Combine all years
    if not all_data:
        raise ValueError("No data loaded.")
    data = pd.concat(all_data, ignore_index=True)

    # --------------------------
    # Crisis + recovery intervals
    # --------------------------
    highlight_periods = pd.DataFrame({
        'start': [
            '2001-01-01',  # 9/11
            '2008-01-01',  # financial crisis
            '2019-10-01'   # covid
        ],
        'end': [
            '2002-01-31',
            '2009-12-31',
            '2021-01-01'
        ],
        'event': [
            '9/11 Aftermath',
            '2008 Financial Crisis',
            'COVID-19 Impact'
        ]
    })

    highlight_periods['start'] = pd.to_datetime(highlight_periods['start'])
    highlight_periods['end']   = pd.to_datetime(highlight_periods['end'])

    # Shaded rectangles
    rects = alt.Chart(highlight_periods).mark_rect(
        opacity=0.25,
        color='red'
    ).encode(
        x='start:T',
        x2='end:T'
    )

    # Passenger line
    line = alt.Chart(data).mark_line(point=True).encode(
        x=alt.X('date:T', title='Date'),
        y=alt.Y('PASSENGERS:Q', title='Total Monthly Passengers'),
        color=alt.value('#1f77b4'),
        tooltip=[
            alt.Tooltip('date:T', title='Month'),
            alt.Tooltip('PASSENGERS:Q', title='Passengers', format=','),
        ]
    )

    # Event labels at the top of shaded regions
    labels = alt.Chart(highlight_periods).mark_text(
        align='left',
        baseline='bottom',
        dx=3,
        dy=3,
        fontSize=12,
        fontWeight='bold'
    ).encode(
        x='start:T',
        y=alt.value(10),
        text='event'
    )

    chart = (rects + line + labels).properties(
        width=900,
        height=400,
        title="Monthly Passenger Trends Over Time"
    )

    return chart



In [2]:
plot_yearly_passenger_trends(1999)

In [24]:
import pandas as pd
import altair as alt
import glob
import os

alt.data_transformers.disable_max_rows()

# ----------------------------
# LOAD ALL CLEAN CSV FILES
# ----------------------------
files = sorted(glob.glob("clean_data/flights_*_clean.csv"))

df_list = []
for f in files:
    filename = os.path.basename(f)
    year = int(filename.split("_")[1])
    
    temp = pd.read_csv(f)
    temp["YEAR"] = year
    df_list.append(temp)

df = pd.concat(df_list, ignore_index=True)

# ----------------------------
# AGGREGATE BY YEAR Ã— SEASON
# ----------------------------
grouped = (
    df.groupby(["YEAR", "SEASON"])
      .agg({
          "PASSENGERS": "sum",
          "SEATS": "sum",
          "DEPARTURES_PERFORMED": "sum",
          "capacity_percent": "mean"
      })
      .reset_index()
)

season_order = ["Winter", "Spring", "Summer", "Fall"]

# ----------------------------
# BRUSH SELECTION
# ----------------------------
brush = alt.selection_interval(encodings=["x", "y"])

# ----------------------------
# SCATTER PLOT (LEFT)
# ----------------------------
scatter = (
    alt.Chart(grouped)
    .mark_circle(opacity=0.75)
    .encode(
        x=alt.X("SEATS:Q", title="Total Seasonal Seats"),
        y=alt.Y("PASSENGERS:Q", title="Total Seasonal Passengers"),
        color=alt.Color(
            "capacity_percent:Q",
            title="Load Factor",
            scale=alt.Scale(scheme="viridis")
        ),
        size=alt.Size(
            "DEPARTURES_PERFORMED:Q",
            title="Departures Performed",
            scale=alt.Scale(range=[40, 900]),
            legend=alt.Legend(orient='left')   # ðŸ‘ˆ THIS MOVES IT LEFT
        ),
        shape=alt.Shape("SEASON:N", sort=season_order),
        tooltip=[
            "YEAR:O",
            "SEASON:N",
            alt.Tooltip("capacity_percent:Q", title="Load Factor", format=".2f"),
            "PASSENGERS:Q",
            "SEATS:Q",
            "DEPARTURES_PERFORMED:Q"
        ]
    )
    .add_params(brush)
    .properties(
        width=600,
        height=450,
        title="Seasonal Scatter â€” Select a Region"
    )
)

# ----------------------------
# HISTOGRAM (RIGHT) â€” With Legend + Tooltip
# ----------------------------
hist = (
    alt.Chart(grouped)
    .mark_bar(opacity=0.85)
    .encode(
        x=alt.X(
            "capacity_percent:Q",
            bin=alt.Bin(maxbins=20),
            title="Load Factor (capacity_percent)"
        ),
        y=alt.Y("count()", title="Number of Seasonal Points"),
        color=alt.Color(
            "SEASON:N",
            sort=season_order,
            title="Season",
            scale=alt.Scale(scheme="tableau10")
        ),
        tooltip=[
            alt.Tooltip("SEASON:N", title="Season"),
            alt.Tooltip("count()", title="Count of Points"),
            alt.Tooltip("capacity_percent:Q", title="Load Factor (Range)", format=".2f")
        ]
    )
    .transform_filter(brush)
    .properties(
        width=300,
        height=450,
        title="Load Factor Distribution â€” Selected Region"
    )
)

# ----------------------------
# FINAL LINKED VIEW
# ----------------------------
final_chart = scatter | hist

final_chart
