# Difference-in-Differences (DiD) + Fare Elasticity Analysis
This notebook runs two panel regressions:
- One basic DiD model on log passengers
- One model including log fares for fare elasticity control
Uses `PanelOLS` with fixed effects (route + time).

## DiD Model - ULCC Entry Effect Only

In [2]:
import pandas as pd
import numpy as np
from linearmodels.panel import PanelOLS

# Load your cleaned and merged data
df = pd.read_csv("../data/processed/bigquery_output/did_table.csv")

# Create interaction term for DiD
df["DID"] = df["ULCC_TREATED"] * df["POST_ENTRY"]
df["log_passengers"] = np.log(df["DB1B_PASSENGERS"] + 1)

# Convert YEAR + QUARTER into a datetime for time index
df["time_index"] = pd.to_datetime(
    df["YEAR"].astype(str) + "-" + (df["QUARTER"] * 3).astype(str) + "-01"
)

# Set panel index: (entity = ROUTE_KEY, time = time_index)
df = df.set_index(["ROUTE_KEY", "time_index"])

# Run DiD PanelOLS
model = PanelOLS.from_formula(
    formula="log_passengers ~ DID + EntityEffects + TimeEffects",
    data=df,
    drop_absorbed=True,
    check_rank=False,
).fit(cov_type="robust")

# Show results
print(model.summary)

# Check if DID survived fixed effects
if "DID" in model.params:
    print(f"\n✅ DiD Estimate (Treatment x Post): {model.params['DID']:.4f}")
else:
    print(
        "\n⚠️ DiD term was absorbed by fixed effects. Consider simplifying or re-specifying the model."
    )

                          PanelOLS Estimation Summary                           
Dep. Variable:         log_passengers   R-squared:                        0.0021
Estimator:                   PanelOLS   R-squared (Between):              0.0064
No. Observations:              684124   R-squared (Within):               0.0025
Date:                Fri, Jun 27 2025   R-squared (Overall):              0.0090
Time:                        16:56:03   Log-likelihood                 -3.53e+05
Cov. Estimator:                Robust                                           
                                        F-statistic:                      1412.4
Entities:                       18408   P-value                           0.0000
Avg Obs:                       37.164   Distribution:                F(1,665656)
Min Obs:                       1.0000                                           
Max Obs:                       1402.0   F-statistic (robust):             1505.5
                            

## DiD + Fare Elasticity (Controlled)

In [3]:
# 📥 1. Load Data Again (for clean state)
df = pd.read_csv("../data/processed/bigquery_output/did_table.csv")

# 🧼 2. Basic Cleaning + Derived Columns
df["DID"] = df["ULCC_TREATED"] * df["POST_ENTRY"]
df["log_passengers"] = np.log(df["DB1B_PASSENGERS"] + 1)
df["log_fare"] = np.log(df["MARKET_FARE"] + 1)

# ⏱️ 3. Create time index (date-like for PanelOLS)
df["time_index"] = pd.to_datetime(
    df["YEAR"].astype(str) + "-" + (df["QUARTER"] * 3).astype(str) + "-01"
)

# 🧱 4. Set panel index (Entity = ROUTE_KEY, Time = time_index)
df = df.set_index(["ROUTE_KEY", "time_index"])

# ✅ 5. Run PanelOLS with Route & Time Fixed Effects
model = PanelOLS.from_formula(
    formula="log_passengers ~ log_fare + DID + EntityEffects + TimeEffects",
    data=df,
    drop_absorbed=True,
    check_rank=False,
).fit(cov_type="robust")

# 📊 6. Output Results
print("📊 Fare Elasticity with DiD (PanelOLS)")
print(model.summary)

# 📈 7. Interpret Coefficients
if "log_fare" in model.params:
    elasticity = model.params["log_fare"]
    print(
        f"\n✅ Fare Elasticity: {elasticity:.4f} → 1% fare drop leads to ~{abs(elasticity):.2f}% change in demand"
    )
else:
    print("\n⚠️ Fare variable was absorbed or missing.")

if "DID" in model.params:
    did_effect = model.params["DID"]
    percent_increase = (np.exp(did_effect) - 1) * 100
    print(
        f"✅ DiD Effect: {did_effect:.4f} → ~{percent_increase:.2f}% demand change after ULCC entry"
    )
else:
    print("\n⚠️ DID variable was absorbed or missing.")

📊 Fare Elasticity with DiD (PanelOLS)
                          PanelOLS Estimation Summary                           
Dep. Variable:         log_passengers   R-squared:                        0.0074
Estimator:                   PanelOLS   R-squared (Between):             -0.2933
No. Observations:              684124   R-squared (Within):              -0.0130
Date:                Fri, Jun 27 2025   R-squared (Overall):             -0.1719
Time:                        16:56:45   Log-likelihood                -3.512e+05
Cov. Estimator:                Robust                                           
                                        F-statistic:                      2478.1
Entities:                       18408   P-value                           0.0000
Avg Obs:                       37.164   Distribution:                F(2,665655)
Min Obs:                       1.0000                                           
Max Obs:                       1402.0   F-statistic (robust):          