# Data Collection


In [13]:
# Mount Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
# Install Kaggle CLI
!pip install -q kaggle

In [28]:
# Create folder & download the dataset
import os

dataset_dir = '/content/drive/MyDrive/2025F/Cathay Hackathon'
os.makedirs(dataset_dir, exist_ok=True)

csv_path = os.path.join(dataset_dir, 'flights.csv')

if not os.path.exists(csv_path):
    print("Downloading flights.csv from Kaggle (≈ 570 MB)…")
    !kaggle datasets download -d usdot/flight-delays -f flights.csv -p {dataset_dir} --unzip
    print("Download finished!")
else:
    print(f"{csv_path} already exists – skipping download.")

/content/drive/MyDrive/2025F/Cathay Hackathon/flights.csv already exists – skipping download.


# Download Data and Display

In [46]:
# Without Drive mounting
print("Downloading flights.csv from Kaggle (≈ 570 MB)…")
!kaggle datasets download -d usdot/flight-delays -f flights.csv -p {dataset_dir} --unzip
print("Download finished!")

Downloading flights.csv from Kaggle (≈ 570 MB)…
Traceback (most recent call last):
  File "/usr/local/bin/kaggle", line 10, in <module>
    sys.exit(main())
             ^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/kaggle/cli.py", line 68, in main
    out = args.func(**command_args)
          ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/kaggle/api/kaggle_api_extended.py", line 1741, in dataset_download_cli
    with self.build_kaggle_client() as kaggle:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/kaggle/api/kaggle_api_extended.py", line 688, in build_kaggle_client
    username=self.config_values['username'],
             ~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^
KeyError: 'username'
Download finished!


In [47]:
# File verification

import pandas as pd
df = pd.read_csv(csv_path, nrows=5)
print("First 5 rows:")
display(df.head())
print(f"Total rows: {len(pd.read_csv(csv_path)):,}")

First 5 rows:


Unnamed: 0,YEAR,MONTH,DAY,DAY_OF_WEEK,AIRLINE,FLIGHT_NUMBER,TAIL_NUMBER,ORIGIN_AIRPORT,DESTINATION_AIRPORT,SCHEDULED_DEPARTURE,...,ARRIVAL_TIME,ARRIVAL_DELAY,DIVERTED,CANCELLED,CANCELLATION_REASON,AIR_SYSTEM_DELAY,SECURITY_DELAY,AIRLINE_DELAY,LATE_AIRCRAFT_DELAY,WEATHER_DELAY
0,2015,1,1,4,AS,98,N407AS,ANC,SEA,5,...,408,-22,0,0,,,,,,
1,2015,1,1,4,AA,2336,N3KUAA,LAX,PBI,10,...,741,-9,0,0,,,,,,
2,2015,1,1,4,US,840,N171US,SFO,CLT,20,...,811,5,0,0,,,,,,
3,2015,1,1,4,AA,258,N3HYAA,LAX,MIA,20,...,756,-9,0,0,,,,,,
4,2015,1,1,4,AS,135,N527AS,SEA,ANC,25,...,259,-21,0,0,,,,,,


  print(f"Total rows: {len(pd.read_csv(csv_path)):,}")


Total rows: 5,819,079


# Data Preprocessing

In [48]:
# Data Preprocessing
print("Loading CSV...")
df = pd.read_csv(csv_path, nrows=50000, low_memory=False)  # Remove nrows for full data

# KEEP ONLY NON-CANCELLED/NON-DIVERTED
df = df[(df['CANCELLED'] == 0) & (df['DIVERTED'] == 0)].copy()

# DROP EXACT DUPLICATE FLIGHT RECORDS
duplicate_cols = ['YEAR', 'MONTH', 'DAY', 'AIRLINE', 'FLIGHT_NUMBER',
                  'ORIGIN_AIRPORT', 'DESTINATION_AIRPORT', 'SCHEDULED_DEPARTURE']
print(f"Before dedupe: {len(df)} rows")
df = df.drop_duplicates(subset=duplicate_cols)
print(f"After dedupe:  {len(df)} rows")

Loading CSV...
Before dedupe: 48753 rows
After dedupe:  48753 rows


# Build and Train GNN model




In [67]:
import networkx as nx
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import random
from sklearn.preprocessing import StandardScaler
from collections import deque
# --------------------------------------------------------------
# 3. BUILD GRAPH & NODE FEATURES
# --------------------------------------------------------------
airports = list(set(df['ORIGIN_AIRPORT']) | set(df['DESTINATION_AIRPORT']))
airport_to_idx = {ap: i for i, ap in enumerate(airports)}
# Node features
avg_dep = df.groupby('ORIGIN_AIRPORT')['DEPARTURE_DELAY'].mean().reindex(airports).fillna(0)
avg_arr = df.groupby('DESTINATION_AIRPORT')['ARRIVAL_DELAY'].mean().reindex(airports).fillna(0)
avg_wdelay = df.groupby('ORIGIN_AIRPORT')['WEATHER_DELAY'].mean().reindex(airports).fillna(0)
out_cnt = df['ORIGIN_AIRPORT'].value_counts().reindex(airports).fillna(0)
scaler = StandardScaler()
node_features = scaler.fit_transform(np.column_stack([avg_dep, avg_arr, avg_wdelay, out_cnt])).astype(np.float32)
G = nx.MultiDiGraph()
for ap, feats in zip(airports, node_features):
    G.add_node(ap, features=feats)
def time_to_minutes(t):
    if pd.isna(t): return 0
    s = f"{int(t):04d}"
    return int(s[:2])*60 + int(s[2:])
def str_to_minutes(s):
    h, m = map(int, s.split(':'))
    return h*60 + m
def minutes_to_str(m):
    m = m % 1440
    return f"{m//60:02d}:{m%60:02d}"
# Add all flights as edges
for idx, row in df.iterrows():
    dep = time_to_minutes(row['SCHEDULED_DEPARTURE'])
    arr = time_to_minutes(row['SCHEDULED_ARRIVAL'])
    risk = row['WEATHER_DELAY'] if pd.notna(row['WEATHER_DELAY']) else 0
    # When adding edges:
    G.add_edge(
        row['ORIGIN_AIRPORT'], row['DESTINATION_AIRPORT'],
        key=idx,
        dep=dep, arr=arr,
        flight_num=row['FLIGHT_NUMBER'],
        airline=row['AIRLINE'],
        weather_risk=risk,
        dest=row['DESTINATION_AIRPORT'] # <--- ADD THIS
    )
adj = torch.tensor(nx.to_numpy_array(G, nodelist=airports), dtype=torch.float32)
features = torch.tensor(node_features, dtype=torch.float32)
# --------------------------------------------------------------
# 4. GNN MODEL
# --------------------------------------------------------------
class FlightGNN(nn.Module):
    def __init__(self, in_dim=4, hidden=64, out_dim=32):
        super().__init__()
        self.fc1 = nn.Linear(in_dim, hidden)
        self.fc2 = nn.Linear(hidden, out_dim)
        self.norm = nn.LayerNorm(hidden)
    def forward(self, x, adj):
        x = torch.mm(adj, x)
        x = F.relu(self.norm(self.fc1(x)))
        x = torch.mm(adj, x)
        x = self.fc2(x)
        return x
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = FlightGNN().to(device)
adj, features = adj.to(device), features.to(device)
# --------------------------------------------------------------
# 5. TRAIN ON REAL FLIGHT CHAINS
# --------------------------------------------------------------
def generate_sessions(n=3000, max_len=3):
    sessions = []
    for _ in range(n):
        row = df.sample(1).iloc[0]
        path = [row['ORIGIN_AIRPORT'], row['DESTINATION_AIRPORT']]
        cur = path[-1]
        arr = time_to_minutes(row['SCHEDULED_ARRIVAL'])
        for _ in range(random.randint(0, max_len-1)):
            cand = df[(df['ORIGIN_AIRPORT']==cur) &
                      (df['SCHEDULED_DEPARTURE'].apply(time_to_minutes) >= arr + 30)]
            if cand.empty: break
            nxt = cand.sample(1).iloc[0]
            path.append(nxt['DESTINATION_AIRPORT'])
            arr = time_to_minutes(nxt['SCHEDULED_ARRIVAL'])
        if len(path) >= 2:
            sessions.append(path)
    return sessions
sessions = generate_sessions()
train_s, _ = train_test_split(sessions, test_size=0.2)
optimizer = torch.optim.Adam(model.parameters(), lr=3e-3)
criterion = nn.CrossEntropyLoss()
def train_epoch():
    model.train()
    optimizer.zero_grad()
    embeds = model(features, adj)
    preds, targets = [], []
    for sess in train_s:
        if len(sess) < 2: continue
        for i in range(len(sess)-1):
            u = airport_to_idx[sess[i]]
            v = airport_to_idx[sess[i+1]]
            preds.append(embeds[u])
            targets.append(v)
    if not preds: return 0
    preds = torch.stack(preds)
    logits = preds @ embeds.t()
    loss = criterion(logits, torch.tensor(targets, device=device))
    loss.backward()
    optimizer.step()
    return loss.item() / len(preds)
print("Training GNN...")
for epoch in range(1, 81):
    loss = train_epoch()
    print(f"Epoch {epoch:2d} | Loss: {loss:.4f}")

Training GNN...
Epoch  1 | Loss: 1668.4201
Epoch  2 | Loss: 1047.1031
Epoch  3 | Loss: 627.5591
Epoch  4 | Loss: 370.2874
Epoch  5 | Loss: 227.4159
Epoch  6 | Loss: 155.5084
Epoch  7 | Loss: 124.0795
Epoch  8 | Loss: 115.4455
Epoch  9 | Loss: 118.8828
Epoch 10 | Loss: 126.3345
Epoch 11 | Loss: 132.3710
Epoch 12 | Loss: 134.8128
Epoch 13 | Loss: 134.0241
Epoch 14 | Loss: 130.8494
Epoch 15 | Loss: 125.2535
Epoch 16 | Loss: 116.7739
Epoch 17 | Loss: 105.5222
Epoch 18 | Loss: 92.3818
Epoch 19 | Loss: 78.4092
Epoch 20 | Loss: 64.3529
Epoch 21 | Loss: 50.8595
Epoch 22 | Loss: 38.8282
Epoch 23 | Loss: 29.2867
Epoch 24 | Loss: 22.8593
Epoch 25 | Loss: 19.4519
Epoch 26 | Loss: 18.3608
Epoch 27 | Loss: 18.6756
Epoch 28 | Loss: 19.5737
Epoch 29 | Loss: 20.4148
Epoch 30 | Loss: 20.7211
Epoch 31 | Loss: 20.2319
Epoch 32 | Loss: 18.9713
Epoch 33 | Loss: 17.1988
Epoch 34 | Loss: 15.2517
Epoch 35 | Loss: 13.4008
Epoch 36 | Loss: 11.7828
Epoch 37 | Loss: 10.4870
Epoch 38 | Loss: 9.4803
Epoch 39 | Loss:

# Testing

In [82]:
def recommend_two_hop(start, end, orig_dep, orig_arr, now_min, top_k=10):
    model.eval()
    with torch.no_grad():
        embeds = model(features, adj)

    MIN_BUFFER_MINUTES = 60
    earliest_dep = now_min + MIN_BUFFER_MINUTES

    s_idx = airport_to_idx[start]
    e_idx = airport_to_idx[end]

    candidates = []
    seen_routes = set()          # keep the original duplicate-filter

    # -----------------------------------------------------------------
    # 1. Build a *per-airport* dict:  airport → list of candidate dicts
    # -----------------------------------------------------------------
    per_airport = {}             # NEW

    for c in airports:
        if c in {start, end}:
            continue

        out_edges = [(k, d) for u, v, k, d in G.out_edges(start, keys=True, data=True) if v == c]
        in_edges  = [(k, d) for u, v, k, d in G.out_edges(c,    keys=True, data=True) if v == end]

        for key1, e1 in out_edges:
            dep = e1.get('dep', -1)
            if dep < earliest_dep:                     # buffer after “now”
                continue
            if e1.get('weather_risk', 1) > 0:
                continue

            arr1 = e1['arr'] + (1440 if e1['arr'] < e1['dep'] else 0)

            for key2, e2 in in_edges:
                if e2.get('weather_risk', 1) > 0:
                    continue
                if e2.get('dep', -1) < arr1 + 30:
                    continue

                arr2 = e2['arr'] + (1440 if e2['arr'] < e2['dep'] else 0)
                if (arr2 // 1440) != (orig_arr // 1440):
                    continue

                f1_id = (e1['airline'], e1['flight_num'], e1['dep'], e1['arr'])
                f2_id = (e2['airline'], e2['flight_num'], e2['dep'], e2['arr'])
                route_key = (f1_id, f2_id)
                if route_key in seen_routes:
                    continue
                seen_routes.add(route_key)

                # ---- build the flight dicts exactly as you did before ----
                flight1 = {
                    'airline': e1['airline'],
                    'number' : e1['flight_num'],
                    'dep_ap' : start,
                    'arr_ap' : c,
                    'dep_time': minutes_to_str(e1['dep']),
                    'arr_time': minutes_to_str(e1['arr']),
                }
                flight2 = {
                    'airline': e2['airline'],
                    'number' : e2['flight_num'],
                    'dep_ap' : c,
                    'arr_ap' : end,
                    'dep_time': minutes_to_str(e2['dep']),
                    'arr_time': minutes_to_str(e2['arr']),
                }

                cand = {
                    'path'          : [start, c, end],
                    'flights'       : [flight1, flight2],
                    'total_arr'     : arr2,
                    'layover_min'   : e2['dep'] - arr1,
                    'time_to_dep'   : dep - now_min,
                    'dep_diff_to_orig': dep - orig_dep,
                }

                # ---- store per airport ---------------------------------
                per_airport.setdefault(c, []).append(cand)

    # -----------------------------------------------------------------
    # 2. From each airport pick the *earliest-departure* candidate
    # -----------------------------------------------------------------
    best_per_airport = []
    for c, lst in per_airport.items():
        best = min(lst, key=lambda x: x['time_to_dep'])   # earliest dep
        best_per_airport.append(best)

    if not best_per_airport:
        return []

    # -----------------------------------------------------------------
    # 3. Sort by “time to departure” and keep at most top_k
    # -----------------------------------------------------------------
    top = sorted(best_per_airport,
             key=lambda x: x['total_arr'])[:top_k]
    return top

# --------------------------------------------------------------
# 6. USER INPUT – FIND ORIGINAL FLIGHT & PRINT RESULTS
# --------------------------------------------------------------
print("\n" + "="*70)
print("FLIGHT REBOOKING: 2-HOP ALTERNATIVES (SAME ARRIVAL DAY)")
print("="*70)
# ---- example (replace with real input) -------------------------
airline = 'AA'
flight_num = 123
year = 2015
month = 1
day = 1
now_str = '08:00'  # Replace with the actual current time in HH:MM format
now_min = str_to_minutes(now_str)
mask = (
    (df['AIRLINE'] == airline) &
    (df['FLIGHT_NUMBER'] == flight_num) &
    (df['YEAR'] == year) &
    (df['MONTH'] == month) &
    (df['DAY'] == day)
)
if mask.sum() == 0:
    print("Flight not found in the dataset.")
else:
    orig = df.loc[mask].iloc[0]
    A, B = orig['ORIGIN_AIRPORT'], orig['DESTINATION_AIRPORT']
    orig_dep = time_to_minutes(orig['SCHEDULED_DEPARTURE'])
    orig_arr = time_to_minutes(orig['SCHEDULED_ARRIVAL'])
    if orig_arr < orig_dep: orig_arr += 1440
    orig_time_to_now = orig_dep - now_min
    print(f"\nCurrent time: {now_str}")
    print(f"Time to original departure: {orig_time_to_now} minutes")
    print(f"\nOriginal: {airline}{flight_num} | {A} → {B}")
    print(f" Dep: {minutes_to_str(orig_dep)} | Arr: {minutes_to_str(orig_arr)}")
    alts = recommend_two_hop(A, B, orig_dep, orig_arr, now_min, top_k=10)
    if not alts:
        print("\nNo 2-hop alternatives found.")
    else:
        print(f"\nTop {len(alts)} Unique 2-Hop Routes (same arrival day, dep <= original, dep >= now, sorted by shortest time to dep):\n")
        for i, r in enumerate(alts, 1):
            f1, f2 = r['flights']
            print(f" {i}. {A} → {f1['arr_ap']} → {B}")
            print(f" Time to this alternative's departure: {r['time_to_dep']} minutes")
            print(f" 1 {f1['airline']}{f1['number']}")
            print(f" {f1['dep_ap']} {f1['dep_time']} → {f1['arr_ap']} {f1['arr_time']}")
            print(f" Layover: {r['layover_min']} min at {f1['arr_ap']}")
            print(f" 2 {f2['airline']}{f2['number']}")
            print(f" {f2['dep_ap']} {f2['dep_time']} → {f2['arr_ap']} {f2['arr_time']}")
            print(f" Arrives: {minutes_to_str(r['total_arr'] % 1440)} (same day)")
            print(" " + "─"*60)
        if alts:
            shortest_time = alts[0]['time_to_dep']
            print(f"\nShortest time to an alternative flight: {shortest_time} minutes")


FLIGHT REBOOKING: 2-HOP ALTERNATIVES (SAME ARRIVAL DAY)

Current time: 08:00
Time to original departure: 175 minutes

Original: AA123 | DFW → HNL
 Dep: 10:55 | Arr: 15:25

Top 8 Unique 2-Hop Routes (same arrival day, dep <= original, dep >= now, sorted by shortest time to dep):

 1. DFW → SFO → HNL
 Time to this alternative's departure: 60 minutes
 1 AA2293
 DFW 09:00 → SFO 10:45
 Layover: 40 min at SFO
 2 UA300
 SFO 11:25 → HNL 14:49
 Arrives: 14:49 (same day)
 ────────────────────────────────────────────────────────────
 2. DFW → SAN → HNL
 Time to this alternative's departure: 60 minutes
 1 AA2337
 DFW 09:00 → SAN 10:10
 Layover: 55 min at SAN
 2 AS895
 SAN 11:05 → HNL 15:19
 Arrives: 15:19 (same day)
 ────────────────────────────────────────────────────────────
 3. DFW → DEN → HNL
 Time to this alternative's departure: 60 minutes
 1 AA1471
 DFW 09:00 → DEN 10:05
 Layover: 85 min at DEN
 2 UA328
 DEN 11:30 → HNL 15:50
 Arrives: 15:50 (same day)
 ────────────────────────────────────

# Visualisation

In [107]:
# --------------------------------------------------------------
# 7. FULL VISUALISATIONS – TIMELINE + RED LINE + ENTIRE FLIGHT NETWORK
# --------------------------------------------------------------
import plotly.graph_objects as go
import networkx as nx
import pandas as pd
import numpy as np

# === 1. GANTT TIMELINE (Original + 2-Hop Alternatives) ===
fig1 = go.Figure()
palette = ['#1f77b4','#ff7f0e','#2ca02c','#d62728','#9467bd',
           '#8c564b','#e377c2','#7f7f7f','#bcbd22','#17becf']

def min_to_str(m):
    m = m % 1440
    return f"{m//60:02d}:{m%60:02d}"

# --- Original Flight ---
orig_start = orig_dep % 1440
orig_end   = orig_arr % 1440
if orig_end < orig_start: orig_end += 1440

fig1.add_trace(go.Bar(
    y=['Original AA123'],
    x=[orig_end - orig_start],
    base=orig_start,
    orientation='h',
    marker_color='red',
    name='Original',
    hovertemplate="<b>Original AA123</b><br>%{customdata[0]} to %{customdata[1]}<extra></extra>",
    customdata=[[min_to_str(orig_dep), min_to_str(orig_arr % 1440)]]
))

# --- 2-Hop Alternatives ---
for idx, r in enumerate(alts):
    col = palette[idx % len(palette)]
    f1, f2 = r['flights']
    dep1 = str_to_minutes(f1['dep_time'])
    arr1 = str_to_minutes(f1['arr_time'])
    dep2 = str_to_minutes(f2['dep_time'])
    arr2 = r['total_arr'] % 1440
    label = f"{idx+1}. {A} to {f1['arr_ap']} to {B}"

    # Leg 1
    fig1.add_trace(go.Bar(y=[label], x=[arr1 - dep1], base=dep1,
                          orientation='h', marker_color=col, showlegend=False,
                          hovertemplate=f"<b>{label}</b><br>Leg 1: {f1['dep_time']} to {f1['arr_time']}<extra></extra>"))
    # Layover
    fig1.add_trace(go.Bar(y=[label], x=[dep2 - arr1], base=arr1,
                          orientation='h', marker_color='lightgray',
                          hovertemplate=f"<b>Layover @ {f1['arr_ap']}</b><br>Buffer: {dep2 - arr1} min<extra></extra>"))
    # Leg 2
    fig1.add_trace(go.Bar(y=[label], x=[arr2 - dep2], base=dep2,
                          orientation='h', marker_color=col, marker_pattern_shape="/",
                          hovertemplate=f"<b>Leg 2</b><br>{f2['dep_time']} to {min_to_str(arr2)}<extra></extra>"))

# --- ONE RED VERTICAL LINE at original arrival ---
fig1.add_vline(
    x=orig_end,
    line=dict(color="red", width=3, dash="dot"),
    annotation_text=f"Original Arrival<br>{min_to_str(orig_arr % 1440)}",
    annotation_position="top",
    annotation_font_color="red"
)

fig1.update_layout(
    title="Rebooking Timeline (HKT, 15 Nov 2025)",
    xaxis=dict(
        title="Time of Day (HKT)",
        tickmode='array',
        tickvals=list(range(0, 1441, 120)),
        ticktext=[f"{h:02d}:00" for h in range(0, 25, 2)]
    ),
    yaxis_title="Route",
    barmode='stack',
    height=300 + max(len(alts), 1) * 60,
    legend=dict(title="Segment"),
    hovermode="y unified"
)
fig1.show()

# === 2. FULL FLIGHT NETWORK (All Airports & All Flights) ===
# Use the full G (MultiDiGraph) you already built
G_full = G.copy()  # Your full graph with ALL flights

# Downsample for performance (optional: show only top N busiest airports)
top_n = 100
if len(G_full.nodes) > top_n:
    degrees = dict(G_full.degree())
    top_airports = sorted(degrees, key=degrees.get, reverse=True)[:top_n]
    G_full = G_full.subgraph(top_airports).copy()

# Layout
pos = nx.spring_layout(G_full, k=0.8, iterations=50, seed=42)

# Edge traces
edge_x, edge_y = [], []
for edge in G_full.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.extend([x0, x1, None])
    edge_y.extend([y0, y1, None])

fig_net = go.Figure()

fig_net.add_trace(go.Scatter(
    x=edge_x, y=edge_y,
    mode='lines',
    line=dict(width=0.5, color='#cccccc'),
    hoverinfo='none',
    showlegend=False
))

# Node traces
node_x = [pos[n][0] for n in G_full.nodes]
node_y = [pos[n][1] for n in G_full.nodes]
node_text = list(G_full.nodes)
node_size = [max(5, np.log1p(G_full.degree(n)) * 3) for n in G_full.nodes]
node_color = ['#ff6b6b' if n == A else '#51cf66' if n == B else '#339af0' for n in G_full.nodes]

fig_net.add_trace(go.Scatter(
    x=node_x, y=node_y,
    mode='markers+text',
    text=node_text,
    textposition="top center",
    marker=dict(size=node_size, color=node_color, line_width=1),
    hovertemplate="<b>%{text}</b><br>Degree: %{customdata}<extra></extra>",
    customdata=[G_full.degree(n) for n in G_full.nodes]
))

fig_net.update_layout(
    title=f"Full US Flight Network ({len(G_full.nodes)} Airports, {len(G_full.edges)} Flights)",
    showlegend=False,
    hovermode='closest',
    width=900, height=700,
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    plot_bgcolor='white',
    font=dict(family="Arial", size=10)
)

fig_net.show()

print(f"\nGenerated: Full Timeline + Red Arrival Line + Entire Flight Network ({len(G_full.nodes)} nodes)")

# === 3. NETWORK – ONLY THE 10 RELEVANT AIRPORTS + GRAY BACKGROUND FLIGHTS ===
# -------------------------------------------------------------------------
# 1. Airports that appear in any of the 8 alternatives
hub_airports = [r['flights'][0]['arr_ap'] for r in alts]
focus_airports = [A, B] + list(set(hub_airports))           # A = DFW, B = HNL

# 2. Sub-graph with *exactly* these 10 airports
G_focus = G_full.subgraph(focus_airports).copy()

# 3. Layout – a bit more spacing for clarity
pos = nx.spring_layout(G_focus, k=2.8, iterations=150, seed=42)

# -------------------------------------------------------------------------
# 4. Helper key functions
def edge_key(data):
    return (data.get('airline'), data.get('flight_num'), data.get('dep'), data.get('arr'))

def alt_key(f):
    return (f['airline'], f['number'],
            str_to_minutes(f['dep_time']), str_to_minutes(f['arr_time']))

leg1_keys = {alt_key(r['flights'][0]) for r in alts}
leg2_keys = {alt_key(r['flights'][1]) for r in alts}

# -------------------------------------------------------------------------
# 5. Classify edges (MultiDiGraph → need keys=True)
edge_groups = {
    'background': [],   # gray – all other flights between the 10 airports
    'original'  : [],   # red dashed
    'leg1'      : [],   # blue
    'leg2'      : []    # green
}

for u, v, key, data in G_focus.edges(keys=True, data=True):
    ek = edge_key(data)

    if u == A and v == B and data.get('airline') == 'AA' and data.get('flight_num') == '123':
        edge_groups['original'].append((u, v, data))
    elif ek in leg1_keys:
        edge_groups['leg1'].append((u, v, data))
    elif ek in leg2_keys:
        edge_groups['leg2'].append((u, v, data))
    else:
        edge_groups['background'].append((u, v, data))

# -------------------------------------------------------------------------
# 6. Plotly figure – draw in order: background → leg1 → leg2 → original
fig = go.Figure()

# Order matters – background first
draw_order = ['background', 'leg1', 'leg2', 'original']

line_styles = {
    'background': dict(color='#bbbbbb', width=1),
    'original'  : dict(color='#d62728', width=5, dash='dash'),
    'leg1'      : dict(color='#1f77b4', width=4),
    'leg2'      : dict(color='#2ca02c', width=4)
}
legend_names = {
    'background': 'Other flights (same 10 airports)',
    'original'  : 'Original AA123',
    'leg1'      : 'Leg 1 (DFW → Hub)',
    'leg2'      : 'Leg 2 (Hub → HNL)'
}

for grp in draw_order:
    edges = edge_groups[grp]
    if not edges:
        continue

    xs, ys, hovers = [], [], []
    for u, v, data in edges:
        x0, y0 = pos[u]
        x1, y1 = pos[v]
        xs.extend([x0, x1, None])
        ys.extend([y0, y1, None])

        txt = (f"<b>{data.get('airline','?')}{data.get('flight_num','?')}</b><br>"
               f"{u} to {v}<br>"
               f"{minutes_to_str(data.get('dep',0))} to {minutes_to_str(data.get('arr',0))}")
        hovers.extend([txt, txt, None])

    fig.add_trace(go.Scatter(
        x=xs, y=ys,
        mode='lines',
        line=line_styles[grp],
        name=legend_names[grp],
        hoverinfo='text',
        text=hovers,
        showlegend=True
    ))

# -------------------------------------------------------------------------
# 7. Nodes
node_x = [pos[n][0] for n in G_focus.nodes]
node_y = [pos[n][1] for n in G_focus.nodes]

node_size  = [38 if n in {A, B} else 28 for n in G_focus.nodes]
node_color = ['#d62728' if n == A else '#2ca02c' if n == B else '#1f77b4' for n in G_focus.nodes]

fig.add_trace(go.Scatter(
    x=node_x, y=node_y,
    mode='markers+text',
    text=list(G_focus.nodes),
    textposition='middle center',
    textfont=dict(color='white', size=12, family='Arial'),
    marker=dict(size=node_size, color=node_color, line=dict(width=2, color='black')),
    hovertemplate="<b>%{text}</b><br>Out-degree: %{customdata}<extra></extra>",
    customdata=[G_focus.out_degree(n) for n in G_focus.nodes],
    name='Airports'
))

# -------------------------------------------------------------------------
# 8. Layout
fig.update_layout(
    title="<b>Rebooking Network – 10 Airports (DFW to Hubs to HNL)</b><br>"
          "<span style='font-size:13px'>Gray = all other flights between these airports</span>",
    showlegend=True,
    legend=dict(title="Segment", bgcolor="rgba(255,255,255,0.9)", bordercolor="gray", borderwidth=1),
    hovermode='closest',
    width=960, height=720,
    margin=dict(l=30, r=30, t=90, b=30),
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    plot_bgcolor='white',
    font=dict(family="Arial", size=12)
)

fig.show()

# -------------------------------------------------------------------------
print("\nNetwork with **only** the 10 alternative-related airports")
print(f"   Airports : {len(G_focus.nodes)}  →  {', '.join(sorted(G_focus.nodes))}")
print(f"   Total flights : {len(G_focus.edges)}")
print(f"   Breakdown → background:{len(edge_groups['background'])}, "
      f"orig:{len(edge_groups['original'])}, leg1:{len(edge_groups['leg1'])}, leg2:{len(edge_groups['leg2'])}")


Generated: Full Timeline + Red Arrival Line + Entire Flight Network (100 nodes)



Network with **only** the 10 alternative-related airports
   Airports : 10  →  DEN, DFW, HNL, LAX, OGG, PHX, SAN, SEA, SFO, SLC
   Total flights : 3439
   Breakdown → background:3394, orig:0, leg1:22, leg2:23
