In [None]:
%%capture
import sys

if 'google.colab' in sys.modules:
    %pip install pyomo >/dev/null 2>/dev/null
    %pip install highspy >/dev/null 2>/dev/null

solver = 'appsi_highs'

In [None]:
import pyomo.environ as pyo
SOLVER = pyo.SolverFactory(solver)

assert SOLVER.available(), f"Solver {solver} is not available."

import pandas as pd

In [None]:
import pandas as pd

# Load data
delay_df = pd.read_csv("avg_predicted_delay_by_carrier_airport.csv")
cancel_df = pd.read_csv("avg_predicted_cancellation_risk.csv")

# Merge and rename columns
df = pd.merge(delay_df, cancel_df, on=["carrier", "airport"])
df.rename(columns={
    "carrier": "airline",
    "airport": "origin",
    "avg_predicted_delay": "delay",
    "avg_predicted_cancel_prob": "cancel_risk"
}, inplace=True)

# Unique (airline, origin) pairs
origins = df[["airline", "origin", "delay", "cancel_risk"]].drop_duplicates()

# Airline-specific destination pairs
legs = pd.merge(
    origins,
    origins.rename(columns={
        "origin": "destination",
        "delay": "delay_dest",
        "cancel_risk": "cancel_risk_dest"
    }),
    on="airline"
)

# Remove same-airport routes
legs = legs[legs["origin"] != legs["destination"]].reset_index(drop=True)

# Add unique index for Pyomo
legs["index"] = legs.index.astype(str)

# Show result
legs.head()


Unnamed: 0,airline,origin,delay,cancel_risk,destination,delay_dest,cancel_risk_dest,index
0,AA,ATL,12.921324,0.075702,CLT,10.313781,0.087851,0
1,AA,ATL,12.921324,0.075702,DEN,12.605865,0.075124,1
2,AA,ATL,12.921324,0.075702,DFW,13.548542,0.109917,2
3,AA,ATL,12.921324,0.075702,EWR,17.178791,0.193884,3
4,AA,ATL,12.921324,0.075702,JFK,14.716888,0.106529,4


In [None]:
import numpy as np

# Define airport-to-region mapping
airport_regions = {
    "West Coast": ["SFO", "OAK", "SJC", "LAX", "SEA"],
    "Rockies":    ["DEN", "PHX", "SLC"],
    "Midwest":    ["ORD", "DFW", "MSP", "STL"],
    "East Coast": ["JFK", "LGA", "EWR", "CLT"],
    "Southeast":  ["ATL"]
}

# Create reverse mapping: airport → region
airport_to_region = {}
for region, airports in airport_regions.items():
    for code in airports:
        airport_to_region[code] = region

# Assign region labels to origin and destination
legs["origin_region"] = legs["origin"].map(airport_to_region)
legs["destination_region"] = legs["destination"].map(airport_to_region)

# Define region-to-region cost matrix
region_costs = {
    ("West Coast", "West Coast"): 150,
    ("West Coast", "Rockies"): 200,
    ("West Coast", "Midwest"): 300,
    ("West Coast", "East Coast"): 600,
    ("West Coast", "Southeast"): 500,
    ("Rockies", "West Coast"): 200,
    ("Rockies", "Rockies"): 100,
    ("Rockies", "Midwest"): 200,
    ("Rockies", "East Coast"): 400,
    ("Rockies", "Southeast"): 350,
    ("Midwest", "West Coast"): 300,
    ("Midwest", "Rockies"): 200,
    ("Midwest", "Midwest"): 150,
    ("Midwest", "East Coast"): 200,
    ("Midwest", "Southeast"): 250,
    ("East Coast", "West Coast"): 600,
    ("East Coast", "Rockies"): 400,
    ("East Coast", "Midwest"): 200,
    ("East Coast", "East Coast"): 100,
    ("East Coast", "Southeast"): 150,
    ("Southeast", "West Coast"): 500,
    ("Southeast", "Rockies"): 350,
    ("Southeast", "Midwest"): 250,
    ("Southeast", "East Coast"): 150,
    ("Southeast", "Southeast"): 100
}

# Lookup function for base cost
def lookup_cost(row):
    return region_costs.get((row["origin_region"], row["destination_region"]), 999)

# Add deterministic cost
legs["estimated_cost"] = legs.apply(lookup_cost, axis=1)

# Set seed for reproducibility
np.random.seed(42)

# Define mean and std dev
legs["cost_mean"] = legs["estimated_cost"]
legs["cost_std"] = legs["cost_mean"] * 0.05

# Sample stochastic cost and truncate below $50 assume cost follows normal distribution
legs["cost_sample"] = np.maximum(
    np.random.normal(loc=legs["cost_mean"], scale=legs["cost_std"]),
    50
)

legs

Unnamed: 0,airline,origin,delay,cancel_risk,destination,delay_dest,cancel_risk_dest,index,origin_region,destination_region,estimated_cost,cost_mean,cost_std,cost_sample
0,AA,ATL,12.921324,0.075702,CLT,10.313781,0.087851,0,Southeast,East Coast,150,150,7.5,153.725356
1,AA,ATL,12.921324,0.075702,DEN,12.605865,0.075124,1,Southeast,Rockies,350,350,17.5,347.580375
2,AA,ATL,12.921324,0.075702,DFW,13.548542,0.109917,2,Southeast,Midwest,250,250,12.5,258.096107
3,AA,ATL,12.921324,0.075702,EWR,17.178791,0.193884,3,Southeast,East Coast,150,150,7.5,161.422724
4,AA,ATL,12.921324,0.075702,JFK,14.716888,0.106529,4,Southeast,East Coast,150,150,7.5,148.243850
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1679,WN,STL,8.775019,0.070165,PHX,9.250295,0.040083,1679,Midwest,Rockies,200,200,10.0,195.683797
1680,WN,STL,8.775019,0.070165,SEA,10.470112,0.055785,1680,Midwest,West Coast,300,300,15.0,306.055951
1681,WN,STL,8.775019,0.070165,SFO,15.787549,0.141653,1681,Midwest,West Coast,300,300,15.0,299.637066
1682,WN,STL,8.775019,0.070165,SJC,8.285672,0.033636,1682,Midwest,West Coast,300,300,15.0,286.444472


In [None]:
from pyomo.environ import *
from pyomo.opt import SolverFactory, SolverStatus, TerminationCondition

# === User Inputs ===
print("🌍 Welcome to the Flight Itinerary Optimizer!")
origin_airport = input("📍 Enter your origin airport code (e.g., SFO): ").strip().upper()
destination_airport = input("🧭 Enter your destination airport code (e.g., LAX): ").strip().upper()
budget = float(input("💰 What's your maximum total budget in USD? "))

# === Objective Parameters ===
lambda_penalty = 100
mu_penalty = 1

print(f"\n🧠 Objective: Minimize delay + λ({lambda_penalty}) × cancellation risk + μ({mu_penalty}) × delay/cost")
print(f"💸 Budget constraint: Max ${budget:.2f}")
print("\n🧮 Calculating the best itinerary for you... Please wait ✈️💨")

# === Pre-check ===
reachable_legs = legs[
    (legs["origin"] == origin_airport) |
    (legs["destination"] == destination_airport)
]

if reachable_legs.empty:
    print("🚫 No flights found to connect your selected airports.")
    print(f"(Checked: {origin_airport} ➝ {destination_airport})")
    exit()

# === Pyomo Model ===
model = ConcreteModel()
model.Legs = Set(initialize=legs["index"].tolist())

model.delay = Param(model.Legs, initialize=dict(legs[["index", "delay"]].values))
model.cancel = Param(model.Legs, initialize=dict(legs[["index", "cancel_risk"]].values))
model.cost = Param(model.Legs, initialize=dict(legs[["index", "cost_sample"]].values))
model.origin = Param(model.Legs, initialize=dict(legs[["index", "origin"]].values))
model.destination = Param(model.Legs, initialize=dict(legs[["index", "destination"]].values))

model.x = Var(model.Legs, domain=Binary)

# === Objective Function ===
def obj_rule(m):
    return sum(
        m.x[i] * (
            m.delay[i] +
            lambda_penalty * m.cancel[i] +
            mu_penalty * (m.delay[i] / m.cost[i])
        )
        for i in m.Legs
    )
model.obj = Objective(rule=obj_rule, sense=minimize)

# === Flow Constraints ===
airports = set(legs["origin"]).union(set(legs["destination"]))
def flow_rule(m, ap):
    inflow = sum(m.x[i] for i in m.Legs if m.destination[i] == ap)
    outflow = sum(m.x[i] for i in m.Legs if m.origin[i] == ap)
    if ap == origin_airport:
        return outflow - inflow == 1
    elif ap == destination_airport:
        return outflow - inflow == -1
    else:
        return outflow - inflow == 0
model.flow = Constraint(airports, rule=flow_rule)

# Require at least one flight
model.must_fly = Constraint(expr=sum(model.x[i] for i in model.Legs) >= 1)

# Budget constraint
def budget_rule(m):
    return sum(m.x[i] * m.cost[i] for i in m.Legs) <= budget
model.budget = Constraint(rule=budget_rule)

# === Solve ===
try:
    results = SOLVER.solve(model)

    if (results.solver.status != SolverStatus.ok) or \
       (results.solver.termination_condition != TerminationCondition.optimal):
        raise RuntimeError("Solver did not find an optimal solution.")

    legs["selected"] = legs["index"].apply(lambda i: value(model.x[i]))
    route = legs[legs["selected"] > 0.5]

    total_delay = route['delay'].sum()
    total_cancel_risk = route['cancel_risk'].sum()
    total_cost = route['cost_sample'].sum()
    delay_per_dollar = (route['delay'] / route['cost_sample']).sum()

    print("\n✅ ✈️ Optimal Route Found!")
    print(route[["airline", "origin", "destination", "delay", "cancel_risk", "cost_sample"]])

    print("\n📊 Summary:")
    print(f"💸 Total cost (sampled): ${total_cost:.2f}")
    print(f"⏱️ Total predicted delay: {total_delay:.1f} minutes")
    print(f"⚠️ Total cancellation risk: {total_cancel_risk:.2f}")
    print(f"📉 Total delay-to-cost ratio (∑ delayᵢ / costᵢ): {delay_per_dollar:.3f}")

    # Savings (relative to average leg)
    avg_delay = legs["delay"].mean()
    avg_cancel_risk = legs["cancel_risk"].mean()
    delay_saved = avg_delay * len(route) - total_delay
    cancel_risk_saved = avg_cancel_risk * len(route) - total_cancel_risk

    print(f"\n🎉 You saved approximately:")
    print(f"   ⏳ {delay_saved:.1f} fewer minutes of delay")
    print(f"   🚫 {cancel_risk_saved:.2f} lower cancellation risk")

except RuntimeError as e:
    print("\n😬 Sorry, you're too cheap. 💸✋")
    print("Try raising your budget or lowering your standards. 😅")
    print(f"\n🔧 Error: {e}")

🌍 Welcome to the Flight Itinerary Optimizer!
📍 Enter your origin airport code (e.g., SFO): SFO
🧭 Enter your destination airport code (e.g., LAX): JFK
💰 What's your maximum total budget in USD? 800

🧠 Objective: Minimize delay + λ(100) × cancellation risk + μ(1) × delay/cost
💸 Budget constraint: Max $800.00

🧮 Calculating the best itinerary for you... Please wait ✈️💨

✅ ✈️ Optimal Route Found!
    airline origin destination      delay  cancel_risk  cost_sample
935      DL    SFO         JFK  11.880899      0.01719   584.938373

📊 Summary:
💸 Total cost (sampled): $584.94
⏱️ Total predicted delay: 11.9 minutes
⚠️ Total cancellation risk: 0.02
📉 Total delay-to-cost ratio (∑ delayᵢ / costᵢ): 0.020

🎉 You saved approximately:
   ⏳ 0.4 fewer minutes of delay
   🚫 0.06 lower cancellation risk


Assumptions:


Each flight leg is defined by a unique combination of airline, origin, and destination.

All combinations of origin-destination operated by the same airline are considered valid, excluding self-loops (i.e., flights that start and end at the same airport).

Predicted delays and predicted cancellation risks are taken from models trained on historical data.

These predictions are assumed to be accurate and static for optimization purposes.

The predicted delay and cancellation risk depend only on the airline and the origin airport,
and are constant across all destinations served from that origin.

This is a simplification — in reality, destination-specific weather, distance, or congestion could affect these risks.

Flight prices are heavily assumed based on distance.

Delay and cancellation risk act as proxies for quality or cost.

A monetary budget constraint could be added by estimating costs per leg.

The model does not enforce minimum layover times between connecting flights.

All transfers are assumed logistically feasible regardless of time between flights — another simplification.