# Matt's Code Playground

In [1]:
# Standard library imports
import os
import sys

# Data manipulation and analysis
import numpy as np
import pandas as pd

# Custom helper classes
from modelhelper import ModelHelper
model_helper = ModelHelper()

In [2]:
model = model_helper.train_flight_delay_model('random_forest')

INFO:modelhelper:Starting flight delay model training process...
INFO:modelhelper:Step 1: Fetching and preparing the dataset...
INFO:modelhelper:Using original saved flight dataset
INFO:modelhelper:Using saved flight dataset
INFO:modelhelper:Dataset shape: (3516114, 7)
INFO:modelhelper:Step 2: Preparing features and target...
INFO:modelhelper:Features being used: ['AIRLINE', 'ORIGIN', 'DAY_OF_YEAR', 'TOTAL_DELAY', 'DELAY_VARIANCE', 'HISTORICAL_DELAY']
INFO:modelhelper:Step 3: Splitting data into train/test sets (test_size=0.2)...
INFO:modelhelper:Training set shape: (2812891, 6)
INFO:modelhelper:Test set shape: (703223, 6)
INFO:modelhelper:Step 4: Preprocessing features...
INFO:modelhelper:Step 5: Training random_forest model...
INFO:modelhelper:Model successfully loaded from models\random_forest_flight_delay_model.pkl
INFO:modelhelper:Model random_forest already trained and loaded. Using the loaded model...
INFO:modelhelper:Step 6: Evaluating model performance...
INFO:modelhelper:
Mod

In [3]:
display(model)

# Test different combinations
test_combinations = [
    # Holiday Season Travel
    ('American Airlines Inc.', '2024-12-23', 'DFW'),  # Christmas Dallas hub
    ('Delta Air Lines Inc.', '2024-12-22', 'ATL'),   # Christmas Atlanta hub
    ('United Air Lines Inc.', '2024-12-24', 'ORD'),  # Christmas Eve Chicago
    ('Southwest Airlines Co.', '2024-12-26', 'MDW'),  # Post-Christmas Chicago
    ('JetBlue Airways', '2024-12-28', 'JFK'),        # New Year's NYC
    
    # Spring Festival Events
    ('Southwest Airlines Co.', '2024-03-08', 'MSY'),  # Mardi Gras New Orleans
    ('American Airlines Inc.', '2024-03-23', 'DCA'),  # Cherry Blossom DC
    ('Delta Air Lines Inc.', '2024-04-19', 'AUS'),   # Austin Music Festival
    ('United Air Lines Inc.', '2024-05-03', 'CVG'),  # Kentucky Derby
    ('JetBlue Airways', '2024-05-15', 'SAN'),        # San Diego Spring Break
    
    # Summer Peak Travel
    ('Alaska Airlines Inc.', '2024-07-03', 'ANC'),   # Alaska Summer Peak
    ('Hawaiian Airlines Inc.', '2024-07-15', 'HNL'), # Hawaii Summer Peak
    ('Spirit Air Lines', '2024-08-01', 'MCO'),       # Orlando Theme Parks
    ('Frontier Airlines Inc.', '2024-08-15', 'LAS'), # Vegas Summer Heat
    ('American Airlines Inc.', '2024-08-30', 'MIA'),  # Labor Day Miami
    
    # Business Routes
    ('Delta Air Lines Inc.', '2024-09-16', 'SEA'),   # Seattle Tech Week
    ('United Air Lines Inc.', '2024-09-23', 'SFO'),  # SF Business Week
    ('American Airlines Inc.', '2024-10-01', 'BOS'),  # Boston Conference Season
    ('JetBlue Airways', '2024-10-15', 'IAD'),        # DC Political Season
    ('Southwest Airlines Co.', '2024-11-05', 'HOU'),  # Houston Energy Conference
    
    # Fall Sports Events
    ('American Airlines Inc.', '2024-09-08', 'PHL'),  # NFL Opening Weekend
    ('Delta Air Lines Inc.', '2024-10-25', 'STL'),   # World Series Potential
    ('United Air Lines Inc.', '2024-11-28', 'DTW'),  # Thanksgiving Football
    ('Southwest Airlines Co.', '2024-12-07', 'CLT'),  # College Football Playoff
    
    # Winter Weather Challenges
    ('JetBlue Airways', '2024-01-20', 'BUF'),        # Buffalo Snow Season
    ('United Air Lines Inc.', '2024-02-15', 'MSP'),  # Minneapolis Winter
    ('American Airlines Inc.', '2024-02-01', 'BDL'),  # Hartford Winter
    ('Delta Air Lines Inc.', '2024-01-10', 'PWM'),   # Portland ME Winter
    ('Southwest Airlines Co.', '2024-02-20', 'MKE'),  # Milwaukee Winter

    # Mid-January (Post-Holiday Lull)
    ('Southwest Airlines Co.', '2024-01-16', 'MDW'),  # Chicago Off-Peak
    ('Delta Air Lines Inc.', '2024-01-17', 'MSP'),   # Minnesota Quiet Period
    ('American Airlines Inc.', '2024-01-18', 'DFW'),  # Dallas Winter Off-Peak
    ('United Air Lines Inc.', '2024-01-22', 'EWR'),  # Newark Slow Period
    ('JetBlue Airways', '2024-01-23', 'BOS'),        # Boston Winter Off-Peak

    # Early February (Pre-Spring Break)
    ('Alaska Airlines Inc.', '2024-02-06', 'SEA'),   # Seattle Winter Weekday
    ('Spirit Air Lines', '2024-02-07', 'FLL'),       # Florida Off-Season
    ('Frontier Airlines Inc.', '2024-02-08', 'DEN'),  # Denver Mid-Week
    ('Hawaiian Airlines Inc.', '2024-02-13', 'HNL'),  # Hawaii Low Season
    ('American Airlines Inc.', '2024-02-14', 'ORD'),  # Chicago Mid-Week

    # Late September (Post-Summer)
    ('Delta Air Lines Inc.', '2024-09-17', 'ATL'),   # Atlanta Tuesday
    ('United Air Lines Inc.', '2024-09-18', 'IAH'),  # Houston Off-Peak
    ('Southwest Airlines Co.', '2024-09-24', 'BWI'),  # Baltimore Mid-Week
    ('JetBlue Airways', '2024-09-25', 'JFK'),        # NYC Quiet Period
    ('American Airlines Inc.', '2024-09-26', 'PHX'),  # Phoenix Shoulder Season

    # Early November (Pre-Holiday)
    ('United Air Lines Inc.', '2024-11-12', 'SFO'),  # SF Tuesday
    ('Delta Air Lines Inc.', '2024-11-13', 'DTW'),   # Detroit Off-Peak
    ('Southwest Airlines Co.', '2024-11-14', 'MCI'),  # Kansas City Quiet
    ('Alaska Airlines Inc.', '2024-11-19', 'PDX'),   # Portland Pre-Holiday
    ('Spirit Air Lines', '2024-11-12', 'LGA'),       # NYC Tuesday

    # Early December (Pre-Holiday Rush)
    ('American Airlines Inc.', '2024-12-03', 'CLT'),  # Charlotte Tuesday
    ('United Air Lines Inc.', '2024-12-04', 'DCA'),  # DC Mid-Week
    ('Delta Air Lines Inc.', '2024-12-10', 'SLC'),   # Salt Lake City Tuesday
    ('JetBlue Airways', '2024-12-11', 'TPA'),        # Tampa Off-Peak

    # Late January (Deep Winter)
    ('Southwest Airlines Co.', '2024-01-29', 'PIT'),  # Pittsburgh Winter
    ('American Airlines Inc.', '2024-01-30', 'CLE'),  # Cleveland Off-Peak
    ('United Air Lines Inc.', '2024-01-31', 'BNA'),  # Nashville Winter
]

for airline, date, origin in test_combinations:
    result = model_helper.predict(airline, date, origin)
    print(f"Airline: {airline}, Date: {date}, Origin: {origin} -> Prediction: {result['prediction']}")
    print(f"Probabilities: {result['probabilities']}")


(RandomForestClassifier(class_weight='balanced_subsample', max_depth=10,
                        min_samples_leaf=20, min_samples_split=50,
                        n_estimators=200, n_jobs=-1, oob_score=True,
                        random_state=32),
 {'accuracy': 0.7707796815519401,
  'precision': 0.7877001806991136,
  'recall': 0.7707796815519401,
  'f1': 0.7748977781464175})

INFO:modelhelper:Airline: American Airlines Inc., Airport: DFW, Total delay: 17.704347826086956


Airline: American Airlines Inc., Date: 2024-12-23, Origin: DFW -> Prediction: Delayed
Probabilities: Delayed: 45.14%, Early: 22.99%, On Time: 31.87%


INFO:modelhelper:Airline: Delta Air Lines Inc., Airport: ATL, Total delay: 13.839344262295082


Airline: Delta Air Lines Inc., Date: 2024-12-22, Origin: ATL -> Prediction: Delayed
Probabilities: Delayed: 45.16%, Early: 23.38%, On Time: 31.46%


INFO:modelhelper:Airline: United Air Lines Inc., Airport: ORD, Total delay: 27.585714285714282


Airline: United Air Lines Inc., Date: 2024-12-24, Origin: ORD -> Prediction: Delayed
Probabilities: Delayed: 45.14%, Early: 22.99%, On Time: 31.87%


INFO:modelhelper:Airline: Southwest Airlines Co., Airport: MDW, Total delay: 16.39230769230769


Airline: Southwest Airlines Co., Date: 2024-12-26, Origin: MDW -> Prediction: Delayed
Probabilities: Delayed: 45.16%, Early: 23.44%, On Time: 31.39%


INFO:modelhelper:Airline: JetBlue Airways, Airport: JFK, Total delay: 12.15


Airline: JetBlue Airways, Date: 2024-12-28, Origin: JFK -> Prediction: Delayed
Probabilities: Delayed: 45.14%, Early: 22.99%, On Time: 31.87%
