In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

In [11]:
# merged dataset
raw_df = pd.read_csv("2_raw_data/raw_merged/raw_merged_data.csv")

# Preview
print(raw_df.head())
print(raw_df.columns)

      FL_DATE CRS_DEP_TIME  DEP_TIME OP_UNIQUE_CARRIER  OP_CARRIER_FL_NUM  \
0  2023-01-01     06:10:00  06:05:00                DL                977   
1  2023-01-01     06:15:00  06:17:00                DL               2423   
2  2023-01-01     06:15:00  06:44:00                AA               1651   
3  2023-01-01     06:15:00  06:15:00                DL                820   
4  2023-01-01     06:15:00  06:13:00                DL                914   

  DEST  DEP_DEL15  CANCELLED CANCELLATION_CODE  CARRIER_DELAY  ...  NAS_DELAY  \
0  DTW          0          0               NaN              0  ...          0   
1  SLC          0          0               NaN              0  ...          0   
2  CLT          1          0               NaN              0  ...          0   
3  ATL          0          0               NaN              0  ...          0   
4  MSP          0          0               NaN              0  ...          0   

   SECURITY_DELAY  LATE_AIRCRAFT_DELAY  wind_dir_d

In [12]:
# Create a new binary target: 1 if delayed or cancelled, else 0
raw_df["TARGET"] = np.where((raw_df["DEP_DEL15"] == 1) | (raw_df["CANCELLED"] == 1), 1, 0)

# Check balance
print(raw_df["TARGET"].value_counts(normalize=True) * 100)

TARGET
0    89.337823
1    10.662177
Name: proportion, dtype: float64


## Undersample the Majority Class (Data Balancing)

In [13]:
# Separate majority and minority classes
majority_df = raw_df[raw_df["TARGET"] == 0]
minority_df = raw_df[raw_df["TARGET"] == 1]

# Undersample the majority class
majority_downsampled = majority_df.sample(n=len(minority_df), random_state=42)

# Combine balanced dataset
balanced_df = pd.concat([majority_downsampled, minority_df])

# Shuffle the rows
balanced_df = balanced_df.sample(frac=1, random_state=42).reset_index(drop=True)

# Check new balance
print(balanced_df["TARGET"].value_counts(normalize=True) * 100)

TARGET
1    50.0
0    50.0
Name: proportion, dtype: float64


## Split the Dataset

In [15]:
from sklearn.model_selection import train_test_split

#split: 70% Train, 15% Validation, 15% Test
train_df, temp_df = train_test_split(
    balanced_df,
    test_size=0.30,
    random_state=42,
    stratify=balanced_df["TARGET"]
)

val_df, test_df = train_test_split(
    temp_df,
    test_size=0.50,
    random_state=42,
    stratify=temp_df["TARGET"]
)

# Confirm split sizes
print("Train set size:", len(train_df))
print("Validation set size:", len(val_df))
print("Test set size:", len(test_df))

Train set size: 27704
Validation set size: 5937
Test set size: 5937


In [18]:
# Step 1: Select final features
features_to_use = [
    'DEP_TIME', 'DEP_DEL15', 'CANCELLED',               # Flight performance features
    'wind_dir_degrees', 'wind_speed_kt', 'temperature_c', 
    'dewpoint_c', 'visibility_statute_mi', 'altimeter_hpa',  # Weather features
    'OP_UNIQUE_CARRIER', 'DEST'                         
]

# 2: (X) and target (y)
X = train_df[features_to_use]
y = train_df['TARGET']

# 3: One-Hot Encode categorical variables
# This creates binary columns for each unique value
X = pd.get_dummies(X, columns=['OP_UNIQUE_CARRIER', 'DEST'])

# 4: Lets Do the same for validation and test sets
X_val = pd.get_dummies(val_df[features_to_use], columns=['OP_UNIQUE_CARRIER', 'DEST'])
X_test = pd.get_dummies(test_df[features_to_use], columns=['OP_UNIQUE_CARRIER', 'DEST'])

# 5: Align validation/test sets to match training columns
X_val = X_val.reindex(columns=X.columns, fill_value=0)
X_test = X_test.reindex(columns=X.columns, fill_value=0)

# 6: val/test
y_val = val_df['TARGET']
y_test = test_df['TARGET']

# Confirm final shapes of datasets
print("Training set shape:", X.shape)
print("Validation set shape:", X_val.shape)
print("Test set shape:", X_test.shape)

Training set shape: (27704, 92)
Validation set shape: (5937, 92)
Test set shape: (5937, 92)


## Feature Scaling before training model

In [45]:
# first select features to use, we are using weather and flight-related features for this model training
features_to_use = [
    'wind_dir_degrees', 'wind_speed_kt',
    'temperature_c', 'dewpoint_c', 'visibility_statute_mi', 'altimeter_hpa',
    'OP_UNIQUE_CARRIER', 'DEST'
]

In [46]:
# then split features from each dataset
# selected features from training, validation and test sets
X = train_df[features_to_use].copy()
X_val = val_df[features_to_use].copy()
X_test = test_df[features_to_use].copy()

In [47]:
# encode the categorical features 
# lets convert the OP_UNIQUE_CARRIER abd DEST to binary dummy variables
X = pd.get_dummies(X, columns=['OP_UNIQUE_CARRIER', 'DEST'])
X_val = pd.get_dummies(X_val, columns=['OP_UNIQUE_CARRIER', 'DEST'])
X_test = pd.get_dummies(X_test, columns=['OP_UNIQUE_CARRIER', 'DEST'])

# Align with training set columns
X_val = X_val.reindex(columns=X.columns, fill_value=0)
X_test = X_test.reindex(columns=X.columns, fill_value=0)

In [48]:
# Standardize values and scale
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

## Random Forest

In [49]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

#model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# train the model on the training data
rf_model.fit(X_scaled, y)

# make predictions on the validation set
val_preds = rf_model.predict(X_val_scaled)

# check performance
print("✅ Validation Set Performance:")
print(confusion_matrix(y_val, val_preds))
print(classification_report(y_val, val_preds))

✅ Validation Set Performance:
[[1890 1078]
 [1031 1938]]
              precision    recall  f1-score   support

           0       0.65      0.64      0.64      2968
           1       0.64      0.65      0.65      2969

    accuracy                           0.64      5937
   macro avg       0.64      0.64      0.64      5937
weighted avg       0.64      0.64      0.64      5937



### Conclusion
We trained a Random Forest model to predict flight delays and cancellations, and it achieved around 64% accuracy on the validation set

The model predicted both delayed/cancelled flights (1) and on-time flights (0) fairly evenly, with F1 scores of 0.64–0.65 for each class. This means the model is equally balanced in identifying delays and non-delays.

## Reduction in Operational Costs Due to Delays

## Correlation Between Weather and Delays