# Airline Delay Analysis

#### This coursework uses the U.S. Department of Transportationâ€™s Airline On-Time Statistics and Delay Causes dataset, which provides monthly records on domestic flights operated by major U.S. carriers. The dataset includes counts of on-time, delayed, canceled, and diverted flights, along with detailed delay-cause categories such as Air Carrier, Extreme Weather, NAS (National Aviation System), Late-Arriving Aircraft, and Security.

#### Using this dataset of over 400,000 flight records (2003-2025), the coursework develops a full machine learning pipeline for analyzing and predicting airline delays. The goal is to perform exploratory data analysis, engineer meaningful features, train regression and classification models, and deploy an interactive Streamlit application for real-time delay predictions. The project also aims to extract business insights that airlines and airports can use to improve operational efficiency and reduce delays.

## Student ID: 00016395



# Loading the dataset, identify shape and overview

In [None]:
import pandas as pd

df = pd.read_csv('data/Airline_Delay_Cause.csv')
print("Dataset loaded sucessfully!!!")

print(f"Dataset shape: {df.shape}")


# Exploratory Data Analysis (EDA)

In [None]:
print(f"Dataset shape: {df.shape[0]} rows x {df.shape[1]} columns")
print("=" * 40)
print("Dataset First 10 rows: ")
df.head(10)


In [None]:
print('Checking datatypes: ')
df.info()
print("=" * 40)

In [None]:
print('Summary for numerical columns:')
df.describe()

In [None]:
print('Summary for categorical columns:')
df.describe(include=['object'])

In [None]:
print('Checking missing values:')
df.isnull().sum()


In [None]:
print('Checking for duplicates:')
df.duplicated().sum()

### Distribution of arrival delay

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(10,6))
sns.histplot(df['arr_delay'], bins=100, kde=True)
plt.title('Distribution of Arrival Delay')
plt.xlabel('Minutes')
plt.ylabel('Count')
plt.show()

### Correlation Matrix

In [None]:
import numpy as np

num_cols = df.select_dtypes(include=[np.number]).columns.tolist()

plt.figure(figsize=(12, 10))
correlation_matrix = df[num_cols].corr()
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm', center=0)
plt.title('Correlation Matrix')
plt.tight_layout()
plt.show()


#### Avarage delay by type(cause of delay)

In [None]:
delay_types = ['carrier_delay', 'weather_delay', 'nas_delay', 'security_delay', 'late_aircraft_delay']
delay_means = df[delay_types].mean()
plt.bar(delay_means.index, delay_means.values)
plt.title('Average Delay by Type')
plt.xlabel('Delay Type')
plt.ylabel('Average Minutes')
plt.xticks(rotation=45)
plt.show()



#### Delays by carier

In [None]:
plt.figure(figsize=(15, 6))
top_carriers = df['carrier'].value_counts().nlargest(10).index
df_top = df[df['carrier'].isin(top_carriers)]
sns.boxplot(x='carrier', y='arr_delay', data=df_top)
plt.title('Arrival Delay Distribution by Carrier (Top 10)')
plt.xlabel('Carrier Code')
plt.ylabel('Delay Minutes')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

#### Avarage Delay by Month

In [None]:
monthly_delay = df.groupby('month')['arr_delay'].mean()

plt.figure(figsize=(10, 6))
plt.bar(monthly_delay.index, monthly_delay.values, color='orange', alpha=0.7)
plt.title('Average Delay by Month')
plt.xlabel('Month')
plt.ylabel('Average Delay (minutes)')
plt.xticks(range(1, 13))
plt.show()

#### Cancellation and diversion graphs

In [None]:
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
cancellation_rate = df.groupby('month')['arr_cancelled'].mean()
plt.bar(cancellation_rate.index, cancellation_rate.values)
plt.title('Cancellation Rate by Month')
plt.xlabel('Month')
plt.ylabel('Cancellation Rate')

plt.subplot(1, 2, 2)
diversion_rate = df.groupby('month')['arr_diverted'].mean()
plt.bar(diversion_rate.index, diversion_rate.values)
plt.title('Diversion Rate by Month')
plt.xlabel('Month')
plt.ylabel('Diversion Rate')

plt.tight_layout()
plt.show()

# Data Preperation 


### Copy of dataset for preprocessing

In [None]:
df_clean = df.copy()

### Handling Missing Values

In [None]:
print(f"Missing values before handling:")
print(df_clean.isnull().sum().sort_values(ascending=False))

For better performance of the upcoming models i have chosen filling missing values rather than dropping them, and followed strategy below:
- for delay minutes columns filling with 0(no delays)
- for categorical columns filled with 'Unknown'

In [None]:
delay_minutes_cols = ['carrier_delay', 'weather_delay', 'nas_delay', 
                      'security_delay', 'late_aircraft_delay', 'arr_delay', 'arr_cancelled', 'arr_diverted']
for col in delay_minutes_cols:
    if col in df_clean.columns:
        df_clean[col] = df_clean[col].fillna(0)
        
print('Data filled with 0 for delay minutes columns')        

In [None]:
count_cols = ['carrier_ct', 'weather_ct', 'nas_ct', 'security_ct', 
              'late_aircraft_ct', 'arr_del15', 'arr_flights']
for col in count_cols:
    if col in df_clean.columns:
        df_clean[col] = df_clean[col].fillna(0)
        
print('Data filled with 0 for count columns')        

In [None]:
categorical_cols = ['carrier', 'carrier_name', 'airport', 'airport_name']
for col in categorical_cols:
    if col in df_clean.columns and df_clean[col].isnull().any():
        df_clean[col] = df_clean[col].fillna('Unknown')
        
print('Data filled with Unknown for categorical columns')        

In [None]:
print(f"\nMissing values after handling:")
print(df_clean.isnull().sum().sort_values(ascending=False))

### Checking for data with errors (eg. negative flights, delays more than flights )

In [None]:
if 'arr_flights' in df_clean.columns:
    negative_flights = (df_clean['arr_flights'] < 0).sum()
    if negative_flights > 0:
        print(f"Found {negative_flights} rows with negative flight count. Setting to 0.")
        df_clean['arr_flights'] = df_clean['arr_flights'].clip(lower=0)

In [None]:
if all(col in df_clean.columns for col in ['arr_del15', 'arr_flights']):
    impossible = (df_clean['arr_del15'] > df_clean['arr_flights']).sum()
    if impossible > 0:
        print(f"Found {impossible} rows where delayed flights > total flights. Correcting...")
        df_clean['arr_del15'] = df_clean[['arr_del15', 'arr_flights']].min(axis=1)

### Outliers

In [None]:
def handle_outliers(data, column):
    Q1 = data[column].quantile(0.25)
    Q3 = data[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    
    outliers = ((data[column] < lower_bound) | (data[column] > upper_bound)).sum()
    
    if outliers > 0:
        data[column] = np.clip(data[column], lower_bound, upper_bound)
    
    return outliers

outlier_cols = ['arr_delay', 'carrier_delay', 'weather_delay', 'nas_delay', 'late_aircraft_delay']

for col in outlier_cols:
    if col in df_clean.columns:
        outliers_count = handle_outliers(df_clean, col)
        if outliers_count > 0:
            print(f"  - {col}: {outliers_count} outliers capped")

print("Outliers handled using IQR method!!!")

# Feature Engineering

New binary target for classifying whether there is delay or not

In [None]:
df_clean['delay_rate'] = df_clean['arr_del15'] / df_clean['arr_flights'].replace(0, 1)
print("Delay rate statistics:")
print(df_clean['delay_rate'].describe())

df_clean['high_delay_month'] = (df_clean['delay_rate'] > 0.25).astype(int)
print("Class distribution:")
print(df_clean['high_delay_month'].value_counts(normalize=True) * 100)

delay_cause_cols = ['carrier_delay', 'weather_delay', 'nas_delay', 'security_delay', 'late_aircraft_delay']
total_delay_minutes = df_clean[delay_cause_cols].sum(axis=1)

for col in delay_cause_cols:
    percentage_col = col.replace('_delay', '_percentage')
    df_clean[percentage_col] = np.where(
        total_delay_minutes > 0,
        df_clean[col] / total_delay_minutes * 100,
        0
    )

In [None]:
df_clean['quarter'] = ((df_clean['month'] - 1) // 3) + 1
df_clean['is_winter'] = df_clean['month'].isin([12, 1, 2]).astype(int)
df_clean['is_summer'] = df_clean['month'].isin([6, 7, 8]).astype(int)
df_clean['is_peak_travel'] = df_clean['month'].isin([6, 7, 8, 11, 12]).astype(int)


In [None]:
df_clean['flights_per_day'] = df_clean['arr_flights'] / 30  # Approximate daily flights
df_clean['cancellation_rate'] = df_clean['arr_cancelled'] / df_clean['arr_flights'].replace(0, 1)
df_clean['total_disruptions'] = df_clean['arr_cancelled'] + df_clean['arr_diverted']

In [None]:
airport_stats = df_clean.groupby('airport').agg({
    'arr_flights': ['sum', 'mean'],  # Use flight volume, not delay rate
    'arr_cancelled': 'sum',
    'arr_diverted': 'sum'
}).round(2)

# Flatten column names
airport_stats.columns = ['airport_total_flights', 'airport_avg_flights', 'airport_total_cancelled', 'airport_total_diverted']

carrier_stats = df_clean.groupby('carrier').agg({
    'arr_flights': ['sum', 'mean'],  # Use flight volume, not delay rate
    'arr_cancelled': 'sum', 
    'arr_diverted': 'sum'
}).round(2)

# Flatten column names  
carrier_stats.columns = ['carrier_total_flights', 'carrier_avg_flights', 'carrier_total_cancelled', 'carrier_total_diverted']

# Merge back
df_clean = df_clean.merge(airport_stats, left_on='airport', right_index=True, how='left')
df_clean = df_clean.merge(carrier_stats, left_on='carrier', right_index=True, how='left')

print("Feature engineering completed without data leakage!")
print(f"Dataset shape: {df_clean.shape}")

In [None]:
# Remove data leakage columns and redundant features
leakage_cols = [
    # Direct delay indicators (CRITICAL TO REMOVE)
    'arr_del15', 'arr_delay',
    'carrier_delay', 'weather_delay', 'nas_delay', 'security_delay', 'late_aircraft_delay',
    'carrier_ct', 'weather_ct', 'nas_ct', 'security_ct', 'late_aircraft_ct',
    
    # Target-derived features (CRITICAL TO REMOVE)
    'delay_rate',  # Used to create target
    'carrier_percentage', 'weather_percentage', 'nas_percentage', 'security_percentage', 'late_aircraft_percentage',
    
    # Redundant features
    'carrier_name', 'airport_name'
]

# Keep percentage columns for cause prediction
clean_features = [col for col in df_clean.columns if col not in leakage_cols + ['high_delay_month']]
df_model = df_clean[clean_features + ['high_delay_month']].copy()

print(f"Removed {len(leakage_cols)} leakage columns")
print(f"Clean dataset shape: {df_model.shape}")
print("Remaining features:", clean_features)

### Feature and Target Prepation

In [None]:
# Prepare features and target
X = df_model.drop('high_delay_month', axis=1)
y = df_model['high_delay_month']

print(f"Features shape: {X.shape}")
print(f"Target distribution:")
print(y.value_counts(normalize=True) * 100)

# Check class imbalance ratio
imbalance_ratio = y.value_counts()
minority_ratio = imbalance_ratio[1] / imbalance_ratio[0]
print(f"Class imbalance ratio: {imbalance_ratio[0]}:{imbalance_ratio[1]} (No delay:Delay)")
print(f"Minority class is {minority_ratio:.2%} of majority class")

### Class Weight Method for Imbalance handling

In [None]:
# BEST PRACTICE: Proper class weight handling
from sklearn.utils.class_weight import compute_class_weight

# Calculate class weights
classes = np.unique(y)
class_weights = compute_class_weight('balanced', classes=classes, y=y)
class_weight_dict = dict(zip(classes, class_weights))

print(f"Calculated class weights: {class_weight_dict}")
print(f"Minority class weight: {class_weight_dict[1]:.3f}")
print(f"Majority class weight: {class_weight_dict[0]:.3f}")
print(f"Weight ratio: {class_weight_dict[1]/class_weight_dict[0]:.2f}x")

### Encode Categorical Variables

In [None]:
from sklearn.preprocessing import LabelEncoder

categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
label_encoders = {}

X_encoded = X.copy()
for col in categorical_cols:
    le = LabelEncoder()
    X_encoded[col] = le.fit_transform(X_encoded[col].astype(str))
    label_encoders[col] = le
    print(f"Encoded {col}: {len(le.classes_)} unique values")

print("Categorical encoding completed!")

### Scaling the features

In [None]:
from sklearn.preprocessing import StandardScaler

# Feature Scaling
scaler = StandardScaler()
numerical_cols = X_encoded.select_dtypes(include=[np.number]).columns
X_scaled = X_encoded.copy()
X_scaled[numerical_cols] = scaler.fit_transform(X_encoded[numerical_cols])

print("Feature scaling completed!")

### Data Splitting

In [None]:
# BEST PRACTICE: Stratified split to maintain class distribution
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y,
    test_size=0.2,
    random_state=42,
    stratify=y  # CRITICAL: Maintain class distribution
)

print(f"Training set: {X_train.shape[0]:,} samples")
print(f"Test set: {X_test.shape[0]:,} samples")
print(f"Features: {X_train.shape[1]}")

# Verify stratification worked
print("\nClass distribution verification:")
print("Training set:")
train_dist = pd.Series(y_train).value_counts(normalize=True) * 100
for idx, val in train_dist.items():
    status = "High Delay" if idx == 1 else "No High Delay"
    print(f"  {status}: {val:.2f}%")

print("Test set:")
test_dist = pd.Series(y_test).value_counts(normalize=True) * 100
for idx, val in test_dist.items():
    status = "High Delay" if idx == 1 else "No High Delay"
    print(f"  {status}: {val:.2f}%")

# ML Models - Algorithms training

In [None]:
models = {}
best_params = {}
training_times = {}
cv_scores = {}

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
import time

print("Training Logistic Regression...")

lr_param_grid = {
    'C': [0.1, 1.0, 10.0],
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear']
}

lr_base = LogisticRegression(class_weight='balanced', random_state=42, max_iter=2000)

start_time = time.time()
lr_grid = GridSearchCV(lr_base, lr_param_grid, cv=3, scoring='f1', n_jobs=-1, verbose=0)
lr_grid.fit(X_train, y_train)
lr_time = time.time() - start_time

models['Logistic Regression'] = lr_grid.best_estimator_
best_params['Logistic Regression'] = lr_grid.best_params_
training_times['Logistic Regression'] = lr_time

# Cross-validation score
lr_cv_scores = cross_val_score(lr_grid.best_estimator_, X_train, y_train, cv=5, scoring='f1')
cv_scores['Logistic Regression'] = lr_cv_scores.mean()

print(f"Best parameters: {lr_grid.best_params_}")
print(f"Training time: {lr_time:.2f} seconds")
print(f"CV F1-score: {lr_cv_scores.mean():.4f} (+/- {lr_cv_scores.std() * 2:.4f})")

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
import time

print('Training Random Forest...')

rf_param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [10, 15, 20],
    'min_samples_split': [5, 10],
    'min_samples_leaf': [2, 5]
}

rf_base = RandomForestClassifier(class_weight='balanced', random_state=42, n_jobs=-1)

start_time = time.time()
rf_grid = GridSearchCV(
    rf_base, rf_param_grid,
    cv=3, scoring='f1',
    n_jobs=-1, verbose=0
)
rf_grid.fit(X_train, y_train)
rf_time = time.time() - start_time

models['Random Forest'] = rf_grid.best_estimator_
best_params['Random Forest'] = rf_grid.best_params_
training_times['Random Forest'] = rf_time

# Cross-validation score
rf_cv_scores = cross_val_score(rf_grid.best_estimator_, X_train, y_train, cv=5, scoring='f1')
cv_scores['Random Forest'] = rf_cv_scores.mean()

print(f"Best parameters: {rf_grid.best_params_}")
print(f"Training time: {rf_time:.2f} seconds")
print(f"CV F1-score: {rf_cv_scores.mean():.4f} (+/- {rf_cv_scores.std() * 2:.4f})")

In [None]:
# from sklearn.neighbors import KNeighborsClassifier

# knn_param_grid = {
#     'n_neighbors': [3, 5, 7, 9],
#     'weights': ['uniform', 'distance'],
#     'p': [1, 2]  # Manhattan vs Euclidean distance
# }

# knn_base = KNeighborsClassifier()

# start_time = time.time()
# knn_grid = GridSearchCV(
#     knn_base, knn_param_grid,
#     cv=3, scoring='f1',
#     n_jobs=-1, verbose=0
# )
# knn_grid.fit(X_train, y_train)
# knn_time = time.time() - start_time

# models['K-Nearest Neighbors'] = knn_grid.best_estimator_
# best_params['K-Nearest Neighbors'] = knn_grid.best_params_
# training_times['K-Nearest Neighbors'] = knn_time

# # Cross-validation score
# knn_cv_scores = cross_val_score(knn_grid.best_estimator_, X_train, y_train, cv=5, scoring='f1')
# cv_scores['K-Nearest Neighbors'] = knn_cv_scores.mean()

# print(f"Best parameters: {knn_grid.best_params_}")
# print(f"Training time: {knn_time:.2f} seconds")
# print(f"CV F1-score: {knn_cv_scores.mean():.4f} (+/- {knn_cv_scores.std() * 2:.4f})")

In [None]:
from sklearn.naive_bayes import GaussianNB

print('Training Naive Bayes...')

nb_model = GaussianNB()

start_time = time.time()
nb_model.fit(X_train, y_train)
nb_time = time.time() - start_time

models['Naive Bayes'] = nb_model
best_params['Naive Bayes'] = 'No Hyperparameters'
training_times['Naive Bayes'] = nb_time

# Cross-validation score
nb_cv_scores = cross_val_score(nb_model, X_train, y_train, cv=5, scoring='f1')
cv_scores['Naive Bayes'] = nb_cv_scores.mean()

print(f"Training time: {nb_time:.2f} seconds")
print(f"CV F1-score: {nb_cv_scores.mean():.4f} (+/- {nb_cv_scores.std() * 2:.4f})")






In [None]:
from sklearn.tree import DecisionTreeClassifier

print('Training Decision Tree...')

dt_param_grid = {
    'max_depth': [5, 10, 15, None],
    'min_samples_split': [5, 10, 20],
    'min_samples_leaf': [2, 5, 10]
}

dt_base = DecisionTreeClassifier(class_weight='balanced', random_state=42)

start_time = time.time()
dt_grid = GridSearchCV(
    dt_base, dt_param_grid,
    cv=3, scoring='f1',
    n_jobs=-1, verbose=0)
dt_grid.fit(X_train, y_train)
dt_time = time.time() - start_time

models['Decision Tree'] = dt_grid.best_estimator_
best_params['Decision Tree'] = dt_grid.best_params_
training_times['Decision Tree'] = dt_time

# Cross-validation score
dt_cv_scores = cross_val_score(dt_grid.best_estimator_, X_train, y_train, cv=5, scoring='f1')
cv_scores['Decision Tree'] = dt_cv_scores.mean()

print(f"Best parameters: {dt_grid.best_params_}")
print(f"Training time: {dt_time:.2f} seconds")
print(f"CV F1-score: {dt_cv_scores.mean():.4f} (+/- {dt_cv_scores.std() * 2:.4f})")


In [None]:
# Training Summary
print("\n" + "="*60)
print("TRAINING SUMMARY")
print("="*60)
for model_name in models.keys():
    print(f"{model_name:20} | F1: {cv_scores[model_name]:.4f} | Time: {training_times[model_name]:.2f}s")

# Model Evaluation

In [None]:
# Model Evaluation
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
from sklearn.metrics import precision_recall_curve, f1_score, accuracy_score
import matplotlib.pyplot as plt

print("\n" + "="*80)
print("MODEL EVALUATION ON TEST SET")
print("="*80)

# Evaluate all models on test set
test_results = {}

for name, model in models.items():
    # Predictions
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred_proba)
    
    test_results[name] = {
        'accuracy': accuracy,
        'f1_score': f1,
        'roc_auc': roc_auc,
        'y_pred': y_pred,
        'y_pred_proba': y_pred_proba
    }
    
    print(f"\n{name} Results:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1-Score: {f1:.4f}")
    print(f"ROC-AUC: {roc_auc:.4f}")

# Find best model
best_model_name = max(test_results.keys(), key=lambda k: test_results[k]['f1_score'])
print(f"\nBest Model: {best_model_name}")
print(f"Best F1-Score: {test_results[best_model_name]['f1_score']:.4f}")

In [None]:
# Model Comparison Visualization
comparison_df = pd.DataFrame({
    'Model': list(test_results.keys()),
    'Accuracy': [results['accuracy'] for results in test_results.values()],
    'F1-Score': [results['f1_score'] for results in test_results.values()],
    'ROC-AUC': [results['roc_auc'] for results in test_results.values()],
    'Training Time (s)': [training_times[model] for model in test_results.keys()]
})

comparison_df = comparison_df.sort_values('F1-Score', ascending=False)

print("\n" + "="*80)
print("FINAL MODEL COMPARISON")
print("="*80)
print(comparison_df.to_string(index=False))

# Plot comparison
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Accuracy comparison
axes[0,0].bar(comparison_df['Model'], comparison_df['Accuracy'])
axes[0,0].set_title('Model Accuracy Comparison')
axes[0,0].set_ylabel('Accuracy')
axes[0,0].tick_params(axis='x', rotation=45)

# F1-Score comparison
axes[0,1].bar(comparison_df['Model'], comparison_df['F1-Score'])
axes[0,1].set_title('Model F1-Score Comparison')
axes[0,1].set_ylabel('F1-Score')
axes[0,1].tick_params(axis='x', rotation=45)

# ROC-AUC comparison
axes[1,0].bar(comparison_df['Model'], comparison_df['ROC-AUC'])
axes[1,0].set_title('Model ROC-AUC Comparison')
axes[1,0].set_ylabel('ROC-AUC')
axes[1,0].tick_params(axis='x', rotation=45)

# Training Time comparison
axes[1,1].bar(comparison_df['Model'], comparison_df['Training Time (s)'])
axes[1,1].set_title('Model Training Time Comparison')
axes[1,1].set_ylabel('Training Time (seconds)')
axes[1,1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

In [None]:
# ROC Curves for all models
plt.figure(figsize=(10, 8))

for name, results in test_results.items():
    fpr, tpr, _ = roc_curve(y_test, results['y_pred_proba'])
    plt.plot(fpr, tpr, label=f"{name} (AUC = {results['roc_auc']:.3f})")

plt.plot([0, 1], [0, 1], 'k--', label='Random')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves - All Models')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Detailed evaluation for best model
best_model = models[best_model_name]
y_pred_best = test_results[best_model_name]['y_pred']

print(f"\nDetailed Evaluation for {best_model_name}")
print("="*50)

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred_best, target_names=['No High Delay', 'High Delay']))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred_best)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['No High Delay', 'High Delay'],
            yticklabels=['No High Delay', 'High Delay'])
plt.title(f'Confusion Matrix - {best_model_name}')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()