# Error Analysis

- Load predictions from
  - 2-stage model (Daniel)
  - MLP regression --> classification (Ryan)
  - Multi tower (Maia)

# Imports

In [0]:
from pyspark.sql.functions import col
from pyspark.sql import Window
import pyspark.sql.functions as F
from pyspark.ml.feature import StringIndexer, OneHotEncoder, VectorAssembler
from pyspark.ml import Pipeline
from pyspark.ml.regression import RandomForestRegressor

from pyspark.ml.evaluation import RegressionEvaluator

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd


import random

import mlflow
print(mlflow.__version__)

import os

# Helper Functions and Blocks

In [0]:
def checkpoint_dataset(dataset, file_path):
    # Create base folder
    section = "2"
    number = "2"
    base_folder = f"dbfs:/student-groups/Group_{section}_{number}"
    dbutils.fs.mkdirs(base_folder)
    # Create subfolders if file_path contains directories
    full_path = f"{base_folder}/{file_path}.parquet"
    subfolder = "/".join(full_path.split("/")[:-1])
    dbutils.fs.mkdirs(subfolder)
    # Save dataset as a parquet file
    dataset.write.mode("overwrite").parquet(full_path)
    print(f"Checkpointed {file_path}")

In [0]:
# creat MLflow experiment

spark.conf.set("spark.databricks.mlflow.trackMLlib.enabled", "true")

RANDOM_SEED = 0
# Define experiment name with proper Databricks path
EXPERIMENT_NAME = "/Shared/team_2_2/mlflow-baseline"
# Create the experiment if it doesn't exist
try:
    experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME)
    if experiment is None:
        experiment_id = mlflow.create_experiment(EXPERIMENT_NAME)
        print(f"Created new experiment with ID: {experiment_id}")
    else:
        print(f"Using existing experiment: {experiment.name}")
    mlflow.set_experiment(EXPERIMENT_NAME)
except Exception as e:
    print(f"Error with experiment setup: {e}")
    # Fallback to default experiment in workspace
    mlflow.set_experiment(f"/Users/{dbutils.notebook.entry_point.getDbutils().notebook().getContext().userName().get()}/default")

# Load Predictions

In [0]:
# %fs ls dbfs:/student-groups/Group_2_2/5_year_custom_joined/nn_predictions_final/FINAL_TEST_OPTIMIZED_T36_9fea73a1
# FINAL_TEST_OPTIMIZED_T36_9fea73a1
# FINAL_VAL-OPTIMIZED_T36_93b24868


In [0]:
two_stage_5y_test = "dbfs:/student-groups/Group_2_2/2_stage_5y_preds/test_predictions.parquet"

mlp_5y_test = "dbfs:/student-groups/Group_2_2/5_year_custom_joined/fe_graph_and_holiday_nnfeat/training_splits/test_predictions"
mlp_train_val = "dbfs:/student-groups/Group_2_2/5_year_custom_joined/fe_graph_and_holiday_nnfeat/training_splits/train_val_predictions"

multi_tower_test = "dbfs:/student-groups/Group_2_2/5_year_custom_joined/nn_predictions_final/FINAL_TEST_OPTIMIZED_T36_9fea73a1"


two_stage_test_df = spark.read.parquet(two_stage_5y_test)

mlp_test_df = spark.read.parquet(mlp_5y_test)
mlp_train_val_df = spark.read.parquet(mlp_train_val)

multi_tower_test_df = spark.read.parquet(multi_tower_test)



In [0]:
multi_tower_test_df.columns

In [0]:
multi_tower_test_df.limit(100).display()

- Start with Maia's predictions
- Create a confusion matrix to see how the model is performing
- Dig deeper into false negatives (b/c we care about recall)
- Perturbing some delay numbers to get an idea of how that impacts the model outputs

In [0]:
# two_stage_test_df.columns
# mlp_test_df.columns
# mlp_test_df.describe('xgb_predicted_delay').show()

### Multi-tower - confusion matrix

In [0]:
multi_tower_test_2019_df = multi_tower_test_df.join(
    multi_tower_test_df,
    on='flight_uid',
    how='left'
)

# multi_tower_test_2019_df.show()

In [0]:
multi_tower_test_cm_df = multi_tower_test_df.stat.crosstab(
    'target_class',
    'pred_class_optimized'
)

multi_tower_test_cm_df.show()

In [0]:
from pyspark.mllib.evaluation import MulticlassMetrics

# Select the required columns, convert them to an RDD, and cast to float/double
predictionAndLabels = multi_tower_test_df.select( "pred_class_optimized", "target_class") \
                        .rdd \
                        .map(lambda row: (float(row[0]), float(row[1])))

# --- 3. Instantiate Metrics Class ---
# Instantiate metrics object
metrics = MulticlassMetrics(predictionAndLabels)

# --- 4. Get Overall Performance Metrics ---
print("\n--- Model Performance Metrics (Overall) ---")
print(f"1. Weighted Precision: {metrics.weightedPrecision:.4f}")
print(f"2. Weighted Recall:    {metrics.weightedRecall:.4f}")
print(f"3. Weighted F1-Score:  {metrics.weightedFMeasure():.4f}")
print(f"4. Accuracy:           {metrics.accuracy:.4f}")

# --- 5. Get Metrics by Class (for 'Delayed' class 1.0) ---
# For a binary classification like yours, it's critical to look at the positive class (1.0)
positive_class_label = 1.0

# Precision (for the Positive Class 1.0)
precision_1 = metrics.precision(positive_class_label)
print(f"\n--- Metrics for Positive Class (Delayed = 1) ---")
print(f"1. Precision (P): {precision_1:.4f}")

# Recall (Sensitivity)
recall_1 = metrics.recall(positive_class_label)
print(f"2. Recall (R):    {recall_1:.4f}")

# F1-Score (for the Positive Class 1.0)
f1_score_1 = metrics.fMeasure(positive_class_label)
print(f"3. F1-Score:      {f1_score_1:.4f}")


# --- 6. Calculate F2-Score Manually (Since MLlib doesn't have an F-beta method) ---
# F-beta requires the custom calculation based on the extracted P and R.
beta = 2
beta_sq = beta**2
f2_score = (1 + beta_sq) * (precision_1 * recall_1) / ((beta_sq * precision_1) + recall_1) \
           if ((beta_sq * precision_1) + recall_1) > 0 else 0.0

print(f"4. F2-Score:      {f2_score:.4f}")


# --- Optional: Display the Confusion Matrix (for validation) ---
print("\n--- Confusion Matrix  ---")
# The matrix is a NumPy array: rows are actual labels, columns are predicted labels
print(metrics.confusionMatrix().toArray())

In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

cm_pandas = multi_tower_test_cm_df.toPandas()
cm_np = cm_pandas.iloc[:, 1:].values

# Calculate the row sums (Actual Label totals) 
cm_row_sums = cm_np.sum(axis=1)[:, np.newaxis] 

# Calculate the row-normalized percentage matrix 
cm_percent = np.divide(cm_np, cm_row_sums, 
                       out=np.zeros_like(cm_np, dtype=float), 
                       where=cm_row_sums != 0) * 100

annot_labels = (np.asarray([f'{count:d}\n({perc:.1f}%)' 
                            for count, perc in zip(cm_np.flatten(), cm_percent.flatten())])
                            .reshape(cm_np.shape))

class_names = ['Delayed (1)', 'On-Time (0)' ]

cm_df = pd.DataFrame(cm_percent, 
                     index=class_names, 
                     columns=['On-Time (0)', 'Delayed (1)'])

cm_df_reversed = cm_df.iloc[::-1]

annot_labels_reversed = annot_labels[::-1]

# visualize
plt.figure(figsize=(8, 6))

gfy = sns.heatmap(cm_df_reversed,  
                  annot=annot_labels_reversed, 
                  fmt='s',            
                  cmap='Blues',       
                  cbar=True,          
                  linewidths=0.5,
                  linecolor='black')


gfy.set_ylim(len(cm_df_reversed), 0)

# Add titles and labels
plt.suptitle('Flight Delay Prediction Confusion Matrix (Normalized by Actual Class/Recall)', fontsize=14)
plt.title('Multi Tower Model (Test Set - 2019)', fontsize=14)
plt.ylabel('Actual Label (Row total = 100%)', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)

plt.show()

Results notes

In [0]:
mt_columns = [
 'flight_uid',
 'page_rank',
 'out_degree',
 'in_degree',
 'weighted_out_degree',
 'weighted_in_degree',
 'N_RUNWAYS',
 'betweenness_unweighted',
 'closeness',
 'betweenness',
 'avg_origin_dep_delay',
 'avg_dest_arr_delay',
 'avg_daily_route_flights',
 'avg_route_delay',
 'avg_hourly_flights',
 'QUARTER',
 'MONTH',
 'DAY_OF_MONTH',
 'DAY_OF_WEEK',
 'FL_DATE',
 'OP_UNIQUE_CARRIER',
 'OP_CARRIER_AIRLINE_ID',
 'OP_CARRIER',
 'TAIL_NUM',
 'HourlyDryBulbTemperature',
 'HourlyDewPointTemperature',
 'HourlyRelativeHumidity',
 'HourlyAltimeterSetting',
 'HourlyVisibility',
 'HourlyStationPressure',
 'HourlyWetBulbTemperature',
 'HourlyPrecipitation',
 'HourlyCloudCoverage',
 'HourlyCloudElevation',
 'HourlyWindSpeed',
 'utc_timestamp',
 'CRS_DEP_MINUTES',
 'origin_delays_4h',
 'prev_flight_delay_in_minutes',
 'prev_flight_delay',
 'delay_origin_7d',
 'delay_origin_carrier_7d',
 'route',
 'delay_route_7d',
 'flight_count_24h',
 'LANDING_TIME_DIFF_MINUTES',
 'AVG_ARR_DELAY_ORIGIN',
 'AVG_TAXI_OUT_ORIGIN',
 'IS_HOLIDAY',
 'IS_HOLIDAY_WINDOW',
 'AIRPORT_HUB_CLASS',
 'RATING',
 'AIRLINE_CATEGORY',
 'year',
 'dep_hour',
 'day_of_year',
 'dep_hour_sin',
 'dep_hour_cos',
 'dow_sin',
 'dow_cos',
 'doy_sin',
 'doy_cos',
 'HourlyVisibility_3h_change',
 'HourlyStationPressure_3h_change',
 'HourlyDryBulbTemperature_3h_change',
 'HourlyWindSpeed_3h_change',
 'HourlyPrecipitation_3h_change',
 'utc_ts_sec',
 'ground_flights_last_hour',
 'arrivals_last_hour'
]

In [0]:
mt_test_df = spark.read.parquet("dbfs:/student-groups/Group_2_2/5_year_custom_joined/fe_graph_and_holiday_nnfeat/training_splits/test.parquet/")

full = multi_tower_test_df.join(mt_test_df.select(mt_columns), 'flight_uid', how='left')

In [0]:
# Assume 1 = Delayed (Positive), 0 = On-Time (Negative)

mt_false_negatives = full.filter(F.col("pred_class_optimized") == 0).filter(F.col("target_class") == 1)
mt_false_positives = full.filter(F.col("pred_class_optimized") == 1).filter(F.col("target_class") == 0)
mt_true_negatives = full.filter(F.col("pred_class_optimized") == 0).filter(F.col("target_class") == 0)
mt_true_positives = full.filter(F.col("pred_class_optimized") == 1).filter(F.col("target_class") == 1)

#### Multi-tower Routes

In [0]:
# Get FN stats
fn_stats = mt_false_positives.groupBy('route').agg(
    F.count('*').alias('fn_count'),
    F.round(F.avg('target_delay'), 3).alias('avg_delay'),
    F.round(F.avg('pred_prob'), 3).alias('avg_pred_prob')
    
)

# Get TN stats
tn_stats = mt_true_negatives.groupBy('route').agg(
    F.count('*').alias('tn_count')
)

fp_stats = mt_false_positives.groupBy('route').agg(
    F.count('*').alias('fp_count')
)

tp_stats = mt_true_positives.groupBy('route').agg(
    F.count('*').alias('tp_count')
)

# Join them
route_analysis = fn_stats.join(
    tn_stats, on='route', how='left'
).join(
    fp_stats, on='route', how='left'
).join(
    tp_stats, on='route', how='left'
)

# Calculate FN rate and sort
route_analysis = route_analysis.withColumn(
    'fn_rate_pct',
    F.round((F.col('fn_count') / (F.col('fn_count') + F.col('tn_count'))) * 100, 2)
).orderBy(F.desc('fn_rate_pct'))

route_analysis = route_analysis.withColumn(
    'total_flights',
    F.round((F.col('fn_count') + F.col('tn_count') + F.col('fp_count') + F.col('tp_count')))
)

display(route_analysis)

In [0]:
route_top_15_err = route_analysis.limit(15)

route_top_15_err = route_top_15_err['route', 'fn_rate_pct', 'total_flights']

md_output = route_top_15_err.toPandas().to_markdown(index=False)
print(md_output)

In [0]:
route_analysis.describe().show()

Results
- Most routes are hub to smaller airport
  - Dallas (DFW) an AA hub and O'Hare (ORD) both UA and AA hub in particular
- 8 of 15 routes involve Hawaii
  - HNL (Honolulu), OGG (Maui), LIH (Lihui, HI)
  - long distance flights compound delays - weather, crew limitations, air traffic control issues between east and west coast
- in general, less popular routes with all but one having on average 470 flights, where the mean number of flights per route is over 1500.

#### previous flight delay (minutes)

In [0]:
# Get FN stats
fn_stats = mt_false_negatives.groupBy('prev_flight_delay_in_minutes').agg(
    F.count('*').alias('fn_count'),
    F.round(F.avg('target_delay'), 3).alias('avg_delay'),
    F.round(F.avg('pred_prob'), 3).alias('avg_pred_prob')
    
)

# Get TN stats
tn_stats = mt_true_negatives.groupBy('prev_flight_delay_in_minutes').agg(
    F.count('*').alias('tn_count')
)

fp_stats = mt_false_positives.groupBy('prev_flight_delay_in_minutes').agg(
    F.count('*').alias('fp_count')
)

tp_stats = mt_true_positives.groupBy('prev_flight_delay_in_minutes').agg(
    F.count('*').alias('tp_count')
)

# Join them
prev_delay_analysis = fn_stats.join(
    tn_stats, on='prev_flight_delay_in_minutes', how='left'
).join(
    fp_stats, on='prev_flight_delay_in_minutes', how='left'
).join(
    tp_stats, on='prev_flight_delay_in_minutes', how='left'
)

# Calculate TP rate
prev_delay_analysis = prev_delay_analysis.withColumn(
    'tp_rate_pct',
    F.round((F.col('tp_count') / (F.col('tp_count') + F.col('fp_count'))) * 100, 2 ) 
)

# Calculate TN rate
prev_delay_analysis = prev_delay_analysis.withColumn(
    'tn_rate_pct',
    F.round((F.col('tn_count') / (F.col('tn_count') + F.col('fn_count'))) * 100, 2 ) 
)

# Calculate FP rate
prev_delay_analysis = prev_delay_analysis.withColumn(
    'fp_rate_pct',
    F.round((F.col('fp_count') / (F.col('fp_count') + F.col('tn_count'))) * 100, 2 ) 
)

# Calculate FN rate and sort
prev_delay_analysis = prev_delay_analysis.withColumn(
    'fn_rate_pct',
    F.round((F.col('fn_count') / (F.col('fn_count') + F.col('tn_count'))) * 100, 2)
).orderBy(('prev_flight_delay_in_minutes'))



# route_analysis = route_analysis.withColumn(
#     'total_flights',
#     F.round((F.col('fn_count') + F.col('tn_count') + F.col('fp_count') + F.col('tp_count')))
# )

display(prev_delay_analysis)

In [0]:
df = prev_delay_analysis.toPandas()

In [0]:
# --- Filtering and Plotting Setup ---
x_col = 'prev_flight_delay_in_minutes'
max_x_limit = 650
step_size = 50

# Filter the data for the 0-120 minute range
df_filtered = df[(df[x_col] >= 0) & (df[x_col] <= max_x_limit)].copy()

# --- Plotting ---
plt.figure(figsize=(10, 6))

# Use line plots for clarity when plotting multiple series
# plt.plot(df_filtered[x_col], df_filtered['tp_rate_pct'], marker='o', linestyle='-', color='tab:red', label='True Positive Rate (TPR %)')
plt.plot(df_filtered[x_col], df_filtered['tn_rate_pct'], marker='s', linestyle='-', color='tab:green', label='True Negative Rate (TNR %)')
plt.plot(df_filtered[x_col], df_filtered['fn_rate_pct'], marker='D', linestyle='-', color='tab:blue', label='False Negative Rate (FNR %)')
# plt.plot(df_filtered[x_col], df_filtered['fp_rate_pct'], marker='^', linestyle='-', color='tab:orange', label='False Positive Rate (FPR %)')

# Titles and labels
plt.title(f'Model Classification Rates vs. Previous Flight Delay (0-{max_x_limit} Minutes)', fontsize=16)
plt.xlabel('Previous Flight Delay (Minutes)', fontsize=12)
plt.ylabel('Classification Rate (%)', fontsize=12)
plt.ylim(0, 100) # Set Y-axis from 0% to 100%
plt.grid(axis='both', linestyle='--', alpha=0.7)
# plt.legend(loc='lower right', ncol=2)
plt.legend(loc='lower right')

# Set x-axis range and ticks
plt.xlim(0, max_x_limit)
plt.xticks(range(0, max_x_limit + 1, step_size))

plt.tight_layout()

In [0]:
df['prev_flight_delay_in_minutes'].dtype

## 2-stage Error Analysis

#### 2 stage outputs
2-stage 5 year columns
- `low_pred`
- `high_pred`
- `high_pred_minus_low_pred`
- `features`: empty
- `rawPrediction`: empty
- `clf_prediction`: [0 - on time, 1 - delayed]
- `probability`: empty
- `final_prediction`:[0 - on time, 1 - delayed]
- `decision_source`: [quantile_low, quantile_high, classifier]
- `label_bin`: [0 - on time, 1 - delayed]

In [0]:
confusion_matrix_df = two_stage_test_df.stat.crosstab("label_bin", "final_prediction")
print("--- Confusion Matrix ---")

confusion_matrix_df.show()

In [0]:
from pyspark.mllib.evaluation import MulticlassMetrics

# Select the required columns, convert them to an RDD, and cast to float/double
predictionAndLabels = two_stage_test_df.select("final_prediction", "label_bin") \
                        .rdd \
                        .map(lambda row: (float(row[0]), float(row[1])))

# --- 3. Instantiate Metrics Class ---
# Instantiate metrics object
metrics = MulticlassMetrics(predictionAndLabels)

# --- 4. Get Overall Performance Metrics ---
print("\n--- Model Performance Metrics (Overall) ---")
print(f"1. Weighted Precision: {metrics.weightedPrecision:.4f}")
print(f"2. Weighted Recall:    {metrics.weightedRecall:.4f}")
print(f"3. Weighted F1-Score:  {metrics.weightedFMeasure():.4f}")
print(f"4. Accuracy:           {metrics.accuracy:.4f}")

# --- 5. Get Metrics by Class (for 'Delayed' class 1.0) ---
# For a binary classification like yours, it's critical to look at the positive class (1.0)
positive_class_label = 1.0

# Precision (for the Positive Class 1.0)
precision_1 = metrics.precision(positive_class_label)
print(f"\n--- Metrics for Positive Class (Delayed = 1) ---")
print(f"1. Precision (P): {precision_1:.4f}")

# Recall (Sensitivity)
recall_1 = metrics.recall(positive_class_label)
print(f"2. Recall (R):    {recall_1:.4f}")

# F1-Score (for the Positive Class 1.0)
f1_score_1 = metrics.fMeasure(positive_class_label)
print(f"3. F1-Score:      {f1_score_1:.4f}")


# --- 6. Calculate F2-Score Manually (Since MLlib doesn't have an F-beta method) ---
# F-beta requires the custom calculation based on the extracted P and R.
beta = 2
beta_sq = beta**2
f2_score = (1 + beta_sq) * (precision_1 * recall_1) / ((beta_sq * precision_1) + recall_1) \
           if ((beta_sq * precision_1) + recall_1) > 0 else 0.0

print(f"4. F2-Score:      {f2_score:.4f}")


# --- Optional: Display the Confusion Matrix (for validation) ---
print("\n--- Confusion Matrix  ---")
# The matrix is a NumPy array: rows are actual labels, columns are predicted labels
print(metrics.confusionMatrix().toArray())

In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


cm_np = metrics.confusionMatrix().toArray()

class_names = ['On-Time (0)', 'Delayed (1)']

# calculate percentages
cm_row_sums = cm_np.sum(axis=1)[:, np.newaxis]


cm_percent = np.divide(cm_np, cm_row_sums, 
                       out=np.zeros_like(cm_np, dtype=float), 
                       where=cm_row_sums != 0) * 100


annot_labels = (np.asarray([f'{count:.0f}\n({perc:.1f}%)' 
                            for count, perc in zip(cm_np.flatten(), cm_percent.flatten())])
                            .reshape(cm_np.shape))

cm_df = pd.DataFrame(cm_percent, 
                     index=class_names,      
                     columns=class_names)

two_stage_cm_df = cm_df

plt.figure(figsize=(8, 6))

gfy = sns.heatmap(cm_df,                        
            annot=annot_labels,                 
            fmt='s',                     
            cmap='Blues',   
            cbar=True,      
            linewidths=0.5, 
            linecolor='black')

gfy.set_ylim(cm_df.shape[0], 0)

# Add titles and labels
plt.suptitle('Flight Delay Prediction Confusion Matrix (Normalized by Actual Class/Recall)', fontsize=14)
plt.title('2-stage model (Test Set - 2019)', fontsize=14)
plt.ylabel('Actual Label (Row total = 100%)', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)

plt.show()

Result notes:
- data imbalance (on time 81.07%, delayed 18.93%)
- true positives 77%, good at catching actual delays
- high false positives (precision 34%) - it is wrong 65% of the time

In [0]:
# features used
two_stage_features_5y = [
    'avg_route_delay',
    'DAY_OF_WEEK',
    'LANDING_TIME_DIFF_MINUTES',
    'HourlyDryBulbTemperature',
    'origin_delays_4h',
    'CRS_DEP_MINUTES',
    'betweenness',
    'AIRLINE_CATEGORY',
    'N_RUNWAYS',
    'closeness',
    'DISTANCE',
    'YEAR',
    'delay_origin_carrier_7d',
    'RATING',
    'betweenness_unweighted',
    'CRS_ELAPSED_TIME',
    'AVG_ARR_DELAY_ORIGIN',
    'IS_HOLIDAY_WINDOW',
    'IS_HOLIDAY',
    'weighted_in_degree',
    'DAY_OF_MONTH',
    'QUARTER',
    'avg_dest_arr_delay',
    'HourlyDewPointTemperature',
    'in_degree',
    'prev_flight_delay_in_minutes',
    'MONTH',
    'avg_origin_dep_delay',
    'flight_count_24h',
    'avg_hourly_flights',
    'weighted_out_degree',
    'HourlyVisibility',
    'AIRPORT_HUB_CLASS',
    'HourlyPrecipitation',
    'HourlyCloudElevation',
    'prev_flight_delay',
    'HourlyStationPressure',
    'out_degree',
    'HourlyWetBulbTemperature',
    'HourlyAltimeterSetting',
    'carrier_vec',
    'HourlyRelativeHumidity',
    'AVG_TAXI_OUT_ORIGIN',
    'avg_daily_route_flights',
    'delay_route_7d',
    'HourlyCloudCoverage',
    'delay_origin_7d',
    'HourlyWindSpeed']

In [0]:
two_stage_test_df.columns

### What was the decision source for the predicted labels?

In [0]:
def analyze_confusion_component(df, component_name: str, condition: F.Column, feature_col: str = "decision_source"):
    """
    Analyzes the distribution of a specified feature column (e.g., decision_source) 
    within a single confusion matrix component (TP, TN, FP, or FN).
    
    Args:
        df: input df
        component_name: The name of the component (e.g., "False Positives").
        condition: The PySpark Column condition that filters for the component.
        feature_col: The column to analyze the distribution of
        
    Returns:
        A DataFrame showing the count and percentage of the feature_col 
        within the filtered component.
    """

    component_df = df.filter(condition)
    

    total_count = component_df.count()
    
    if total_count == 0:
        print(f"Warning: No records found for {component_name}.")
        return spark.createDataFrame([], [feature_col, "count", f"percentage_of_{component_name}"])

    analysis_df = component_df.groupBy(feature_col).count()
    

    analysis_df = analysis_df.withColumn(
        f"percentage_of_{component_name}",
        (F.col("count") / total_count) * 100
    ).orderBy(F.desc("count"))
    
    print(f"\n--- Analysis for {component_name} (Total Count: {total_count}) ---")
    analysis_df.show(20, truncate=False)
    
    return analysis_df


# Assume 1 = Delayed (Positive), 0 = On-Time (Negative)

TP_CONDITION = (F.col("label_bin") == 1) & (F.col("final_prediction") == 1)

TN_CONDITION = (F.col("label_bin") == 0) & (F.col("final_prediction") == 0)

FP_CONDITION = (F.col("label_bin") == 0) & (F.col("final_prediction") == 1)

FN_CONDITION = (F.col("label_bin") == 1) & (F.col("final_prediction") == 0)



# Correct delay prediction
tp_analysis = analyze_confusion_component(two_stage_test_df, "True Positives (TP)", TP_CONDITION)

# Correct on time prediction
tn_analysis = analyze_confusion_component(two_stage_test_df, "True Negatives (TN)", TN_CONDITION)

# False delay prediction
fn_analysis = analyze_confusion_component(two_stage_test_df, "False Negatives (FN)", FN_CONDITION)

# False on time prediction
fp_analysis = analyze_confusion_component(two_stage_test_df, "False Positives (FP)", FP_CONDITION)

In [0]:
display(tp_analysis)

In [0]:
tp_df = tp_analysis.withColumn("Category", F.lit("TP"))
tn_df = tn_analysis.withColumn("Category", F.lit("TN"))
fn_df = fn_analysis.withColumn("Category", F.lit("FN"))
fp_df = fp_analysis.withColumn("Category", F.lit("FP"))

# --- 2. Union the PySpark DataFrames ---

# Use unionAll to combine the four PySpark DataFrames into one large PySpark DF
combined_pyspark_df = tp_df.unionAll(tn_df).unionAll(fn_df).unionAll(fp_df)

# --- 3. Convert to a Single Pandas DataFrame ---

# Select and rename columns to match the visualization code
two_stage_source_df = combined_pyspark_df.select(
    "Category",
    F.col("decision_source").alias("DecisionSource"),
    F.col("count").alias("Count")
).toPandas()

# --- 4. Display the Consolidated DataFrame (Optional) ---
print("--- Consolidated Pandas DataFrame 'df_source' ---")
print(two_stage_source_df.head(8))
# print(f"\nTotal rows in consolidated DataFrame: {len(df_source)}")

In [0]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df_source = two_stage_source_df

# Round the counts to integers for display
df_source['Count'] = df_source['Count'].round(0).astype(int)

# --- 2. CREATE STACKED BAR CHART ---
plt.figure(figsize=(10, 6))

# Use the 'Category' for the x-axis and 'Count' for the height, colored by 'DecisionSource'
sns.histplot(
    df_source,
    x='Category',
    weights='Count',
    hue='DecisionSource',
    multiple='stack',
    palette={'classifier': 'skyblue', 'quantile_high': 'green', 'quantile_low': 'red'},
    shrink=0.7 # Adjust bar width
)

# Add title and labels
plt.title('Source Contribution to Confusion Matrix Outcomes', fontsize=16)
plt.xlabel('Confusion Matrix Outcome', fontsize=14)
plt.ylabel('Total Count', fontsize=14)
plt.xticks(fontsize=12)

plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()

plt.show()

### Stacked MLP

In [0]:
from pyspark.mllib.evaluation import MulticlassMetrics

In [0]:
mlp_stack_test_path = "dbfs:/student-groups/Group_2_2/5_year_custom_joined/fe_graph_and_holiday_nnfeat/training_splits/mlp_classification_test_predictions"

mlp_stack_test_df = spark.read.parquet(mlp_stack_test_path)
# display(mlp_stack_test_df)

- target variable: `DEP_DEL15`
- predicted variable: `prediction`

In [0]:
mlp_stack_test_df.columns

In [0]:
mlp_stack_test_df = mlp_stack_test_df.join( mlp_stack_test_df.select(mlp_stack_test_df.columns), 'flight_uid', how='inner')

In [0]:
mlp_stack_test_df.printSchema()

In [0]:
mlp_stack_test_df.describe('prob_delay').show()

In [0]:
mlp_stack_test_df.describe('prediction').show()

In [0]:
confusion_matrix_df = mlp_stack_test_df.stat.crosstab("DEP_DEL15", "prediction")
display(confusion_matrix_df)

In [0]:
# Select the required columns, convert them to an RDD, and cast to float/double
predictionAndLabels_mlp = mlp_stack_test_df.select("prediction", "DEP_DEL15") \
                        .rdd \
                        .map(lambda row: (float(row[0]), float(row[1])))

# --- 3. Instantiate Metrics Class ---
# Instantiate metrics object
metrics = MulticlassMetrics(predictionAndLabels_mlp)

# --- 4. Get Overall Performance Metrics ---
print("\n--- Model Performance Metrics (Overall) ---")
print(f"1. Weighted Precision: {metrics.weightedPrecision:.4f}")
print(f"2. Weighted Recall:    {metrics.weightedRecall:.4f}")
print(f"3. Weighted F1-Score:  {metrics.weightedFMeasure():.4f}")
print(f"4. Accuracy:           {metrics.accuracy:.4f}")

# --- 5. Get Metrics by Class (for 'Delayed' class 1.0) ---
# For a binary classification like yours, it's critical to look at the positive class (1.0)
positive_class_label = 1.0

# Precision (for the Positive Class 1.0)
precision_1 = metrics.precision(positive_class_label)
print(f"\n--- Metrics for Positive Class (Delayed = 1) ---")
print(f"1. Precision (P): {precision_1:.4f}")

# Recall (Sensitivity)
recall_1 = metrics.recall(positive_class_label)
print(f"2. Recall (R):    {recall_1:.4f}")

# F1-Score (for the Positive Class 1.0)
f1_score_1 = metrics.fMeasure(positive_class_label)
print(f"3. F1-Score:      {f1_score_1:.4f}")


# --- 6. Calculate F2-Score Manually (Since MLlib doesn't have an F-beta method) ---
# F-beta requires the custom calculation based on the extracted P and R.
beta = 2
beta_sq = beta**2
f2_score = (1 + beta_sq) * (precision_1 * recall_1) / ((beta_sq * precision_1) + recall_1) \
           if ((beta_sq * precision_1) + recall_1) > 0 else 0.0

print(f"4. F2-Score:      {f2_score:.4f}")


# --- Optional: Display the Confusion Matrix (for validation) ---
print("\n--- Confusion Matrix  ---")
# The matrix is a NumPy array: rows are actual labels, columns are predicted labels
print(metrics.confusionMatrix().toArray())

## MK Error analysis
- Which airlines does the model predict best / worst for?
- Which airports show the highest prediction error?
- Are errors worse at certain times of day, days of week, seasons?
- Does the model systematically underpredict severe delays?

#### Which airlines does the model predict best / worst for?
Intepretation Guidance:
- High MAE: unpredictable airline operations
- Positive bias: model predicts delays higher than actual
- Negative bias: model underpredicts delays (dangerous)

In [0]:
# error column
test_error = mlp_test_df.withColumn(
    "error",
    F.col("xgb_predicted_delay") - F.col("DEP_DELAY")
).withColumn(
    "abs_error",
    F.abs(F.col("error"))

)

In [0]:
test_error.describe("error").show()

In [0]:
test_error.describe("abs_error").show()

In [0]:

############################
##### Error by Airline #####
############################

carrier_errors = test_error.groupBy("OP_CARRIER") \
    .agg(
        F.avg("abs_error").alias("mae"),
        F.avg("error").alias("bias"),
        F.count("*").alias("count")
    ) \
    .orderBy(F.desc("mae"))

display(carrier_errors)

In [0]:
carrier_errors_pd = carrier_errors.toPandas()

In [0]:

import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")

plt.figure(figsize=(12,6))
sns.barplot(
    x="OP_CARRIER",
    y="mae",
    data=carrier_errors_pd,
    palette="flare"
)

plt.title("Mean Absolute Error by Airline (5 year) MLP")
plt.xlabel("Airline (OP_CARRIER)")
plt.ylabel("Mean Absolute Error (minutes)")
plt.xticks(rotation=45)  # rotate x-axis labels for readability
plt.show()


# Ankush

In [0]:
columns = [
 'flight_uid',
 'page_rank',
 'out_degree',
 'in_degree',
 'weighted_out_degree',
 'weighted_in_degree',
 'N_RUNWAYS',
 'betweenness_unweighted',
 'closeness',
 'betweenness',
 'avg_origin_dep_delay',
 'avg_dest_arr_delay',
 'avg_daily_route_flights',
 'avg_route_delay',
 'avg_hourly_flights',
 'QUARTER',
 'MONTH',
 'DAY_OF_MONTH',
 'DAY_OF_WEEK',
 'FL_DATE',
 'OP_UNIQUE_CARRIER',
 'OP_CARRIER_AIRLINE_ID',
 'OP_CARRIER',
 'TAIL_NUM',
 'HourlyDryBulbTemperature',
 'HourlyDewPointTemperature',
 'HourlyRelativeHumidity',
 'HourlyAltimeterSetting',
 'HourlyVisibility',
 'HourlyStationPressure',
 'HourlyWetBulbTemperature',
 'HourlyPrecipitation',
 'HourlyCloudCoverage',
 'HourlyCloudElevation',
 'HourlyWindSpeed',
 'utc_timestamp',
 'CRS_DEP_MINUTES',
 'origin_delays_4h',
 'prev_flight_delay_in_minutes',
 'prev_flight_delay',
 'delay_origin_7d',
 'delay_origin_carrier_7d',
 'route',
 'delay_route_7d',
 'flight_count_24h',
 'LANDING_TIME_DIFF_MINUTES',
 'AVG_ARR_DELAY_ORIGIN',
 'AVG_TAXI_OUT_ORIGIN',
 'IS_HOLIDAY',
 'IS_HOLIDAY_WINDOW',
 'AIRPORT_HUB_CLASS',
 'RATING',
 'AIRLINE_CATEGORY',
 'year',
 'dep_hour',
 'day_of_year',
 'dep_hour_sin',
 'dep_hour_cos',
 'dow_sin',
 'dow_cos',
 'doy_sin',
 'doy_cos',
 'HourlyVisibility_3h_change',
 'HourlyStationPressure_3h_change',
 'HourlyDryBulbTemperature_3h_change',
 'HourlyWindSpeed_3h_change',
 'HourlyPrecipitation_3h_change',
 'utc_ts_sec',
 'ground_flights_last_hour',
 'arrivals_last_hour'
]

In [0]:
confidence_threshold = 0.10
test_df = spark.read.parquet("dbfs:/student-groups/Group_2_2/5_year_custom_joined/fe_graph_and_holiday_nnfeat/training_splits/test.parquet/")

full = multi_tower_test_df.join(test_df.select(columns), 'flight_uid', how='left')

false_negatives = full.filter(F.col("pred_class_optimized") == 0).filter(F.col("target_class") == 1)
false_positives = full.filter(F.col("pred_class_optimized") == 1).filter(F.col("target_class") == 0)
true_negatives = full.filter(F.col("pred_class_optimized") == 0).filter(F.col("target_class") == 0)
true_positives = full.filter(F.col("pred_class_optimized") == 1).filter(F.col("target_class") == 1)



## Congestion

In [0]:
congestion_cols = ['origin_delays_4h', 'ground_flights_last_hour', 'arrivals_last_hour']

fn_stats = false_negatives.select(congestion_cols).describe()
tn_stats = true_negatives.select(congestion_cols).describe()

# Add prefixes to distinguish them
for col in congestion_cols:
    fn_stats = fn_stats.withColumnRenamed(col, f'FN_{col}')
    tn_stats = tn_stats.withColumnRenamed(col, f'TN_{col}')

comparison = fn_stats.join(tn_stats, on='summary', how='inner')
display(comparison)


## Weather

In [0]:


false_negatives.select(
    'HourlyVisibility_3h_change',
    'HourlyStationPressure_3h_change', 
    'HourlyDryBulbTemperature_3h_change',
    'HourlyWindSpeed_3h_change',
    'HourlyPrecipitation_3h_change',
    'target_delay'
).describe().display()

# And compare to true negatives
true_negatives.select(
    'HourlyVisibility_3h_change',
    'HourlyStationPressure_3h_change',
    'HourlyDryBulbTemperature_3h_change', 
    'HourlyWindSpeed_3h_change',
    'HourlyPrecipitation_3h_change'
).describe().display()

## Carrier Analysis

In [0]:
from pyspark.sql import functions as F

# Get FN stats
fn_stats = false_negatives.groupBy('OP_UNIQUE_CARRIER').agg(
    F.count('*').alias('fn_count'),
    F.round(F.avg('target_delay'), 3).alias('avg_delay'),
    F.round(F.avg('pred_prob'), 3).alias('avg_pred_prob')
)

# Get TN stats
tn_stats = true_negatives.groupBy('OP_UNIQUE_CARRIER').agg(
    F.count('*').alias('tn_count')
)

# Join them
carrier_analysis = fn_stats.join(tn_stats, on='OP_UNIQUE_CARRIER', how='left')

# Calculate FN rate and sort
carrier_analysis = carrier_analysis.withColumn(
    'fn_rate_pct',
    F.round((F.col('fn_count') / (F.col('fn_count') + F.col('tn_count'))) * 100, 2)
).orderBy(F.desc('fn_count'))

display(carrier_analysis)

**Analysis of False Negatives: When the Model Misses Delays**

We analyzed false negatives - cases where the model predicts no delay but the flight is actually delayed. This analysis is critical because we've optimized the model for recall: missing a delay prediction (a false negative) is more costly to our operations than a false alarm.

Our analysis revealed significant carrier-specific performance issues. When the model predicts "no delay," it's wrong 6-13% of the time, depending on the carrier. Budget and regional airlines show the highest error rates:

- Frontier: Wrong 12.76% of the time (nearly double the rate of major carriers)
- ExpressJet: Wrong 9.66% of the time  
- Mesa Airlines: Wrong 9.28% of the time

In contrast, the model performs better with legacy carriers:
- Delta: Wrong 6.13% of the time
- Southwest: Wrong 7.08% of the time

What makes this more concerning is the severity: when the model misses delays for budget and regional carriers, those delays average 80-90+ minutesâ€”indicating major operational disruptions rather than minor inconveniences.

The root cause appears to be that the model treats all carriers similarly (assigning comparable probability scores of 0.23-0.27), failing to distinguish that budget and regional airlines operate with tighter schedules and less operational buffer. When things go wrong for these carriers, delays cascade more severely, but the model hasn't learned this pattern.

# Feature breakdown
- look at multi-tower features
    - mutual information (MI)
    - information gain ratio (IGR)

In [0]:
import pandas as pd
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, when
from sklearn.feature_selection import mutual_info_classif

In [0]:
test_df.printSchema()

In [0]:
# list and sort features
categorical_cols = ['ORIGIN_AIRPORT_ID', 'DEST_AIRPORT_ID', 'TAIL_NUM', 'route','AIRPORT_HUB_CLASS', 'AIRLINE_CATEGORY', 'OP_CARRIER_AIRLINE_ID']

numerical_cols = ['N_RUNWAYS', 'RATING']

graph_cols = [
    'page_rank',
    'out_degree',
    'in_degree',
    'weighted_out_degree',
    'weighted_in_degree',
    'betweenness_unweighted',
    'closeness',
    'betweenness']

temporal_cols = [
    'CRS_DEP_MINUTES', 
    'IS_HOLIDAY',
    'ground_flights_last_hour', 
    'arrivals_last_hour',
    'avg_origin_dep_delay',
    'avg_dest_arr_delay',
    'avg_daily_route_flights',
    'avg_route_delay',
    'avg_hourly_flights',
    'origin_delays_4h',
    'prev_flight_delay_in_minutes',
    'prev_flight_delay',
    'IS_HOLIDAY_WINDOW',
    'delay_origin_7d',
    'delay_origin_carrier_7d',
    'delay_route_7d',
    'flight_count_24h',
    'dep_hour',
    'day_of_year',
    'dep_hour_sin',
    'dep_hour_cos',
    'dow_sin',
    'dow_cos',
    'doy_sin',
    'doy_cos']

weather_cols =[
    'HourlyDryBulbTemperature',
    'HourlyDewPointTemperature',
    'HourlyRelativeHumidity',
    'HourlyAltimeterSetting',
    'HourlyVisibility',
    'HourlyStationPressure',
    'HourlyWetBulbTemperature',
    'HourlyPrecipitation',
    'HourlyCloudCoverage',
    'HourlyCloudElevation',
    'HourlyWindSpeed'
]

In [0]:
print(len(numerical_cols))
print(len(graph_cols))
print(len(temporal_cols))
print(len(weather_cols))
print(len(categorical_cols))

In [0]:

mlflow.autolog(disable=True)

In [0]:
train_multitower_df = spark.read.parquet("dbfs:/student-groups/Group_2_2/5_year_custom_joined/fe_graph_and_holiday_nnfeat/training_splits/train.parquet/")


In [0]:
# 1. Label Encode categorical features
indexers = [
    StringIndexer(inputCol=c, outputCol=c + "_indexed", handleInvalid='keep')
    for c in categorical_cols
]
temp_df = train_multitower_df
for indexer in indexers:
    temp_df = indexer.fit(temp_df).transform(temp_df)

indexed_categorical_cols = [c + "_indexed" for c in categorical_cols]
all_features = indexed_categorical_cols + numerical_cols + graph_cols + temporal_cols + weather_cols

# 2. Select only the necessary columns (features + target)
# Convert Spark DataFrame to Pandas DataFrame on the driver node
# NOTE: This step is the bottleneck. Only use this if your dataset fits into memory on a single node.
pandas_df = temp_df.select(all_features + ['DEP_DELAY']).toPandas()

# Create X (Features) and Y (Target)
X = pandas_df[all_features].values  # Features matrix
Y = pandas_df['DEP_DELAY'].values    # Target array

In [0]:
X.shape

In [0]:
test_df.count()

In [0]:
# The mutual_info_classif function is typically run on the single-node Pandas data (X, Y)

# 1. Calculate MI scores
# n_neighbors=3 (default)
mi_scores = mutual_info_classif(
    X,
    Y,
    discrete_features=[X.shape[1] - len(numerical_cols)] 
)

# 2. Create a results DataFrame for comparison
results = pd.DataFrame({
    'Feature': all_features,
    'MI_Score': mi_scores
})

# 3. Rank the features
ranked_features = results.sort_values(by='MI_Score', ascending=False)

print("--- Ranked Feature Importance based on Mutual Information ---")
display(ranked_features)

Mutual importance for training features - train (2015-2018)
| \# | Feature | MI\_Score |
| :--- | :--- | :--- |
| 1 | HourlyAltimeterSetting | 2.8002939325422997 |
| 2 | prev\_flight\_delay\_in\_minutes | 0.11336009011833781 |
| 3 | route\_indexed | 0.0877265942207659 |
| 4 | avg\_route\_delay | 0.08739041228864863 |
| 5 | RATING | 0.08432725609042269 |
| 6 | OP\_CARRIER\_AIRLINE\_ID | 0.083282313244327 |
| 7 | TAIL\_NUM\_indexed | 0.07619558248381342 |
| 8 | avg\_daily\_route\_flights | 0.07437661271511775 |
| 9 | prev\_flight\_delay | 0.06886316995341701 |
| 10 | page\_rank | 0.048748496553829845 |
| 11 | weighted\_out\_degree | 0.04872314373307862 |
| 12 | weighted\_in\_degree | 0.048675670747392985 |
| 13 | betweenness\_unweighted | 0.047945557026980445 |
| 14 | flight\_count\_24h | 0.047218053696152396 |
| 15 | avg\_origin\_dep\_delay | 0.0455932888525572 |
| 16 | closeness | 0.045550862102127 |
| 17 | HourlyVisibility | 0.04533864598746096 |
| 18 | N\_RUNWAYS | 0.043330541760434826 |
| 19 | dow\_cos | 0.042989460122560886 |
| 20 | ORIGIN\_AIRPORT\_ID\_indexed | 0.04254309545083235 |
| 21 | out\_degree | 0.041375160267438815 |
| 22 | origin\_delays\_4h | 0.04056672779331816 |
| 23 | in\_degree | 0.040471647675563105 |
| 24 | delay\_origin\_carrier\_7d | 0.038586117313815116 |
| 25 | HourlyCloudElevation | 0.03708797255078444 |
| 26 | HourlyCloudCoverage | 0.037032407469152595 |
| 27 | HourlyStationPressure | 0.03549293723475433 |
| 28 | HourlyWetBulbTemperature | 0.034874260682618896 |
| 29 | HourlyDryBulbTemperature | 0.03397509984802749 |
| 30 | CRS\_DEP\_MINUTES | 0.033774722272511326 |
| 31 | dep\_hour | 0.03373940858799962 |
| 32 | AIRLINE\_CATEGORY\_indexed | 0.03325321332436104 |
| 33 | avg\_dest\_arr\_delay | 0.033029141284210084 |
| 34 | HourlyDewPointTemperature | 0.03293807732961174 |
| 35 | betweenness | 0.03241730338804594 |
| 36 | HourlyWindSpeed | 0.03226603678254314 |
| 37 | dep\_hour\_sin | 0.03159371167170555 |
| 38 | HourlyRelativeHumidity | 0.031196384619298634 |
| 39 | DEST\_AIRPORT\_ID\_indexed | 0.030910435184559937 |
| 40 | AIRPORT\_HUB\_CLASS\_indexed | 0.030860668726728768 |
| 41 | dow\_sin | 0.027092979730052313 |
| 42 | delay\_origin\_7d | 0.02595217467666977 |
| 43 | ground\_flights\_last\_hour | 0.02059656938924448 |
| 44 | avg\_hourly\_flights | 0.019106348503965087 |
| 45 | day\_of\_year | 0.014697552416505388 |
| 46 | HourlyPrecipitation | 0.014679587186027021 |
| 47 | doy\_sin | 0.014569783352691879 |
| 48 | dep\_hour\_cos | 0.0137394334246288 |
| 49 | delay\_route\_7d | 0.013313080960970503 |
| 50 | doy\_cos | 0.010357335143982738 |
| 51 | arrivals\_last\_hour | 0.008357375915882592 |
| 52 | IS\_HOLIDAY | 0.000003134918295621958 |
| 53 | IS\_HOLIDAY\_WINDOW | 0 |

Mutual importance for multi-tower training features - train (2015-2018)
| \# | Feature | MI\_Score | Feature Family |
| :--- | :--- | :--- |:--- |
| 1 | HourlyAltimeterSetting | 2.8002939325422997 | Weather
| 2 | prev\_flight\_delay\_in\_minutes | 0.11336009011833781 | Temporal 
| 3 | route\_indexed | 0.0877265942207659 | Categorical
| 4 | avg\_route\_delay | 0.08739041228864863 | Temporal
| 5 | RATING | 0.08432725609042269 | Numerical
| 6 | OP\_CARRIER\_AIRLINE\_ID | 0.083282313244327 | Categorical
| 7 | TAIL\_NUM\_indexed | 0.07619558248381342 | Categorical
| 8 | avg\_daily\_route\_flights | 0.07437661271511775 | Temporal
| 9 | prev\_flight\_delay | 0.06886316995341701 | Temporal
| 10 | page\_rank | 0.048748496553829845 | Graph
| 11 | weighted\_out\_degree | 0.04872314373307862 | Graph
| 12 | weighted\_in\_degree | 0.048675670747392985 | Graph
| 13 | betweenness\_unweighted | 0.047945557026980445 | Graph
| 14 | flight\_count\_24h | 0.047218053696152396 | Temporal
| 15 | avg\_origin\_dep\_delay | 0.0455932888525572 | Temporal

In [0]:
# Check if ANY value in the entire matrix X is NaN
if np.isnan(X).any():
    print("WARNING: NaN values found in the feature matrix (X).")
    
    # Check which columns have NaNs (Optional but helpful)
    nan_cols = [col for col, has_nan in zip(all_features, np.isnan(X).any(axis=0)) if has_nan]
    print(f"Columns with NaNs: {nan_cols}")
else:
    print("Success: No NaNs found in feature matrix (X).")

#### Mutual Importance - Test 2019

|Feature|MI_Score|
|---|---|
|HourlyAltimeterSetting|2.8083349322357227|
|prev_flight_delay_in_minutes|0.10879858801918196|
|route_indexed|0.09175029891133857|
|avg_route_delay|0.09055906131536418|
|TAIL_NUM_indexed|0.08199164087166899|
|avg_daily_route_flights|0.07782292888261466|
|OP_CARRIER_AIRLINE_ID|0.06949685871290168|
|RATING|0.06624835755394543|
|prev_flight_delay|0.06360856531719072|
|weighted_in_degree|0.0478152512439145|
|page_rank|0.04776523648540287|
|weighted_out_degree|0.04758373922179615|
|HourlyVisibility|0.04654793313309291|
|betweenness_unweighted|0.04648674653990703|
|closeness|0.04610142560445318|
|avg_origin_dep_delay|0.04569827218410971|
|ORIGIN_AIRPORT_ID_indexed|0.04561018481968393|
|out_degree|0.040099901462164134|
|origin_delays_4h|0.03867851050713167|
|in_degree|0.03830230138619495|
|HourlyStationPressure|0.037916333181540196|
|HourlyCloudElevation|0.03655269747308054|
|delay_origin_carrier_7d|0.0362647444761004|
|HourlyCloudCoverage|0.0362392793562476|
|HourlyWetBulbTemperature|0.03438703803935006|
|HourlyDryBulbTemperature|0.033158677337901565|
|dep_hour|0.03307315952863643|
|CRS_DEP_MINUTES|0.032810465349711215|
|HourlyWindSpeed|0.03274264874871413|
|HourlyDewPointTemperature|0.032539353468915344|
|HourlyRelativeHumidity|0.03165753351548428|
|avg_dest_arr_delay|0.031550912220193794|
|betweenness|0.031046630262261843|
|dep_hour_sin|0.030874666770480452|
|DEST_AIRPORT_ID_indexed|0.03083368915405238|
|delay_origin_7d|0.029019900401917376|
|flight_count_24h|0.027151977847219122|
|AIRLINE_CATEGORY_indexed|0.024445540824185485|
|N_RUNWAYS|0.023127139528960505|
|day_of_year|0.021766215616070284|
|doy_sin|0.021682019451673362|
|avg_hourly_flights|0.020577333555855226|
|AIRPORT_HUB_CLASS_indexed|0.019747316537505277|
|ground_flights_last_hour|0.019579292502759138|
|HourlyPrecipitation|0.017987522286777136|
|dow_cos|0.015091764321507384|
|dep_hour_cos|0.014846055299774008|
|doy_cos|0.012294921596183173|
|dow_sin|0.00973864873825736|
|delay_route_7d|0.00919839416170909|
|arrivals_last_hour|0.007229669577608178|
|IS_HOLIDAY_WINDOW|0.005493714398421723|
|IS_HOLIDAY|0|