In [27]:
!pip install "jupyterlab>=3" "ipywidgets>=7.6"

import plotly.offline as pyo

pyo.init_notebook_mode(connected=True)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [28]:
from google.colab import files
import os

!pip install pandas==2.0.0rc0
!pip install imblearn

from google.colab import files, data_table
from google.colab.data_table import DataTable
import numpy as np
import plotly.figure_factory as ff
import pandas as pd
import plotly.graph_objects as go
from tqdm.notebook import trange, tqdm
from pprint import pprint
import pyarrow as pa

pd.options.plotting.backend = "plotly"

data_table.enable_dataframe_formatter()
df_new = None
model_scores = {}
plots = []

class StopExecution(Exception):
    def _render_traceback_(self):
        pass

DataTable.max_columns = 50
ROLLING_FRAME_SIZE = 5 # Size of frame for calculating previous average delay. 7 cannot be a factor!
TOP_N = 10 # Number of origin airports to consider
ROWS = 50 # Number of rows to display when showing tables
DataTable.max_columns = 200
year_start, year_end = 2017, 2018
LOW_MEMORY = True

while not "filtered.ftr" in os.listdir():
    print("Upload filtered.ftr")
    files.upload()

df_new = pd.read_feather("filtered.ftr")

print("Done loading data.")

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Done loading data.


In [29]:
cat_columns_arr = ["OP_CARRIER", "ORIGIN", "DEST"]
df_new_cat = pd.get_dummies(df_new, columns=cat_columns_arr)
cat_columns_new_arr = [col 
                       for col in df_new_cat.columns 
                       if any(f"{cat}_" in col for cat in cat_columns_arr)]

In [30]:
df_new_cat.columns

Index(['FL_DATE', 'OP_CARRIER_FL_NUM', 'CRS_DEP_TIME', 'DEP_TIME', 'DEP_DELAY',
       'TAXI_OUT', 'WHEELS_OFF', 'WHEELS_ON', 'TAXI_IN', 'CRS_ARR_TIME',
       'ARR_TIME', 'ARR_DELAY', 'CRS_ELAPSED_TIME', 'ACTUAL_ELAPSED_TIME',
       'AIR_TIME', 'DISTANCE', 'DEP_DELAY_ADJ', 'DELAYED', 'ARR_DELAY_ADJ',
       'ARR_DELAYED', 'DATE_NORM', 'DEP_HOUR', 'FL_DATE_MONTH',
       'FL_DATE_MONTH_NORM', 'FL_DATE_DAYOFWEEK', 'CRS_DEP_TIME_ADJ',
       'CRS_ARR_TIME_ADJ', 'DEP_TIME_ADJ', 'ARR_TIME_ADJ', 'PREV_AVG_DELAY',
       'PREV_AVG_DELAY_ADJ', 'PREV_AVG_ARR_DELAY', 'PREV_AVG_ARR_DELAY_ADJ',
       'HOURS_SINCE_START', 'HOURS_SINCE_START_ARR', 'OBS_CODE_DEP',
       'OBS_CODE_ARR', 'wind_speed_dep', 'ceiling_height_dep',
       'vis_distance_dep', 'temp_c_dep', 'dew_point_c_dep', 'pressure_hpa_dep',
       'liquid_precip_rate_dep', 'wind_speed_arr', 'ceiling_height_arr',
       'vis_distance_arr', 'temp_c_arr', 'dew_point_c_arr', 'pressure_hpa_arr',
       'liquid_precip_rate_arr', 'OP_CARRIE

In [31]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df_new_cat, test_size=0.2, random_state=0)

In [32]:
X_features_arr = ["DATE_NORM", "CRS_DEP_TIME_ADJ", "CRS_ARR_TIME_ADJ", "TAXI_OUT", "WHEELS_OFF", "DEP_DELAY",
                  "CRS_ELAPSED_TIME", "DISTANCE", "FL_DATE_DAYOFWEEK", "PREV_AVG_DELAY",
                 "PREV_AVG_ARR_DELAY", "vis_distance_dep", "liquid_precip_rate_dep","wind_speed_dep",
                  "ceiling_height_dep", "temp_c_dep", "dew_point_c_dep", "pressure_hpa_dep",
                  "vis_distance_arr", "liquid_precip_rate_arr","wind_speed_arr",
                  "ceiling_height_arr", "temp_c_arr", "dew_point_c_arr", 
                  "pressure_hpa_arr"] + cat_columns_new_arr

X_features_arr.remove("OP_CARRIER_FL_NUM")

X_features = list(set(X_features_arr))

y_feature_arr = "ARR_DELAYED"

X_train_arr = train[X_features_arr].astype(float)
X_test_arr = test[X_features_arr].astype(float)
y_train_arr = train[y_feature_arr]
y_test_arr = test[y_feature_arr]

In [33]:
from imblearn.under_sampling import RandomUnderSampler
from imblearn.combine import SMOTEENN
from collections import Counter

random_sampler = RandomUnderSampler(random_state=0)
X_train_arr_s, y_train_arr_s = random_sampler.fit_resample(X_train_arr, y_train_arr)
print("Random undersampling\t\t", sorted(Counter(y_train_arr_s).items()))

smote_enn = SMOTEENN(random_state=0)
X_train_arr_s_smote, y_train_arr_s_smote = smote_enn.fit_resample(X_train_arr, y_train_arr)
print("SMOTE/ENN under/oversampling\t", sorted(Counter(y_train_arr_s_smote).items()))

Random undersampling		 [(False, 10626), (True, 10626)]
SMOTE/ENN under/oversampling	 [(False, 21924), (True, 27900)]


In [34]:
X_train_arr.columns

Index(['DATE_NORM', 'CRS_DEP_TIME_ADJ', 'CRS_ARR_TIME_ADJ', 'TAXI_OUT',
       'WHEELS_OFF', 'DEP_DELAY', 'CRS_ELAPSED_TIME', 'DISTANCE',
       'FL_DATE_DAYOFWEEK', 'PREV_AVG_DELAY', 'PREV_AVG_ARR_DELAY',
       'vis_distance_dep', 'liquid_precip_rate_dep', 'wind_speed_dep',
       'ceiling_height_dep', 'temp_c_dep', 'dew_point_c_dep',
       'pressure_hpa_dep', 'vis_distance_arr', 'liquid_precip_rate_arr',
       'wind_speed_arr', 'ceiling_height_arr', 'temp_c_arr', 'dew_point_c_arr',
       'pressure_hpa_arr', 'OP_CARRIER_9E', 'OP_CARRIER_AA', 'OP_CARRIER_B6',
       'OP_CARRIER_DL', 'OP_CARRIER_EV', 'OP_CARRIER_F9', 'OP_CARRIER_MQ',
       'OP_CARRIER_NK', 'OP_CARRIER_OH', 'OP_CARRIER_OO', 'OP_CARRIER_UA',
       'OP_CARRIER_VX', 'OP_CARRIER_WN', 'OP_CARRIER_YV', 'OP_CARRIER_YX',
       'ORIGIN_ATL', 'ORIGIN_CLT', 'ORIGIN_DEN', 'ORIGIN_DFW', 'ORIGIN_IAH',
       'ORIGIN_LAS', 'ORIGIN_LAX', 'ORIGIN_ORD', 'ORIGIN_PHX', 'ORIGIN_SFO',
       'DEST_ATL', 'DEST_CLT', 'DEST_DEN', 'DEST_DF

In [35]:
# What happens without resampling?

from sklearn.linear_model import RidgeClassifier
from sklearn.metrics import classification_report

model = RidgeClassifier()
model.fit(X_train_arr, y_train_arr)
print(classification_report(y_test_arr, model.predict(X_test_arr), 
                            target_names=["NOT DELAYED", "DELAYED"]))

              precision    recall  f1-score   support

 NOT DELAYED       0.85      1.00      0.92      8920
     DELAYED       0.97      0.40      0.57      2626

    accuracy                           0.86     11546
   macro avg       0.91      0.70      0.74     11546
weighted avg       0.88      0.86      0.84     11546



In [36]:
# What happens with random undersampling?
from sklearn.metrics import roc_curve

model = RidgeClassifier()
model.fit(X_train_arr_s, y_train_arr_s)

print(classification_report(y_test_arr, model.predict(X_test_arr), 
                            target_names=["NOT DELAYED", "DELAYED"]))

              precision    recall  f1-score   support

 NOT DELAYED       0.91      0.90      0.91      8920
     DELAYED       0.67      0.71      0.69      2626

    accuracy                           0.86     11546
   macro avg       0.79      0.80      0.80     11546
weighted avg       0.86      0.86      0.86     11546



In [37]:
# What happens with SMOTE_ENN (undersampling/oversampling combination)?

model = RidgeClassifier()
model.fit(X_train_arr_s_smote, y_train_arr_s_smote)
print(classification_report(y_test_arr, model.predict(X_test_arr), 
                            target_names=["NOT DELAYED", "DELAYED"]))

              precision    recall  f1-score   support

 NOT DELAYED       0.92      0.83      0.87      8920
     DELAYED       0.57      0.77      0.65      2626

    accuracy                           0.81     11546
   macro avg       0.75      0.80      0.76     11546
weighted avg       0.84      0.81      0.82     11546



In [38]:
import warnings
from sklearn.preprocessing import RobustScaler
from plotly.subplots import make_subplots
from sklearn.metrics import roc_auc_score, confusion_matrix

warnings.filterwarnings(action='ignore')

def serialize_model(model, **model_params):
    s = ' :' + model.__name__ + ': '
    p = []
    for param, value in model_params.items():
        p.append(f"{param}: {value}")

    return s + ", ".join(p)

def serialize_params(params):
    if not params:
        return "default"
    s = []
    for param, val in params.items():
        s.append(f"{param}: {val}")
    return ", ".join(s)

feature_sets = [
    [
        'DEP_DELAY', 'CRS_ELAPSED_TIME', 'DISTANCE', 'TAXI_OUT', 
        'PREV_AVG_ARR_DELAY', 'PREV_AVG_DELAY', 'ORIGIN_SFO', 
        'ORIGIN_LAX', 'ORIGIN_ATL', 'DEST_SFO', 'OP_CARRIER_VX', 
        'OP_CARRIER_UA', 'DEST_LAX', 'ORIGIN_CLT', 'DEST_ATL', 
        'DEST_CLT', 'ORIGIN_ORD', 'ORIGIN_LAS', 'OP_CARRIER_WN', 
        'DATE_NORM'
    ],
    [
        'DEP_DELAY', 'CRS_ELAPSED_TIME', 'DISTANCE', 'TAXI_OUT', 
        'PREV_AVG_ARR_DELAY', 'PREV_AVG_DELAY', 'ORIGIN_SFO', 
        'ORIGIN_LAX', 'ORIGIN_ATL', 'DEST_SFO', 'OP_CARRIER_VX', 
        'OP_CARRIER_UA', 'DEST_LAX', 'ORIGIN_CLT', 'DEST_ATL', 
        'DEST_CLT', 'ORIGIN_ORD', 'ORIGIN_LAS', 'OP_CARRIER_WN', 
        'DEST_LAS', 'WHEELS_OFF', 'DATE_NORM', 'CRS_ARR_TIME_ADJ', 
        'FL_DATE_DAYOFWEEK', 'vis_distance_dep', 'OP_CARRIER_9E'
    ]
]

def run_model(model, 
              model_scores, 
              scaler=None, 
              undersampling=True,
              smote=False,
              feature_selection=None, # options are None, '1', '2'
              features=None,
              **model_params):
    global plots

    regs = []
    metrics = dict()

    if scaler is None or not callable(scaler):
        scaler = RobustScaler
    
    if features is None:
        features = X_features_arr

    base = (X_train_arr_s, y_train_arr_s)
    ous = "Random undersampling"

    if not undersampling:
        base = (X_train_arr, y_train_arr)
        ous = "No"
    
    if smote:
        base = (X_train_arr_s_smote, y_train_arr_s_smote)
        ous = "SMOTE ENN"

    transformer_arr = scaler().fit(base[0][features])
    X_train_arr_trans = transformer_arr.transform(base[0][features])
    X_test_arr_trans = transformer_arr.transform(test[features])

    reg_arr = model(**model_params)
    reg_arr.fit(X_train_arr_trans, base[1])
    regs.append(reg_arr)
    y_pred_arr_train = reg_arr.predict(base[0][features])
    y_pred_arr_test = reg_arr.predict(X_test_arr_trans)
    y_pred_prob_arr_test = reg_arr.predict_proba(X_test_arr_trans)[:,1]

    model_name = serialize_model(model, **model_params)

    metrics["ROC_AUC_score_TE"] = roc_auc_score(y_test_arr, y_pred_prob_arr_test)
    metrics["features"] = list(features)

    fs = feature_selection
    if feature_selection is None:
        metrics["features"] = "__all__"
        fs = "all"

    report = classification_report(y_test_arr, y_pred_arr_test,  output_dict=True,
                            target_names=["NOT DELAYED", "DELAYED"])

    print(f"  MODEL  {model_name}  ".center(68, "-"))
    print("  CLASSIFICATION REPORT (test set)  ".center(68, "-"))
    print(f"  FEATURE SELECTION: {fs}  ".center(68, "-"))
    print(f"  OVER/UNDERSAMPLING: {ous}  ".center(68, "-"))
    print()
    print(classification_report(y_test_arr, y_pred_arr_test,  digits=3,
                            target_names=["NOT DELAYED", "DELAYED"]))
    print("Area under the ROC curve:\t", metrics["ROC_AUC_score_TE"] )
    print()

    metrics["Recall(ND)_TE"] = report["NOT DELAYED"]["recall"]
    metrics["F1(ND)_TE"] = report["NOT DELAYED"]["f1-score"]
    metrics["Recall(D)_TE"] = report["DELAYED"]["recall"]
    metrics["F1(D)_TE"] = report["DELAYED"]["f1-score"]
    metrics["Accuracy_TE"] = report["accuracy"]
    
    # Don't include in evaluation unless we're using undersampling
    # (Based on our observation that undersampling performs slightly better)
    if undersampling and not smote: 
        model_scores[f"{model_name}__{fs}"] = metrics

    cm = confusion_matrix(y_test_arr, y_pred_arr_test)
    
    z_text = [["True Negative", "False Positive"],
             ["False Negative", "True Positive"]]
    
    fp, tp, _thresholds = roc_curve(y_test_arr, y_pred_prob_arr_test)
    
    fig = make_subplots(rows=1, cols=2) 

    # create the figure
    fig.add_trace(
        go.Heatmap(
            text=z_text, 
            z=cm, x=['0', '1'], y=['0', '1'], 
            texttemplate="<b>%{z}</b><br>(%{text})", 
            colorscale='Blues',
            showscale=False
        ), 
        row=1, col=1
    )

    fig.add_trace(go.Scatter(
        mode="lines",
        x=fp,
        y=tp,
        name="ROC curve",
    ), row=1, col=2)

    params = serialize_params(model_params)

    fig.update_layout(title=f"<b>Confusion matrix and ROC curve for {model.__name__}</b>"
                            f"<br><sup><b>Feature selection:</b> {fs}; "
                            f"{len(features)} features, "
                            f"<b>Hyperparameters:</b> {params}, "
                            f"<b>Undersampling:</b> {ous}</sup>")
    
    fig.update_xaxes(title_text="Predicted", row=1, col=1)
    fig.update_yaxes(title_text="Actual", row=1, col=1)
    fig.update_xaxes(title_text="False positive rate", row=1, col=2)
    fig.update_yaxes(title_text="True positive rate", row=1, col=2)

    if not LOW_MEMORY or feature_selection is None:
        fig.show()

    if len(plots) % 10 == 0:
        plots.append( fig.to_html(include_plotlyjs=True))
    else:
        plots.append( fig.to_html(include_plotlyjs=False))

    if metrics["ROC_AUC_score_TE"] > 0.5 and feature_selection is None:
        for selector in range(2):
            features = feature_sets[selector]
            model_scores = run_model(model, model_scores, scaler=scaler,
                feature_selection=str(selector+1), features=features, 
                undersampling=undersampling, smote=smote, **model_params)

    return model_scores

In [39]:
# Use DEP_DELAYED = ARR_DELAYED as baseline

metrics = dict()

report = classification_report(y_test_arr, test["DELAYED"], output_dict=True,
                            target_names=["NOT DELAYED", "DELAYED"])

metrics["Recall(ND)_TE"] = report["NOT DELAYED"]["recall"]
metrics["F1(ND)_TE"] = report["NOT DELAYED"]["f1-score"]
metrics["Recall(D)_TE"] = report["DELAYED"]["recall"]
metrics["F1(D)_TE"] = report["DELAYED"]["f1-score"]
metrics["Accuracy_TE"] = report["accuracy"]

model_scores["BASELINE"] = metrics

In [40]:
from sklearn.linear_model import LogisticRegression

model_scores = run_model(LogisticRegression, model_scores, undersampling=False)

-----------------  MODEL   :LogisticRegression:   ------------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
---------------------  FEATURE SELECTION: all  ---------------------
---------------------  OVER/UNDERSAMPLING: No  ---------------------

              precision    recall  f1-score   support

 NOT DELAYED      0.928     0.979     0.953      8920
     DELAYED      0.913     0.743     0.819      2626

    accuracy                          0.926     11546
   macro avg      0.921     0.861     0.886     11546
weighted avg      0.925     0.926     0.923     11546

Area under the ROC curve:	 0.9432423778769736



-----------------  MODEL   :LogisticRegression:   ------------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
----------------------  FEATURE SELECTION: 1  ----------------------
---------------------  OVER/UNDERSAMPLING: No  ---------------------

              precision    recall  f1-score   support

 NOT DELAYED      0.933     0.979     0.956      8920
     DELAYED      0.913     0.763     0.831      2626

    accuracy                          0.930     11546
   macro avg      0.923     0.871     0.893     11546
weighted avg      0.929     0.930     0.927     11546

Area under the ROC curve:	 0.9493324772284057

-----------------  MODEL   :LogisticRegression:   ------------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
----------------------  FEATURE SELECTION: 2  ----------------------
---------------------  OVER/UNDERSAMPLING: No  ---------------------

              precision    recall  f1-score   support

 NOT DELAYED     

In [41]:
model_scores = run_model(LogisticRegression, model_scores, undersampling=True)

-----------------  MODEL   :LogisticRegression:   ------------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
---------------------  FEATURE SELECTION: all  ---------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED      0.948     0.919     0.933      8920
     DELAYED      0.751     0.827     0.787      2626

    accuracy                          0.898     11546
   macro avg      0.849     0.873     0.860     11546
weighted avg      0.903     0.898     0.900     11546

Area under the ROC curve:	 0.938538468369086



-----------------  MODEL   :LogisticRegression:   ------------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
----------------------  FEATURE SELECTION: 1  ----------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED      0.957     0.923     0.940      8920
     DELAYED      0.766     0.860     0.810      2626

    accuracy                          0.908     11546
   macro avg      0.862     0.891     0.875     11546
weighted avg      0.914     0.908     0.910     11546

Area under the ROC curve:	 0.9506203060802805

-----------------  MODEL   :LogisticRegression:   ------------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
----------------------  FEATURE SELECTION: 2  ----------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED     

In [42]:
model_scores = run_model(LogisticRegression, model_scores, smote=True)

-----------------  MODEL   :LogisticRegression:   ------------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
---------------------  FEATURE SELECTION: all  ---------------------
-----------------  OVER/UNDERSAMPLING: SMOTE ENN  ------------------

              precision    recall  f1-score   support

 NOT DELAYED      0.943     0.920     0.931      8920
     DELAYED      0.749     0.811     0.778      2626

    accuracy                          0.895     11546
   macro avg      0.846     0.865     0.855     11546
weighted avg      0.899     0.895     0.896     11546

Area under the ROC curve:	 0.9338208976123552



-----------------  MODEL   :LogisticRegression:   ------------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
----------------------  FEATURE SELECTION: 1  ----------------------
-----------------  OVER/UNDERSAMPLING: SMOTE ENN  ------------------

              precision    recall  f1-score   support

 NOT DELAYED      0.960     0.911     0.935      8920
     DELAYED      0.741     0.871     0.801      2626

    accuracy                          0.901     11546
   macro avg      0.851     0.891     0.868     11546
weighted avg      0.910     0.901     0.904     11546

Area under the ROC curve:	 0.9508262067151869

-----------------  MODEL   :LogisticRegression:   ------------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
----------------------  FEATURE SELECTION: 2  ----------------------
-----------------  OVER/UNDERSAMPLING: SMOTE ENN  ------------------

              precision    recall  f1-score   support

 NOT DELAYED     

In [43]:
from sklearn.tree import DecisionTreeClassifier

model_scores = run_model(DecisionTreeClassifier, model_scores, undersampling=True, max_depth=6)

---------  MODEL   :DecisionTreeClassifier: max_depth: 6  ----------
----------------  CLASSIFICATION REPORT (test set)  ----------------
---------------------  FEATURE SELECTION: all  ---------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED      0.948     0.922     0.935      8920
     DELAYED      0.757     0.827     0.791      2626

    accuracy                          0.900     11546
   macro avg      0.852     0.875     0.863     11546
weighted avg      0.904     0.900     0.902     11546

Area under the ROC curve:	 0.9341515638714613



---------  MODEL   :DecisionTreeClassifier: max_depth: 6  ----------
----------------  CLASSIFICATION REPORT (test set)  ----------------
----------------------  FEATURE SELECTION: 1  ----------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED      0.948     0.924     0.936      8920
     DELAYED      0.764     0.829     0.795      2626

    accuracy                          0.903     11546
   macro avg      0.856     0.877     0.865     11546
weighted avg      0.906     0.903     0.904     11546

Area under the ROC curve:	 0.9372698079569942

---------  MODEL   :DecisionTreeClassifier: max_depth: 6  ----------
----------------  CLASSIFICATION REPORT (test set)  ----------------
----------------------  FEATURE SELECTION: 2  ----------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED     

In [44]:
from sklearn.ensemble import RandomForestClassifier

model_scores = run_model(RandomForestClassifier, model_scores, undersampling=True)

---------------  MODEL   :RandomForestClassifier:   ----------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
---------------------  FEATURE SELECTION: all  ---------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED      0.955     0.920     0.937      8920
     DELAYED      0.758     0.852     0.802      2626

    accuracy                          0.905     11546
   macro avg      0.857     0.886     0.870     11546
weighted avg      0.910     0.905     0.906     11546

Area under the ROC curve:	 0.9492665830484394



---------------  MODEL   :RandomForestClassifier:   ----------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
----------------------  FEATURE SELECTION: 1  ----------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED      0.955     0.920     0.937      8920
     DELAYED      0.759     0.851     0.802      2626

    accuracy                          0.905     11546
   macro avg      0.857     0.886     0.870     11546
weighted avg      0.910     0.905     0.906     11546

Area under the ROC curve:	 0.9474502773233514

---------------  MODEL   :RandomForestClassifier:   ----------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
----------------------  FEATURE SELECTION: 2  ----------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED     

In [45]:
from sklearn.ensemble import GradientBoostingClassifier

model_scores = run_model(GradientBoostingClassifier, model_scores, undersampling=True)

-------------  MODEL   :GradientBoostingClassifier:   --------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
---------------------  FEATURE SELECTION: all  ---------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED      0.953     0.928     0.941      8920
     DELAYED      0.776     0.846     0.810      2626

    accuracy                          0.909     11546
   macro avg      0.865     0.887     0.875     11546
weighted avg      0.913     0.909     0.911     11546

Area under the ROC curve:	 0.9505381678216115



-------------  MODEL   :GradientBoostingClassifier:   --------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
----------------------  FEATURE SELECTION: 1  ----------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED      0.953     0.928     0.940      8920
     DELAYED      0.776     0.844     0.809      2626

    accuracy                          0.909     11546
   macro avg      0.864     0.886     0.874     11546
weighted avg      0.913     0.909     0.910     11546

Area under the ROC curve:	 0.9500125726180759

-------------  MODEL   :GradientBoostingClassifier:   --------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
----------------------  FEATURE SELECTION: 2  ----------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED     

In [46]:
from sklearn.neural_network import MLPClassifier

model_scores = run_model(MLPClassifier, model_scores, undersampling=True)

--------------------  MODEL   :MLPClassifier:   --------------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
---------------------  FEATURE SELECTION: all  ---------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED      0.956     0.912     0.933      8920
     DELAYED      0.741     0.856     0.794      2626

    accuracy                          0.899     11546
   macro avg      0.848     0.884     0.864     11546
weighted avg      0.907     0.899     0.902     11546

Area under the ROC curve:	 0.9454838899723017



--------------------  MODEL   :MLPClassifier:   --------------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
----------------------  FEATURE SELECTION: 1  ----------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED      0.960     0.911     0.935      8920
     DELAYED      0.743     0.870     0.802      2626

    accuracy                          0.902     11546
   macro avg      0.851     0.891     0.868     11546
weighted avg      0.910     0.902     0.905     11546

Area under the ROC curve:	 0.9518500319331691

--------------------  MODEL   :MLPClassifier:   --------------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
----------------------  FEATURE SELECTION: 2  ----------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED     

In [47]:
model_scores = run_model(MLPClassifier, model_scores, smote=True)

--------------------  MODEL   :MLPClassifier:   --------------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
---------------------  FEATURE SELECTION: all  ---------------------
-----------------  OVER/UNDERSAMPLING: SMOTE ENN  ------------------

              precision    recall  f1-score   support

 NOT DELAYED      0.955     0.904     0.929      8920
     DELAYED      0.724     0.856     0.784      2626

    accuracy                          0.893     11546
   macro avg      0.840     0.880     0.857     11546
weighted avg      0.903     0.893     0.896     11546

Area under the ROC curve:	 0.9427174870815815



--------------------  MODEL   :MLPClassifier:   --------------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
----------------------  FEATURE SELECTION: 1  ----------------------
-----------------  OVER/UNDERSAMPLING: SMOTE ENN  ------------------

              precision    recall  f1-score   support

 NOT DELAYED      0.953     0.926     0.939      8920
     DELAYED      0.771     0.845     0.806      2626

    accuracy                          0.908     11546
   macro avg      0.862     0.885     0.873     11546
weighted avg      0.911     0.908     0.909     11546

Area under the ROC curve:	 0.946133525046192

--------------------  MODEL   :MLPClassifier:   --------------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
----------------------  FEATURE SELECTION: 2  ----------------------
-----------------  OVER/UNDERSAMPLING: SMOTE ENN  ------------------

              precision    recall  f1-score   support

 NOT DELAYED      

In [48]:
model_scores = run_model(MLPClassifier, model_scores, undersampling=True, alpha=0.00005)

--------------  MODEL   :MLPClassifier: alpha: 5e-05  --------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
---------------------  FEATURE SELECTION: all  ---------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED      0.953     0.894     0.923      8920
     DELAYED      0.703     0.850     0.769      2626

    accuracy                          0.884     11546
   macro avg      0.828     0.872     0.846     11546
weighted avg      0.896     0.884     0.888     11546

Area under the ROC curve:	 0.9404226107329603



--------------  MODEL   :MLPClassifier: alpha: 5e-05  --------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
----------------------  FEATURE SELECTION: 1  ----------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED      0.955     0.932     0.943      8920
     DELAYED      0.787     0.849     0.817      2626

    accuracy                          0.913     11546
   macro avg      0.871     0.891     0.880     11546
weighted avg      0.916     0.913     0.915     11546

Area under the ROC curve:	 0.9520278416251421

--------------  MODEL   :MLPClassifier: alpha: 5e-05  --------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
----------------------  FEATURE SELECTION: 2  ----------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED     

In [49]:
model_scores = run_model(MLPClassifier, model_scores, undersampling=True, alpha=0.000025)

-------------  MODEL   :MLPClassifier: alpha: 2.5e-05  -------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
---------------------  FEATURE SELECTION: all  ---------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED      0.948     0.922     0.935      8920
     DELAYED      0.756     0.828     0.791      2626

    accuracy                          0.900     11546
   macro avg      0.852     0.875     0.863     11546
weighted avg      0.904     0.900     0.902     11546

Area under the ROC curve:	 0.9369420660589688



-------------  MODEL   :MLPClassifier: alpha: 2.5e-05  -------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
----------------------  FEATURE SELECTION: 1  ----------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED      0.956     0.926     0.941      8920
     DELAYED      0.773     0.855     0.812      2626

    accuracy                          0.910     11546
   macro avg      0.864     0.890     0.876     11546
weighted avg      0.914     0.910     0.911     11546

Area under the ROC curve:	 0.9535650309598052

-------------  MODEL   :MLPClassifier: alpha: 2.5e-05  -------------
----------------  CLASSIFICATION REPORT (test set)  ----------------
----------------------  FEATURE SELECTION: 2  ----------------------
------------  OVER/UNDERSAMPLING: Random undersampling  ------------

              precision    recall  f1-score   support

 NOT DELAYED     

In [50]:
pd.DataFrame(model_scores).T.to_csv("class_scores.csv")

In [51]:
for idx in range(int(len(plots) / 10) + 1):
    with open(f"plot_class{idx}.html", 'w') as f:
        f.write("".join(plots[(idx*10):((idx+1)*10)]))