In [1]:
!pip install "jupyterlab>=3" "ipywidgets>=7.6"

from plotly.offline import plot, iplot, init_notebook_mode

init_notebook_mode(connected=True)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting jupyterlab>=3
  Using cached jupyterlab-3.6.3-py3-none-any.whl (8.9 MB)
Collecting jupyter-ydoc~=0.2.3
  Using cached jupyter_ydoc-0.2.4-py3-none-any.whl (5.9 kB)
Collecting jupyterlab-server~=2.19
  Using cached jupyterlab_server-2.22.1-py3-none-any.whl (57 kB)
Collecting jupyter-server-ydoc~=0.8.0
  Using cached jupyter_server_ydoc-0.8.0-py3-none-any.whl (11 kB)
Collecting nbclassic
  Using cached nbclassic-0.5.5-py3-none-any.whl (10.0 MB)
Collecting jedi>=0.16
  Using cached jedi-0.18.2-py2.py3-none-any.whl (1.6 MB)
Collecting ypy-websocket<0.9.0,>=0.8.2
  Using cached ypy_websocket-0.8.4-py3-none-any.whl (10 kB)
Collecting jupyter-server-fileid<1,>=0.6.0
  Using cached jupyter_server_fileid-0.9.0-py3-none-any.whl (15 kB)
Collecting y-py<0.6.0,>=0.5.3
  Using cached y_py-0.5.9-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.6 MB)
Collecting requests>=2.28
  Using

In [2]:
from google.colab import files
import os

!pip install pandas==2.0.0rc0
!pip install plotly

from google.colab import files, data_table
from google.colab.data_table import DataTable
import numpy as np
import plotly.figure_factory as ff
import pandas as pd
import plotly.graph_objects as go
from tqdm.notebook import trange, tqdm
from pprint import pprint
import pyarrow as pa
import copy

pd.options.plotting.backend = "plotly"

data_table.enable_dataframe_formatter()
df_new = None
model_scores = {}
plots = []

class StopExecution(Exception):
    def _render_traceback_(self):
        pass

DataTable.max_columns = 50
ROLLING_FRAME_SIZE = 5 # Size of frame for calculating previous average delay. 7 cannot be a factor!
TOP_N = 10 # Number of origin airports to consider
ROWS = 50 # Number of rows to display when showing tables
DataTable.max_columns = 200
year_start, year_end = 2017, 2018

while not "class_scores.csv" in os.listdir():
    print("Upload class_scores.csv")
    files.upload()

while not "regr_scores.csv" in os.listdir():
    print("Upload regr_scores.csv")
    files.upload()

class_scores = pd.read_csv("class_scores.csv")
regr_scores = pd.read_csv("regr_scores.csv")

class_scores = class_scores.rename(columns={"Unnamed: 0": "model"}).set_index("model")
regr_scores = regr_scores.rename(columns={"Unnamed: 0": "model"}).set_index("model")

print("Done loading data.")

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Done loading data.


In [3]:
filtered_regr = regr_scores[(regr_scores["R^2 score_TE"] > 0.936) | (regr_scores.index == "BASELINE")].copy()
filtered_class = class_scores.copy()

filtered_regr["is_baseline"] = filtered_regr.index == "BASELINE"
filtered_class["is_baseline"] = filtered_class.index == "BASELINE"

filtered_regr["color"] = filtered_regr["is_baseline"].replace({True: '#8a0900', False: '#028eb0'})
filtered_class["color"] = filtered_class["is_baseline"].replace({True: '#8a0900', False: '#028eb0'})

regr_names = {
    ":LinearRegression:": "Lin,",
    ":KNeighborsRegressor:": "KN,",
    ":DecisionTreeRegressor:": "DTree,",
    ":RandomForestRegressor:": "RForest,",
    ":GradientBoostingRegressor:": "GBoost,",
    ":MLPRegressor:": "MLP,",
    ":Lasso:": "Lasso,",
    ":SGDRegressor:": "SGD,",
    ":HuberRegressor:": "Huber,",
    ":LinearSVR:": "LinSVR,",
    "__permutation_small": "__ps",
    "__permutation_big": "__pb",
    "__select_from_model": "__sfm",
    "squared_epsilon_insensitive": "sq_eps_ins",
    "dual: False": "",
}

for name, abbr in regr_names.items():
    filtered_regr.index = filtered_regr.index.str.replace(name, abbr, regex=False)

class_names = {
    ":LogisticRegression:": "Log,",
    ":DecisionTreeClassifier:": "DTree,",
    ":RandomForestClassifier:": "RForest,",
    ":GradientBoostingClassifier:": "GBoost,",
    ":MLPClassifier:": "MLP",
}

for name, abbr in class_names.items():
    filtered_class.index = filtered_class.index.str.replace(name, abbr, regex=False)

In [4]:
def plot_regr_scores(col):
    score_df = filtered_regr.sort_values(by=col).copy()

    fig = go.Figure(
        go.Bar(x=score_df.index, 
            y=score_df[col], 
            marker_color=score_df["color"]
        )
    )

    replacements = {
        "Recall": "Recall score",
        "F1": "F1 score",
        "(ND)": " (Not delayed flights)",
        "(D)": " (Delayed flights)",
        "_TE": " – Test set"
    }

    title = copy.deepcopy(col)

    for abbr, repl in replacements.items():
        title = title.replace(abbr, repl)

    fig.update_yaxes(range=(score_df[col].min() * 0.98, score_df[col].max() * 1.02))
    fig.update_layout(
        title=title
    )

    fig.show()

plot_regr_scores("R^2 score_TE")
plot_regr_scores("R^2 score (adjusted)_TE")
plot_regr_scores("Mean absolute error_TE")
plot_regr_scores("Mean absolute error (adjusted)_TE")

In [5]:
def plot_class_scores(col):
    score_df = filtered_class.sort_values(by=col).copy()

    fig = go.Figure(
        go.Bar(x=score_df.index, 
            y=score_df[col], 
            marker_color=score_df["color"]
        )
    )

    replacements = {
        "Recall": "Recall score",
        "F1": "F1 score",
        "(ND)": " (Not delayed flights)",
        "(D)": " (Delayed flights)",
        "_TE": " – Test set"
    }

    title = copy.deepcopy(col)

    for abbr, repl in replacements.items():
        title = title.replace(abbr, repl)

    fig.update_yaxes(range=(score_df[col].min() * 0.98, score_df[col].max() * 1.02))
    fig.update_layout(
        title=title
    )

    fig.show()

plot_class_scores("Recall(ND)_TE")
plot_class_scores("Recall(D)_TE")
plot_class_scores("F1(ND)_TE")
plot_class_scores("F1(D)_TE")