# INTRODUCTION
This notebook performs linear regression on the pre-processed data from "1. daily_import_merge_engineer.ipynb". 

## Libraries

In [27]:
import os
import pandas as pd
pd.set_option('display.width', 600)
import numpy as np
import joblib

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, Ridge, ElasticNet
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, mean_absolute_percentage_error   
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.exceptions import ConvergenceWarning
import warnings

from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer

## Import data & column groups

In [20]:
DAILY_DATA_PATH = "data.v3/daily" 

# df = pd.read_parquet(os.path.join(DAILY_DATA_PATH, "daily_flights_and_weather_merged.parquet"))
df = pd.read_parquet(DAILY_DATA_PATH + "/daily_flights_and_weather_merged.parquet")


# Flights column groups
flights_terminal_cols = ['flights_arr_A', 'flights_arr_B', 'flights_arr_C', 'flights_arr_D', 'flights_arr_E',
                         'flights_dep_A', 'flights_dep_B', 'flights_dep_C', 'flights_dep_D', 'flights_dep_E']

flights_non_terminal_cols = ['flights_total', 'flights_cancel', 'flights_delay', 'flights_ontime',
                             'flights_arr_ontime', 'flights_arr_delay', 'flights_arr_cancel',
                             'flights_dep_ontime', 'flights_dep_delay', 'flights_dep_cancel']

flights_percentage_cols = ['flights_cancel_pct', 'flights_delay_pct', 'flights_ontime_pct',
                            'flights_arr_delay_pct', 'flights_arr_ontime_pct', 'flights_arr_cancel_pct',
                            'flights_dep_delay_pct', 'flights_dep_ontime_pct', 'flights_dep_cancel_pct']

flights_prediction_cols = flights_non_terminal_cols + flights_percentage_cols
flights_forecast_cols = [f"{col}_next_day" for col in flights_prediction_cols]

# Date column groups
date_cols = ['date', 'covid', 'ordinal_date', 'year', 'month', 'day_of_month', 'day_of_week', 'season', 'holiday', 'halloween', 'xmas_eve', 'new_years_eve', 'jan_2', 'jan_3', 'day_before_easter', 'days_until_xmas', 'days_until_thanksgiving', 'days_until_july_4th', 'days_until_labor_day', 'days_until_memorial_day']

# Weather column groups
weather_cols = ['wx_temperature_max', 'wx_temperature_min', 'wx_apcp', 'wx_prate', 'wx_asnow', 'wx_frozr', 'wx_vis', 'wx_gust', 'wx_maxref', 'wx_cape', 'wx_lftx', 'wx_wind_speed', 'wx_wind_direction']
weather_cols_s2 = ['wx_temperature_max_s2', 'wx_temperature_min_s2', 'wx_apcp_s2', 'wx_prate_s2', 'wx_asnow_s2', 'wx_frozr_s2', 'wx_vis_s2', 'wx_gust_s2', 'wx_maxref_s2', 'wx_cape_s2', 'wx_lftx_s2', 'wx_wind_speed_s2', 'wx_wind_direction_s2']

# Lag column groups
lag_cols =  ['flights_cancel_lag_1', 'flights_cancel_lag_2', 'flights_cancel_lag_3', 'flights_cancel_lag_4', 'flights_cancel_lag_5', 'flights_cancel_lag_6', 'flights_cancel_lag_7',
             'flights_delay_lag_1', 'flights_delay_lag_2', 'flights_delay_lag_3', 'flights_delay_lag_4', 'flights_delay_lag_5', 'flights_delay_lag_6', 'flights_delay_lag_7',
             'flights_ontime_lag_1', 'flights_ontime_lag_2', 'flights_ontime_lag_3', 'flights_ontime_lag_4', 'flights_ontime_lag_5', 'flights_ontime_lag_6', 'flights_ontime_lag_7']

# DATA PREPROCESSING

## Train Test Split
The shuffle=True/False argument drastically changes the linear model fit. If shuffle=False, the data are split sequentially into train, validate, and test sets (80:10:10). The entire data set covers a little over 5 years, shuffle=False means the last year is not used in training the data. Omitting the last year of data from the training dataset crushes the validation model accuracy (r-squared, mean squared error) and is not surprising given the dramatic disruption of air traffic caused by the COVID-19 pandemic and the relatively recent recovery. 

Using shuffle=True leads to robust r-squared and MSE metrics that will likely be mostly retained in the test set. However, the accuracy of the model on future data will likely be lower as air-traffic is likely to continue changing for unpredictable reasons. Any forecasting model will likely require frequent training updates with current data to stay relevant.

"shuffle=True" is used for linear regression. "shuffle = False" should be used for time series models like recurrent neural networks that are designed to digest sequences of data, such as 7 day windows.

In [23]:
# Select training features
train_features = date_cols + weather_cols + weather_cols_sq + lag_cols

# Create X and y
X = df[train_features].drop('date', axis=1)
y = df[flights_prediction_cols + flights_forecast_cols]

print(X.columns.tolist())
print("\nTarget columns head\n", y.head())
print("\n\nTarget columns tail\n", y.tail())

# Split data into train and test sets
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Split data into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.1, random_state=42)

# Print shapes
print("\n\nX_train_full shape:", X_train_full.shape)
print("y_train_full shape:", y_train_full.shape)
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_Test shape:", X_test.shape)


['covid', 'ordinal_date', 'year', 'month', 'day_of_month', 'day_of_week', 'season', 'holiday', 'halloween', 'xmas_eve', 'new_years_eve', 'jan_2', 'jan_3', 'day_before_easter', 'days_until_xmas', 'days_until_thanksgiving', 'days_until_july_4th', 'days_until_labor_day', 'days_until_memorial_day', 'wx_temperature_max', 'wx_temperature_min', 'wx_apcp', 'wx_prate', 'wx_asnow', 'wx_frozr', 'wx_vis', 'wx_gust', 'wx_maxref', 'wx_cape', 'wx_lftx', 'wx_wind_speed', 'wx_wind_direction', 'wx_temperature_max_sq', 'wx_temperature_min_sq', 'wx_apcp_sq', 'wx_prate_sq', 'wx_asnow_sq', 'wx_frozr_sq', 'wx_vis_sq', 'wx_gust_sq', 'wx_maxref_sq', 'wx_cape_sq', 'wx_lftx_sq', 'wx_wind_speed_sq', 'wx_wind_direction_sq', 'flights_cancel_lag_1', 'flights_cancel_lag_2', 'flights_cancel_lag_3', 'flights_cancel_lag_4', 'flights_cancel_lag_5', 'flights_cancel_lag_6', 'flights_cancel_lag_7', 'flights_delay_lag_1', 'flights_delay_lag_2', 'flights_delay_lag_3', 'flights_delay_lag_4', 'flights_delay_lag_5', 'flights_del

## Column transformers

In [24]:
categorical_tranformer = make_pipeline(OneHotEncoder(handle_unknown='ignore')) # Some observed holidays may not be in the training data
numeric_transformer = make_pipeline(StandardScaler())

# print value counts of unique data types in X
print(X.dtypes.value_counts())

# Identify categorical and numeric columns in X_train_full
categorical_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()
numeric_cols = X.select_dtypes(include = ['float64', 'float32', 'int32', 'int64']).columns.tolist()

# Check that all columns are accounted for
print(f"categorical columns: {categorical_cols}")
print(f"numeric columns: {numeric_cols}")
assert len(categorical_cols) + len(numeric_cols) == X_train_full.shape[1] 
print("All columns are accounted for!")

# Linear regression transformer
LR__transformer = ColumnTransformer(
    transformers=[
        ('cat', categorical_tranformer, categorical_cols),
        ('num', numeric_transformer, numeric_cols)
    ])

float64    39
object     11
float32     8
int64       6
int32       2
Name: count, dtype: int64
categorical columns: ['covid', 'month', 'day_of_week', 'season', 'holiday', 'halloween', 'xmas_eve', 'new_years_eve', 'jan_2', 'jan_3', 'day_before_easter']
numeric columns: ['ordinal_date', 'year', 'day_of_month', 'days_until_xmas', 'days_until_thanksgiving', 'days_until_july_4th', 'days_until_labor_day', 'days_until_memorial_day', 'wx_temperature_max', 'wx_temperature_min', 'wx_apcp', 'wx_prate', 'wx_asnow', 'wx_frozr', 'wx_vis', 'wx_gust', 'wx_maxref', 'wx_cape', 'wx_lftx', 'wx_wind_speed', 'wx_wind_direction', 'wx_temperature_max_sq', 'wx_temperature_min_sq', 'wx_apcp_sq', 'wx_prate_sq', 'wx_asnow_sq', 'wx_frozr_sq', 'wx_vis_sq', 'wx_gust_sq', 'wx_maxref_sq', 'wx_cape_sq', 'wx_lftx_sq', 'wx_wind_speed_sq', 'wx_wind_direction_sq', 'flights_cancel_lag_1', 'flights_cancel_lag_2', 'flights_cancel_lag_3', 'flights_cancel_lag_4', 'flights_cancel_lag_5', 'flights_cancel_lag_6', 'flights_cancel_

## Lasso regression

Lasso regression on all targets using gridsearchCV to tune alpha

In [5]:
from sklearn.exceptions import ConvergenceWarning
import warnings

param_grid = {'lasso__alpha': [.01, .1, 1, 10, 20]}

lasso_pipeline = make_pipeline(
    LR__transformer,
    Lasso(max_iter=10000)
)

grid_search = GridSearchCV(
    lasso_pipeline,
    param_grid,
    cv=5,
    scoring='neg_mean_squared_error',
    n_jobs=-1, # n_jobs=-1 means use all available CPU cores
    verbose=0
    )

lasso_models = {}
convergence_issues = {}

# Fit lasso models for all targets
for target in y.columns.tolist():
    grid_search.fit(X_train, y_train[target])

    # Save best model parameters, best alpha, and best model
    best_model = grid_search.best_estimator_
    best_alpha = best_model.named_steps['lasso'].get_params()['alpha']
    lasso_models[f"lasso_{target}"] = grid_search.best_estimator_

    # Identify convergence issues for the best alpha values
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always", ConvergenceWarning)
        best_model.fit(X_train, y_train[target])
        if any(issubclass(warn.category, ConvergenceWarning) for warn in w):
            convergence_issues[target] = best_alpha

# Print convergence issues
if convergence_issues:
    print("Convergence issues:")
    for target, alpha in convergence_issues.items():
        print(f"{target} did not converge with alpha = {alpha}")
else:
    print("No convergence issues for the best alpha values of any target")

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

No convergence issues for the best alpha values of any target


Best lasso alpha, r-squared, and mean absolute error on validation set for each target

In [6]:
lasso_results = pd.DataFrame(columns=['TARGET', 'ALPHA', 'R2', 'MAE', 'MSE', 'MAPE'])

# Print best alpha and R2 for all lasso models
for target in y.columns.tolist():
    model = lasso_models[f"lasso_{target}"]
    alpha = model.named_steps['lasso'].get_params()['alpha']
    r2 = r2_score(y_val[target], model.predict(X_val)).round(3)
    mae = mean_absolute_error(y_val[target], model.predict(X_val)).round(1)
    mse = mean_squared_error(y_val[target], model.predict(X_val)).round()
    mape = mean_absolute_percentage_error(y_val[target], model.predict(X_val)).round(3)
    temp = pd.DataFrame({'TARGET': target, 'ALPHA': alpha, 'R2': r2, 'MAE': mae, 'MSE': mse, 'MAPE': mape}, index=[0])
    
    with warnings.catch_warnings():
        warnings.simplefilter(action="ignore", category=FutureWarning)
        lasso_results = pd.concat([lasso_results, temp], ignore_index=True)

print("Validation Metrics for Lasso Regression:")
print(lasso_results)

Validation Metrics for Lasso Regression:
                    TARGET  ALPHA     R2    MAE      MSE          MAPE
0            flights_total   0.10  0.916   59.4   7972.0  3.900000e-02
1           flights_cancel   1.00  0.811   32.3   4628.0  6.339263e+15
2            flights_delay   1.00  0.350   92.2  23254.0  3.850000e-01
3           flights_ontime   0.10  0.671  121.6  33618.0  1.320000e-01
4       flights_arr_ontime   0.10  0.684   59.9   8427.0  1.280000e-01
5        flights_arr_delay   1.00  0.300   47.3   6232.0  3.810000e-01
6       flights_arr_cancel   1.00  0.822   14.7   1075.0  3.684885e+15
7       flights_dep_ontime   0.10  0.656   63.0   8642.0  1.370000e-01
8        flights_dep_delay   0.10  0.408   47.5   5688.0  4.190000e-01
9       flights_dep_cancel   1.00  0.791   16.6   1310.0  7.935274e+15
10      flights_cancel_pct   0.10  0.792    2.1     20.0  3.711407e+14
11       flights_delay_pct   0.10  0.263    5.1     79.0  2.900000e-01
12      flights_ontime_pct   0.10  0

## Ridge regression

Here I use ridge regression on a single target, flights_ontime, and rank the coefficients from most important to least important. Becuase the continuous features were standardized, coefficient magnitudes generally correspond to importance. A preview of the first 5 features yields no suprises, as Thanksgiving, Saturdays, Christmas eve, and yesterday's ontime flights are expected to be strong predictors of ontime_flights. 

In [7]:
# Ridge pipeline for flights_ontime
ridge_pipeline = make_pipeline(
    LR__transformer,
    Ridge(alpha=10)
)

# Ridge fit
ridge_pipeline.fit(X_train, y_train['flights_ontime'])

# Ridge predictions
y_pred_ontime = ridge_pipeline.predict(X_val)
print("R2 score:", r2_score(y_val['flights_ontime'], y_pred_ontime))

# Features and coefficients with non-zero coefficients
ridge_ontime_features = ridge_pipeline.named_steps['columntransformer'].get_feature_names_out()
ridge_ontime_coef = ridge_pipeline.named_steps['ridge'].coef_

# Create a dataframe of features and coefficients
ridge_ontime_df = pd.DataFrame({'features': ridge_ontime_features, 'coefficients': ridge_ontime_coef})

# Sort the dataframe by coefficient absolute value, largest to smallest
ridge_ontime_df['coefficients_abs'] = ridge_ontime_df['coefficients'].abs()
ridge_ontime_df.sort_values(by='coefficients_abs', inplace=True, ascending=False)

# Filter the dataframe for coefficients_abs > .1
ridge_ontime_df = ridge_ontime_df[ridge_ontime_df['coefficients_abs'] > .1]

print("Ridge coefficients:\n", ridge_ontime_df)

R2 score: 0.6686829700384376
Ridge coefficients:
                       features  coefficients  coefficients_abs
38   cat__holiday_Thanksgiving   -181.891491        181.891491
70              num__wx_maxref    -86.768167         86.768167
89   num__flights_ontime_lag_1     69.218829         69.218829
16   cat__day_of_week_Saturday    -64.810300         64.810300
44           cat__xmas_eve_yes    -60.865488         60.865488
..                         ...           ...               ...
52  cat__day_before_easter_yes     -1.002268          1.002268
51   cat__day_before_easter_no      1.002268          1.002268
64                num__wx_apcp     -0.941440          0.941440
45       cat__new_years_eve_no      0.240445          0.240445
46      cat__new_years_eve_yes     -0.240445          0.240445

[96 rows x 3 columns]


## Ridge regression on all targets using grid search CV to tune alpha

In [8]:
param_grid = {'ridge__alpha': [.01, .1, 1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]}

ridge_pipeline = make_pipeline(
    LR__transformer,
    Ridge(max_iter=10000)
)

grid_search = GridSearchCV(
    ridge_pipeline,
    param_grid,
    cv=5,
    scoring='neg_mean_squared_error',
    n_jobs=-1, # n_jobs=-1 means use all available CPU cores
    verbose=0
    )

ridge_models = {}
convergence_issues = {}

# Fit ridge models for all targets
for target in y.columns.tolist():
    grid_search.fit(X_train, y_train[target])

    # Save best model parameters, best alpha, and best model
    best_model = grid_search.best_estimator_
    best_alpha = best_model.named_steps['ridge'].get_params()['alpha']
    ridge_models[f"ridge_{target}"] = grid_search.best_estimator_

    # Identify convergence issues for the best alpha values
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always", ConvergenceWarning)
        best_model.fit(X_train, y_train[target])
        if any(issubclass(warn.category, ConvergenceWarning) for warn in w):
            convergence_issues[target] = best_alpha

# Print convergence issues
if convergence_issues:
    print("Convergence issues:")
    for target, alpha in convergence_issues.items():
        print(f"{target} did not converge with alpha = {alpha}")
else:
    print("No convergence issues for the best alpha values of any target")

No convergence issues for the best alpha values of any target


Get alpha, r-squared,and MAE for best ridge fit for each target

In [9]:
ridge_results = pd.DataFrame(columns=['TARGET', 'ALPHA', 'R2', 'MAE'])

# Print best alpha and R2 for all ridge models
for target in y.columns.tolist():
    model = ridge_models[f"ridge_{target}"]
    alpha = model.named_steps['ridge'].get_params()['alpha']
    r2 = r2_score(y_val[target], model.predict(X_val)).round(3)
    mae = mean_absolute_error(y_val[target], model.predict(X_val)).round(3)
    mse = mean_squared_error(y_val[target], model.predict(X_val)).round()
    mape = mean_absolute_percentage_error(y_val[target], model.predict(X_val)).round(3)
    temp = pd.DataFrame({'TARGET': target, 'ALPHA': alpha, 'R2': r2, 'MAE': mae, 'MSE' :mse, 'MAPE': mape}, index=[0])
    
    with warnings.catch_warnings():
        warnings.simplefilter(action="ignore", category=FutureWarning)
        ridge_results = pd.concat([ridge_results, temp], ignore_index=True)

print(ridge_results)

                    TARGET  ALPHA     R2      MAE      MSE          MAPE
0            flights_total    0.1  0.916   60.873   8004.0  4.000000e-02
1           flights_cancel  100.0  0.814   32.343   4547.0  6.167185e+15
2            flights_delay   30.0  0.370   91.431  22542.0  3.860000e-01
3           flights_ontime    1.0  0.672  121.471  33538.0  1.320000e-01
4       flights_arr_ontime    1.0  0.678   60.743   8576.0  1.300000e-01
5        flights_arr_delay   30.0  0.324   46.997   6018.0  4.010000e-01
6       flights_arr_cancel  100.0  0.828   15.355   1040.0  4.137612e+15
7       flights_dep_ontime    1.0  0.655   62.930   8649.0  1.370000e-01
8        flights_dep_delay   30.0  0.414   47.084   5636.0  4.120000e-01
9       flights_dep_cancel  100.0  0.788   17.482   1330.0  8.958675e+15
10      flights_cancel_pct  100.0  0.800    2.112     19.0  3.602348e+14
11       flights_delay_pct   60.0  0.278    5.081     78.0  2.920000e-01
12      flights_ontime_pct   50.0  0.590    5.929  

## Linear regression prediction with elastic net regularization (L1 and L2)

In [28]:
elastic_net_pipeline = make_pipeline(
    LR__transformer,
    ElasticNet(alpha=10, 
               l1_ratio=0.5,
               max_iter=10000))

# # get a list of 200 values from .0001 to .4
# alpha_values = [round(x, 4) for x in np.linspace(.0001, .4, 200)]
# l1_ratio_values = [round(x, 2) for x in np.linspace(.1, .9, 9)]

# param_grid = {'elasticnet__alpha': alpha_values,
#               'elasticnet__l1_ratio': l1_ratio_values}

# grid_search = GridSearchCV(
#     elastic_net_pipeline,
#     param_grid,
#     cv=5,
#     scoring='neg_mean_squared_error',
#     n_jobs=-1, # n_jobs=-1 means use all available CPU cores
#     verbose=0
#     )

search_spaces = {
    'elasticnet__alpha': Real(1e-6, 1e-1, prior='log-uniform'),
    'elasticnet__l1_ratio': Real(0.1, 0.9, prior='uniform')
}

bayes_search = BayesSearchCV(
    elastic_net_pipeline,
    search_spaces,
    n_iter=100,
    cv=5,
    scoring='neg_mean_squared_error',
    n_jobs=-1,
    verbose=0
)

elastic_net_models = {}
convergence_issues = {}
models_dir = "models/flights_ontime/elastic_net"
os.makedirs(models_dir, exist_ok=True)

for target in flights_prediction_cols:
    bayes_search.fit(X_train, y_train[target])

    # Save best model parameters, best alpha, and best model
    best_model = bayes_search.best_estimator_
    best_alpha = best_model.named_steps['elasticnet'].get_params()['alpha']
    best_l1_ratio = best_model.named_steps['elasticnet'].get_params()['l1_ratio']
    elastic_net_models[f"elastic_net_{target}"] = bayes_search.best_estimator_

    # Identify convergence issues for the best alpha values and l1_ratio
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always", ConvergenceWarning)
        best_model.fit(X_train, y_train[target])
        if any(issubclass(warn.category, ConvergenceWarning) for warn in w):
            convergence_issues[target] = (best_alpha, best_l1_ratio)

# Print convergence issues
if convergence_issues:
    print("Convergence issues:")
    for target, alpha_l1_ratio in convergence_issues.items():
        print(f"{target} did not converge with alpha = {alpha_l1_ratio[0]} and l1_ratio = {alpha_l1_ratio[1]}")
else:
    print("No convergence issues for the best alpha and l1_ratio values of any target")

    # print(f"Best parameters for elastic_net_{target}:\n{grid_search.best_params_}")

# Save best elastic net models
for target, model in elastic_net_models.items():
    model_path = os.path.join(models_dir, f"{target}.joblib")
    joblib.dump(model, model_path)
    print(f"Saved {target} model to {model_path}")

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

No convergence issues for the best alpha and l1_ratio values of any target
Saved elastic_net_flights_total model to models/flights_ontime/elastic_net/elastic_net_flights_total.joblib
Saved elastic_net_flights_cancel model to models/flights_ontime/elastic_net/elastic_net_flights_cancel.joblib
Saved elastic_net_flights_delay model to models/flights_ontime/elastic_net/elastic_net_flights_delay.joblib
Saved elastic_net_flights_ontime model to models/flights_ontime/elastic_net/elastic_net_flights_ontime.joblib
Saved elastic_net_flights_arr_ontime model to models/flights_ontime/elastic_net/elastic_net_flights_arr_ontime.joblib
Saved elastic_net_flights_arr_delay model to models/flights_ontime/elastic_net/elastic_net_flights_arr_delay.joblib
Saved elastic_net_flights_arr_cancel model to models/flights_ontime/elastic_net/elastic_net_flights_arr_cancel.joblib
Saved elastic_net_flights_dep_ontime model to models/flights_ontime/elastic_net/elastic_net_flights_dep_ontime.joblib
Saved elastic_net_f

In [30]:
elastic_net_results = pd.DataFrame(columns=['TARGET', 'ALPHA', 'L1L2', 'R2', 'MAE', 'MSE', 'MAPE'])

for target in flights_prediction_cols:
    model = elastic_net_models[f"elastic_net_{target}"]
    alpha = model.named_steps['elasticnet'].get_params()['alpha']
    l1_ratio = model.named_steps['elasticnet'].get_params()['l1_ratio']
    r2 = r2_score(y_val[target], model.predict(X_val)).round(2)
    mae = mean_absolute_error(y_val[target], model.predict(X_val)).round()
    mse = mean_squared_error(y_val[target], model.predict(X_val)).round()
    mape = mean_absolute_percentage_error(y_val[target], model.predict(X_val)).round(2)
    temp = pd.DataFrame({'TARGET': target, 'ALPHA': alpha, 'L1L2': l1_ratio, 'R2': r2, 'MAE': mae, 'MSE': mse, 'MAPE': mape}, index=[0])
    
    with warnings.catch_warnings():
        warnings.simplefilter(action="ignore", category=FutureWarning)
        elastic_net_results = pd.concat([elastic_net_results, temp], ignore_index=True)

# Create "model_output" directory
os.makedirs("model_output/flights_ontime", exist_ok=True)

# Save results to a csv file
elastic_net_results.to_csv("model_output/flights_ontime/elastic_net_results.csv", index=False)

print(elastic_net_results)
# print(temp)

                    TARGET     ALPHA      L1L2    R2    MAE      MSE          MAPE
0            flights_total  0.100000  0.100000  0.88   67.0  11111.0  4.000000e-02
1           flights_cancel  0.100000  0.100000  0.81   34.0   4568.0  5.523666e+15
2            flights_delay  0.100000  0.100000  0.35   91.0  23298.0  3.700000e-01
3           flights_ontime  0.100000  0.100000  0.66  124.0  34539.0  1.300000e-01
4       flights_arr_ontime  0.100000  0.100000  0.67   62.0   8697.0  1.300000e-01
5        flights_arr_delay  0.100000  0.100000  0.31   47.0   6145.0  3.900000e-01
6       flights_arr_cancel  0.100000  0.900000  0.82   16.0   1067.0  4.137099e+15
7       flights_dep_ontime  0.100000  0.100000  0.64   64.0   8995.0  1.400000e-01
8        flights_dep_delay  0.100000  0.100000  0.39   48.0   5905.0  4.000000e-01
9       flights_dep_cancel  0.100000  0.457459  0.79   19.0   1304.0  8.592318e+15
10      flights_cancel_pct  0.100000  0.164854  0.81    2.0     19.0  3.310475e+14
11  