In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import sparse
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import Ridge, LinearRegression
from lightgbm import LGBMRegressor
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from catboost import CatBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor


RSEED = 42

## Load the dataset 

In [2]:
df_train_prepro = pd.read_csv('data/data_all_col_upto_duration_ratio.csv')
df_train_prepro.dtypes

departure_point                object
arrival_point                  object
departure_time                 object
arrival_time                   object
flight_status                  object
aircraft_code                  object
target                        float64
duration                      float64
dep_hour                        int64
dep_day                         int64
dep_month                       int64
dep_dayofweek                   int64
dep_quarter                     int64
dep_season                      int64
dep_is_weekend                  int64
dep_time_of_day                object
arr_hour                        int64
arr_day                         int64
arr_month                       int64
arr_dayofweek                   int64
arr_quarter                     int64
arr_season                      int64
arr_is_weekend                  int64
arr_time_of_day                object
route                          object
departure_date                 object
arrival_date

# Define target and feature columns

In [3]:
y = df_train_prepro['target']
X = df_train_prepro.drop(columns=['target'])

# Define the numerical and categorical features

In [4]:
num_col = ['duration','dep_temp', 'dep_precip', 'dep_wind', 'arr_temp',
       'arr_precip', 'arr_wind', 'holiday_length', 'num_passenger_year', 
       'distance_km', 'expected_duration', 'delay_relative_to_expected', 
       'duration_ratio', 'dep_lat', 'dep_long', 'arr_lat', 'arr_long']
cat_col = ['departure_point', 'arrival_point', 'flight_status', 'aircraft_code','dep_hour',
       'dep_day', 'dep_month', 'dep_dayofweek', 'dep_quarter', 'dep_season',
       'dep_is_weekend', 'dep_time_of_day', 'arr_hour', 'arr_day', 'arr_month',
       'arr_dayofweek', 'arr_quarter', 'arr_season', 'arr_is_weekend',
       'arr_time_of_day', 'route', 'is_holiday', 'Country', 'City', 'aircraft_model']

# Scale numeric / Encode (sparse) categorical columns

sparse - efficiently store and process large feature matrices with lots of zeros.

In [5]:
# Use sparse output for OneHotEncoder to save memory
encoder = OneHotEncoder(handle_unknown='ignore')
scaler = StandardScaler()

# Fit and transform categorical columns (sparse matrix)
X_cat_sparse = encoder.fit_transform(X[cat_col])

# Scale only the numerical columns and convert to float32
X_num_scaled = scaler.fit_transform(X[num_col]).astype(np.float32)

# Convert sparse matrix to float32 and combine with numerical features
X_encoded_scaled = sparse.hstack([X_num_scaled, X_cat_sparse.astype(np.float32)]).tocsr()

# Split the encoded and scaled data
X_train_1, X_train_2, y_train_1, y_train_2 = train_test_split(
    X_encoded_scaled, y, stratify=y, test_size=0.2, random_state=RSEED
)

In [6]:
X_encoded_scaled

<Compressed Sparse Row sparse matrix of dtype 'float32'
	with 4189374 stored elements and shape (99747, 1421)>

# Split raw train data for CatBoost

CatBoost takes raw unencoded data so the train data needs to be split seperately

In [6]:
# Get indices for the split
train_idx, test_idx = train_test_split(
    np.arange(len(X)), stratify=y, test_size=0.2, random_state=RSEED
)

X_train_1_raw = X.iloc[train_idx]
X_train_2_raw = X.iloc[test_idx]
y_train_1_raw = y.iloc[train_idx]
y_train_2_raw = y.iloc[test_idx]

# Get categorical column indices for CatBoost
cat_features_idx = [X.columns.get_loc(col) for col in cat_col]

# Create Base and Meta model - BASE & HYPE

In [86]:
# Base models - BASE
random_forest_default = RandomForestRegressor(random_state=RSEED)
xgb_default = XGBRegressor(random_state=RSEED)
ridge_default = Ridge(random_state=RSEED)
knn_default = KNeighborsRegressor()
lgbm_default = LGBMRegressor(random_state=RSEED)
catboost_default = CatBoostRegressor(random_state=RSEED)
adaboost_default = AdaBoostRegressor(random_state=RSEED)
gbr_default = GradientBoostingRegressor(random_state=RSEED)
svr_default = SVR()

In [7]:
# Base models - HYPE
random_forest_hype = RandomForestRegressor(max_depth=30, 
                                      max_features='auto', 
                                      min_samples_leaf=2,
                                      min_samples_split=20, 
                                      n_estimators=300, 
                                      random_state=RSEED)
xgb_hype = XGBRegressor(objective='reg:squarederror',  
                   colsample_bytree=0.5111, 
                   gamma=3.6609, 
                   learning_rate=0.0583, 
                   max_depth=10, 
                   n_estimators=266, 
                   reg_lambda=9.6965, 
                   subsample=0.5241,
                   random_state=RSEED)
ridge_hype = Ridge(alpha=1.5, 
                   random_state=RSEED, 
                   solver="sag")
knn_hype = KNeighborsRegressor(weights='distance', 
                               p=1, 
                               n_neighbors=28)
lgbm_hype = LGBMRegressor(subsample=0.8, 
                     reg_lambda=1.0, 
                     reg_alpha=1.0, 
                     num_leaves=63, 
                     n_estimators=300, 
                     max_depth=-1, 
                     learning_rate=0.05, 
                     colsample_bytree=1.0,
                     random_state=RSEED)
catboost_hype = CatBoostRegressor(random_strength=10, 
                             learning_rate=0.1, 
                             l2_leaf_reg=9, 
                             iterations=500, 
                             depth=8, 
                             border_count=64, 
                             bagging_temperature=0.5,
                             random_state=RSEED)
adaboost_hype = AdaBoostRegressor(estimator=XGBRegressor(max_depth=5),
                                  learning_rate=0.1,
                                  n_estimators=50,
                                  random_state=RSEED)
gbr_hype = GradientBoostingRegressor(n_estimators=200, 
                                learning_rate=0.1, 
                                max_depth=7,
                                subsample=0.8, 
                                random_state=RSEED)
svr_hype = SVR(C=41.54172090104322, 
               epsilon=0.001,
               gamma=0.0020588729828704562)

In [87]:
# Meta-model to combine all the base models - BASE
meta_xgb_base = XGBRegressor(objective='reg:squarederror', random_state=42)

In [8]:
# Meta-model to combine all the base models - HYPE
meta_xgb_hype = XGBRegressor(objective='reg:squarederror', random_state=42)

In [34]:
meta_xgb_hype_2 = XGBRegressor(
    objective='reg:squarederror',
    random_state=42,
    n_estimators=200,       # fewer trees
    max_depth=2,            # shallower trees
    learning_rate=0.05,     # slower learning
    subsample=0.7,          # row subsampling
    colsample_bytree=0.7,   # feature subsampling
    reg_alpha=1.0,          # L1 regularization
    reg_lambda=2.0          # L2 regularization
)

# Fit all the base estimators on the 1st half of the train dataset - BASE

In [None]:
#Fit Random Forest
rf_model_base = random_forest_default.fit(X_train_1, y_train_1) 

  warn(


KeyboardInterrupt: 

In [88]:
#Fit XGBoost
xgb_model_base = xgb_default.fit(X_train_1, y_train_1)

In [89]:
# Fit Ridge Convert sparse matrix to dense for Ridge regression
ridge_model_base = ridge_default.fit(X_train_1.toarray(), y_train_1)

In [90]:
# fit KNN KNeighborsRegressor does not support sparse input, so convert to dense
knn_model_base = knn_default.fit(X_train_1, y_train_1)

In [91]:
# Fit LightGBM
lgbm_model_base = lgbm_default.fit(X_train_1, y_train_1)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008637 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4623
[LightGBM] [Info] Number of data points in the train set: 79797, number of used features: 811
[LightGBM] [Info] Start training from score 9.881625


In [92]:
# Fit CatBoost
# Drop datetime columns that CatBoost cannot handle
datetime_cols = ['departure_time', 'arrival_time', 'departure_date', 'arrival_date']
X_train_1_raw_catboost = X_train_1_raw.drop(columns=datetime_cols)

# Update categorical feature indices for the new dataframe
cat_features_idx_catboost = [X_train_1_raw_catboost.columns.get_loc(col) for col in cat_col if col in X_train_1_raw_catboost.columns]

catboost_model_base = catboost_default.fit(X_train_1_raw_catboost, y_train_1_raw, cat_features=cat_features_idx_catboost)

Learning rate set to 0.081789
0:	learn: 54.7082281	total: 150ms	remaining: 2m 30s
1:	learn: 53.9514531	total: 294ms	remaining: 2m 26s
2:	learn: 53.2698197	total: 424ms	remaining: 2m 20s
3:	learn: 52.6731685	total: 550ms	remaining: 2m 17s
4:	learn: 52.1610881	total: 666ms	remaining: 2m 12s
5:	learn: 51.7082094	total: 777ms	remaining: 2m 8s
6:	learn: 51.3218166	total: 889ms	remaining: 2m 6s
7:	learn: 50.9967758	total: 1s	remaining: 2m 4s
8:	learn: 50.7097222	total: 1.12s	remaining: 2m 3s
9:	learn: 50.4391436	total: 1.24s	remaining: 2m 2s
10:	learn: 50.2134803	total: 1.35s	remaining: 2m 1s
11:	learn: 50.0089747	total: 1.53s	remaining: 2m 6s
12:	learn: 49.8225619	total: 1.65s	remaining: 2m 5s
13:	learn: 49.6639278	total: 1.78s	remaining: 2m 5s
14:	learn: 49.5232444	total: 1.89s	remaining: 2m 4s
15:	learn: 49.4027255	total: 2s	remaining: 2m 3s
16:	learn: 49.2982936	total: 2.12s	remaining: 2m 2s
17:	learn: 49.1508121	total: 2.25s	remaining: 2m 2s
18:	learn: 49.0552864	total: 2.37s	remaining:

In [93]:
# Fit AdaBoost on encoded/scaled training data
adaboost_model_base = adaboost_default.fit(X_train_1, y_train_1)

In [94]:
# Fit GradientBoostingRegressor on encoded/scaled training data
gbr_model_base = gbr_default.fit(X_train_1, y_train_1)

In [None]:
# Fit support vector regression - SVR does not support sparse input, so convert to dense
svr_model_base = svr_default.fit(X_train_1.toarray(), y_train_1)

# Fit Base Models - HYPE

In [24]:
#Fit Random Forest
rf_model_hype = random_forest_hype.fit(X_train_1, y_train_1) 

  warn(


In [10]:
#Fit XGBoost
xgb_model_hype = xgb_hype.fit(X_train_1, y_train_1)

In [11]:
# Fit Ridge Convert sparse matrix to dense for Ridge regression
ridge_model_hype = ridge_hype.fit(X_train_1.toarray(), y_train_1)

In [12]:
# fit KNN KNeighborsRegressor does not support sparse input, so convert to dense
knn_model_hype = knn_hype.fit(X_train_1, y_train_1)

In [13]:
# Fit LightGBM
lgbm_model_hype = lgbm_hype.fit(X_train_1, y_train_1)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.010113 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4623
[LightGBM] [Info] Number of data points in the train set: 79797, number of used features: 811
[LightGBM] [Info] Start training from score 9.881625


In [14]:
# Fit CatBoost
# Drop datetime columns that CatBoost cannot handle
datetime_cols = ['departure_time', 'arrival_time', 'departure_date', 'arrival_date']
X_train_1_raw_catboost = X_train_1_raw.drop(columns=datetime_cols)

# Update categorical feature indices for the new dataframe
cat_features_idx_catboost = [X_train_1_raw_catboost.columns.get_loc(col) for col in cat_col if col in X_train_1_raw_catboost.columns]

catboost_model_hype = catboost_hype.fit(X_train_1_raw_catboost, y_train_1_raw, cat_features=cat_features_idx_catboost)

0:	learn: 54.6808402	total: 308ms	remaining: 2m 33s
1:	learn: 54.0533590	total: 430ms	remaining: 1m 47s
2:	learn: 53.4530870	total: 624ms	remaining: 1m 43s
3:	learn: 53.0848117	total: 714ms	remaining: 1m 28s
4:	learn: 52.6362303	total: 878ms	remaining: 1m 26s
5:	learn: 52.1177579	total: 1.15s	remaining: 1m 34s
6:	learn: 51.6509189	total: 1.42s	remaining: 1m 40s
7:	learn: 51.3283908	total: 1.69s	remaining: 1m 44s
8:	learn: 51.0125624	total: 2.02s	remaining: 1m 50s
9:	learn: 50.7386141	total: 2.29s	remaining: 1m 51s
10:	learn: 50.4492883	total: 2.6s	remaining: 1m 55s
11:	learn: 50.1943303	total: 2.92s	remaining: 1m 58s
12:	learn: 50.1323188	total: 3s	remaining: 1m 52s
13:	learn: 49.9641197	total: 3.25s	remaining: 1m 52s
14:	learn: 49.8004158	total: 3.52s	remaining: 1m 53s
15:	learn: 49.6615079	total: 3.81s	remaining: 1m 55s
16:	learn: 49.5450352	total: 4.04s	remaining: 1m 54s
17:	learn: 49.4443183	total: 4.24s	remaining: 1m 53s
18:	learn: 49.2848728	total: 4.62s	remaining: 1m 56s
19:	lea

In [15]:
# Fit AdaBoost on encoded/scaled training data
adaboost_model_hype = adaboost_hype.fit(X_train_1, y_train_1)

In [16]:
# Fit GradientBoostingRegressor on encoded/scaled training data
gbr_model_hype = gbr_hype.fit(X_train_1, y_train_1)

In [17]:
# Fit support vector regression - SVR does not support sparse input, so convert to dense
svr_model_hype = svr_hype.fit(X_train_1.toarray(), y_train_1)

# Predict the values from the base estimators with the second half of the train dataset - BASE - WITHOUT HYPERPARAMETERS

In [35]:
#rf_pred_base = rf_model_base.predict(X_train_2)
xgb_pred_base = xgb_model_base.predict(X_train_2)
ridge_pred_base = ridge_model_base.predict(X_train_2)
knn_pred_base = knn_model_base.predict(X_train_2)
lgbm_pred_base = lgbm_model_base.predict(X_train_2)
catboost_pred_base = catboost_model_base.predict(X_train_2_raw.drop(columns=datetime_cols))
adaboost_pred_base = adaboost_model_base.predict(X_train_2)
gbr_pred_base = gbr_model_base.predict(X_train_2)
#svr_pred_base = svr_model_base.predict(X_train_2.toarray())

# Combine base model predictions for meta-model input
combine_X_pred_test_base = pd.concat([
	#pd.DataFrame(rf_pred_base),
	pd.DataFrame(xgb_pred_base),
	pd.DataFrame(ridge_pred_base),
    pd.DataFrame(knn_pred_base),
    pd.DataFrame(lgbm_pred_base),
    pd.DataFrame(catboost_pred_base),
    pd.DataFrame(adaboost_pred_base),
    pd.DataFrame(gbr_pred_base),
    #pd.DataFrame(svr_pred_base)
], axis=1)

# Predict the values from the base estimators with the second half of the train dataset - HYPE - WITH HYPERPARAMETERS


In [43]:
rf_pred_hype = rf_model_hype.predict(X_train_2)

In [25]:
rf_pred_hype = rf_model_hype.predict(X_train_2)
xgb_pred_hype = xgb_model_hype.predict(X_train_2)
ridge_pred_hype = ridge_model_hype.predict(X_train_2)
knn_pred_hype = knn_model_hype.predict(X_train_2)
lgbm_pred_hype = lgbm_model_hype.predict(X_train_2)
catboost_pred_hype = catboost_model_hype.predict(X_train_2_raw.drop(columns=datetime_cols))
adaboost_pred_hype = adaboost_model_hype.predict(X_train_2)
gbr_pred_hype = gbr_model_hype.predict(X_train_2)
svr_pred_hype = svr_model_hype.predict(X_train_2.toarray())

# Combine base model predictions for meta-model input
combine_X_pred_test_hype = pd.concat([
	pd.DataFrame(rf_pred_hype),
	pd.DataFrame(xgb_pred_hype),
	pd.DataFrame(ridge_pred_hype),
    pd.DataFrame(knn_pred_hype),
    pd.DataFrame(lgbm_pred_hype),
    pd.DataFrame(catboost_pred_hype),
    pd.DataFrame(adaboost_pred_hype),
    pd.DataFrame(gbr_pred_hype),
    pd.DataFrame(svr_pred_hype)
], axis=1)

# Fit the final estimator on the combined probabilities and target values - BASE

In [36]:
# Ensure unique column names for stacking features
combine_X_pred_test_base.columns = [f'base_model_{i}' for i in range(combine_X_pred_test_base.shape[1])]

meta_xgb_base.fit(combine_X_pred_test_base, y_train_2)

# Fit the final estimator on the combined probabilities and target values - HYPE

In [26]:
# Ensure unique column names for stacking features
combine_X_pred_test_hype.columns = [f'hype_model_{i}' for i in range(combine_X_pred_test_hype.shape[1])]

meta_xgb_hype.fit(combine_X_pred_test_hype, y_train_2)

In [35]:
# Ensure unique column names for stacking features
combine_X_pred_test_hype.columns = [f'hype_model_{i}' for i in range(combine_X_pred_test_hype.shape[1])]

meta_xgb_hype_2.fit(combine_X_pred_test_hype, y_train_2)

# Predict / Evalute - Base

In [37]:
y_pred_base = meta_xgb_base.predict(combine_X_pred_test_base)

In [38]:
# Ensure predictions are non-negative
y_pred_base[y_pred_base < 0] = 0
y_train_2 = y_train_2.clip(lower=0)

In [39]:
mse_base = mean_squared_error(y_train_2, y_pred_base)
r2_base = r2_score(y_train_2, y_pred_base)
rmse_base = np.sqrt(mean_squared_error(y_train_2, y_pred_base))
print(f'Base Mean Squared Error: {mse_base}')
print(f'Base R2 Score: {r2_base}')
print(f"Base Stacking RMSE: {rmse_base:.2f}")

Base Mean Squared Error: 483.0663271584569
Base R2 Score: 0.6388529377661651
Base Stacking RMSE: 21.98


# Predict / Evalute - Hype

In [27]:
y_pred_hype = meta_xgb_hype.predict(combine_X_pred_test_hype)

In [28]:
# Ensure predictions are non-negative
y_pred_hype[y_pred_hype < 0] = 0
y_train_2 = y_train_2.clip(lower=0)

In [29]:
mse_hype = mean_squared_error(y_train_2, y_pred_hype)
r2_hype = r2_score(y_train_2, y_pred_hype)
rmse_hype = np.sqrt(mean_squared_error(y_train_2, y_pred_hype))
print(f'Hype Mean Squared Error: {mse_hype}')
print(f'Hype R2 Score: {r2_hype}')
print(f"Hype Stacking RMSE: {rmse_hype:.2f}")

Hype Mean Squared Error: 427.0761105267586
Hype R2 Score: 0.6807119975133393
Hype Stacking RMSE: 20.67


# HYPE 2

In [36]:
y_pred_hype_2 = meta_xgb_hype_2.predict(combine_X_pred_test_hype)

In [37]:
# Ensure predictions are non-negative
y_pred_hype_2[y_pred_hype_2 < 0] = 0
y_train_2 = y_train_2.clip(lower=0)

In [38]:
mse_hype_2 = mean_squared_error(y_train_2, y_pred_hype_2)
r2_hype_2 = r2_score(y_train_2, y_pred_hype_2)
rmse_hype_2 = np.sqrt(mean_squared_error(y_train_2, y_pred_hype_2))
print(f'Hype 2 Mean Squared Error: {mse_hype_2}')
print(f'Hype 2 R2 Score: {r2_hype_2}')
print(f"Hype 2 Stacking RMSE: {rmse_hype_2:.2f}")

Hype 2 Mean Squared Error: 867.4674958686329
Hype 2 R2 Score: 0.3514693115557733
Hype 2 Stacking RMSE: 29.45


# Evaluate individual RMSE - BASE

In [40]:
#rf_pred_base[rf_pred_base < 0] = 0
xgb_pred_base[xgb_pred_base < 0] = 0
ridge_pred_base[ridge_pred_base < 0] = 0
knn_pred_base[knn_pred_base < 0] = 0
lgbm_pred_base[lgbm_pred_base < 0] = 0
catboost_pred_base[catboost_pred_base < 0] = 0
adaboost_pred_base[adaboost_pred_base < 0] = 0
gbr_pred_base[gbr_pred_base < 0] = 0
#svr_pred_base[svr_pred_base < 0] = 0

#print(f"Base RF RMSE: {np.sqrt(mean_squared_error(y_train_2, rf_pred_base))}")
print(f"Base XGB RMSE: {np.sqrt(mean_squared_error(y_train_2, xgb_pred_base))}")
print(f"Base Ridge RMSE: {np.sqrt(mean_squared_error(y_train_2, ridge_pred_base))}")
print(f"Base KNN RMSE: {np.sqrt(mean_squared_error(y_train_2, knn_pred_base))}")
print(f"Base LGBM RMSE: {np.sqrt(mean_squared_error(y_train_2, lgbm_pred_base))}")
print(f"Base CatBoost RMSE: {np.sqrt(mean_squared_error(y_train_2, catboost_pred_base))}")
print(f"Base AdaBoost RMSE: {np.sqrt(mean_squared_error(y_train_2, adaboost_pred_base))}")
print(f"Base GBR RMSE: {np.sqrt(mean_squared_error(y_train_2, gbr_pred_base))}")
#print(f"Base SVR RMSE: {np.sqrt(mean_squared_error(y_train_2, svr_pred_base))}")

Base XGB RMSE: 32.954982359331474
Base Ridge RMSE: 33.783243796145044
Base KNN RMSE: 35.269071102612024
Base LGBM RMSE: 32.98349148167305
Base CatBoost RMSE: 31.02505216694233
Base AdaBoost RMSE: 35.91112986771845
Base GBR RMSE: 34.12870180471963


# Evaluate individual RMSE - HYPE

In [30]:
rf_pred_hype[rf_pred_hype < 0] = 0
xgb_pred_hype[xgb_pred_hype < 0] = 0
ridge_pred_hype[ridge_pred_hype < 0] = 0
knn_pred_hype[knn_pred_hype < 0] = 0
lgbm_pred_hype[lgbm_pred_hype < 0] = 0
catboost_pred_hype[catboost_pred_hype < 0] = 0
adaboost_pred_hype[adaboost_pred_hype < 0] = 0
gbr_pred_hype[gbr_pred_hype < 0] = 0
svr_pred_hype[svr_pred_hype < 0] = 0

print(f"Hype RF RMSE: {np.sqrt(mean_squared_error(y_train_2, rf_pred_hype))}")
print(f"Hype XGB RMSE: {np.sqrt(mean_squared_error(y_train_2, xgb_pred_hype))}")
print(f"Hype Ridge RMSE: {np.sqrt(mean_squared_error(y_train_2, ridge_pred_hype))}")
print(f"Hype KNN RMSE: {np.sqrt(mean_squared_error(y_train_2, knn_pred_hype))}")
print(f"Hype LGBM RMSE: {np.sqrt(mean_squared_error(y_train_2, lgbm_pred_hype))}")
print(f"Hype CatBoost RMSE: {np.sqrt(mean_squared_error(y_train_2, catboost_pred_hype))}")
print(f"Hype AdaBoost RMSE: {np.sqrt(mean_squared_error(y_train_2, adaboost_pred_hype))}")
print(f"Hype GBR RMSE: {np.sqrt(mean_squared_error(y_train_2, gbr_pred_hype))}")
print(f"Hype SVR RMSE: {np.sqrt(mean_squared_error(y_train_2, svr_pred_hype))}")

Hype RF RMSE: 32.570904283801404
Hype XGB RMSE: 32.31704108028705
Hype Ridge RMSE: 33.808832744329884
Hype KNN RMSE: 33.51490459288633
Hype LGBM RMSE: 32.49031939296803
Hype CatBoost RMSE: 31.013358548717434
Hype AdaBoost RMSE: 33.044839419054654
Hype GBR RMSE: 32.68357137689161
Hype SVR RMSE: 34.5023580266961


# Predict Test Data with meta-model

In [None]:
encoded_train_df = pd.DataFrame(X_encoded_scaled.toarray(), columns=encoder.get_feature_names_out(cat_col))

In [51]:
encoded_test_data = pd.read_csv('data/encoded_test_data.csv')

In [52]:
# Convert DataFrame to numpy array, then to sparse CSR matrix
X_encoded_test_sparse = sparse.csr_matrix(encoded_test_data.values)

In [53]:
encoded_test_df = pd.DataFrame(X_encoded_test_sparse.toarray(), columns=encoded_test_data.columns)

In [54]:
# Ensure test DataFrame has same columns as train DataFrame after one-hot encoding

def align_test_to_train(train_df, test_df):
    # Add missing columns to test_df, fill with 0
    missing_cols = set(train_df.columns) - set(test_df.columns)
    for col in missing_cols:
        test_df[col] = 0

    # Remove extra columns from test_df
    extra_cols = set(test_df.columns) - set(train_df.columns)
    test_df = test_df.drop(columns=extra_cols)

    # Reorder columns to match train_df
    test_df = test_df[train_df.columns]
    return test_df

# Convert sparse matrices to DataFrames using correct column names
encoded_cat_cols = encoder.get_feature_names_out(cat_col)
all_feature_names = num_col + list(encoded_cat_cols)

# Convert sparse matrices to DataFrames using correct column names
train_df = pd.DataFrame(X_encoded_scaled.toarray(), columns=all_feature_names)
test_df = pd.DataFrame(X_encoded_test_sparse.toarray(), columns=encoded_test_data.columns)

aligned_test_df = align_test_to_train(train_df, test_df)

  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[col] = 0
  test_df[co

In [None]:
# Prepare raw test data for CatBoost prediction
# 1. Load your raw test data (not encoded)
raw_test_data = pd.read_csv('data/test_data_with_everything_final.csv')  # Update path/filename as needed

# 2. Drop datetime columns that CatBoost cannot handle
datetime_cols = ['departure_time', 'arrival_time', 'departure_date', 'arrival_date']
raw_test_data_catboost = raw_test_data.drop(columns=datetime_cols)

# 3. Get categorical feature indices for CatBoost
cat_features_idx_catboost = [raw_test_data_catboost.columns.get_loc(col) for col in cat_col if col in raw_test_data_catboost.columns]

In [None]:
# Make sure test columns match training columns exactly
raw_test_data_catboost = raw_test_data_catboost[X_train_1_raw_catboost.columns]

# Ensure categorical columns are string type
for col in cat_col:
    if col in raw_test_data_catboost.columns:
        raw_test_data_catboost[col] = raw_test_data_catboost[col].astype(str)

In [None]:
# Generate base model predictions for the test set
rf_pred_test = rf_model_hype.predict(aligned_test_df)
xgb_pred_test = xgb_model_hype.predict(aligned_test_df)
ridge_pred_test = ridge_model_hype.predict(aligned_test_df.to_numpy())
knn_pred_test = knn_model_hype.predict(aligned_test_df)
lgbm_pred_test = lgbm_model_hype.predict(aligned_test_df)
catboost_pred_test = catboost_model_hype.predict(raw_test_data_catboost)
adaboost_pred_test = adaboost_model_hype.predict(aligned_test_df)
gbr_pred_test = gbr_model_hype.predict(aligned_test_df)
svr_pred_test = svr_model_hype.predict(aligned_test_df.to_numpy())

# Combine predictions into a DataFrame with correct column names
combine_X_pred_test = pd.DataFrame({
	'hype_model_0': rf_pred_test,
	'hype_model_1': xgb_pred_test,
	'hype_model_2': ridge_pred_test,
	'hype_model_3': knn_pred_test,
	'hype_model_4': lgbm_pred_test,
	'hype_model_5': catboost_pred_test,
	'hype_model_5': adaboost_pred_test,
	'hype_model_6': gbr_pred_test,
	'hype_model_7': svr_pred_test
})



In [70]:
# Ensure combine_X_pred_test has all 9 base model predictions with unique column names
combine_X_pred_test = pd.DataFrame({
	'hype_model_0': rf_pred_test,
	'hype_model_1': xgb_pred_test,
	'hype_model_2': ridge_pred_test,
	'hype_model_3': knn_pred_test,
	'hype_model_4': lgbm_pred_test,
	'hype_model_5': catboost_pred_test,  # CatBoost predictions (raw_test_data_catboost)
	'hype_model_6': adaboost_pred_test,
	'hype_model_7': gbr_pred_test,
	'hype_model_8': svr_pred_test
})

In [None]:
# Generate base model predictions for the test set
rf_pred_test = rf_model_hype.predict(aligned_test_df)
xgb_pred_test = xgb_model_hype.predict(aligned_test_df)
ridge_pred_test = ridge_model_hype.predict(aligned_test_df.to_numpy())
knn_pred_test = knn_model_hype.predict(aligned_test_df)
lgbm_pred_test = lgbm_model_hype.predict(aligned_test_df)
catboost_pred_test = catboost_model_hype.predict(raw_test_data_catboost)
# CatBoost expects raw features, not encoded; skip or use raw test if available
adaboost_pred_test = adaboost_model_hype.predict(aligned_test_df)
gbr_pred_test = gbr_model_hype.predict(aligned_test_df)
svr_pred_test = svr_model_hype.predict(aligned_test_df.to_numpy())

# Combine predictions into a DataFrame with correct column names
combine_X_pred_test = pd.DataFrame({
	'hype_model_0': rf_pred_test,
	'hype_model_1': xgb_pred_test,
	'hype_model_2': ridge_pred_test,
	'hype_model_3': knn_pred_test,
	'hype_model_4': lgbm_pred_test,
	'hype_model_5': catboost_pred_test, # Only if you have raw test data for CatBoost
	'hype_model_5': adaboost_pred_test,
	'hype_model_6': gbr_pred_test,
	'hype_model_7': svr_pred_test
})



CatBoostError: Bad value for num_feature[non_default_doc_idx=0,feature_idx=4]="TU 32AIMF": Cannot convert 'TU 32AIMF' to float

In [None]:
# Generate base model predictions for the test set
rf_pred_test = rf_model_hype.predict(aligned_test_df)
xgb_pred_test = xgb_model_hype.predict(aligned_test_df)
ridge_pred_test = ridge_model_hype.predict(aligned_test_df.to_numpy())
knn_pred_test = knn_model_hype.predict(aligned_test_df)
lgbm_pred_test = lgbm_model_hype.predict(aligned_test_df)
catboost_pred_test = catboost_model_hype.predict(raw_test_data_catboost, cat_features=cat_features_idx_catboost)
# CatBoost expects raw features, not encoded; skip or use raw test if available
adaboost_pred_test = adaboost_model_hype.predict(aligned_test_df)
gbr_pred_test = gbr_model_hype.predict(aligned_test_df)
svr_pred_test = svr_model_hype.predict(aligned_test_df.to_numpy())

# Combine predictions into a DataFrame with correct column names
combine_X_pred_test = pd.DataFrame({
	'hype_model_0': rf_pred_test,
	'hype_model_1': xgb_pred_test,
	'hype_model_2': ridge_pred_test,
	'hype_model_3': knn_pred_test,
	'hype_model_4': lgbm_pred_test,
	'hype_model_5': catboost_pred_test, # Only if you have raw test data for CatBoost
	'hype_model_5': adaboost_pred_test,
	'hype_model_6': gbr_pred_test,
	'hype_model_7': svr_pred_test
})

# If you have CatBoost predictions for test, insert them at the correct position



TypeError: CatBoostRegressor.predict() got an unexpected keyword argument 'cat_features'

In [71]:
# Predict using the meta-model
y_pred_test = meta_xgb_hype.predict(combine_X_pred_test)

In [73]:
# Ensure predictions are non-negative
y_pred_test[y_pred_test < 0] = 0
y_train_2 = y_train_2.clip(lower=0)

In [75]:
y_pred_test_df = pd.DataFrame(y_pred_test, columns=['target'])

In [None]:
first_test_df = pd.read_csv('data/Test.csv')
first_test_df = first_test_df[['ID']]

Unnamed: 0,ID
0,test_id_0
1,test_id_1
2,test_id_2
3,test_id_3
4,test_id_4
...,...
9328,test_id_9328
9329,test_id_9329
9330,test_id_9330
9331,test_id_9331


In [82]:
final_submission_df = pd.concat([first_test_df, y_pred_test_df], axis=1)
final_submission_df.to_csv('data/flight_delay_final_test_submission.csv', index=False)

In [83]:
final_submission_df

Unnamed: 0,ID,target
0,test_id_0,14.608800
1,test_id_1,96.567245
2,test_id_2,94.432655
3,test_id_3,16.899582
4,test_id_4,24.557573
...,...,...
9328,test_id_9328,100.799919
9329,test_id_9329,14.658327
9330,test_id_9330,29.645308
9331,test_id_9331,29.645308


# Evaluate the final meta model

In [None]:
mse = mean_squared_error(y_train_2, y_pred)
r2 = r2_score(y_train_2, y_pred)
rmse = np.sqrt(mean_squared_error(y_train_2, y_pred))
print(f'Mean Squared Error: {mse}')
print(f'R2 Score: {r2}')
print(f"Stacking RMSE: {rmse:.2f}")

# Evaluate the individual base models

# Get feature importances from each base model

In [None]:
# Random Forest feature importances
rf_importances = rf_model.feature_importances_

# XGBoost feature importances
xgb_importances = xgb_model.feature_importances_

# KNN does not provide feature importances, so skip it

# Ridge feature coefficients (absolute value for importance)
ridge_importances = np.abs(ridge_model.coef_)

# LightGBM feature importances
lgbm_importances = lgbm_model.feature_importances_

# CatBoost feature importances
catboost_importances = catboost.get_feature_importance()

# AdaBoost feature importances (using the base estimator's feature importances)
adaboost_importances = adaboost_model.feature_importances_

# Gradient Boosting feature importances
gbr_importances = gbr_model.feature_importances_

# Support Vector Regression does not provide feature importances, so skip it

# Get encoded categorical column names
encoded_cat_cols = encoder.get_feature_names_out(cat_col)

# Feature names
feature_names = num_col + list(encoded_cat_cols)

# Create a DataFrame for each model's importances
importances_df = pd.DataFrame({
    'feature': feature_names,
    'RandomForest': rf_importances,
    'XGBoost': xgb_importances,
    'Ridge': ridge_importances,
    'LightGBM': lgbm_importances,
    #'CatBoost': catboost_importances,
    'AdaBoost': adaboost_importances,
    'GradientBoosting': gbr_importances
})

# Show top 15 features by average importance across models
importances_df['avg_importance'] = importances_df[[
    'RandomForest',
    'XGBoost', 
    'Ridge', 
    'LightGBM',
    #'CatBoost',
    'AdaBoost',
    'GradientBoosting'
    ]].mean(axis=1)
importances_df.sort_values('avg_importance', ascending=False)

In [None]:
aligned_test_df

# Random Forrest - Hyperparameters Randomised Search

In [None]:
# Create Random Forest model
rf = RandomForestRegressor(random_state=42)

# Define parameter grid for Random Forest
rf_param_grid = {
    'n_estimators': [100, 200, 300, 500], # More trees can improve performance, but increase training time
    'max_depth': [None, 5, 10, 20, 30, 50],     # None means nodes are expanded until all leaves are pure
    'min_samples_split': [2, 5, 10, 20],        # Minimum samples required to split a node
    'min_samples_leaf': [1, 2, 4, 8],                  # Minimum samples required at a leaf node
    'max_features': ['auto', 'sqrt', 'log2'],          # Number of features to consider at each split
    'bootstrap': [True, False]                         # Whether bootstrap samples are used
}

# Randomized Search CV
rf_random_search = RandomizedSearchCV(
    estimator=rf,
    param_distributions=rf_param_grid,
    n_iter=50,  # Number of parameter settings sampled
    cv=3,
    scoring='neg_root_mean_squared_error',
    verbose=2,
    random_state=42,
    n_jobs=-1
)

# Fit to training data
rf_random_search.fit(X_train_1, y_train_1)

# Best estimator
best_rf = rf_random_search.best_estimator_

Fitting 3 folds for each of 50 candidates, totalling 150 fits


  warn(


In [8]:
best_rf

# CatBoost - Hyperparameters Randomised Search

In [None]:
catboost_param_grid = {
    'iterations': [100, 300, 500],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'depth': [4, 6, 8, 10],
    'l2_leaf_reg': [1, 3, 5, 7, 9],
    'border_count': [32, 64, 128],
    'bagging_temperature': [0, 0.5, 1, 2],
    'random_strength': [1, 2, 5, 10]
}

catboost = CatBoostRegressor(random_state=42, verbose=0)

catboost_search = RandomizedSearchCV(
    estimator=catboost,
    param_distributions=catboost_param_grid,
    n_iter=30,  # Number of parameter settings sampled
    cv=3,
    scoring='neg_root_mean_squared_error',
    verbose=2,
    random_state=42,
    n_jobs=-1
)

# Drop datetime columns that CatBoost cannot handle
datetime_cols = ['departure_time', 'arrival_time', 'departure_date', 'arrival_date']
X_train_1_raw_catboost = X_train_1_raw.drop(columns=datetime_cols)

# Update categorical feature indices for the new dataframe
cat_features_idx_catboost = [X_train_1_raw_catboost.columns.get_loc(col) for col in cat_col if col in X_train_1_raw_catboost.columns]

catboost_search.fit(X_train_1_raw_catboost, y_train_1_raw, cat_features=cat_features_idx_catboost)

best_catboost = catboost_search.best_estimator_
print("Best CatBoost parameters:", catboost_search.best_params_)

Fitting 3 folds for each of 30 candidates, totalling 90 fits
Best CatBoost parameters: {'random_strength': 10, 'learning_rate': 0.1, 'l2_leaf_reg': 9, 'iterations': 500, 'depth': 8, 'border_count': 64, 'bagging_temperature': 0.5}


# LGBMRegressor - Hyperparameters Randomised Search

In [12]:
lgbm_param_grid = {
    'n_estimators': [100, 200, 300, 500],
    'max_depth': [3, 5, 7, 10, -1],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'num_leaves': [15, 31, 63, 127],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0],
    'reg_alpha': [0, 0.1, 0.5, 1.0],
    'reg_lambda': [0, 0.1, 0.5, 1.0]
}

lgbm = LGBMRegressor(random_state=RSEED)

lgbm_search = RandomizedSearchCV(
    estimator=lgbm,
    param_distributions=lgbm_param_grid,
    n_iter=30,
    cv=3,
    scoring='neg_root_mean_squared_error',
    verbose=2,
    random_state=RSEED,
    n_jobs=-1
)

lgbm_search.fit(X_train_1, y_train_1)

best_lgbm = lgbm_search.best_estimator_
print("Best LGBM parameters:", lgbm_search.best_params_)

Fitting 3 folds for each of 30 candidates, totalling 90 fits
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.014500 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4623
[LightGBM] [Info] Number of data points in the train set: 79797, number of used features: 811
[LightGBM] [Info] Start training from score 9.881625
Best LGBM parameters: {'subsample': 0.8, 'reg_lambda': 1.0, 'reg_alpha': 1.0, 'num_leaves': 63, 'n_estimators': 300, 'max_depth': -1, 'learning_rate': 0.05, 'colsample_bytree': 1.0}


# KNN - Hyperparameters Randomised Search

In [7]:
knn_param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11, 15, 21, 25, 31, 41],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'leaf_size': [10, 20, 30, 40, 50, 60],
    'p': [1, 2],  # 1: Manhattan, 2: Euclidean
    'metric': ['minkowski', 'euclidean', 'manhattan']
}

knn = KNeighborsRegressor()

knn_search = RandomizedSearchCV(
    estimator=knn,
    param_distributions=knn_param_grid,
    n_iter=30,
    cv=3,
    scoring='neg_root_mean_squared_error',
    verbose=2,
    random_state=RSEED,
    n_jobs=-1
)

# KNN does not support sparse input, so convert to dense
knn_search.fit(X_train_1.toarray(), y_train_1)

best_knn = knn_search.best_estimator_
print("Best KNN parameters:", knn_search.best_params_)

Fitting 3 folds for each of 30 candidates, totalling 90 fits


KeyboardInterrupt: 

In [42]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.base import clone
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor

def get_oof_predictions(models: dict, X, y, n_splits=5, random_state=42, shuffle=True):
    """
    Compute out-of-fold (OOF) predictions for a dict of models.

    Parameters
    ----------
    models : dict[str, estimator]
        Mapping of {name: sklearn-like regressor}.
    X : array-like or DataFrame
    y : array-like

    Returns
    -------
    oof_df : DataFrame (n_samples x n_models)
        OOF predictions for each model.
    fold_preds : dict[str, list[np.ndarray]]
        List of per-fold validation predictions in the original sample order indices.
    rmses : dict[str, float]
        OOF RMSE per model.
    cv_indices : list[tuple[np.ndarray, np.ndarray]]
        List of (train_idx, val_idx) per fold.
    """
    kf = KFold(n_splits=n_splits, shuffle=shuffle, random_state=random_state)

    oof = {name: np.zeros(len(y), dtype=float) for name in models}
    fold_preds = {name: [] for name in models}
    cv_indices = []

    for fold, (tr_idx, va_idx) in enumerate(kf.split(X)):
        cv_indices.append((tr_idx, va_idx))
        X_tr, X_va = (X[tr_idx], X[va_idx]) if not hasattr(X, "iloc") else (X.iloc[tr_idx], X.iloc[va_idx])
        y_tr, y_va = y[tr_idx], y[va_idx]

        for name, est in models.items():
            est_fold = clone(est)
            est_fold.fit(X_tr, y_tr)
            p = est_fold.predict(X_va)
            oof[name][va_idx] = p
            fold_preds[name].append(p)

    oof_df = pd.DataFrame(oof, index=(X.index if hasattr(X, "index") else None))
    rmses = {name: float(np.sqrt(mean_squared_error(y, oof_df[name]))) for name in oof_df.columns}
    return oof_df, fold_preds, rmses, cv_indices

def correlation_matrix(pred_df: pd.DataFrame):
    """Return the Pearson correlation matrix of columns (models)."""
    return pred_df.corr()

def fit_meta_xgb(oof_preds: pd.DataFrame, y, params=None):
    """Fit a regularized XGB meta-model on OOF predictions and return it."""
    default_params = dict(
        objective="reg:squarederror",
        random_state=42,
        n_estimators=200,
        max_depth=2,
        learning_rate=0.05,
        subsample=0.7,
        colsample_bytree=0.7,
        reg_alpha=1.0,
        reg_lambda=2.0,
        n_jobs=-1,
    )
    if params:
        default_params.update(params)
    meta = XGBRegressor(**default_params)
    meta.fit(oof_preds, y)
    return meta


In [None]:
# --- Stacking diagnostics & meta-model training ---
from stacking_diagnostics import get_oof_predictions, correlation_matrix, fit_meta_xgb
import pandas as pd

# Fix Ridge solver for sparse input
ridge_default = Ridge(random_state=42, solver='sag')
ridge_hype = Ridge(alpha=1.5, random_state=42, solver='sag')

# 1) Define your base and tuned model dicts using the names from your notebook
# Example (EDIT to match your variable names):
base_models = {
    "RF": random_forest_default,
    "XGB": xgb_default,
    "Ridge": ridge_default,
    "KNN": knn_default,
    "LGBM": lgbm_default,
#     "CatBoost": catboost_default,
    "AdaBoost": adaboost_default,
    "GBR": gbr_default,
    "SVR": svr_default,
 }
tuned_models = {
    "RF": random_forest_hype,
    "XGB": xgb_hype,
    "Ridge": ridge_hype,
    "KNN": knn_hype,
    "LGBM": lgbm_hype,
#     "CatBoost": catboost_hype,
    "AdaBoost": adaboost_hype,
    "GBR": gbr_hype,
    "SVR": svr_hype,
 }

# 2) Choose your feature matrix and target vector (EDIT to match your variables)
X_final = X_encoded_scaled
y_final = y

# 3) OOF predictions for base models
oof_base, fold_preds_base, rmses_base, cv_idx = get_oof_predictions(base_models, X_final, y_final, n_splits=5, random_state=42)
# 4) OOF predictions for tuned models
oof_tuned, fold_preds_tuned, rmses_tuned, _ = get_oof_predictions(tuned_models, X_final, y_final, n_splits=5, random_state=42)

# 5) Correlation matrices
corr_base = correlation_matrix(oof_base)
corr_tuned = correlation_matrix(oof_tuned)

# 6) RMSE tables
rmse_base_df = pd.DataFrame.from_dict(rmses_base, orient='index', columns=['OOF_RMSE']).sort_values('OOF_RMSE')
rmse_tuned_df = pd.DataFrame.from_dict(rmses_tuned, orient='index', columns=['OOF_RMSE']).sort_values('OOF_RMSE')

display(corr_base) 
display(corr_tuned)
display(rmse_base_df)
display(rmse_tuned_df)

# 7) Fit regularized meta-XGBoost on the OOF predictions
meta_base = fit_meta_xgb(oof_base, y_final)     # meta-model over base learners
meta_tuned = fit_meta_xgb(oof_tuned, y_final)   # meta-model over tuned learners

# Optional: evaluate meta-models with cross-validated meta-OOM predictions or a holdout set if you have one.


TypeError: cg() got an unexpected keyword argument 'tol'

In [85]:
# Ridge - Hyperparameters Randomised Search
from sklearn.linear_model import Ridge

ridge_param_grid = {
    'alpha': [0.01, 0.1, 0.5, 1.0, 1.5, 2.0, 5.0, 10.0, 20.0],
    'solver': ['auto', 'sag', 'lsqr', 'sparse_cg', 'cholesky', 'lbfgs'],
    'fit_intercept': [True, False],
    'tol': [1e-4, 1e-3]
}

ridge = Ridge(random_state=RSEED)

ridge_search = RandomizedSearchCV(
    estimator=ridge,
    param_distributions=ridge_param_grid,
    n_iter=30,
    cv=3,
    scoring='neg_root_mean_squared_error',
    verbose=2,
    random_state=RSEED,
    n_jobs=-1
)

# Ridge does not support sparse input, so convert to dense
ridge_search.fit(X_train_1.toarray(), y_train_1)

best_ridge = ridge_search.best_estimator_
print("Best Ridge parameters:", ridge_search.best_params_)

Fitting 3 folds for each of 30 candidates, totalling 90 fits


OSError: [WinError 1450] Insufficient system resources exist to complete the requested service