# Modelling Selected Parameters at 0:00 + Day Phase

## RidgeCV - Hyperparameter Tuning

In [5]:
import pandas as pd
import os

In [None]:
# Load, preprocess and standardize the train data 
from pipelines import pipeline
data_file = os.path.join('..', '..', '..', '..', 'data', 'raw', 'train.csv')
train_data = pd.read_csv(data_file, index_col=0, low_memory=False)
train_data = pipeline.fit_transform(train_data)
train_data.head()

In [2]:
X = train_data.drop(columns=['bg+1:00'])
y = train_data['bg+1:00']

In [3]:
from sklearn.linear_model import RidgeCV

param_grid = {
    'alpha_per_target': [True, False],
    'alphas': [[0.1, 1.0, 10.0], [0.1, 0.5, 1.0], [0.1, 0.5, 1.0, 5.0], [0.1, 0.5, 1.0, 5.0, 10.0]],
    'fit_intercept': [True, False]
}

kf = KFold(n_splits=5, shuffle=True, random_state=42)
ridge = RidgeCV()
grid_search = GridSearchCV(estimator=ridge, param_grid=param_grid, cv=kf, scoring='neg_mean_squared_error', n_jobs=-1)

grid_search.fit(X=X, y=y)

# Get the best parameters and best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best parameters found: ", best_params)
print("Best score: ", best_score)

Best parameters found:  {'alpha_per_target': True, 'alphas': [0.1, 1.0, 10.0], 'fit_intercept': True}
Best score:  -4.650405998790541


In [4]:
# create and save model with best parameters
import joblib

ridge = RidgeCV(**best_params)
joblib.dump(ridge, 'ridge.model.pkl')

['ridge.model.pkl']