# Hyperparameter Tuning


In [1]:
import pandas as pd

from data import load_data_selected_features
from pipelines_selected_features import pipeline

train_data, additional_train_data, test_data = load_data_selected_features()
all_train_data = pipeline.fit_transform(pd.concat([train_data, additional_train_data]))

# cut the data into train, additional train and test
train_data = all_train_data.loc[train_data.index]
additional_train_data = all_train_data.loc[additional_train_data.index]

X_train = train_data.drop(columns=['bg+1:00'])
y_train = train_data['bg+1:00']

X_additional_train = additional_train_data.drop(columns=['bg+1:00'])
y_additional_train = additional_train_data['bg+1:00']

### Tune KneighborsRegressor

In [None]:
from sklearn.neighbors import KNeighborsRegressor
from skopt.space import Integer, Categorical
from model_hyperparameter_tuning import tune_hyperparameters

search_space = {
    'n_neighbors': Integer(3, 10),
    'weights': Categorical(['uniform', 'distance']),
    'p': Categorical([1, 2]),
    'metric': Categorical(['minkowski', 'euclidean', 'chebyshev']),
    'leaf_size': Integer(10, 50),
}

model = KNeighborsRegressor(n_jobs=2)
best_estimator, best_params = tune_hyperparameters(model, search_space, pd.DataFrame(), pd.Series(), X_additional_train, y_additional_train, num_iter=20, n_splits=2)

16:27:01 - Start tuning KNeighborsRegressor
16:27:01 - Parameters: {'n_neighbors': Integer(low=3, high=10, prior='uniform', transform='identity'), 'weights': Categorical(categories=('uniform', 'distance'), prior=None), 'p': Categorical(categories=(1, 2), prior=None), 'metric': Categorical(categories=('minkowski', 'euclidean', 'chebyshev'), prior=None), 'leaf_size': Integer(low=10, high=50, prior='uniform', transform='identity')}
16:27:01 - Fitting the model
Fitting 2 folds for each of 1 candidates, totalling 2 fits


  y_all_train = pd.concat([y_train, y_additional_train])


Fitting 2 folds for each of 1 candidates, totalling 2 fits
Fitting 2 folds for each of 1 candidates, totalling 2 fits


In [None]:
import os
import joblib

best_estimator_file_name = f'{model.__class__.__name__}.best_estimator.pkl'
joblib.dump(best_estimator, os.path.join('models', best_estimator_file_name))