# Hyperparameter optimization  - Grid search 
Implemented for learning purpose with no optimization libraries

In [93]:
# Importing libraries
import numpy as np
import pandas as pd
from time import time

from xgboost import XGBRegressor
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import mean_squared_error
from sklearn import preprocessing
from itertools import product

print ("Import done")

Import done


In [94]:
# load data

df = pd.read_csv("../input/train_folds.csv")
df_test = pd.read_csv("../input/test.csv")
sample_submission = pd.read_csv("../input/sample_submission.csv")

useful_features = [c for c in df.columns if c not in ("id", "target", "kfold")]
object_cols = [col for col in useful_features if 'cat' in col]
numerical_cols = [col for col in useful_features if 'cont' in col]
df_test = df_test[useful_features]

print ("Data loaded")

Data loaded


In [95]:
# set the rest of posible values for hyperparameters and a grid of combinations of them


param_names = ["n_estimators","max_depth","learning_rate","subsample","reg_alpha"]
param_values = [
    [1000, 500, 50],                    # "n_estimators"
    [3, 5, 7, 11, 13],                  # "max_depth"
    [1, 0.5, 0.01,0.005],               # "learning_rate"
    [.5,.6,.7,.8,.9,1.0],               # "subsample"
    [1.0,30.0,50.0]                     # "reg_alpha"
    ]

param_combinations=list(product(*param_values))   # creates a list of all possible combinations
param_grid = dict(zip(param_names, param_values))
total_combinations = len(param_combinations)
print("Number of combinations:" ,total_combinations)
param_grid

Number of combinations: 1080


{'n_estimators': [1000, 500, 50],
 'max_depth': [3, 5, 7, 11, 13],
 'learning_rate': [1, 0.5, 0.01, 0.005],
 'subsample': [0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
 'reg_alpha': [1.0, 30.0, 50.0]}

## Train the model and search for best hyperparameters

In [98]:
num_iteration = 0 
best_score = np.inf  # set max error
best_params = {}

#iterate every combination of params
for param_combination in param_combinations:   
    num_iteration += 1 
    # choose the hyperparameters for this iteration
    actual_params=dict(zip(param_names, param_combination))  
    print("Round",num_iteration," / ",total_combinations)  

    start_time = time()
    final_predictions = []
    scores=[]
    print("Fold: ",end=" ")

    # iterate in every 5 folds wich data divided
    for fold in range(5):     
        
        xtrain =  df[df.kfold != fold].reset_index(drop=True)
        xvalid = df[df.kfold == fold].reset_index(drop=True)
        xtest = df_test.copy()

        ytrain = xtrain.target
        yvalid = xvalid.target
        
        xtrain = xtrain[useful_features]
        xvalid = xvalid[useful_features]

        # categorical data -> ordinal encoder        
        ordinal_encoder = preprocessing.OrdinalEncoder()
        xtrain[object_cols] = ordinal_encoder.fit_transform(xtrain[object_cols])
        xvalid[object_cols] = ordinal_encoder.transform(xvalid[object_cols])
        xtest[object_cols] = ordinal_encoder.transform(xtest[object_cols])
    
    
        # standarization numerical cols
        scaler = preprocessing.StandardScaler()
        xtrain[numerical_cols] = scaler.fit_transform(xtrain[numerical_cols])
        xvalid[numerical_cols] = scaler.transform(xvalid[numerical_cols])
        xtest[numerical_cols] = scaler.transform(xtest[numerical_cols])

        # choose model with the hyperparameters of this round
        model = XGBRegressor(random_state=fold, 
                            n_jobs=-1,               # use máx number of CPUs
                            tree_method='gpu_hist',  # use GPU
                            eval_metric='rmse',
                            **actual_params)         # add the actual hyperparameters
        
        # fit model and error calculation
        model.fit(xtrain, ytrain)
        preds_valid = model.predict(xvalid)
        rmse = mean_squared_error(yvalid, preds_valid, squared=False)        
        scores.append(rmse)
        
        print(fold,end =" ") 

    time_elapsed = time() - start_time  
    mean_scores = np.mean(scores)

    print ("\n\nMean MSE: ",mean_scores)
    
    if (mean_scores < best_score):
        best_score = mean_scores
        best_params = actual_params.copy()

    print ("Best MSE: ",best_score)
    print ("Actual params: ",actual_params)      
    print ("Best params:   ",best_params)  

    print(("Time elapsed %.2f min, estimated remaining time %.2f min\n")% 
            (time_elapsed/60, ((len(param_combinations)-num_iteration)*time_elapsed/60 )))



Round 1  /  1
Fold:  0 1 2 3 4 

Mean MSE:  0.7735654713952858
Best MSE:  0.7735654713952858
Actual params:  {'n_estimators': 1000, 'max_depth': 3, 'learning_rate': 1, 'subsample': 0.5, 'reg_alpha': 1.0}
Best params:    {'n_estimators': 1000, 'max_depth': 3, 'learning_rate': 1, 'subsample': 0.5, 'reg_alpha': 1.0}
Time elapsed 0.36 min, estimated remaining time 0.00 min



## Train and predict with best hyperparameters

In [99]:
final_predictions = []
scores=[]
for fold in range(5):     
        
    xtrain =  df[df.kfold != fold].reset_index(drop=True)
    xvalid = df[df.kfold == fold].reset_index(drop=True)
    xtest = df_test.copy()

    ytrain = xtrain.target
    yvalid = xvalid.target
        
    xtrain = xtrain[useful_features]
    xvalid = xvalid[useful_features]

    # categorical data -> ordinal encoder        
    ordinal_encoder = preprocessing.OrdinalEncoder()
    xtrain[object_cols] = ordinal_encoder.fit_transform(xtrain[object_cols])
    xvalid[object_cols] = ordinal_encoder.transform(xvalid[object_cols])
    xtest[object_cols] = ordinal_encoder.transform(xtest[object_cols])
    
    
    # standarization numerical cols
    scaler = preprocessing.StandardScaler()
    xtrain[numerical_cols] = scaler.fit_transform(xtrain[numerical_cols])
    xvalid[numerical_cols] = scaler.transform(xvalid[numerical_cols])
    xtest[numerical_cols] = scaler.transform(xtest[numerical_cols])

    model = XGBRegressor(random_state=fold, 
                            n_jobs=-1,               # use máx number of CPUs
                            tree_method='gpu_hist',  # use GPU
                            eval_metric='rmse',
                            **best_params)         # add the actual hyperparameters

    model.fit(xtrain, ytrain)
    preds_valid = model.predict(xvalid)
    test_preds = model.predict(xtest)
    final_predictions.append(test_preds)
    rmse = mean_squared_error(yvalid, preds_valid, squared=False)
    print(fold,rmse)
    scores.append(rmse)
    

print("MSE final: ", np.mean(scores)) 

0 0.7739620540952099
1 0.7716795570097768
2 0.773534112818326
3 0.7736063901363198
4 0.7750452429167959
MSE final:  0.7735654713952858


In [100]:
preds = np.mean(np.column_stack(final_predictions), axis=1)

In [101]:
sample_submission.target = preds
sample_submission.to_csv("submission.csv", index=False)