# GridSearchCV - odabir najboljih parametara

In [1]:
import numpy as np
import pandas as pd

In [2]:
def rmse(y_pred, y_target):
    return np.sqrt(np.mean(np.square(y_pred.reshape(-1,) - y_target.reshape(-1,))))

In [3]:
df = pd.read_csv('/kaggle/input/housepricesnew/new_train.csv')

In [4]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

x, y = df.drop('SalePrice', axis=1).values, df['SalePrice'].values

y = np.log1p(y)

`GridSearchCV` je metoda koja dobije parametre koje treba testirati i pritom radi unakrsnu validaciju za svaku kombinaciju modela te odabere najbolje parametre za model. Mi ćemo raditi unakrsnu validaciju sa `cv=5`

## XGBoostRegressor

In [5]:
from xgboost import XGBRegressor


xgb_parameters = {
'colsample_bytree':[0.4],
'gamma':[0],
'min_child_weight':[0, 1],
'learning_rate':[0.05, 0.01],
'max_depth':[3,5],
'n_estimators':[10000],
'reg_alpha':[0, 0.1],
'reg_lambda':[0, 0.1],
'subsample':[0.6, 0.8]  
}

model_xgb = XGBRegressor(tree_method='gpu_hist', seed=42)

## LGBMRegressor

In [6]:
from lightgbm import LGBMRegressor


lgb_parameters ={
                'num_leaves':[4,5],
                'learning_rate':[0.05], 
                'n_estimators':[750,1000,5000],
                'max_bin':[50,60], 
                'bagging_fraction':[0.7,0.8],
                'bagging_freq':[4,5], 
                'feature_fraction':[0.2,0.3],
                'feature_fraction_seed':[8,9], 
                'bagging_seed':[8,9],
                'min_data_in_leaf':[6,7], 
                'min_sum_hessian_in_leaf':[10,11],
                'subsample':[0.6,0.7]
    }

                    
model_lgb = LGBMRegressor(objective='regression', tree_method='gpu_hist')

## SVR

In [7]:
from sklearn.svm import SVR

svr_parameters={
    'C':np.linspace(0.01,2,50),
    'epsilon':np.linspace(0.01,0.99,50),
    
    
}

model_svr = SVR()

## ElasticNet

In [8]:
from sklearn.linear_model import ElasticNet






enet_parameters = {
                        "alpha": np.linspace(0.0001, 2, 40),
                        "l1_ratio": np.linspace(0.001, 1.0, 50)
                        }


model_enet=ElasticNet()

## GridSearchCV

In [9]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV


xgb_grid = make_pipeline(MinMaxScaler(), GridSearchCV(model_xgb, xgb_parameters, cv = 5, n_jobs = 6, iid=False, verbose=True, scoring='neg_root_mean_squared_error', refit=True))
xgb_grid.fit(x,y)

lgb_grid = make_pipeline(MinMaxScaler(), GridSearchCV(estimator = model_lgb, param_grid = lgb_parameters, cv=5, n_jobs = 6,iid=False, verbose=True, scoring = 'neg_root_mean_squared_error', refit=True))
lgb_grid.fit(x,y)


svr_grid = make_pipeline(MinMaxScaler(), GridSearchCV(estimator = model_svr, param_grid = svr_parameters, cv=5, n_jobs=6,iid=False, verbose=True, scoring = 'neg_root_mean_squared_error', refit=True))
svr_grid.fit(x,y)


enet_grid = make_pipeline(MinMaxScaler(), GridSearchCV(estimator = model_enet, param_grid = enet_parameters, cv=5, n_jobs=6,iid=False, verbose=True, scoring = 'neg_root_mean_squared_error', refit=True))
enet_grid.fit(x,y)

Fitting 5 folds for each of 64 candidates, totalling 320 fits


[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:  8.2min
[Parallel(n_jobs=6)]: Done 188 tasks      | elapsed: 40.5min
[Parallel(n_jobs=6)]: Done 320 out of 320 | elapsed: 89.4min finished


Fitting 5 folds for each of 3072 candidates, totalling 15360 fits


[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    8.6s
[Parallel(n_jobs=6)]: Done 188 tasks      | elapsed:  1.7min
[Parallel(n_jobs=6)]: Done 438 tasks      | elapsed:  4.0min
[Parallel(n_jobs=6)]: Done 788 tasks      | elapsed:  7.2min
[Parallel(n_jobs=6)]: Done 1238 tasks      | elapsed: 11.6min
[Parallel(n_jobs=6)]: Done 1788 tasks      | elapsed: 17.8min
[Parallel(n_jobs=6)]: Done 2438 tasks      | elapsed: 24.6min
[Parallel(n_jobs=6)]: Done 3188 tasks      | elapsed: 33.2min
[Parallel(n_jobs=6)]: Done 4038 tasks      | elapsed: 43.2min
[Parallel(n_jobs=6)]: Done 4988 tasks      | elapsed: 53.6min
[Parallel(n_jobs=6)]: Done 6038 tasks      | elapsed: 65.7min
[Parallel(n_jobs=6)]: Done 7188 tasks      | elapsed: 78.6min
[Parallel(n_jobs=6)]: Done 8438 tasks      | elapsed: 92.9min
[Parallel(n_jobs=6)]: Done 9788 tasks      | elapsed: 109.7min
[Parallel(n_jobs=6)]: Done 11238 tasks      | elapsed: 127.0

Fitting 5 folds for each of 2500 candidates, totalling 12500 fits


[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:   11.3s
[Parallel(n_jobs=6)]: Done 188 tasks      | elapsed:   21.8s
[Parallel(n_jobs=6)]: Done 462 tasks      | elapsed:   39.5s
[Parallel(n_jobs=6)]: Done 908 tasks      | elapsed:  1.1min
[Parallel(n_jobs=6)]: Done 1454 tasks      | elapsed:  1.6min
[Parallel(n_jobs=6)]: Done 2322 tasks      | elapsed:  2.5min
[Parallel(n_jobs=6)]: Done 3394 tasks      | elapsed:  3.4min
[Parallel(n_jobs=6)]: Done 4646 tasks      | elapsed:  4.5min
[Parallel(n_jobs=6)]: Done 6066 tasks      | elapsed:  5.8min
[Parallel(n_jobs=6)]: Done 7654 tasks      | elapsed:  7.2min
[Parallel(n_jobs=6)]: Done 9442 tasks      | elapsed:  8.8min
[Parallel(n_jobs=6)]: Done 11376 tasks      | elapsed: 10.7min
[Parallel(n_jobs=6)]: Done 12500 out of 12500 | elapsed: 11.6min finished


Fitting 5 folds for each of 2000 candidates, totalling 10000 fits


[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:   10.0s
[Parallel(n_jobs=6)]: Done 188 tasks      | elapsed:   37.6s
[Parallel(n_jobs=6)]: Done 864 tasks      | elapsed:   50.2s
[Parallel(n_jobs=6)]: Done 2264 tasks      | elapsed:   59.1s
[Parallel(n_jobs=6)]: Done 4064 tasks      | elapsed:  1.2min
[Parallel(n_jobs=6)]: Done 6264 tasks      | elapsed:  1.4min
[Parallel(n_jobs=6)]: Done 8864 tasks      | elapsed:  1.6min
[Parallel(n_jobs=6)]: Done 9989 out of 10000 | elapsed:  1.7min remaining:    0.1s
[Parallel(n_jobs=6)]: Done 10000 out of 10000 | elapsed:  1.7min finished


Pipeline(steps=[('minmaxscaler', MinMaxScaler()),
                ('gridsearchcv',
                 GridSearchCV(cv=5, estimator=ElasticNet(), iid=False, n_jobs=6,
                              param_grid={'alpha': array([1.00000000e-04, 5.13794872e-02, 1.02658974e-01, 1.53938462e-01,
       2.05217949e-01, 2.56497436e-01, 3.07776923e-01, 3.59056410e-01,
       4.10335897e-01, 4.61615385e-01, 5.12894872e-01, 5.64174359e-01,
       6.15453846e-01, 6.66...
       0.4087551 , 0.42914286, 0.44953061, 0.46991837, 0.49030612,
       0.51069388, 0.53108163, 0.55146939, 0.57185714, 0.5922449 ,
       0.61263265, 0.63302041, 0.65340816, 0.67379592, 0.69418367,
       0.71457143, 0.73495918, 0.75534694, 0.77573469, 0.79612245,
       0.8165102 , 0.83689796, 0.85728571, 0.87767347, 0.89806122,
       0.91844898, 0.93883673, 0.95922449, 0.97961224, 1.        ])},
                              scoring='neg_root_mean_squared_error',
                              verbose=True))])

In [10]:
print(xgb_grid[1].best_params_)
print(lgb_grid[1].best_params_)
print(svr_grid[1].best_params_)
print(enet_grid[1].best_params_)

{'colsample_bytree': 0.4, 'gamma': 0, 'learning_rate': 0.01, 'max_depth': 3, 'min_child_weight': 0, 'n_estimators': 10000, 'reg_alpha': 0.1, 'reg_lambda': 0.1, 'subsample': 0.6}
{'bagging_fraction': 0.8, 'bagging_freq': 5, 'bagging_seed': 9, 'feature_fraction': 0.2, 'feature_fraction_seed': 9, 'learning_rate': 0.05, 'max_bin': 60, 'min_data_in_leaf': 6, 'min_sum_hessian_in_leaf': 10, 'n_estimators': 1000, 'num_leaves': 4, 'subsample': 0.6}
{'C': 0.9034693877551021, 'epsilon': 0.01}
{'alpha': 0.0001, 'l1_ratio': 1.0}


In [11]:
print('XGBRegressor score:', -xgb_grid[1].best_score_)
print('LGBMRegressor score:', -lgb_grid[1].best_score_)
print('SVR score:', -svr_grid[1].best_score_)
print('ElasticNet score:', -enet_grid[1].best_score_)

XGBRegressor score: 0.1112651947924915
LGBMRegressor score: 0.11386677929547775
SVR score: 0.12035748269311575
ElasticNet score: 0.11436090794260745
