In [2]:
from copy import deepcopy
import numpy as np
%matplotlib inline
import pandas as pd
from sklearn.model_selection import train_test_split as tts
from sklearn.model_selection import KFold, cross_validate 
from xgboost import XGBRegressor
import xgboost as xgb
from hyperopt import hp, fmin, tpe, Trials
from hyperopt.early_stop import no_progress_loss
import joblib 
import warnings 
warnings.filterwarnings("ignore") 
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
import function

In [3]:
df2_backup = pd.read_csv("data_model.csv", index_col='Unnamed: 0')
df2 = deepcopy(df2_backup)
X, y = df2.iloc[:, :-1], df2.iloc[:, -1]
Xtrain, Xtest, Ytrain, Ytest = tts(X, y, test_size=0.3, random_state=10)
for i in [Xtrain, Xtest]:
    i.index = range(i.shape[0])
data_xgb = xgb.DMatrix(X, label=y)
dtrain = xgb.DMatrix(Xtrain,Ytrain)
dtest = xgb.DMatrix(Xtest,Ytest)

In [4]:
def hyperopt_objective(params):
    paramsforxgb = {"eta":params["eta"]
                    ,"booster":params["booster"]
                    ,"colsample_bytree":params["colsample_bytree"]
                    ,"colsample_bynode":params["colsample_bynode"]
                    ,"gamma":params["gamma"]
                    ,"lambda":params["lambda"]
                    ,"min_child_weight":params["min_child_weight"]
                    ,"max_depth":int(params["max_depth"])
                    ,"objective":params["objective"]
                    ,"rate_drop":params["rate_drop"]
                    ,"nthread":14
                    ,"verbosity":0
                    ,"seed":1412}
    xgb.set_config(verbosity=0)
    result = xgb.cv(params,dtrain, seed=1412, metrics=("rmse")
                    ,num_boost_round=int(params["num_boost_round"]))
    return result.iloc[-1,2]

In [5]:
param_grid_simple = {'num_boost_round': hp.quniform("num_boost_round",20,120,10)
                     ,"eta": hp.quniform("eta",0.05,1.05,0.05)
                     ,"booster":hp.choice("booster",["gbtree","dart"])
                     ,"colsample_bytree":hp.quniform("colsample_bytree",0.5,1,0.1)
                     ,"colsample_bynode":hp.quniform("colsample_bynode",0.5,1,0.1)
                     ,"gamma":hp.quniform("gamma",0,40,5)
                     ,"lambda":hp.quniform("lambda",0,1.5,0.2)
                     ,"min_child_weight":hp.quniform("min_child_weight",0,50,2)
                     ,"max_depth":hp.choice("max_depth",[*range(2,30,2)])
                     ,"objective":hp.choice("objective",["reg:squarederror","reg:squaredlogerror"])
                     ,"rate_drop":hp.quniform("rate_drop",0.1,1,0.1)
                    }

In [6]:
def param_hyperopt(max_evals=100):

    trials = Trials()

    early_stop_fn = no_progress_loss(30)

    params_best = fmin(hyperopt_objective
                       , space = param_grid_simple
                       , algo = tpe.suggest
                       , max_evals = max_evals
                       , trials = trials
                       , verbose = True
                       , early_stop_fn = early_stop_fn
                      )
    print("\n","\n","best params: ", params_best,
          "\n")
    return params_best, trials

In [7]:
params_best, trials = param_hyperopt(100)

 96%|█████████▌| 96/100 [01:08<00:02,  1.40trial/s, best loss: 0.03843060628943832] 

 
 best params:  {'booster': 0, 'colsample_bynode': 0.8, 'colsample_bytree': 0.7000000000000001, 'eta': 0.25, 'gamma': 0.0, 'lambda': 0.4, 'max_depth': 5, 'min_child_weight': 28.0, 'num_boost_round': 70.0, 'objective': 0, 'rate_drop': 0.9} 



In [9]:
params = {'colsample_bynode': 0.48
               , 'colsample_bytree': 0.8
               ,'eta': 0.2
               , 'gamma': 0
               , 'lambda': 0.05
               , 'max_depth': 5
               , 'min_child_weight': 2
               , "booster":"gbtree"
               , 'objective': "reg:squarederror"}
reg_xgb = xgb.train(params,dtrain,num_boost_round=98)
y_pred_train = reg_xgb.predict(dtrain)
y_pred_test = reg_xgb.predict(dtest)

In [10]:
xgb_skl = XGBRegressor(colsample_bynode=0.5
               , colsample_bytree=0.75
               ,learning_rate=0.2
               , gamma=0
               , reg_lambda=0.44
               , max_depth=5
               , min_child_weight=2
               , booster="gbtree"
               , objective="reg:squarederror"
               ,n_estimators=140).fit(Xtrain,Ytrain)
function.eval_regressor(xgb_skl, Xtrain, Ytrain, Xtest, Ytest)

r2_train: 0.9980645356882348
r2_test: 0.9877207148017144
mse_train: 0.00011108460320074727
mse_test: 0.0007035371708260911
rmse_train: 0.010539668078300533
rmse_test: 0.026524275123480587


In [None]:
joblib.dump(xgb_skl, "xgb.dat")