 # Age Prediction for Scenario 2

In [1]:
# Suppressing Warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
#Importing all necessary libraries
import pandas as pd
import numpy as np
import s3fs
from matplotlib import pyplot as plt 
%matplotlib inline
import seaborn as sns

In [3]:
#loading datasets
scenario_2=pd.read_parquet("s3://capstonemlc/Final_data/scenario_2.parquet")

#first 5 rows
scenario_2.head()

Unnamed: 0,device_id,brand,model,gender,age
0,398514470209561000,Huawei,è£è€€ç•…çŽ©4X,M,68
1,5805880616488060000,others,è¶…çº§æ‰‹æœº1,M,39
2,-1889893391998300000,Huawei,è£è€€3Xç•…çŽ©ç‰ˆ,M,22
3,3422421754497040000,samsung,Galaxy Note 3,M,27
4,3221871111026990000,Huawei,è£è€€ç•…çŽ©4X,M,29


# Data preprocessing

In [4]:
# converting type of columns to 'category'
scenario_2['brand'] = scenario_2['brand'].astype('category')
scenario_2['model'] = scenario_2['model'].astype('category')
scenario_2['gender'] = scenario_2['gender'].astype('category')

In [5]:
#Basic info dataframe
scenario_2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51335 entries, 0 to 51334
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   device_id  51335 non-null  int64   
 1   brand      51335 non-null  category
 2   model      51335 non-null  category
 3   gender     51335 non-null  category
 4   age        51335 non-null  int64   
dtypes: category(3), int64(2)
memory usage: 1008.5 KB


# Encoding

In [6]:
# generate binary values using get_dummies
scenario_2= pd.get_dummies(scenario_2, columns=["brand"], prefix=["brand"] )
scenario_2= pd.get_dummies(scenario_2, columns=["model"], prefix=["model"] )
scenario_2= pd.get_dummies(scenario_2, columns=["gender"], prefix=["gender"] )

In [7]:
#first 5 rows
scenario_2.head()

Unnamed: 0,device_id,age,brand_Coolpad,brand_Gionee,brand_HTC,brand_Huawei,brand_Meizu,brand_OPPO,brand_Xiaomi,brand_lenovo,...,model_è¶…çº§æ‰‹æœº1 Pro,model_é’æ˜¥ç‰ˆ,model_é­…è“,model_é­…è“2,model_é­…è“metal,model_éº¦èŠ’4,model_é»„é‡‘æ–—å£«A8,model_é”‹å°š,gender_F,gender_M
0,398514470209561000,68,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,5805880616488060000,39,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,-1889893391998300000,22,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,3422421754497040000,27,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,3221871111026990000,29,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


# Split data

In [8]:
#Loading dataset
train_test_split=pd.read_csv("s3://capstonemlc/train_test_split.csv")

### Train data

In [9]:
#extracting the train dataset
train_split = train_test_split[train_test_split['train_test_flag'] == "train"]

In [10]:
#frist 5 rows
train_split.head()

Unnamed: 0,device_id,gender,age,group,train_test_flag
0,-7548291590301750000,M,33,M32+,train
1,6943568600617760000,M,37,M32+,train
2,5441349705980020000,M,40,M32+,train
3,-5393876656119450000,M,33,M32+,train
4,4543988487649880000,M,53,M32+,train


In [11]:
#making a list of train device ids
train_devideId_values = train_split.device_id.values

### Test data

In [12]:
#extracting test data
test_split = train_test_split[train_test_split['train_test_flag'] == "test"]

In [13]:
#first 5 rows
test_split.head()

Unnamed: 0,device_id,gender,age,group,train_test_flag
17481,2948104315232910000,F,65,F32+,test
17482,8231243155939480000,F,47,F32+,test
17483,-3994292212856080000,F,31,F25-32,test
17484,7217910398487470000,M,29,M25-32,test
17485,8642523170587800000,F,31,F25-32,test


In [14]:
#making a list of teat device ids 
test_deviveID_values = test_split.device_id.values

### Train-test datasets

In [15]:
#splitting scenario_2 into train and test sets
scenario2_train = scenario_2.loc[scenario_2['device_id'].isin(train_devideId_values)]
scenario2_test = scenario_2.loc[scenario_2['device_id'].isin(test_deviveID_values)]

In [16]:
#Print the size (rows and columns) in all the data frames
print("scenario2_train = ",scenario2_train.shape," Rows = ",scenario2_train.shape[0]," Columns = ",scenario2_train.shape[1])
print("scenario2_test = ",scenario2_test.shape," Rows = ",scenario2_test.shape[0]," Columns = ",scenario2_test.shape[1])

scenario2_train =  (41067, 166)  Rows =  41067  Columns =  166
scenario2_test =  (10268, 166)  Rows =  10268  Columns =  166


In [17]:
#first 5 rows
scenario2_train.head()

Unnamed: 0,device_id,age,brand_Coolpad,brand_Gionee,brand_HTC,brand_Huawei,brand_Meizu,brand_OPPO,brand_Xiaomi,brand_lenovo,...,model_è¶…çº§æ‰‹æœº1 Pro,model_é’æ˜¥ç‰ˆ,model_é­…è“,model_é­…è“2,model_é­…è“metal,model_éº¦èŠ’4,model_é»„é‡‘æ–—å£«A8,model_é”‹å°š,gender_F,gender_M
0,398514470209561000,68,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,5805880616488060000,39,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,-1889893391998300000,22,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,3422421754497040000,27,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,3221871111026990000,29,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [18]:
#first 5 rows
scenario2_test.head()

Unnamed: 0,device_id,age,brand_Coolpad,brand_Gionee,brand_HTC,brand_Huawei,brand_Meizu,brand_OPPO,brand_Xiaomi,brand_lenovo,...,model_è¶…çº§æ‰‹æœº1 Pro,model_é’æ˜¥ç‰ˆ,model_é­…è“,model_é­…è“2,model_é­…è“metal,model_éº¦èŠ’4,model_é»„é‡‘æ–—å£«A8,model_é”‹å°š,gender_F,gender_M
12773,2602209329720390000,36,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
12774,4999143280669700000,24,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
12775,-1458857306549150000,33,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
12776,-7217261791343240000,20,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1
12777,-8003569692105950000,27,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [19]:
#Train dataset

X_train= scenario2_train.drop(['age'], axis=1)
y_train = scenario2_train[['age']]

In [20]:
#frist 5 rows
X_train.head()

Unnamed: 0,device_id,brand_Coolpad,brand_Gionee,brand_HTC,brand_Huawei,brand_Meizu,brand_OPPO,brand_Xiaomi,brand_lenovo,brand_others,...,model_è¶…çº§æ‰‹æœº1 Pro,model_é’æ˜¥ç‰ˆ,model_é­…è“,model_é­…è“2,model_é­…è“metal,model_éº¦èŠ’4,model_é»„é‡‘æ–—å£«A8,model_é”‹å°š,gender_F,gender_M
0,398514470209561000,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,5805880616488060000,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1
2,-1889893391998300000,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,3422421754497040000,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,3221871111026990000,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [21]:
#dropping device_id column
X_train= X_train.drop(X_train.iloc[:,0:1], axis=1)

In [22]:
#frist 5 rows
X_train.head()

Unnamed: 0,brand_Coolpad,brand_Gionee,brand_HTC,brand_Huawei,brand_Meizu,brand_OPPO,brand_Xiaomi,brand_lenovo,brand_others,brand_samsung,...,model_è¶…çº§æ‰‹æœº1 Pro,model_é’æ˜¥ç‰ˆ,model_é­…è“,model_é­…è“2,model_é­…è“metal,model_éº¦èŠ’4,model_é»„é‡‘æ–—å£«A8,model_é”‹å°š,gender_F,gender_M
0,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1
2,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1
4,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [23]:
#first 5 rows
y_train.head()

Unnamed: 0,age
0,68
1,39
2,22
3,27
4,29


In [24]:
#Test dataset
X_test= scenario2_test.drop(['age'], axis=1)
y_test = scenario2_test[['age']]

In [25]:
#frist 5 rows
X_test.head()

Unnamed: 0,device_id,brand_Coolpad,brand_Gionee,brand_HTC,brand_Huawei,brand_Meizu,brand_OPPO,brand_Xiaomi,brand_lenovo,brand_others,...,model_è¶…çº§æ‰‹æœº1 Pro,model_é’æ˜¥ç‰ˆ,model_é­…è“,model_é­…è“2,model_é­…è“metal,model_éº¦èŠ’4,model_é»„é‡‘æ–—å£«A8,model_é”‹å°š,gender_F,gender_M
12773,2602209329720390000,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
12774,4999143280669700000,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
12775,-1458857306549150000,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
12776,-7217261791343240000,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
12777,-8003569692105950000,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [26]:
#dropping device-id column
X_test= X_test.drop(X_test.iloc[:,0:1], axis=1)

In [27]:
#frist 5 rows
X_test.head()

Unnamed: 0,brand_Coolpad,brand_Gionee,brand_HTC,brand_Huawei,brand_Meizu,brand_OPPO,brand_Xiaomi,brand_lenovo,brand_others,brand_samsung,...,model_è¶…çº§æ‰‹æœº1 Pro,model_é’æ˜¥ç‰ˆ,model_é­…è“,model_é­…è“2,model_é­…è“metal,model_éº¦èŠ’4,model_é»„é‡‘æ–—å£«A8,model_é”‹å°š,gender_F,gender_M
12773,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,1,0
12774,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1
12775,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,1,0
12776,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
12777,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1


In [28]:
#first 5 rows
y_test.head()

Unnamed: 0,age
12773,36
12774,24
12775,33
12776,20
12777,27


# Model Building 

## Linear regression

##### The target variable "age" is a continuous variable,  so as a starting point and for simplicity, linear regression algorithm is considered. The modelling can also be done using multiclass logistic regression algorithm. Since most of the variables are categorical, logistic regression may get more complicated to execute. So to avoid the complexity and ease of understanding the metrics and execution the basic classic linear regression algorithm is opted for modelling.

### Training model

In [91]:
#Making a list of columns of X_train
col=list(X_train.columns)

In [92]:
#importing libraries
import statsmodels.api as sm

# Linear regression model
X_train_sm = sm.add_constant(X_train)
lr1 = sm.GLS(y_train,X_train_sm)
res = lr1.fit()
res.summary()

0,1,2,3
Dep. Variable:,age,R-squared:,0.035
Model:,GLS,Adj. R-squared:,0.031
Method:,Least Squares,F-statistic:,9.207
Date:,"Sun, 16 Jan 2022",Prob (F-statistic):,8.930000000000001e-208
Time:,20:37:48,Log-Likelihood:,-151610.0
No. Observations:,41067,AIC:,303500.0
Df Residuals:,40905,BIC:,304900.0
Df Model:,161,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,5.919e+11,1.18e+12,0.501,0.617,-1.73e+12,2.91e+12
brand_Coolpad,-3.722e+11,9e+11,-0.414,0.679,-2.14e+12,1.39e+12
brand_Gionee,-3.722e+11,9e+11,-0.414,0.679,-2.14e+12,1.39e+12
brand_HTC,-3.722e+11,9e+11,-0.414,0.679,-2.14e+12,1.39e+12
brand_Huawei,-3.722e+11,9e+11,-0.414,0.679,-2.14e+12,1.39e+12
brand_Meizu,-3.722e+11,9e+11,-0.414,0.679,-2.14e+12,1.39e+12
brand_OPPO,-3.722e+11,9e+11,-0.414,0.679,-2.14e+12,1.39e+12
brand_Xiaomi,-3.722e+11,9e+11,-0.414,0.679,-2.14e+12,1.39e+12
brand_lenovo,-3.722e+11,9e+11,-0.414,0.679,-2.14e+12,1.39e+12

0,1,2,3
Omnibus:,10767.931,Durbin-Watson:,2.015
Prob(Omnibus):,0.0,Jarque-Bera (JB):,27636.714
Skew:,1.431,Prob(JB):,0.0
Kurtosis:,5.822,Cond. No.,1130000000000000.0


In [93]:
#Recursive feature elimination
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
rfe = RFE(linreg)
rfe = rfe.fit(X_train, y_train.values.ravel())
print(rfe.support_)
print(rfe.ranking_)

[False False False False False False False False False False False  True
  True False  True False  True  True False  True False False False  True
 False False False  True  True False  True False False False  True  True
 False False False False False False False False False False False False
 False False False False False  True  True False False False False  True
  True  True  True  True  True False  True  True  True  True  True False
 False  True False False False False  True  True  True  True  True  True
  True  True  True  True False  True  True  True  True False  True  True
  True  True  True  True  True  True  True  True False  True  True  True
  True  True  True  True  True  True  True  True  True False  True  True
 False False False  True  True  True  True False False False False  True
 False  True  True False  True False False  True  True  True False False
 False  True False  True False False False  True False  True False False
  True  True  True False False False False False]
[

In [94]:
#List of X_train columns with their respecive rfe support and rankings
list(zip(X_train.columns, rfe.support_, rfe.ranking_))

[('brand_Coolpad', False, 76),
 ('brand_Gionee', False, 79),
 ('brand_HTC', False, 78),
 ('brand_Huawei', False, 74),
 ('brand_Meizu', False, 83),
 ('brand_OPPO', False, 81),
 ('brand_Xiaomi', False, 80),
 ('brand_lenovo', False, 75),
 ('brand_others', False, 77),
 ('brand_samsung', False, 73),
 ('brand_vivo', False, 82),
 ('model_1107', True, 1),
 ('model_45rggt2', True, 1),
 ('model_8720L', False, 50),
 ('model_900tr2', True, 1),
 ('model_900tr2123', False, 13),
 ('model_A1', True, 1),
 ('model_A31', True, 1),
 ('model_A320t', False, 52),
 ('model_A33', True, 1),
 ('model_A788t', False, 70),
 ('model_A850', False, 3),
 ('model_Ascend G7', False, 64),
 ('model_Ascend P6', True, 1),
 ('model_Ascend P7', False, 56),
 ('model_Ascend P8', False, 54),
 ('model_B199', False, 59),
 ('model_Desire 816', True, 1),
 ('model_F103', True, 1),
 ('model_F2', False, 7),
 ('model_Find 5', True, 1),
 ('model_Find 7', False, 8),
 ('model_G610S', False, 36),
 ('model_G610T', False, 55),
 ('model_G620-L7

In [95]:
#X_train columns with rfe support
col_rfe = X_train.columns[rfe.support_]

In [96]:
#X_train columns without rfe support
X_train.columns[~rfe.support_]

Index(['brand_Coolpad', 'brand_Gionee', 'brand_HTC', 'brand_Huawei',
       'brand_Meizu', 'brand_OPPO', 'brand_Xiaomi', 'brand_lenovo',
       'brand_others', 'brand_samsung', 'brand_vivo', 'model_8720L',
       'model_900tr2123', 'model_A320t', 'model_A788t', 'model_A850',
       'model_Ascend G7', 'model_Ascend P7', 'model_Ascend P8', 'model_B199',
       'model_F2', 'model_Find 7', 'model_G610S', 'model_G610T',
       'model_Galaxy A7', 'model_Galaxy A8', 'model_Galaxy Grand',
       'model_Galaxy Grand 2', 'model_Galaxy Grand DUOS',
       'model_Galaxy Grand Prime', 'model_Galaxy Mega 5.8',
       'model_Galaxy Mega 6.3', 'model_Galaxy Mega Plus', 'model_Galaxy Note',
       'model_Galaxy Note 2', 'model_Galaxy Note 3', 'model_Galaxy Note 4',
       'model_Galaxy S2', 'model_Galaxy S3', 'model_Galaxy S4',
       'model_Galaxy S5', 'model_Galaxy S6 Edge+', 'model_Galaxy Trend 3',
       'model_Galaxy Trend DUOS', 'model_Galaxy Win', 'model_MI One Plus',
       'model_Mate 7', 'mod

In [97]:
#retraining and fitting Linear regresion model
X_train_sm = sm.add_constant(X_train[col_rfe])
lr_final = sm.GLS(y_train,X_train_sm)
res_final = lr_final.fit()
res_final.summary()

0,1,2,3
Dep. Variable:,age,R-squared:,0.024
Model:,GLS,Adj. R-squared:,0.022
Method:,Least Squares,F-statistic:,12.36
Date:,"Sun, 16 Jan 2022",Prob (F-statistic):,3.1e-158
Time:,20:38:44,Log-Likelihood:,-151840.0
No. Observations:,41067,AIC:,303800.0
Df Residuals:,40984,BIC:,304600.0
Df Model:,82,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,32.0814,0.060,535.678,0.000,31.964,32.199
model_1107,-4.9528,1.169,-4.235,0.000,-7.245,-2.661
model_45rggt2,-1.6130,0.638,-2.530,0.011,-2.863,-0.363
model_900tr2,-1.9214,1.130,-1.700,0.089,-4.136,0.293
model_A1,-1.6528,1.169,-1.413,0.158,-3.945,0.639
model_A31,-3.9192,0.929,-4.217,0.000,-5.741,-2.098
model_A33,-1.5397,1.412,-1.091,0.275,-4.307,1.227
model_Ascend P6,-2.0393,0.711,-2.866,0.004,-3.434,-0.645
model_Desire 816,-1.5012,0.856,-1.754,0.079,-3.179,0.176

0,1,2,3
Omnibus:,10846.954,Durbin-Watson:,2.014
Prob(Omnibus):,0.0,Jarque-Bera (JB):,27888.393
Skew:,1.441,Prob(JB):,0.0
Kurtosis:,5.828,Cond. No.,38.4


In [98]:
# Getting the predicted values on the train set
y_train_pred =res_final.predict(X_train_sm)
y_train_pred[:10]

0    32.081364
1    30.388889
2    32.081364
3    32.081364
4    32.081364
5    32.081364
6    32.081364
7    32.081364
8    29.520849
9    32.081364
dtype: float64

In [99]:
#reshaping the predictions array
y_train_pred = y_train_pred.values.reshape(-1)
y_train_pred[:10]

array([32.08136434, 30.38888889, 32.08136434, 32.08136434, 32.08136434,
       32.08136434, 32.08136434, 32.08136434, 29.52084857, 32.08136434])

In [100]:
#changing predictions array into dataframe
y_train_pred_final = pd.DataFrame({'age':y_train.values.reshape(-1), 'age_pred':y_train_pred})
y_train_pred_final.head()

Unnamed: 0,age,age_pred
0,68,32.081364
1,39,30.388889
2,22,32.081364
3,27,32.081364
4,29,32.081364


In [101]:
#Changing data type
y_train_pred_final['age_pred']=y_train_pred_final['age_pred'].astype('int')

In [102]:
y_train_pred_final.head()

Unnamed: 0,age,age_pred
0,68,32
1,39,30
2,22,32
3,27,32
4,29,32


In [103]:
#Taking a 25% sample of train data
y_train_lr=y_train_pred_final.sample(frac =.25)

In [104]:
#Calculating the Precetage Population Distribution (PPD)
y_train_lr['percent_population_dist']=y_train_lr['age']-y_train_lr['age_pred']/y_train_lr['age']*100

In [105]:
# importing r2_score module
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

# predicting the accuracy score
score=r2_score(y_train,y_train_pred)
print('r2 socre is',score)
print('rmse is',np.sqrt(mean_squared_error(y_train,y_train_pred)))
print('percentage population distribution is ', y_train_lr['percent_population_dist'].mean())

r2 socre is 0.024134220798985417
rmse is 9.761612262303995
percentage population distribution is  -76.60289181340326


##### As we can see the RFE has not given any better result, so all the initial fearures are considered to predict on test data

### Test data

In [106]:
#first 5 rows
X_test.head()

Unnamed: 0,brand_Coolpad,brand_Gionee,brand_HTC,brand_Huawei,brand_Meizu,brand_OPPO,brand_Xiaomi,brand_lenovo,brand_others,brand_samsung,...,model_è¶…çº§æ‰‹æœº1 Pro,model_é’æ˜¥ç‰ˆ,model_é­…è“,model_é­…è“2,model_é­…è“metal,model_éº¦èŠ’4,model_é»„é‡‘æ–—å£«A8,model_é”‹å°š,gender_F,gender_M
12773,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,1,0
12774,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1
12775,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,1,0
12776,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
12777,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1


In [107]:
#Adding constant to X_test data for linear regression
X_test_sm = sm.add_constant(X_test)

In [108]:
#Predictions on test data
y_test_pred = res.predict(X_test_sm)

In [109]:
y_test_pred[:10]

12773    33.697021
12774    33.733887
12775    32.317139
12776    28.928467
12777    32.315430
12778    32.485596
12779    31.614746
12780    34.983643
12781    32.310791
12782    30.400879
dtype: float64

In [110]:
#changing test predictio array into dataframe
y_pred_final = pd.DataFrame({'age':y_test.values.reshape(-1), 'age_pred':y_test_pred})
y_pred_final.head()

Unnamed: 0,age,age_pred
12773,36,33.697021
12774,24,33.733887
12775,33,32.317139
12776,20,28.928467
12777,27,32.31543


In [111]:
#Changing data type
y_pred_final['age_pred']=y_pred_final['age_pred'].astype('int')

In [112]:
y_pred_final.head()

Unnamed: 0,age,age_pred
12773,36,33
12774,24,33
12775,33,32
12776,20,28
12777,27,32


In [113]:
#taking 25% sample of test data
y_test_lr=y_pred_final.sample(frac =.25)

In [114]:
#Calcualting PPD
y_test_lr['percent_population_dist']=y_test_lr['age']-y_test_lr['age_pred']/y_test_lr['age']*100

In [115]:
# predicting the accuracy score
score=r2_score(y_test,y_test_pred)
print('r2 socre is',score)
print('rmse is',np.sqrt(mean_squared_error(y_test,y_test_pred)))
print('percentage population distribution is ', y_test_lr['percent_population_dist'].mean())

r2 socre is 0.028786845175493125
rmse is 9.604128690156704
percentage population distribution is  -75.7729899752642


#### The results of Linear regression is pretty poor considering the low r squared value and less rmse value.
#### We can increase the model performance with XGBoost Regressor

## XGBoost regression

In [116]:
#Import required libraries
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV

In [117]:
#get X_train, y_train
X_train= scenario2_train.drop(['age'], axis=1)
X_train= X_train.drop(X_train.iloc[:,0:1], axis=1)
y_train = scenario2_train[['age']]

In [118]:
#get X_test, y_test
X_test= scenario2_test.drop(['age'], axis=1)
X_test= X_test.drop(X_test.iloc[:,0:1], axis=1)
y_test = scenario2_test[['age']]

In [119]:
# A parameter grid for XGBoost
params = {
        'min_child_weight': [1, 5, 10],
        'gamma': [0.5, 1, 1.5, 2, 5],
        'subsample': [0.6, 0.8, 1.0],
        'colsample_bytree': [0.6, 0.8, 1.0],
        'max_depth': [3, 4, 5],
        'n_estimators': range(60, 360, 40),
        'learning_rate': [0.1, 0.01, 0.05]
 }

##### Splitting the parameters grid into two for ease of execution of GridSearchCv

In [120]:
# A parameter grid for XGBoost
params1 = {
        'min_child_weight': [1, 5],
        'gamma': [0.5, 1, 1.5],
        'subsample': [0.6, 0.8],
        'colsample_bytree': [0.6, 0.8],
        'max_depth': [3, 4],
        'n_estimators': range(60, 140, 40),
        'learning_rate': [0.1, 0.01]
 }

In [121]:
#Initializing the XGBRegressor object
estimator = XGBRegressor(objective= 'reg:squarederror',eval_metric='rmse', seed=42)

In [122]:
#Initializing GrisSearchCV object for XGBoost regressor
reg = GridSearchCV(estimator=estimator, 
                   param_grid=params1,
                   scoring='r2',
                   n_jobs = 1,
                   verbose=50000)

In [123]:
#Fitting the model
reg.fit(X_train, y_train)

Fitting 5 folds for each of 192 candidates, totalling 960 fits
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.6, score=0.029, total=   7.1s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    7.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.6, score=0.029, total=   7.3s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   14.4s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.6, ga

[CV]  colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=60, subsample=0.6, score=0.028, total=  16.1s
[Parallel(n_jobs=1)]: Done  24 out of  24 | elapsed:  5.9min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=60, subsample=0.6, score=0.024, total=  12.5s
[Parallel(n_jobs=1)]: Done  25 out of  25 | elapsed:  6.1min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=60, subsample=0.8, score=0.029, total=   9.2s
[Parallel(n_jobs=1)]: Done  26 out of  26 | elapsed:  6.2min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth

[CV]  colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.8, score=0.030, total=  22.5s
[Parallel(n_jobs=1)]: Done  48 out of  48 | elapsed: 11.8min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.8, score=0.027, total=  19.5s
[Parallel(n_jobs=1)]: Done  49 out of  49 | elapsed: 12.1min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.8, score=0.026, total=  14.3s
[Parallel(n_jobs=1)]: Done  50 out of  50 | elapsed: 12.3min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth

[CV]  colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.6, score=0.031, total=  28.2s
[Parallel(n_jobs=1)]: Done  72 out of  72 | elapsed: 18.7min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.6, score=0.030, total=  19.1s
[Parallel(n_jobs=1)]: Done  73 out of  73 | elapsed: 19.0min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.6, score=0.027, total=  17.5s
[Parallel(n_jobs=1)]: Done  74 out of  74 | elapsed: 19.3min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0.1, max_

[CV]  colsample_bytree=0.6, gamma=0.5, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=100, subsample=0.8, score=-1.291, total=  16.0s
[Parallel(n_jobs=1)]: Done  96 out of  96 | elapsed: 25.3min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=0.5, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=100, subsample=0.8, score=-1.236, total=  11.9s
[Parallel(n_jobs=1)]: Done  97 out of  97 | elapsed: 25.5min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=0.5, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=100, subsample=0.8, score=-1.280, total=  12.4s
[Parallel(n_jobs=1)]: Done  98 out of  98 | elapsed: 25.7min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0

[CV]  colsample_bytree=0.6, gamma=0.5, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.8, score=-1.276, total=  27.0s
[Parallel(n_jobs=1)]: Done 120 out of 120 | elapsed: 30.8min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=0.5, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.6, score=-2.906, total=  21.9s
[Parallel(n_jobs=1)]: Done 121 out of 121 | elapsed: 31.2min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=0.5, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.6, score=-2.788, total=  11.3s
[Parallel(n_jobs=1)]: Done 122 out of 122 | elapsed: 31.3min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0.01,

[CV]  colsample_bytree=0.6, gamma=0.5, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=60, subsample=0.6, score=-2.803, total=  17.0s
[Parallel(n_jobs=1)]: Done 144 out of 144 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=0.5, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=60, subsample=0.6, score=-2.881, total=  14.8s
[Parallel(n_jobs=1)]: Done 145 out of 145 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=0.5, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=60, subsample=0.8, score=-2.904, total=   8.0s
[Parallel(n_jobs=1)]: Done 146 out of 146 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=0.5, learning_rate=0.01, 

[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.8, score=0.029, total=   7.3s
[Parallel(n_jobs=1)]: Done 168 out of 168 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.8, score=0.027, total=  12.0s
[Parallel(n_jobs=1)]: Done 169 out of 169 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.8, score=0.024, total=  12.4s
[Parallel(n_jobs=1)]: Done 170 out of 170 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.1, max_depth=3, min_chil

[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.6, score=0.030, total=  12.6s
[Parallel(n_jobs=1)]: Done 192 out of 192 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.6, score=0.030, total=  15.2s
[Parallel(n_jobs=1)]: Done 193 out of 193 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.6, score=0.027, total=  21.5s
[Parallel(n_jobs=1)]: Done 194 out of 194 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.1, max_depth=3, min

[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.029, total=  14.1s
[Parallel(n_jobs=1)]: Done 216 out of 216 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.031, total=  14.5s
[Parallel(n_jobs=1)]: Done 217 out of 217 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.030, total=  19.4s
[Parallel(n_jobs=1)]: Done 218 out of 218 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.1, max_depth=4, min

[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.8, score=0.026, total=  17.2s
[Parallel(n_jobs=1)]: Done 240 out of 240 | elapsed:  1.6min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.6, score=-2.907, total=  10.3s
[Parallel(n_jobs=1)]: Done 241 out of 241 | elapsed:  1.8min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.6, score=-2.790, total=   9.3s
[Parallel(n_jobs=1)]: Done 242 out of 242 | elapsed:  1.9min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=3, 

[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=60, subsample=0.6, score=-2.804, total=   8.4s
[Parallel(n_jobs=1)]: Done 264 out of 264 | elapsed:  6.6min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=60, subsample=0.6, score=-2.882, total=   9.0s
[Parallel(n_jobs=1)]: Done 265 out of 265 | elapsed:  6.7min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=60, subsample=0.8, score=-2.906, total=   8.8s
[Parallel(n_jobs=1)]: Done 266 out of 266 | elapsed:  6.9min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=3,

[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.8, score=-2.866, total=   9.3s
[Parallel(n_jobs=1)]: Done 288 out of 288 | elapsed: 11.7min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.8, score=-2.803, total=  12.2s
[Parallel(n_jobs=1)]: Done 289 out of 289 | elapsed: 11.9min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.8, score=-2.881, total=   9.9s
[Parallel(n_jobs=1)]: Done 290 out of 290 | elapsed: 12.0min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=4,

[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.6, score=-1.233, total=  18.4s
[Parallel(n_jobs=1)]: Done 312 out of 312 | elapsed: 18.4min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.6, score=-1.278, total=  15.1s
[Parallel(n_jobs=1)]: Done 313 out of 313 | elapsed: 18.6min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.6, score=-1.235, total=  15.9s
[Parallel(n_jobs=1)]: Done 314 out of 314 | elapsed: 18.9min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1, learning_rate=0.01, max_dep

[CV]  colsample_bytree=0.6, gamma=1.5, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.029, total=  17.7s
[Parallel(n_jobs=1)]: Done 336 out of 336 | elapsed: 24.2min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1.5, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1.5, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.031, total=  12.2s
[Parallel(n_jobs=1)]: Done 337 out of 337 | elapsed: 24.4min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1.5, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1.5, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.030, total=  15.7s
[Parallel(n_jobs=1)]: Done 338 out of 338 | elapsed: 24.7min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1.5, learning_rate=0.1, max_

[CV]  colsample_bytree=0.6, gamma=1.5, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.8, score=0.025, total=  11.6s
[Parallel(n_jobs=1)]: Done 360 out of 360 | elapsed: 29.2min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.6, score=0.029, total=  11.1s
[Parallel(n_jobs=1)]: Done 361 out of 361 | elapsed: 29.4min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.6, score=0.030, total=  16.0s
[Parallel(n_jobs=1)]: Done 362 out of 362 | elapsed: 29.6min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1.5, learning_rate=0.1, max_dept

[CV]  colsample_bytree=0.6, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=60, subsample=0.6, score=0.028, total=   8.2s
[Parallel(n_jobs=1)]: Done 384 out of 384 | elapsed: 35.7min remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=60, subsample=0.6, score=0.025, total= -3589.7s
[Parallel(n_jobs=1)]: Done 385 out of 385 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=60, subsample=0.8, score=0.030, total=   9.3s
[Parallel(n_jobs=1)]: Done 386 out of 386 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1.5, learning_rate=0.1, max_dep

[CV]  colsample_bytree=0.6, gamma=1.5, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.8, score=-2.868, total=   6.8s
[Parallel(n_jobs=1)]: Done 408 out of 408 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1.5, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1.5, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.8, score=-2.804, total=   6.8s
[Parallel(n_jobs=1)]: Done 409 out of 409 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1.5, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1.5, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.8, score=-2.882, total=   7.0s
[Parallel(n_jobs=1)]: Done 410 out of 410 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1.5, learning_rate=0.01, 

[CV]  colsample_bytree=0.6, gamma=1.5, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.6, score=-1.236, total=  15.6s
[Parallel(n_jobs=1)]: Done 432 out of 432 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1.5, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=1.5, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.6, score=-1.280, total=  11.8s
[Parallel(n_jobs=1)]: Done 433 out of 433 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1.5, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=1.5, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.6, score=-1.237, total=  12.3s
[Parallel(n_jobs=1)]: Done 434 out of 434 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1.5, learning_rate=0

[CV]  colsample_bytree=0.6, gamma=1.5, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8, score=-1.289, total=  14.5s
[Parallel(n_jobs=1)]: Done 456 out of 456 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1.5, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1.5, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8, score=-1.233, total=  14.8s
[Parallel(n_jobs=1)]: Done 457 out of 457 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1.5, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1.5, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8, score=-1.279, total=  23.2s
[Parallel(n_jobs=1)]: Done 458 out of 458 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.6, gamma=1.5, learning_rate=0

[CV]  colsample_bytree=0.6, gamma=1.5, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.8, score=-1.274, total=  15.2s
[Parallel(n_jobs=1)]: Done 480 out of 480 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.6, score=0.028, total=  13.0s
[Parallel(n_jobs=1)]: Done 481 out of 481 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.6, score=0.029, total=  15.8s
[Parallel(n_jobs=1)]: Done 482 out of 482 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_de

[CV]  colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=60, subsample=0.6, score=0.028, total=   8.3s
[Parallel(n_jobs=1)]: Done 504 out of 504 | elapsed:  4.0min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=60, subsample=0.6, score=0.025, total=   8.4s
[Parallel(n_jobs=1)]: Done 505 out of 505 | elapsed:  4.2min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=60, subsample=0.8, score=0.029, total=   7.9s
[Parallel(n_jobs=1)]: Done 506 out of 506 | elapsed:  4.3min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth

[CV]  colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.8, score=0.030, total=   9.5s
[Parallel(n_jobs=1)]: Done 528 out of 528 | elapsed:  9.3min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.8, score=0.027, total=   9.9s
[Parallel(n_jobs=1)]: Done 529 out of 529 | elapsed:  9.5min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.8, score=0.025, total=  14.5s
[Parallel(n_jobs=1)]: Done 530 out of 530 | elapsed:  9.7min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth

[CV]  colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.6, score=0.031, total=  20.9s
[Parallel(n_jobs=1)]: Done 552 out of 552 | elapsed: 16.1min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.6, score=0.030, total=  26.4s
[Parallel(n_jobs=1)]: Done 553 out of 553 | elapsed: 16.6min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.6, score=0.027, total=  32.7s
[Parallel(n_jobs=1)]: Done 554 out of 554 | elapsed: 17.1min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0.1, max_

[CV]  colsample_bytree=0.8, gamma=0.5, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=100, subsample=0.8, score=-1.291, total=  15.5s
[Parallel(n_jobs=1)]: Done 576 out of 576 | elapsed: 22.3min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=0.5, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=100, subsample=0.8, score=-1.235, total=  12.8s
[Parallel(n_jobs=1)]: Done 577 out of 577 | elapsed: 22.5min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=0.5, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=100, subsample=0.8, score=-1.279, total=  11.7s
[Parallel(n_jobs=1)]: Done 578 out of 578 | elapsed: 22.7min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0

[CV]  colsample_bytree=0.8, gamma=0.5, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.8, score=-1.276, total=  13.0s
[Parallel(n_jobs=1)]: Done 600 out of 600 | elapsed: 27.5min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=0.5, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.6, score=-2.905, total=   9.8s
[Parallel(n_jobs=1)]: Done 601 out of 601 | elapsed: 27.7min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=0.5, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.6, score=-2.787, total=   9.8s
[Parallel(n_jobs=1)]: Done 602 out of 602 | elapsed: 27.8min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0.01,

[CV]  colsample_bytree=0.8, gamma=0.5, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=60, subsample=0.6, score=-2.802, total=  14.6s
[Parallel(n_jobs=1)]: Done 624 out of 624 | elapsed: 33.6min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=0.5, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=60, subsample=0.6, score=-2.880, total=  10.9s
[Parallel(n_jobs=1)]: Done 625 out of 625 | elapsed: 33.8min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=0.5, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=60, subsample=0.8, score=-2.903, total=   9.5s
[Parallel(n_jobs=1)]: Done 626 out of 626 | elapsed: 33.9min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=0.5, learning_rate=0.01, 

[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.8, score=0.029, total=   7.4s
[Parallel(n_jobs=1)]: Done 648 out of 648 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.8, score=0.028, total=   7.4s
[Parallel(n_jobs=1)]: Done 649 out of 649 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.8, score=0.024, total=   7.5s
[Parallel(n_jobs=1)]: Done 650 out of 650 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.1, max_depth=3, min_chil

[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.6, score=0.030, total=  13.3s
[Parallel(n_jobs=1)]: Done 672 out of 672 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.6, score=0.030, total=  13.3s
[Parallel(n_jobs=1)]: Done 673 out of 673 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.6, score=0.028, total=  13.5s
[Parallel(n_jobs=1)]: Done 674 out of 674 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.1, max_depth=3, min

[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.029, total=  15.3s
[Parallel(n_jobs=1)]: Done 696 out of 696 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.031, total=  15.6s
[Parallel(n_jobs=1)]: Done 697 out of 697 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.031, total=  15.6s
[Parallel(n_jobs=1)]: Done 698 out of 698 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.1, max_depth=4, min

[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.8, score=0.026, total=  16.3s
[Parallel(n_jobs=1)]: Done 720 out of 720 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.6, score=-2.906, total=   8.3s
[Parallel(n_jobs=1)]: Done 721 out of 721 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.6, score=-2.789, total=   8.2s
[Parallel(n_jobs=1)]: Done 722 out of 722 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=3, 

[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=60, subsample=0.6, score=-2.803, total=   8.2s
[Parallel(n_jobs=1)]: Done 744 out of 744 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=60, subsample=0.6, score=-2.882, total=   8.2s
[Parallel(n_jobs=1)]: Done 745 out of 745 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=60, subsample=0.8, score=-2.905, total=   7.7s
[Parallel(n_jobs=1)]: Done 746 out of 746 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=3,

[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.8, score=-2.865, total=   9.6s
[Parallel(n_jobs=1)]: Done 768 out of 768 | elapsed:  2.1min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.8, score=-2.802, total=   9.5s
[Parallel(n_jobs=1)]: Done 769 out of 769 | elapsed:  2.3min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.8, score=-2.881, total=  13.2s
[Parallel(n_jobs=1)]: Done 770 out of 770 | elapsed:  2.5min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=4,

[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.6, score=-1.232, total=  17.1s
[Parallel(n_jobs=1)]: Done 792 out of 792 | elapsed:  7.4min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.6, score=-1.277, total=  17.2s
[Parallel(n_jobs=1)]: Done 793 out of 793 | elapsed:  7.7min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.6, score=-1.234, total=  17.0s
[Parallel(n_jobs=1)]: Done 794 out of 794 | elapsed:  8.0min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1, learning_rate=0.01, max_dep

[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.029, total=  12.5s
[Parallel(n_jobs=1)]: Done 816 out of 816 | elapsed: 12.3min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.032, total=  12.5s
[Parallel(n_jobs=1)]: Done 817 out of 817 | elapsed: 12.5min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=3, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.030, total=  12.5s
[Parallel(n_jobs=1)]: Done 818 out of 818 | elapsed: 12.7min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_

[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.8, score=0.025, total=  12.4s
[Parallel(n_jobs=1)]: Done 840 out of 840 | elapsed: 16.6min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.6, score=0.030, total=  10.2s
[Parallel(n_jobs=1)]: Done 841 out of 841 | elapsed: 16.8min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=60, subsample=0.6, score=0.030, total=  10.2s
[Parallel(n_jobs=1)]: Done 842 out of 842 | elapsed: 16.9min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_dept

[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=60, subsample=0.6, score=0.028, total=  10.2s
[Parallel(n_jobs=1)]: Done 864 out of 864 | elapsed: 21.7min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=60, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=60, subsample=0.6, score=0.026, total=  10.2s
[Parallel(n_jobs=1)]: Done 865 out of 865 | elapsed: 21.8min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=5, n_estimators=60, subsample=0.8, score=0.029, total=   9.6s
[Parallel(n_jobs=1)]: Done 866 out of 866 | elapsed: 22.0min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth

[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.8, score=-2.867, total=   7.7s
[Parallel(n_jobs=1)]: Done 888 out of 888 | elapsed: 26.4min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.8, score=-2.803, total=   7.7s
[Parallel(n_jobs=1)]: Done 889 out of 889 | elapsed: 26.5min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.01, max_depth=3, min_child_weight=1, n_estimators=60, subsample=0.8, score=-2.882, total=   7.5s
[Parallel(n_jobs=1)]: Done 890 out of 890 | elapsed: 26.7min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.01, 

[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.6, score=-1.235, total=  13.6s
[Parallel(n_jobs=1)]: Done 912 out of 912 | elapsed: 30.6min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.6, score=-1.279, total=  13.3s
[Parallel(n_jobs=1)]: Done 913 out of 913 | elapsed: 30.8min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.01, max_depth=3, min_child_weight=5, n_estimators=100, subsample=0.6, score=-1.235, total=  13.3s
[Parallel(n_jobs=1)]: Done 914 out of 914 | elapsed: 31.0min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0

[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8, score=-1.288, total=  15.6s
[Parallel(n_jobs=1)]: Done 936 out of 936 | elapsed: 35.6min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8, score=-1.232, total= -3584.3s
[Parallel(n_jobs=1)]: Done 937 out of 937 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.01, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8, score=-1.278, total=  15.7s
[Parallel(n_jobs=1)]: Done 938 out of 938 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate

[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.8, score=-1.235, total=  15.8s
[Parallel(n_jobs=1)]: Done 959 out of 959 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.01, max_depth=4, min_child_weight=5, n_estimators=100, subsample=0.8, score=-1.274, total=  15.7s
[Parallel(n_jobs=1)]: Done 960 out of 960 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 960 out of 960 | elapsed:    0.0s finished


GridSearchCV(estimator=XGBRegressor(base_score=None, booster=None,
                                    colsample_bylevel=None,
                                    colsample_bynode=None,
                                    colsample_bytree=None,
                                    enable_categorical=False,
                                    eval_metric='rmse', gamma=None, gpu_id=None,
                                    importance_type=None,
                                    interaction_constraints=None,
                                    learning_rate=None, max_delta_step=None,
                                    max_depth=None, min_child_weight=None,
                                    missing=nan, monotone_cons...
                                    random_state=None, reg_alpha=None,
                                    reg_lambda=None, scale_pos_weight=None,
                                    seed=42, subsample=None, tree_method=None,
                                    validate

In [124]:
print("Best parameters:", reg.best_params_)
print("Highest r2: ", reg.best_score_)

Best parameters: {'colsample_bytree': 0.8, 'gamma': 1.5, 'learning_rate': 0.1, 'max_depth': 4, 'min_child_weight': 1, 'n_estimators': 100, 'subsample': 0.8}
Highest r2:  0.028910062889182585


In [125]:
# A parameter grid for XGBoost
params2 = {
        'min_child_weight': [10],
        'gamma': [ 2, 5],
        'subsample': [ 1.0],
        'colsample_bytree': [ 1.0],
        'max_depth': [ 5],
        'n_estimators': range(140, 360, 40),
        'learning_rate': [ 0.05]
 }

In [126]:
#Initializing GrisSearchCV object for XGBoost regressor
reg2 = GridSearchCV(estimator=estimator, 
                   param_grid=params2,
                   scoring='r2', 
                   n_jobs = 1 ,
                   verbose=50000)

In [127]:
#fitting the model
reg2.fit(X_train, y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] colsample_bytree=1.0, gamma=2, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=140, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=2, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=140, subsample=1.0, score=0.029, total=  26.1s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   26.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=2, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=140, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=2, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=140, subsample=1.0, score=0.031, total=  26.7s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   52.7s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=2, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=140, subsample=1.0 
[CV]  colsample_bytree=1.0,

[CV]  colsample_bytree=1.0, gamma=2, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=300, subsample=1.0, score=0.026, total=  57.4s
[Parallel(n_jobs=1)]: Done  24 out of  24 | elapsed: 16.5min remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=2, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=300, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=2, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=300, subsample=1.0, score=0.026, total=  56.3s
[Parallel(n_jobs=1)]: Done  25 out of  25 | elapsed: 17.4min remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=2, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=340, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=2, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=340, subsample=1.0, score=0.028, total= 1.1min
[Parallel(n_jobs=1)]: Done  26 out of  26 | elapsed: 18.5min remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=2, learning_rate=0.05, max_d

[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=260, subsample=1.0, score=0.030, total=  49.3s
[Parallel(n_jobs=1)]: Done  48 out of  48 | elapsed: 33.7min remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=260, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=260, subsample=1.0, score=0.027, total=  48.9s
[Parallel(n_jobs=1)]: Done  49 out of  49 | elapsed: 34.5min remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=260, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=260, subsample=1.0, score=0.026, total=  48.5s
[Parallel(n_jobs=1)]: Done  50 out of  50 | elapsed: 35.4min remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_d

GridSearchCV(estimator=XGBRegressor(base_score=None, booster=None,
                                    colsample_bylevel=None,
                                    colsample_bynode=None,
                                    colsample_bytree=None,
                                    enable_categorical=False,
                                    eval_metric='rmse', gamma=None, gpu_id=None,
                                    importance_type=None,
                                    interaction_constraints=None,
                                    learning_rate=None, max_delta_step=None,
                                    max_depth=None, min_child_weight=None,
                                    missing=nan, monotone_cons...
                                    random_state=None, reg_alpha=None,
                                    reg_lambda=None, scale_pos_weight=None,
                                    seed=42, subsample=None, tree_method=None,
                                    validate

In [128]:
print("Best parameters:", reg2.best_params_)
print("Highest r2: ", reg2.best_score_)

Best parameters: {'colsample_bytree': 1.0, 'gamma': 5, 'learning_rate': 0.05, 'max_depth': 5, 'min_child_weight': 10, 'n_estimators': 180, 'subsample': 1.0}
Highest r2:  0.028536142906736627


##### Reruunning the Gridsearch for getting the best of the two GridSearches done above

In [129]:
# A parameter grid for XGBoost
params3 = {
        'min_child_weight': [1,10],
        'gamma': [1.5, 5],
        'subsample': [ 0.8,1.0],
        'colsample_bytree': [ 0.8, 1.0],
        'max_depth': [4, 5],
        'n_estimators': range(100, 180, 40),
        'learning_rate': [0.1, 0.05]
 }

In [130]:
#Initializing GrisSearchCV object for XGBoost regressor
reg3 = GridSearchCV(estimator=estimator, 
                   param_grid=params3,
                   scoring='r2', 
                   n_jobs = 1 ,
                   verbose=50000)

In [131]:
#Fitting the model
reg3.fit(X_train, y_train)

Fitting 5 folds for each of 128 candidates, totalling 640 fits
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.029, total=  13.8s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   13.7s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.031, total=  16.4s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   30.1s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.

[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=10, n_estimators=100, subsample=0.8, score=0.027, total=  24.6s
[Parallel(n_jobs=1)]: Done  24 out of  24 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=10, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=10, n_estimators=100, subsample=0.8, score=0.026, total=  19.4s
[Parallel(n_jobs=1)]: Done  25 out of  25 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=10, n_estimators=100, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=10, n_estimators=100, subsample=1.0, score=0.029, total=  14.4s
[Parallel(n_jobs=1)]: Done  26 out of  26 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1,

[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=100, subsample=1.0, score=0.030, total=  15.9s
[Parallel(n_jobs=1)]: Done  48 out of  48 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=100, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=100, subsample=1.0, score=0.027, total=  20.1s
[Parallel(n_jobs=1)]: Done  49 out of  49 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=100, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=100, subsample=1.0, score=0.026, total=  28.0s
[Parallel(n_jobs=1)]: Done  50 out of  50 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_

[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=5, min_child_weight=10, n_estimators=140, subsample=0.8, score=0.030, total=  25.7s
[Parallel(n_jobs=1)]: Done  72 out of  72 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=5, min_child_weight=10, n_estimators=140, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=5, min_child_weight=10, n_estimators=140, subsample=0.8, score=0.030, total=  25.6s
[Parallel(n_jobs=1)]: Done  73 out of  73 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=5, min_child_weight=10, n_estimators=140, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.1, max_depth=5, min_child_weight=10, n_estimators=140, subsample=0.8, score=0.026, total=  30.1s
[Parallel(n_jobs=1)]: Done  74 out of  74 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.1,

[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.05, max_depth=4, min_child_weight=1, n_estimators=140, subsample=1.0, score=0.029, total=  30.3s
[Parallel(n_jobs=1)]: Done  96 out of  96 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.05, max_depth=4, min_child_weight=1, n_estimators=140, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.05, max_depth=4, min_child_weight=1, n_estimators=140, subsample=1.0, score=0.031, total=  18.3s
[Parallel(n_jobs=1)]: Done  97 out of  97 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.05, max_depth=4, min_child_weight=1, n_estimators=140, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.05, max_depth=4, min_child_weight=1, n_estimators=140, subsample=1.0, score=0.029, total=  17.7s
[Parallel(n_jobs=1)]: Done  98 out of  98 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.05

[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.05, max_depth=4, min_child_weight=10, n_estimators=140, subsample=1.0, score=0.028, total=  21.2s
[Parallel(n_jobs=1)]: Done 119 out of 119 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.05, max_depth=4, min_child_weight=10, n_estimators=140, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.05, max_depth=4, min_child_weight=10, n_estimators=140, subsample=1.0, score=0.025, total=  25.6s
[Parallel(n_jobs=1)]: Done 120 out of 120 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.029, total=  35.8s
[Parallel(n_jobs=1)]: Done 121 out of 121 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0

[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=100, subsample=0.8, score=0.031, total=  36.8s
[Parallel(n_jobs=1)]: Done 142 out of 142 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=100, subsample=0.8, score=0.029, total=  25.0s
[Parallel(n_jobs=1)]: Done 143 out of 143 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=100, subsample=0.8, score=0.028, total=  19.3s
[Parallel(n_jobs=1)]: Done 144 out of 144 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=1.5, learning_rate

[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.026, total=  15.6s
[Parallel(n_jobs=1)]: Done 165 out of 165 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=100, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=100, subsample=1.0, score=0.029, total=  17.2s
[Parallel(n_jobs=1)]: Done 166 out of 166 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=100, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=100, subsample=1.0, score=0.031, total=  19.9s
[Parallel(n_jobs=1)]: Done 167 out of 167 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=4, min

[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=4, min_child_weight=10, n_estimators=100, subsample=1.0, score=0.028, total=  18.4s
[Parallel(n_jobs=1)]: Done 189 out of 189 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=4, min_child_weight=10, n_estimators=100, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=4, min_child_weight=10, n_estimators=100, subsample=1.0, score=0.026, total=  24.2s
[Parallel(n_jobs=1)]: Done 190 out of 190 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=4, min_child_weight=10, n_estimators=140, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=4, min_child_weight=10, n_estimators=140, subsample=0.8, score=0.029, total=  29.7s
[Parallel(n_jobs=1)]: Done 191 out of 191 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=4

[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=140, subsample=0.8, score=0.030, total=  25.9s
[Parallel(n_jobs=1)]: Done 213 out of 213 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=140, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=140, subsample=0.8, score=0.026, total=  30.1s
[Parallel(n_jobs=1)]: Done 214 out of 214 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=140, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=140, subsample=0.8, score=0.026, total=  47.9s
[Parallel(n_jobs=1)]: Done 215 out of 215 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=5, min

[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=5, min_child_weight=10, n_estimators=140, subsample=1.0, score=0.031, total=  22.5s
[Parallel(n_jobs=1)]: Done 237 out of 237 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=5, min_child_weight=10, n_estimators=140, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=5, min_child_weight=10, n_estimators=140, subsample=1.0, score=0.030, total=  24.4s
[Parallel(n_jobs=1)]: Done 238 out of 238 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=5, min_child_weight=10, n_estimators=140, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=5, min_child_weight=10, n_estimators=140, subsample=1.0, score=0.026, total=  22.0s
[Parallel(n_jobs=1)]: Done 239 out of 239 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.1, max_depth=5

[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.05, max_depth=4, min_child_weight=10, n_estimators=100, subsample=0.8, score=0.029, total=  30.1s
[Parallel(n_jobs=1)]: Done 261 out of 261 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.05, max_depth=4, min_child_weight=10, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.05, max_depth=4, min_child_weight=10, n_estimators=100, subsample=0.8, score=0.030, total=  17.9s
[Parallel(n_jobs=1)]: Done 262 out of 262 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.05, max_depth=4, min_child_weight=10, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.05, max_depth=4, min_child_weight=10, n_estimators=100, subsample=0.8, score=0.029, total=  17.7s
[Parallel(n_jobs=1)]: Done 263 out of 263 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.05, max_d

[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.026, total=  20.0s
[Parallel(n_jobs=1)]: Done 285 out of 285 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=100, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=100, subsample=1.0, score=0.029, total=  19.0s
[Parallel(n_jobs=1)]: Done 286 out of 286 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=100, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=100, subsample=1.0, score=0.030, total=  26.0s
[Parallel(n_jobs=1)]: Done 287 out of 287 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.05, max_depth=

[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=100, subsample=1.0, score=0.027, total=  29.9s
[Parallel(n_jobs=1)]: Done 309 out of 309 | elapsed:  1.5min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=100, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=100, subsample=1.0, score=0.025, total=  24.2s
[Parallel(n_jobs=1)]: Done 310 out of 310 | elapsed:  1.9min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=140, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=5, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=140, subsample=0.8, score=0.029, total=  25.6s
[Parallel(n_jobs=1)]: Done 311 out of 311 | elapsed:  2.4min remaining:    0.0s
[CV] colsample_bytree=0.8, gamma=5, learning_rate=0.05, max_d

[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=140, subsample=0.8, score=0.030, total=  26.0s
[Parallel(n_jobs=1)]: Done 333 out of 333 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=140, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=140, subsample=0.8, score=0.027, total=  27.9s
[Parallel(n_jobs=1)]: Done 334 out of 334 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=140, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=140, subsample=0.8, score=0.026, total=  37.2s
[Parallel(n_jobs=1)]: Done 335 out of 335 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate=0.1, max_

[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=10, n_estimators=140, subsample=1.0, score=0.031, total=  53.0s
[Parallel(n_jobs=1)]: Done 357 out of 357 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=10, n_estimators=140, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=10, n_estimators=140, subsample=1.0, score=0.030, total=  31.5s
[Parallel(n_jobs=1)]: Done 358 out of 358 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=10, n_estimators=140, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.1, max_depth=4, min_child_weight=10, n_estimators=140, subsample=1.0, score=0.027, total=  28.2s
[Parallel(n_jobs=1)]: Done 359 out of 359 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate=0.1,

[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.1, max_depth=5, min_child_weight=10, n_estimators=100, subsample=0.8, score=0.029, total=  23.6s
[Parallel(n_jobs=1)]: Done 381 out of 381 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate=0.1, max_depth=5, min_child_weight=10, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.1, max_depth=5, min_child_weight=10, n_estimators=100, subsample=0.8, score=0.031, total=  30.7s
[Parallel(n_jobs=1)]: Done 382 out of 382 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate=0.1, max_depth=5, min_child_weight=10, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.1, max_depth=5, min_child_weight=10, n_estimators=100, subsample=0.8, score=0.030, total=  42.0s
[Parallel(n_jobs=1)]: Done 383 out of 383 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate=0.1,

[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.05, max_depth=4, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.025, total=  31.0s
[Parallel(n_jobs=1)]: Done 405 out of 405 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate=0.05, max_depth=4, min_child_weight=1, n_estimators=100, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.05, max_depth=4, min_child_weight=1, n_estimators=100, subsample=1.0, score=0.028, total=  15.8s
[Parallel(n_jobs=1)]: Done 406 out of 406 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate=0.05, max_depth=4, min_child_weight=1, n_estimators=100, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.05, max_depth=4, min_child_weight=1, n_estimators=100, subsample=1.0, score=0.030, total=  14.6s
[Parallel(n_jobs=1)]: Done 407 out of 407 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate=0.05

[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.05, max_depth=4, min_child_weight=10, n_estimators=100, subsample=1.0, score=0.028, total=  14.0s
[Parallel(n_jobs=1)]: Done 428 out of 428 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate=0.05, max_depth=4, min_child_weight=10, n_estimators=100, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.05, max_depth=4, min_child_weight=10, n_estimators=100, subsample=1.0, score=0.027, total=  16.2s
[Parallel(n_jobs=1)]: Done 429 out of 429 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate=0.05, max_depth=4, min_child_weight=10, n_estimators=100, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.05, max_depth=4, min_child_weight=10, n_estimators=100, subsample=1.0, score=0.024, total=  15.1s
[Parallel(n_jobs=1)]: Done 430 out of 430 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate

[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=140, subsample=0.8, score=0.029, total=  50.0s
[Parallel(n_jobs=1)]: Done 451 out of 451 | elapsed:  5.0min remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=140, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=140, subsample=0.8, score=0.031, total=  30.6s
[Parallel(n_jobs=1)]: Done 452 out of 452 | elapsed:  5.5min remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=140, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=140, subsample=0.8, score=0.030, total= -3566.8s
[Parallel(n_jobs=1)]: Done 453 out of 453 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate=0.

[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=140, subsample=0.8, score=0.028, total=  52.5s
[Parallel(n_jobs=1)]: Done 474 out of 474 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=140, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=140, subsample=0.8, score=0.026, total=  35.7s
[Parallel(n_jobs=1)]: Done 475 out of 475 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=140, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=1.5, learning_rate=0.05, max_depth=5, min_child_weight=10, n_estimators=140, subsample=1.0, score=0.029, total=  25.0s
[Parallel(n_jobs=1)]: Done 476 out of 476 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=1.5, learning_rate

[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=140, subsample=1.0, score=0.030, total=  21.1s
[Parallel(n_jobs=1)]: Done 498 out of 498 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=140, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=140, subsample=1.0, score=0.027, total=  28.6s
[Parallel(n_jobs=1)]: Done 499 out of 499 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=140, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.1, max_depth=4, min_child_weight=1, n_estimators=140, subsample=1.0, score=0.026, total=  31.5s
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.1, max_depth=4, min

[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.031, total=  23.5s
[Parallel(n_jobs=1)]: Done 522 out of 522 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.030, total=  24.7s
[Parallel(n_jobs=1)]: Done 523 out of 523 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=100, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=100, subsample=0.8, score=0.027, total=  25.4s
[Parallel(n_jobs=1)]: Done 524 out of 524 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.1, max_depth=5, min

[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.1, max_depth=5, min_child_weight=10, n_estimators=100, subsample=1.0, score=0.029, total=  19.9s
[Parallel(n_jobs=1)]: Done 546 out of 546 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.1, max_depth=5, min_child_weight=10, n_estimators=100, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.1, max_depth=5, min_child_weight=10, n_estimators=100, subsample=1.0, score=0.031, total=  21.3s
[Parallel(n_jobs=1)]: Done 547 out of 547 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.1, max_depth=5, min_child_weight=10, n_estimators=100, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.1, max_depth=5, min_child_weight=10, n_estimators=100, subsample=1.0, score=0.030, total=  22.8s
[Parallel(n_jobs=1)]: Done 548 out of 548 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.1, max_depth=5

[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=4, min_child_weight=1, n_estimators=100, subsample=1.0, score=0.024, total=  17.8s
[Parallel(n_jobs=1)]: Done 570 out of 570 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=4, min_child_weight=1, n_estimators=140, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=4, min_child_weight=1, n_estimators=140, subsample=0.8, score=0.029, total=  29.3s
[Parallel(n_jobs=1)]: Done 571 out of 571 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=4, min_child_weight=1, n_estimators=140, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=4, min_child_weight=1, n_estimators=140, subsample=0.8, score=0.031, total=  28.4s
[Parallel(n_jobs=1)]: Done 572 out of 572 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=

[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=4, min_child_weight=10, n_estimators=140, subsample=0.8, score=0.028, total=  29.5s
[Parallel(n_jobs=1)]: Done 594 out of 594 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=4, min_child_weight=10, n_estimators=140, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=4, min_child_weight=10, n_estimators=140, subsample=0.8, score=0.026, total=  33.2s
[Parallel(n_jobs=1)]: Done 595 out of 595 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=4, min_child_weight=10, n_estimators=140, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=4, min_child_weight=10, n_estimators=140, subsample=1.0, score=0.029, total=  25.9s
[Parallel(n_jobs=1)]: Done 596 out of 596 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_d

[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=140, subsample=1.0, score=0.030, total=  27.4s
[Parallel(n_jobs=1)]: Done 618 out of 618 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=140, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=140, subsample=1.0, score=0.027, total=  26.7s
[Parallel(n_jobs=1)]: Done 619 out of 619 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=140, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=140, subsample=1.0, score=0.026, total=  26.7s
[Parallel(n_jobs=1)]: Done 620 out of 620 | elapsed:    0.0s remaining:    0.0s
[CV] colsample_bytree=1.0, gamma=5, learning_rate=0.05, max_depth=

GridSearchCV(estimator=XGBRegressor(base_score=None, booster=None,
                                    colsample_bylevel=None,
                                    colsample_bynode=None,
                                    colsample_bytree=None,
                                    enable_categorical=False,
                                    eval_metric='rmse', gamma=None, gpu_id=None,
                                    importance_type=None,
                                    interaction_constraints=None,
                                    learning_rate=None, max_delta_step=None,
                                    max_depth=None, min_child_weight=None,
                                    missing=nan, monotone_cons...
                                    random_state=None, reg_alpha=None,
                                    reg_lambda=None, scale_pos_weight=None,
                                    seed=42, subsample=None, tree_method=None,
                                    validate

In [132]:
print("Best parameters:", reg3.best_params_)
print("Highest r2: ", reg3.best_score_)

Best parameters: {'colsample_bytree': 0.8, 'gamma': 5, 'learning_rate': 0.1, 'max_depth': 4, 'min_child_weight': 10, 'n_estimators': 100, 'subsample': 0.8}
Highest r2:  0.028999067412264323


In [133]:
#initialiazing the XGBoost regressor object with the best parameters
xgb = XGBRegressor(n_estimators=100,subsample=0.8,min_child_weight=10,\
                          max_depth=4,learning_rate=0.1,gamma=5,colsample_bytree=0.8,\
                          objective= 'reg:squarederror',eval_metric='rmse', seed=42)

In [134]:
#fitting the model
xgb.fit(X_train, y_train)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=0.8, enable_categorical=False,
             eval_metric='rmse', gamma=5, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.1, max_delta_step=0,
             max_depth=4, min_child_weight=10, missing=nan,
             monotone_constraints='()', n_estimators=100, n_jobs=2,
             num_parallel_tree=1, predictor='auto', random_state=42,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=42,
             subsample=0.8, tree_method='exact', validate_parameters=1,
             verbosity=None)

In [135]:
# make predictions for train data
y_train_pred = xgb.predict(X_train)

In [136]:
#prediction
y_train_pred[:10]

array([31.716421, 30.300241, 31.228897, 32.19927 , 31.716421, 31.076078,
       32.959766, 31.098967, 28.875238, 32.27652 ], dtype=float32)

In [143]:
#predictions on train data as a dataframe
y_train_pred_final = pd.DataFrame({'age':y_train.values.reshape(-1), 'age_pred':y_train_pred})
y_train_pred_final.head()

Unnamed: 0,age,age_pred
0,68,31.716421
1,39,30.300241
2,22,31.228897
3,27,32.199268
4,29,31.716421


In [144]:
#Taking a 25% sample of train data
y_train_xgb=y_train_pred_final.sample(frac =.25)

In [145]:
#Calaculating PPD
y_train_xgb['percent_population_dist']=y_train_xgb['age']-y_train_xgb['age_pred']/y_train_xgb['age']*100

In [146]:
# predicting the scores
score=r2_score(y_train,y_train_pred)
print('r2 socre is',score)
print('rmse is',np.sqrt(mean_squared_error(y_train,y_train_pred)))
print('percentage population distribution is ', y_train_xgb['percent_population_dist'].mean())

r2 socre is 0.03632619123445979
rmse is 9.70044229720062
percentage population distribution is  -77.8899785668994


In [147]:
# make predictions for test data
y_pred = xgb.predict(X_test)

In [148]:
y_pred[:10]

array([32.98785 , 33.200344, 32.27884 , 29.139194, 31.025675, 32.959766,
       31.41874 , 33.4206  , 31.640905, 30.625786], dtype=float32)

In [149]:
#predictions on test data as a dataframe
y_pred_final = pd.DataFrame({'age':y_test.values.reshape(-1), 'age_pred':y_pred})
y_pred_final.head()

Unnamed: 0,age,age_pred
0,36,32.98785
1,24,33.200344
2,33,32.278839
3,20,29.139194
4,27,31.025675


In [150]:
#changing the data type
y_pred_final['age_pred']=y_pred_final['age_pred'].astype('int')

In [151]:
#first 5 rows
y_pred_final.head()

Unnamed: 0,age,age_pred
0,36,32
1,24,33
2,33,32
3,20,29
4,27,31


In [152]:
#Taking 25% of test data as a random sample
y_test_xgb=y_pred_final.sample(frac =.25)

In [153]:
#Calculating PPD
y_test_xgb['percent_population_dist']=y_test_xgb['age']-y_test_xgb['age_pred']/y_test_xgb['age']*100

In [154]:
# predicting the accuracy score
score=r2_score(y_test,y_pred)
print('r2 socre is',score)
print('rmse is',np.sqrt(mean_squared_error(y_test,y_pred)))
print('percentage population distribution is ', y_test_xgb['percent_population_dist'].mean())

r2 socre is 0.030552723506562462
rmse is 9.595393512509075
percentage population distribution is  -77.15758274180621


##### These scores are higher compared to linear regression but still very poor values of r squared and rmse.

# Stacking Model

In [155]:
#get X_train, y_train
X_train= scenario2_train.drop(['age'], axis=1)
X_train= X_train.drop(X_train.iloc[:,0:1], axis=1)
y_train = scenario2_train[['age']]

In [157]:
#get X_test, y_test
X_test= scenario2_test.drop(scenario2_test.iloc[:,2:3], axis=1)
X_test= X_test.drop(X_test.iloc[:,0:1], axis=1)
y_test = scenario2_test[['age']]

Let's find the best parameters for random forest

In [158]:
#Importing libraries
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor

In [159]:
#initializing Random Forest Regression object
rfr=RandomForestRegressor(random_state=42)

In [160]:
#Parameter grid for random forest
param_grid = { 
    'n_estimators': [60,100,140,180,220],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [4,5,6,7,8],
}

In [161]:
#Running GridSearchCV to find the best potamaters for RF REgressor
CV_rfr = GridSearchCV(estimator=rfr, param_grid=param_grid, scoring='r2', cv= 5)
CV_rfr.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=RandomForestRegressor(random_state=42),
             param_grid={'max_depth': [4, 5, 6, 7, 8],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'n_estimators': [60, 100, 140, 180, 220]},
             scoring='r2')

In [162]:
print("Best parameters:", CV_rfr.best_params_)
print("Highest r2: ", CV_rfr.best_score_)

Best parameters: {'max_depth': 8, 'max_features': 'auto', 'n_estimators': 220}
Highest r2:  0.027592360158567298


In [163]:
#Build random forest model with best parameters
rfr1=RandomForestRegressor(random_state=42, max_features='auto', n_estimators= 220, max_depth=8)

In [164]:
#fitting the model
rfr1.fit(X_train, y_train)

RandomForestRegressor(max_depth=8, n_estimators=220, random_state=42)

In [165]:
#making predictions
pred=rfr1.predict(X_test)

In [166]:
# predicting the accuracy score
score=r2_score(y_test,pred)
print('r2 socre is',score)
print('rmse is',np.sqrt(mean_squared_error(y_test,pred)))

r2 socre is 0.028933706098585255
rmse is 9.603402523854967


In [167]:
#importing libraries
from sklearn import preprocessing
from mlxtend.regressor import StackingCVRegressor
from sklearn.linear_model import LinearRegression
from sklearn import model_selection

In [168]:
# x,y to np 
X_train_np = X_train.values
y_train_np = y_train.values

In [169]:
#initializing linear regression object for  stacking  
lr1 = LinearRegression()

#Initializing StackingCVRegressor object 
stack = StackingCVRegressor(regressors=[lr1, rfr1],meta_regressor= xgb, use_features_in_secondary=True)

In [170]:
#running each model and finding its accuracy scores
for lr, label in zip([lr1, rfr1, xgb], 
                      ['lr', 
                       'Random Forest', 
                       'xgb']):

    scores = model_selection.cross_val_score(lr, X_train, y_train, cv=3, scoring='r2')
    print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))

Accuracy: 0.02 (+/- 0.00) [lr]
Accuracy: 0.03 (+/- 0.00) [Random Forest]
Accuracy: 0.03 (+/- 0.00) [xgb]


In [171]:
# Fit on train data 
slr_fit = stack.fit(X_train_np, y_train_np.reshape(-1))

In [189]:
#prediction on train data
train_pred = slr_fit.predict(X_train)

In [190]:
#predictions on train data as a dataframe
y_train_pred = pd.DataFrame({'age':y_train.values.reshape(-1), 'age_pred':train_pred})
y_train_pred.head()

Unnamed: 0,age,age_pred
0,68,32.030941
1,39,29.880993
2,22,31.225714
3,27,32.438637
4,29,32.030941


In [193]:
#Taking a 25% train data sample
y_train_slr=y_train_pred_final.sample(frac =.25)

In [194]:
#Calcualting PPD
y_train_slr['percent_population_dist']=y_train_slr['age']-y_train_slr['age_pred']/y_train_slr['age']*100

In [195]:
# predicting the accuracy score
score=r2_score(y_train,train_pred)
print('r2 socre is',score)
print('rmse is',np.sqrt(mean_squared_error(y_train, train_pred)))
print('percentage population distribution is ', y_train_slr['percent_population_dist'].mean())

r2 socre is 0.03731226331069881
rmse is 9.695478074163377
percentage population distribution is  -77.42583732847524


In [203]:
#Prediction on test data
test_pred = slr_fit.predict(X_test)

In [204]:
test_pred[:10]

array([32.64378 , 32.975334, 29.832712, 28.60604 , 29.737932, 32.97491 ,
       30.48729 , 33.428013, 30.033407, 28.928804], dtype=float32)

In [205]:
#predictions on train data as a dataframe
y_test_pred = pd.DataFrame({'age':y_test.values.reshape(-1), 'age_pred':test_pred})
y_test_pred.head()

Unnamed: 0,age,age_pred
0,36,32.64378
1,24,32.975334
2,33,29.832712
3,20,28.606039
4,27,29.737932


In [206]:
#Taking 25% of test data
y_test_slr=y_test_pred.sample(frac =.25)

In [207]:
#Calculating PPD
y_test_slr['percent_population_dist']=y_test_slr['age']-y_test_slr['age_pred']/y_test_slr['age']*100

In [208]:
# predicting the accuracy score
score=r2_score(y_test,test_pred)
print('r2 socre is',score)
print('rmse is',np.sqrt(mean_squared_error(y_test, test_pred)))
print('percentage population distribution is ', y_test_slr['percent_population_dist'].mean())

r2 socre is 0.0027685307455556085
rmse is 9.731923358925679
percentage population distribution is  -72.2743992569547
