In [1]:
import xgboost as xgb
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import explained_variance_score
import numpy as np

In [8]:
X = pd.read_csv('/home/giuseppe/Downloads/CCPP/Folds.csv')
X.head(15)

Unnamed: 0,AT,V,AP,RH,PE
0,8.34,40.77,1010.84,90.01,480.48
1,23.64,58.49,1011.4,74.2,445.75
2,29.74,56.9,1007.15,41.91,438.76
3,19.07,49.69,1007.22,76.79,453.09
4,11.8,40.66,1017.13,97.2,464.43
5,13.97,39.16,1016.05,84.6,470.96
6,22.1,71.29,1008.2,75.38,442.35
7,14.47,41.76,1021.98,78.41,464.0
8,31.25,69.51,1010.25,36.83,428.77
9,6.77,38.18,1017.8,81.13,484.31


In [9]:
y = X['PE']
X = X.drop('PE', axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [10]:
xgb.XGBRegressor()

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

In [44]:
xgb_model = xgb.XGBRegressor()
xgb_model.fit(X_train, y_train)
predictions = xgb_model.predict(X_test)
r2 = explained_variance_score(predictions, y_test)
print(r2)
print(1 / (1 - r2))

0.9434299735993638
17.677205821292546


In [37]:
def score(params):
    xgb_model = xgb.XGBRegressor(gamma = params['gamma'],
                                 max_depth = params['max_depth'],
                                 min_child_weight = params['min_child_weight'],
                                 learning_rate = params['learning_rate'])
    xgb_model.fit(X_train, y_train)
    predictions = xgb_model.predict(X_test)
    r2 = explained_variance_score(predictions, y_test)
    loss = 1 - r2
    print(1 / loss)
    return {'loss': loss, 'status': STATUS_OK}

def optimize(space):
    best = fmin(score, space, algo = tpe.suggest, 
                max_evals = 50)
    return best

In [38]:
space = {
    'gamma': hp.quniform('gamma', 0.5, 1, 0.05),
    'learning_rate': hp.quniform('learning_rate', 0.025, 0.5, 0.025),
    'max_depth':  hp.choice('max_depth', np.arange(1, 14, dtype = int)),
    'min_child_weight': hp.quniform('min_child_weight', 1, 6, 1)
}
best = optimize(space)
best

19.257813555087125
29.371416769377127
25.61431798999916
17.45401819865663
30.42127471529552
18.77488659765708
14.056666811389139
21.229009335357862
14.709385211988597
29.025595724092934
17.70809564241969
29.960727269216704
18.52424043722557
27.319878511061646
18.73517117914144
29.63705755693586
22.235507851224387
27.528304946265337
16.116118107296472
22.621093999975116
29.30349998610726
28.824836062756887
30.286524690197076
27.964024076070782
28.666161778723033
22.68778292804917
29.744120230286885
28.47704135927469
29.738462447452996
20.101870480407424
23.99758889516673
29.275699204940373
27.310265376346422
24.559114511750973
28.642758697503083
27.966414163421184
10.12722488440535
24.274997755436328
29.539580248798625
14.676371283080943
29.709171658095197
30.05284698194466
20.945737994715913
26.596586418262987
25.977794670300575
29.442771946119176
14.610260735912666
23.026861285201715
23.70512088019572
27.880067423657785


{'gamma': 0.9, 'learning_rate': 0.2, 'max_depth': 10, 'min_child_weight': 4.0}

In [39]:
space = {
    'gamma': hp.quniform('gamma', 0.5, 1, 0.05),
    'learning_rate': 0.1,
    'max_depth':  3,
    'min_child_weight': 1
}
best = optimize(space)
best

17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726
17.677707715370726


{'gamma': 0.5}

In [40]:
space = {
    'gamma': 0,
    'learning_rate': hp.quniform('learning_rate', 0.025, 0.5, 0.025),
    'max_depth':  3,
    'min_child_weight': 1
}
best = optimize(space)
best

17.035687390723854
17.677205821292546
24.032584478081997
20.750628570306016
21.036553545354973
20.754512983412766
22.56227166978151
20.750628570306016
22.56227166978151
18.041339549189846
17.677205821292546
22.56227166978151
16.161080617707952
16.161080617707952
24.032584478081997
20.621357601498577
20.750628570306016
23.16501299757314
24.032584478081997
22.56227166978151
21.750840600901146
22.287384121863894
24.25806696801042
24.25806696801042
24.25806696801042
22.287384121863894
21.907794016626514
24.25806696801042
19.188323056739115
24.25806696801042
22.499377312134044
23.16501299757314
19.188323056739115
21.907794016626514
21.036553545354973
23.16501299757314
22.499377312134044
19.188323056739115
21.750840600901146
24.032584478081997
22.499377312134044
20.621357601498577
18.041339549189846
23.16501299757314
22.287384121863894
20.754512983412766
21.907794016626514
17.035687390723854
22.56227166978151
24.25806696801042


{'learning_rate': 0.5}

In [41]:
space = {
    'gamma': 0,
    'learning_rate': 0.1,
    'max_depth':  hp.choice('max_depth', np.arange(1, 14, dtype=int)),
    'min_child_weight': 1
}
best = optimize(space)
best

29.579461078771754
29.579461078771754
30.52212501829393
24.585803831118852
30.52212501829393
23.10240207219613
29.579461078771754
21.476520201099397
27.5812887945868
14.245235957246791
19.393364500422635
16.116118107296472
30.52212501829393
21.476520201099397
29.579461078771754
27.5812887945868
30.52212501829393
23.10240207219613
29.551034052658164
14.245235957246791
30.52212501829393
17.677205821292546
27.189541738454285
28.9497195361232
30.52212501829393
30.52212501829393
30.52212501829393
19.393364500422635
16.116118107296472
17.677205821292546
29.551034052658164
24.585803831118852
27.189541738454285
28.9497195361232
30.52212501829393
30.52212501829393
24.585803831118852
23.10240207219613
21.476520201099397
29.579461078771754
27.5812887945868
30.52212501829393
14.245235957246791
19.393364500422635
16.116118107296472
30.52212501829393
30.52212501829393
28.9497195361232
29.551034052658164
21.476520201099397


{'max_depth': 12}

In [42]:
space = {
    'gamma': 0,
    'learning_rate': 0.1,
    'max_depth':  3,
    'min_child_weight': hp.quniform('min_child_weight', 1, 6, 1)
}
best = optimize(space)
best

17.629152707232883
17.677205821292546
17.629152707232883
17.57507899790063
17.57507899790063
17.70112230542156
17.629152707232883
17.70112230542156
17.598195670002546
17.53721792566778
17.57507899790063
17.598195670002546
17.57507899790063
17.70112230542156
17.629152707232883
17.677205821292546
17.598195670002546
17.57507899790063
17.70112230542156
17.57507899790063
17.70112230542156
17.677205821292546
17.70112230542156
17.70112230542156
17.677205821292546
17.70112230542156
17.677205821292546
17.629152707232883
17.70112230542156
17.629152707232883
17.677205821292546
17.629152707232883
17.677205821292546
17.70112230542156
17.70112230542156
17.629152707232883
17.677205821292546
17.70112230542156
17.629152707232883
17.70112230542156
17.677205821292546
17.629152707232883
17.53721792566778
17.598195670002546
17.677205821292546
17.70112230542156
17.57507899790063
17.629152707232883
17.70112230542156
17.629152707232883


{'min_child_weight': 2.0}

In [43]:
space = {
    'gamma': hp.quniform('gamma', 0.5, 1, 0.05),
    'learning_rate': hp.quniform('learning_rate', 0.025, 0.5, 0.025),
    'max_depth':  12,
    'min_child_weight': hp.quniform('min_child_weight', 1, 6, 1)
}
best = optimize(space)
best

28.6079902638914
26.036969786195034
29.23575575959322
29.751005832451312
29.107372968932886
27.212244985705134
29.300961465522782
30.324840842981107
26.765022328650854
26.137939623174205
27.741258776203086
27.17006472100223
24.725063747213408
28.111510417381062
26.863903361035558
30.065054160916773
29.469152690358943
29.777320352892644
28.64749155044564
28.032340451072464
10.12722488440535
30.713925191687352
10.12722488440535
29.419157737879488
30.370166298437542
30.443538677799125
28.90592122832711
28.919451493222862
30.336952624419762
28.621915972038256
24.16836167908716
31.06688517926498
30.71575571482342
27.743575413012877
30.756120033519444
31.327680222886894
31.327680222886894
31.14478023690131
23.58807831737575
29.903452314902125
28.992194981758743
30.56308372063744
30.571719889165138
29.645523504702
29.683434567197075
27.827490729241415
29.979261914594744
25.486153348377186
24.223764796421587
27.614175091700027


{'gamma': 0.9500000000000001,
 'learning_rate': 0.15000000000000002,
 'min_child_weight': 1.0}