In [46]:
import pandas as pd
import numpy as np
import math

from sklearn.preprocessing import normalize, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import GradientBoostingRegressor

from hyperopt import fmin, tpe, hp, STATUS_OK,Trials

In [47]:
def go_back(look_back, df):
    #Create previous Day price column 
    
    for i in range(look_back):
        name = 'Previous_Day_Price' + str(i)
        df[name] = df['Weighted_Price'].shift(+1+i)
        df.dropna(how='any', inplace=True)
        
    return df

In [55]:
df = pd.read_csv('fulldata.csv')
#Drop np.nan
df.dropna(how='any', inplace=True)

df = df[['Weighted_Price', 'Polarity_Textblob','Trend_Values', 'Polarity_Vader']]

#Go back
look_back = 1
df = go_back(look_back, df)

df = df.values

# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
df = scaler.fit_transform(df)

dataset = np.delete(df,(0), axis=1)
labels = np.delete(df,(1,2,3,4,5,6,7,8,9), axis=1).ravel()

x_train, x_test, y_train, y_test = train_test_split(dataset, labels, test_size=0.2, shuffle=False)

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.25, shuffle = False)
    
eval_set = [(x_val, y_val)]



In [56]:
def hyperopt_train_test(params):

    clf = GradientBoostingRegressor(**params)
    clf.fit(x_train, y_train)
    
    prediction = clf.predict(x_train)
    print (math.sqrt(mean_squared_error(prediction,y_train)) )
   
    return math.sqrt(mean_squared_error(prediction,y_train))

best = 200
def f(params):
    global best
    acc = hyperopt_train_test(params)
    if acc < best:
        best = acc
        print ('new best:', best, params)
    return {'loss': acc, 'status': STATUS_OK}

In [57]:
GradientBoosting4space = {
    
    'max_depth': hp.quniform('max_depth',1,20,1),
    'min_weight_fraction_leaf': hp.uniform('min_weight_fraction_leaf', 0, 0.5),
}

trials = Trials()
best = fmin(f, GradientBoosting4space , algo=tpe.suggest, max_evals=1000, trials=trials)
print ('best:')
print (best)

0.07818685245212083
new best: 0.07818685245212083 {'max_depth': 2.0, 'min_weight_fraction_leaf': 0.27027291759556665}
0.10157273550623389
0.15564579482562813
0.03864242003297227
new best: 0.03864242003297227 {'max_depth': 17.0, 'min_weight_fraction_leaf': 0.1422833170479566}
0.021518715604332113
new best: 0.021518715604332113 {'max_depth': 10.0, 'min_weight_fraction_leaf': 0.07623849509948388}
0.142989655213902
0.015485055853808424
new best: 0.015485055853808424 {'max_depth': 20.0, 'min_weight_fraction_leaf': 0.03698708749347979}
0.0704232688060674
0.03589425036540674
0.053184218007799926
0.1568707528001499
0.01739009527256161
0.026976690026729224
0.0493968028019686
0.01574509536167155
0.10772394421108887
0.017449833243120067
0.04404990781428339
0.12604053424966682
0.002290335173058317
new best: 0.002290335173058317 {'max_depth': 12.0, 'min_weight_fraction_leaf': 0.011398188498971118}
0.0030187468696174454
0.0003154595739849803
new best: 0.0003154595739849803 {'max_depth': 8.0, 'min_we

0.10540771782523381
0.004699108151573155
0.13450972789179058
0.01949587573967523
0.04659209766701705
0.012322898413282166
0.0016042614329603675
0.018630984073619888
0.006577572091621138
0.0003154537449312517
0.020855328021936643
0.016681747830074447
0.023073963473141605
0.03642501838093132
0.012618122580663834
0.00289279795368825
0.04120799026328142
0.051892034799967034
0.03204592773413069
0.020815755255247034
0.025944886769567094
0.05887835642035272
0.017683409547124388
0.00548184432111157
0.018698266317896686
0.0133067901384235
0.00031345095435922213
0.01951801549297227
0.03806662199127477
0.010468932029042774
0.016855414615919044
0.035511101861309166
0.0012601608293121907
0.022532424708502453
0.0003750116239294139
0.00992839314595902
0.14497006941776666
0.023427508869894142
0.019621648749050875
0.0032985777921209253
0.15017233327965768
0.028248552251874094
0.07111470531815829
0.017380944603988438
0.0030787720752853717
0.01490989833937063
0.006239377916100257
0.0009315086626322771
0.

0.0003154595739849804
0.022567460154283032
0.019031656174490267
0.0022903351730583174
0.0003134509543592223
0.013464172888657241
0.0065775720916211354
0.019495875739675237
0.0168489741402953
0.0047073294217196415
0.0003136922561452148
0.011585888814001606
0.0366782193026363
0.002413286275718318
0.01877749256293724
0.02824855225187409
0.015568807912228519
0.05412717013801839
0.021544986603683265
0.022854035520713162
0.0003149893894598372
0.012322898413282166
0.017678554077066193
0.007069660561298213
0.019612879427161375
0.0021198132785514925
0.014909898339370628
0.00031498938945983743
0.01005662095636133
0.018575244990226485
0.016848974140295295
0.04061607931559744
0.002119813278551485
0.00031345095435922197
0.005541822513457386
0.0003134509543592223
0.0009032633613226442
0.010459316751883439
0.0009315086626322808
0.006667341478012361
0.015058059787861596
0.0021198132785514847
0.011619298106936292
0.00031345095435922224
0.0003149893894598371
0.005514507426020849
0.01572245355073897
0.00