In [9]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

In [2]:
vbc = pd.read_csv("../data/vb_data_3_categZone.csv")
vb = pd.read_csv("../data/vb_data_3_numZone.csv")
print(len(vb), 'lines loaded')

146050 lines loaded


In [3]:
X = vb.drop(['Season', 'GameID', 'PlayerTeam', 'PlayerName', 'RewardDistance', 'RewardValue'], axis=1)
cols = [col for col in list(X.columns) if X[col].dtype == 'object']
X = pd.get_dummies(data=X, columns = cols)
Y = vb.RewardValue
print(len(X.columns), 'columns in dataframe')

204 columns in dataframe


In [4]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X)
X_n = scaler.transform(X)

In [10]:
# define base model
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(360, input_dim=204, kernel_initializer='normal', activation='relu'))
    model.add(Dense(120, input_dim=160, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal', activation='tanh'))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

from keras.callbacks import EarlyStopping
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)

# evaluate model
estimator = KerasRegressor(build_fn=baseline_model, epochs=15, batch_size=64, verbose=0)
kfold = KFold(n_splits=5)

In [10]:
import time
t = time.time()
for k in range(3,20):
    estimator = KerasRegressor(build_fn=baseline_model, epochs=k, batch_size=64, verbose=0)
    results = cross_val_score(estimator, X_n, Y, cv=kfold)
    print("Result for %d epochs: %.4f (%.2f) MSE" % (k, results.mean(), results.std()))
    #print(results)
    print("Time elapsed:", time.time()-t)

Result for 3 epochs: -0.6233 (0.01) MSE
Time elapsed: 68.32886552810669
Result for 4 epochs: -0.6174 (0.01) MSE
Time elapsed: 171.3441665172577
Result for 5 epochs: -0.6166 (0.01) MSE
Time elapsed: 302.524534702301
Result for 6 epochs: -0.6157 (0.01) MSE
Time elapsed: 474.5486605167389
Result for 7 epochs: -0.6153 (0.01) MSE
Time elapsed: 688.9392251968384
Result for 8 epochs: -0.6180 (0.01) MSE
Time elapsed: 946.8766977787018
Result for 9 epochs: -0.6179 (0.01) MSE
Time elapsed: 1235.9549872875214
Result for 10 epochs: -0.6170 (0.01) MSE
Time elapsed: 1578.1987149715424
Result for 11 epochs: -0.6180 (0.01) MSE
Time elapsed: 1981.4478669166565
Result for 12 epochs: -0.6179 (0.01) MSE
Time elapsed: 2457.8616428375244
Result for 13 epochs: -0.6184 (0.01) MSE
Time elapsed: 3016.3553421497345
Result for 14 epochs: -0.6204 (0.01) MSE
Time elapsed: 3657.088270187378
Result for 15 epochs: -0.6190 (0.01) MSE
Time elapsed: 4401.62668132782
Result for 16 epochs: -0.6200 (0.01) MSE
Time elapsed: 

In [12]:
from sklearn import metrics
for k in range(3,20):
    model = baseline_model()
    model.fit(X_n,Y,epochs=k, shuffle=True, batch_size=64, verbose=0)
    yp = model.predict(X_n)
    print(k, metrics.mean_squared_error(Y,yp))

3 0.614845633050413
4 0.6090885249445158
5 0.6098673522528847
6 0.6087576471036178
7 0.6078304307774619
8 0.606563356283146
9 0.6039002186094222
10 0.6017533567241319
11 0.6016242715165601
12 0.6069657611780048
13 0.604716299466489
14 0.5994722546370327
15 0.6001953628257938
16 0.5993999326150183
17 0.5983801387013684
18 0.5979715852133953
19 0.5957619439576445


In [15]:
estimator.fit(X_n,Y,epochs=20,verbose=1,shuffle=True, validation_split=0.1)

Train on 131445 samples, validate on 14605 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x28fa0f75278>

In [12]:
from sklearn import metrics
yp = estimator.predict(X_n)
print(metrics.mean_squared_error(Y,yp))

0.5914095700594837


In [13]:
import pickle
pickle.dump(yp, open('../output/nn_regr_test.pkl', 'wb'))