This notebook creates a NN to make predictions based on pre-existing data.

In [83]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import json
import os
import pandas as pd
import oracledb
from sklearn.model_selection import train_test_split
import tensorflow as tf

In [84]:
import yaml
from pathlib import Path
home = str(Path.home())

def process_yaml():
	with open("../config.yaml") as file:
		return yaml.safe_load(file)

class OracleJSONDatabaseThickConnection:
    def __init__(self, data=process_yaml()):
        oracledb.init_oracle_client(lib_dir=data['INSTANT_CLIENT'])
        self.pool = oracledb.create_pool(user=data['db']['username'], password=data['db']['password'], dsn=data['db']['dsn'],
            min=1, max=4, increment=1, getmode=oracledb.POOL_GETMODE_WAIT)
        print('Connection successful.')



    def close_pool(self):
        self.pool.close()
        print('Connection pool closed.')



    def insert(self, collection_name, json_object_to_insert):
        connection = self.pool.acquire()
        connection.autocommit = True
        soda = connection.getSodaDatabase()
        x_collection = soda.createCollection(collection_name)

        try:
            x_collection.insertOne(json_object_to_insert)
            print('[DBG] INSERT {} OK'.format(json_object_to_insert))
        except cx_Oracle.IntegrityError as e:
            print('[DBG] INSERT {} ERR: {} '.format(json_object_to_insert, e))
            return -1
        self.pool.release(connection)
        return 1


    def delete(self, collection_name, on_column, on_value):
        connection = self.pool.acquire()
        connection.autocommit = True
        soda = connection.getSodaDatabase()
        x_collection = soda.createCollection(collection_name)
        qbe = {on_column: on_value}
        x_collection.find().filter(qbe).remove()
        self.pool.release(connection)


    def get_connection(self):
        connection = self.pool.acquire()
        connection.autocommit = True
        return connection


    def close_connection(self, conn_object):
        self.pool.release(conn_object)


    def get_collection_names(self):
        connection = self.pool.acquire()
        connection.autocommit = True
        returning_object = connection.getSodaDatabase().getCollectionNames(startName=None, limit=0)
        self.pool.release(connection)
        return returning_object

    def open_collection(self, collection_name):
        connection = self.pool.acquire()
        returning_object = self.pool.acquire().getSodaDatabase().openCollection(collection_name)
        self.pool.release(connection)
        return returning_object

In [85]:
db = OracleJSONDatabaseThickConnection()
print(db.get_collection_names())

Connection successful.
['1v1_model', 'match', 'match_detail', 'matchups', 'predictor', 'predictor_liveclient', 'summoner']


In [86]:
data = db.open_collection('predictor')
all_data = list()
i = 0
for doc in data.find().getCursor():
    content = doc.getContent()
    all_data.append(content)
    i+= 1
    if i > 100000:
        break

print('Data length: {}'.format(len(all_data)))

Data length: 100001


In [87]:
df = pd.read_json(json.dumps(all_data), orient='records')

df.head(5)

Unnamed: 0,winner,trueDamageTaken,totalGold,magicResist,timeEnemySpentControlled,ccReduction,goldPerSecond,spellVamp,timestamp,bonusArmorPenPercent,...,physicalDamageDone,attackSpeed,magicDamageTaken,magicDamageDoneToChampions,healthMax,armor,xp,totalDamageDone,level,armorPen
0,0,0,1334,42,6705,0,0,0,240060,0,...,5088,123,165,58,767,37,1453,6574,4,0
1,0,70,1097,30,5054,0,0,0,240060,0,...,4697,117,119,53,659,38,822,4750,3,0
2,0,0,1507,30,13840,0,0,0,240060,0,...,865,112,88,410,851,45,963,1711,3,0
3,1,0,1169,41,10571,5,0,0,240060,0,...,4559,105,308,0,766,37,1075,4559,3,0
4,1,0,1313,42,43737,0,24,0,240060,0,...,9185,119,550,0,762,44,1190,11795,4,0


In [88]:
df.columns

Index(['winner', 'trueDamageTaken', 'totalGold', 'magicResist',
       'timeEnemySpentControlled', 'ccReduction', 'goldPerSecond', 'spellVamp',
       'timestamp', 'bonusArmorPenPercent', 'magicPenPercent', 'participantId',
       'armorPenPercent', 'physicalDamageTaken', 'magicPen', 'powerMax',
       'identifier', 'totalDamageTaken', 'power', 'attackDamage', 'lifesteal',
       'abilityPower', 'powerRegen', 'trueDamageDoneToChampions',
       'jungleMinionsKilled', 'physicalDamageDoneToChampions', 'movementSpeed',
       'bonusMagicPenPercent', 'cooldownReduction',
       'totalDamageDoneToChampions', 'minionsKilled', 'healthRegen', 'health',
       'trueDamageDone', 'magicDamageDone', 'physicalDamageDone',
       'attackSpeed', 'magicDamageTaken', 'magicDamageDoneToChampions',
       'healthMax', 'armor', 'xp', 'totalDamageDone', 'level', 'armorPen'],
      dtype='object')

In [89]:
df.describe()

Unnamed: 0,winner,trueDamageTaken,totalGold,magicResist,timeEnemySpentControlled,ccReduction,goldPerSecond,spellVamp,timestamp,bonusArmorPenPercent,...,physicalDamageDone,attackSpeed,magicDamageTaken,magicDamageDoneToChampions,healthMax,armor,xp,totalDamageDone,level,armorPen
count,100001.0,100001.0,100001.0,100001.0,100001.0,100001.0,100001.0,100001.0,100001.0,100001.0,...,100001.0,100001.0,100001.0,100001.0,100001.0,100001.0,100001.0,100001.0,100001.0,100001.0
mean,0.499435,414.9644,5367.99752,45.03744,103458.4,6.518115,16.332257,0.076339,867697.5,0.0,...,28631.072959,141.885171,2935.98423,2724.485435,1373.901351,68.98102,5912.04605,49200.524065,8.397946,0.0
std,0.500002,723.207542,3926.266767,21.065325,140543.7,12.510065,34.592667,1.184294,560966.6,0.0,...,40271.852413,45.22791,3481.799732,4754.082157,614.55394,37.755724,4624.107962,52616.841484,4.424767,0.0
min,0.0,0.0,500.0,7.0,0.0,-30.0,0.0,0.0,0.0,0.0,...,0.0,56.0,0.0,0.0,340.0,17.0,0.0,0.0,1.0,0.0
25%,0.0,0.0,2061.0,33.0,10834.0,0.0,0.0,0.0,420099.0,0.0,...,3077.0,112.0,401.0,98.0,888.0,45.0,2019.0,8725.0,5.0,0.0
50%,0.0,150.0,4704.0,38.0,54581.0,0.0,0.0,0.0,840234.0,0.0,...,10046.0,125.0,1703.0,829.0,1292.0,59.0,5165.0,29384.0,9.0,0.0
75%,1.0,509.0,7927.0,47.0,140229.0,10.0,10.0,0.0,1260467.0,0.0,...,38959.0,156.0,4273.0,3098.0,1729.0,81.0,9020.0,76705.0,12.0,0.0
max,1.0,18181.0,23396.0,492.0,2520831.0,77.0,233.0,37.0,2760816.0,0.0,...,368403.0,621.0,35552.0,66384.0,5793.0,693.0,26694.0,391159.0,18.0,0.0


In [90]:
# drop columns we don't want
df = df.drop(columns=['identifier', 'participantId', 'timestamp'])

#train = df.sample(frac=0.8, random_state=200) #random state is a seed value
#test = df.drop(train.index)
label = 'winner'

X = df.drop(columns=[label])
y = df.pop(label)

X_train, X_test, Y_train, Y_test = train_test_split(X, y, random_state=1, test_size=0.2)

print(X_train.columns)

Index(['trueDamageTaken', 'totalGold', 'magicResist',
       'timeEnemySpentControlled', 'ccReduction', 'goldPerSecond', 'spellVamp',
       'bonusArmorPenPercent', 'magicPenPercent', 'armorPenPercent',
       'physicalDamageTaken', 'magicPen', 'powerMax', 'totalDamageTaken',
       'power', 'attackDamage', 'lifesteal', 'abilityPower', 'powerRegen',
       'trueDamageDoneToChampions', 'jungleMinionsKilled',
       'physicalDamageDoneToChampions', 'movementSpeed',
       'bonusMagicPenPercent', 'cooldownReduction',
       'totalDamageDoneToChampions', 'minionsKilled', 'healthRegen', 'health',
       'trueDamageDone', 'magicDamageDone', 'physicalDamageDone',
       'attackSpeed', 'magicDamageTaken', 'magicDamageDoneToChampions',
       'healthMax', 'armor', 'xp', 'totalDamageDone', 'level', 'armorPen'],
      dtype='object')


In [96]:
model = tf.keras.Sequential([
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1, kernel_initializer='normal')
])

loss_fn = tf.keras.losses.MeanSquaredError()

model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])

model.fit(X_train, Y_train, epochs=100, batch_size=128)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x22a4ec7d3a0>

In [None]:
save_path = '/home/ubuntu/models/autogluon_trained_models'  # specifies folder to store trained models
predictor = TabularPredictor(label=label, path=save_path).fit(train, time_limit=600)

In [None]:
y_test = test[label]  # values to predict
test_data_nolabel = test.drop(columns=[label])  # delete label column to prove we're not cheating, also drop identifier column
test_data_nolabel.head(5)

In [None]:
predictor = TabularPredictor.load(save_path)

y_pred = predictor.predict(test_data_nolabel)
print("Predictions:  \n", y_pred)
perf = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)


In [None]:
predictor.leaderboard(test, silent=False)

In [None]:
predictor.feature_importance(test)

In [None]:
print('Finished')