In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from numpy import array
from numpy import argmax
from numpy import array_equal
from IPython import display


from keras.utils import to_categorical
from keras.models import Model
from keras.layers import Input
from keras.layers import LSTM
from keras.layers import Dense

from xgboost import XGBClassifier
from sklearn.tree import export_graphviz
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest,f_classif
from sklearn.metrics import plot_confusion_matrix, classification_report
from sklearn.ensemble import RandomForestClassifier

In [18]:
events_dict = {
    'BLUE: first_blood': 0,
    'BLUE: dragon': 1,
    'BLUE: herald': 2,
    'BLUE: first_tower_top': 3,
    'BLUE: first_tower_mid': 4,
    'BLUE: first_tower_bot': 5,
    'BLUE: second_tower_top': 6,
    'BLUE: second_tower_mid': 7,
    'BLUE: second_tower_bot': 8,
    'BLUE: third_tower_top': 9,
    'BLUE: third_tower_mid': 10,
    'BLUE: third_tower_bot': 11,
    'BLUE: inhibitor_top': 12,
    'BLUE: inhibitor_mid': 13,
    'BLUE: inhibitor_bot': 14,
    'BLUE: baron': 15,
    'BLUE: elder_dragon': 16,
    'BLUE: nexus_tower': 17,
    'BLUE: nexus': 18,
    'RED: first_blood': 19,
    'RED: dragon': 20,
    'RED: herald': 21,
    'RED: first_tower_top': 22,
    'RED: first_tower_mid': 23,
    'RED: first_tower_bot': 24,
    'RED: second_tower_top': 25,
    'RED: second_tower_mid': 26,
    'RED: second_tower_bot': 27,
    'RED: third_tower_top': 28,
    'RED: third_tower_mid': 29,
    'RED: third_tower_bot': 30,
    'RED: inhibitor_top': 31,
    'RED: inhibitor_mid': 32,
    'RED: inhibitor_bot': 33,
    'RED: baron': 34,
    'RED: elder_dragon': 35,
    'RED: nexus_tower': 36,
    'RED: nexus': 37
}

n_in = 3
df = pd.read_csv('data/clean-one-line.csv')
df = df.drop('golId', axis=1)
data = df.values.tolist()

for game in data:
    for i, s in enumerate(game):
        if s in events_dict:
            game[i] = events_dict.get(s)

sequence = [b for b in data if not(isinstance(b, float))]
sequence = [[x for x in y if not np.isnan(x)] for y in sequence]

In [19]:
def splitSequence(games, n_steps_in, n_steps_out=1):
    X, Y = [], []
    
    for sequence in games:
        for i in range(len(sequence)):
            # find the end of this pattern
            end_ix = i + n_steps_in
            out_end_ix = end_ix + n_steps_out
            # check if we are beyond the sequence
            if out_end_ix > len(sequence):
                break
            # gather input and output parts of the pattern
            seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:out_end_ix]
            X.append(seq_x)
            Y.append(seq_y)
	
    return X, Y

In [20]:
x_train, x_test = train_test_split(sequence,test_size=0.15,random_state=42,shuffle=False)
len(x_train)

2868

In [21]:
train, y_train = splitSequence(x_train, n_in)
test, y_test = splitSequence(x_test, n_in)

In [22]:
df_test = pd.DataFrame({'event_1': [],
                   'event_2': [],
                   'event_3': [],
                   'target': []})

df_train = pd.DataFrame({'event_1': [],
                   'event_2': [],
                   'event_3': [],
                   'target': []})

In [23]:
for index,test_row in enumerate(test):
    event_1 = test_row[0]
    event_2 = test_row[1]
    event_3 = test_row[2]
    df_test.loc[len(df_test.index)] = [event_1, event_2, event_3, y_test[index][0]]

y_test = df_test['target'].copy()
X_test = df_test.drop(['target'],axis=1)
y_test.head()

0    1.0
1    3.0
2    4.0
3    2.0
4    6.0
Name: target, dtype: float64

In [24]:
for index, train_row in enumerate(train):
    event_1 = train_row[0]
    event_2 = train_row[1]
    event_3 = train_row[2]
    df_train.loc[len(df_train.index)] = [event_1, event_2, event_3, y_train[index][0]]

y_train = df_train['target'].copy()
X_train = df_train.drop(['target'],axis=1)

In [25]:
len(X_train)

60302

In [26]:
len(X_test)

10713

In [27]:
clf = RandomForestClassifier(n_estimators=295, min_samples_leaf=4,min_samples_split=10,max_depth=5, criterion='gini', bootstrap=True)
clf.fit(X_train, y_train)

RandomForestClassifier(max_depth=5, min_samples_leaf=4, min_samples_split=10,
                       n_estimators=295)

In [28]:
preds = clf.predict(X_test)

In [29]:
print(f'training model score: {clf.score(X_train, y_train)}')
print(f'test model score: {clf.score(X_test, y_test)}')
print(classification_report(y_test, preds))

training model score: 0.2530927664090743
test model score: 0.25035004200504063
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00        10
         1.0       0.14      0.54      0.22       929
         2.0       0.00      0.00      0.00       309
         3.0       0.00      0.00      0.00       446
         4.0       0.00      0.00      0.00       430
         5.0       0.00      0.00      0.00       452
         6.0       0.00      0.00      0.00       237
         7.0       0.00      0.00      0.00       318
         8.0       0.00      0.00      0.00       272
         9.0       0.00      0.00      0.00       109
        10.0       0.00      0.00      0.00       250
        11.0       0.00      0.00      0.00       167
        12.0       0.00      0.00      0.00       103
        13.0       0.00      0.00      0.00       261
        14.0       0.00      0.00      0.00       164
        15.0       0.00      0.00      0.00       362
  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [30]:
pd.DataFrame(clf.feature_importances_, index=X_train.columns).sort_values(by=0, ascending=False)

Unnamed: 0,0
event_2,0.38873
event_3,0.373589
event_1,0.237681


In [32]:
from sklearn.tree import export_graphviz

estimator = clf.estimators_[5]


# Export as dot file
export_graphviz(estimator, out_file='tree.dot', 
                feature_names = ['event_1','event_2','event_3'],
                class_names = ['0','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38'],
                rounded = True, proportion = False, 
                precision = 2, filled = True)

In [33]:
import os
os.environ["PATH"] += os.pathsep + 'C:/Program Files/Graphviz/bin/'
os.system('dot -Tpng tree.dot -o random.png')

0