In [3]:
import numpy as np
import pandas as pd
from numpy import array
from numpy import argmax
from numpy import array_equal

from keras.utils import to_categorical
from keras.models import Model
from keras.layers import Input
from keras.layers import LSTM
from keras.layers import Dense

from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest,f_classif
from sklearn.metrics import accuracy_score, precision_score,recall_score,f1_score

In [4]:
events_dict = {
    'BLUE: first_blood': 0,
    'BLUE: dragon': 1,
    'BLUE: herald': 2,
    'BLUE: first_tower_top': 3,
    'BLUE: first_tower_mid': 4,
    'BLUE: first_tower_bot': 5,
    'BLUE: second_tower_top': 6,
    'BLUE: second_tower_mid': 7,
    'BLUE: second_tower_bot': 8,
    'BLUE: third_tower_top': 9,
    'BLUE: third_tower_mid': 10,
    'BLUE: third_tower_bot': 11,
    'BLUE: inhibitor_top': 12,
    'BLUE: inhibitor_mid': 13,
    'BLUE: inhibitor_bot': 14,
    'BLUE: baron': 15,
    'BLUE: elder_dragon': 16,
    'BLUE: nexus_tower': 17,
    'BLUE: nexus': 18,
    'RED: first_blood': 19,
    'RED: dragon': 20,
    'RED: herald': 21,
    'RED: first_tower_top': 22,
    'RED: first_tower_mid': 23,
    'RED: first_tower_bot': 24,
    'RED: second_tower_top': 25,
    'RED: second_tower_mid': 26,
    'RED: second_tower_bot': 27,
    'RED: third_tower_top': 28,
    'RED: third_tower_mid': 29,
    'RED: third_tower_bot': 30,
    'RED: inhibitor_top': 31,
    'RED: inhibitor_mid': 32,
    'RED: inhibitor_bot': 33,
    'RED: baron': 34,
    'RED: elder_dragon': 35,
    'RED: nexus_tower': 36,
    'RED: nexus': 37
}

df = pd.read_csv('data/clean-one-line.csv')
df = df.drop('golId', axis=1)
data = df.values.tolist()

for game in data:
    for i, s in enumerate(game):
        if s in events_dict:
            game[i] = events_dict.get(s)

labelsArray = []

for game in data:
    labelsArray += game

sequence = [b for b in labelsArray if not(isinstance(b, float))]
n_in = 6

In [5]:
def splitSequence(sequence, n_steps_in, n_steps_out=1):
	X, y = [], []
	for i in range(len(sequence)):
		# find the end of this pattern
		end_ix = i + n_steps_in
		out_end_ix = end_ix + n_steps_out
		# check if we are beyond the sequence
		if out_end_ix > len(sequence):
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:out_end_ix]
		X.append(seq_x)
		y.append(seq_y)
	return X, y

In [6]:
x_train, x_test = train_test_split(sequence,test_size=0.20,shuffle = False)
len(x_train)

16044

In [7]:
train, y_train = splitSequence(x_train, n_in)
print(train[:5])
y_train[:5]

[[2, 20, 1, 2, 3, 20], [20, 1, 2, 3, 20, 4], [1, 2, 3, 20, 4, 5], [2, 3, 20, 4, 5, 23], [3, 20, 4, 5, 23, 24]]


[[4], [5], [23], [24], [20]]

In [8]:
test, y_test = splitSequence(x_test, n_in)
print(test[:5])
y_test[:5]

[[27, 23, 34, 20, 25, 30], [23, 34, 20, 25, 30, 26], [34, 20, 25, 30, 26, 33], [20, 25, 30, 26, 33, 20], [25, 30, 26, 33, 20, 34]]


[[26], [33], [20], [34], [29]]

In [9]:
df_test = pd.DataFrame({'event_1': [],
                   'event_2': [],
                   'event_3': [],
                   'event_4': [],
                   'event_5': [],
                   'event_6': [],
                   'target': []})

df_train = pd.DataFrame({'event_1': [],
                   'event_2': [],
                   'event_3': [],
                   'event_4': [],
                   'event_5': [],
                   'event_6': [],
                   'target': []})

In [10]:
for index,test_row in enumerate(test):
    event_1 = test_row[0]
    event_2 = test_row[1]
    event_3 = test_row[2]
    event_4 = test_row[3]
    event_5 = test_row[4]
    event_6 = test_row[5]
    df_test.loc[len(df_test.index)] = [event_1, event_2, event_3, event_4, event_5, event_6, y_test[index][0]]

In [11]:
y_test = df_test['target'].copy()
X_test = df_test.drop(['target'],axis=1)
df_test

Unnamed: 0,event_1,event_2,event_3,event_4,event_5,event_6,target
0,27.0,23.0,34.0,20.0,25.0,30.0,26.0
1,23.0,34.0,20.0,25.0,30.0,26.0,33.0
2,34.0,20.0,25.0,30.0,26.0,33.0,20.0
3,20.0,25.0,30.0,26.0,33.0,20.0,34.0
4,25.0,30.0,26.0,33.0,20.0,34.0,29.0
...,...,...,...,...,...,...,...
4000,8.0,24.0,15.0,6.0,11.0,9.0,12.0
4001,24.0,15.0,6.0,11.0,9.0,12.0,14.0
4002,15.0,6.0,11.0,9.0,12.0,14.0,17.0
4003,6.0,11.0,9.0,12.0,14.0,17.0,17.0


In [12]:
for index, train_row in enumerate(train):
    event_1 = train_row[0]
    event_2 = train_row[1]
    event_3 = train_row[2]
    event_4 = train_row[3]
    event_5 = train_row[4]
    event_6 = train_row[5]
    df_train.loc[len(df_train.index)] = [event_1, event_2, event_3, event_4, event_5, event_6, y_train[index][0]]

In [13]:
y_train = df_train['target'].copy()
X_train = df_train.drop(['target'],axis=1)
y_train

0         4.0
1         5.0
2        23.0
3        24.0
4        20.0
         ... 
16033     2.0
16034     4.0
16035     5.0
16036    22.0
16037     1.0
Name: target, Length: 16038, dtype: float64

In [14]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC, SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier

models = {
    'Logistic Regression': LogisticRegression(max_iter=5000),
    'Support Vector Machine (Linear Kernel)': LinearSVC(max_iter=10000),
    'Support Vector Machine (RBF Kernel)': SVC(),
    'Decission Tree': DecisionTreeClassifier(),
    'Adaboost': AdaBoostClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting Classifier': GradientBoostingClassifier()
}

GBCmodel = GradientBoostingClassifier()
GBCmodel.fit(X_train,y_train)


for name, model in models.items():
  model.fit(X_train,y_train)
  print(name + ' trained')

Logistic Regression trained




Support Vector Machine (Linear Kernel) trained
Support Vector Machine (RBF Kernel) trained
Decission Tree trained
Adaboost trained
Random Forest trained
Gradient Boosting Classifier trained


In [15]:
scores_list = []

for name,model in models.items():    
    scores_list.append({
    'Model': name,
    'Accuracy': accuracy_score(y_test,model.predict(X_test)),
    'Precision':  precision_score(y_test,model.predict(X_test), average='micro'),
    'Recall': recall_score(y_test,model.predict(X_test), average='micro'),
    'F1-Score': f1_score(y_test,model.predict(X_test), average='micro')
    })
scores = pd.DataFrame(scores_list)

In [16]:
scores

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,Logistic Regression,0.152559,0.152559,0.152559,0.152559
1,Support Vector Machine (Linear Kernel),0.109114,0.109114,0.109114,0.109114
2,Support Vector Machine (RBF Kernel),0.221223,0.221223,0.221223,0.221223
3,Decission Tree,0.261174,0.261174,0.261174,0.261174
4,Adaboost,0.180025,0.180025,0.180025,0.180025
5,Random Forest,0.314107,0.314107,0.314107,0.314107
6,Gradient Boosting Classifier,0.378027,0.378027,0.378027,0.378027
