In [1]:
import numpy as np
import pandas as pd
from numpy import array
from numpy import argmax
from numpy import array_equal

from keras.utils import to_categorical
from keras.models import Model
from keras.layers import Input
from keras.layers import LSTM
from keras.layers import Dense

from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest,f_classif
from sklearn.metrics import accuracy_score, precision_score,recall_score,f1_score

In [2]:
events_dict = {
    'BLUE: first_blood': 0,
    'BLUE: dragon': 1,
    'BLUE: herald': 2,
    'BLUE: first_tower_top': 3,
    'BLUE: first_tower_mid': 4,
    'BLUE: first_tower_bot': 5,
    'BLUE: second_tower_top': 6,
    'BLUE: second_tower_mid': 7,
    'BLUE: second_tower_bot': 8,
    'BLUE: third_tower_top': 9,
    'BLUE: third_tower_mid': 10,
    'BLUE: third_tower_bot': 11,
    'BLUE: inhibitor_top': 12,
    'BLUE: inhibitor_mid': 13,
    'BLUE: inhibitor_bot': 14,
    'BLUE: baron': 15,
    'BLUE: elder_dragon': 16,
    'BLUE: nexus_tower': 17,
    'BLUE: nexus': 18,
    'RED: first_blood': 19,
    'RED: dragon': 20,
    'RED: herald': 21,
    'RED: first_tower_top': 22,
    'RED: first_tower_mid': 23,
    'RED: first_tower_bot': 24,
    'RED: second_tower_top': 25,
    'RED: second_tower_mid': 26,
    'RED: second_tower_bot': 27,
    'RED: third_tower_top': 28,
    'RED: third_tower_mid': 29,
    'RED: third_tower_bot': 30,
    'RED: inhibitor_top': 31,
    'RED: inhibitor_mid': 32,
    'RED: inhibitor_bot': 33,
    'RED: baron': 34,
    'RED: elder_dragon': 35,
    'RED: nexus_tower': 36,
    'RED: nexus': 37
}

n_in = 19
df = pd.read_csv('data/clean-one-line.csv')
df = df.drop('golId', axis=1)
data = df.values.tolist()

for game in data:
    for i, s in enumerate(game):
        if s in events_dict:
            game[i] = events_dict.get(s)

sequence = [b for b in data if not(isinstance(b, float))]
sequence = [[x for x in y if not np.isnan(x)] for y in sequence]

In [3]:
def splitSequence(games, n_steps_in, n_steps_out=1):
    X, Y = [], []
    
    for sequence in games:
        for i in range(len(sequence)):
            # find the end of this pattern
            end_ix = i + n_steps_in
            out_end_ix = end_ix + n_steps_out
            # check if we are beyond the sequence
            if out_end_ix > len(sequence):
                break
            # gather input and output parts of the pattern
            seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:out_end_ix]
            X.append(seq_x)
            Y.append(seq_y)
	
    return X, Y

In [4]:
x_train, x_test = train_test_split(sequence,test_size=0.15,random_state=42, shuffle=False)
len(x_train)

2792

In [5]:
train, y_train = splitSequence(x_train, n_in)

In [6]:
test, y_test = splitSequence(x_test, n_in)

In [7]:
df_test = pd.DataFrame({'event_1': [],
                   'event_2': [],
                   'event_3': [],
                   'event_4': [],
                   'event_5': [],
                   'event_6': [],
                   'target': []})

df_train = pd.DataFrame({'event_1': [],
                   'event_2': [],
                   'event_3': [],
                   'event_4': [],
                   'event_5': [],
                   'event_6': [],
                   'target': []})

In [8]:
for index,test_row in enumerate(test):
    event_1 = test_row[0]
    event_2 = test_row[1]
    event_3 = test_row[2]
    event_4 = test_row[3]
    event_5 = test_row[4]
    event_6 = test_row[5]
    df_test.loc[len(df_test.index)] = [event_1, event_2, event_3, event_4, event_5, event_6, y_test[index][0]]

In [9]:
y_test = df_test['target'].copy()
X_test = df_test.drop(['target'],axis=1)
df_test

Unnamed: 0,event_1,event_2,event_3,event_4,event_5,event_6,target
0,19.0,20.0,2.0,3.0,1.0,6.0,17.0
1,20.0,2.0,3.0,1.0,6.0,24.0,10.0
2,2.0,3.0,1.0,6.0,24.0,5.0,17.0
3,3.0,1.0,6.0,24.0,5.0,22.0,18.0
4,19.0,21.0,1.0,24.0,2.0,20.0,37.0
...,...,...,...,...,...,...,...
2438,20.0,20.0,4.0,34.0,27.0,25.0,31.0
2439,20.0,4.0,34.0,27.0,25.0,6.0,17.0
2440,4.0,34.0,27.0,25.0,6.0,16.0,26.0
2441,34.0,27.0,25.0,6.0,16.0,15.0,16.0


In [10]:
for index, train_row in enumerate(train):
    event_1 = train_row[0]
    event_2 = train_row[1]
    event_3 = train_row[2]
    event_4 = train_row[3]
    event_5 = train_row[4]
    event_6 = train_row[5]
    df_train.loc[len(df_train.index)] = [event_1, event_2, event_3, event_4, event_5, event_6, y_train[index][0]]

In [11]:
y_train = df_train['target'].copy()
X_train = df_train.drop(['target'],axis=1)
y_train

0        11.0
1        10.0
2         9.0
3        14.0
4        13.0
         ... 
14326    17.0
14327    20.0
14328    15.0
14329    12.0
14330    18.0
Name: target, Length: 14331, dtype: float64

In [12]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC, SVC, SVR, LinearSVR, NuSVR
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier

models = {
    'Logistic Regression': LogisticRegression(max_iter=5000),
    'Support Vector Machine (Linear Kernel)': LinearSVC(max_iter=15000),
    'Support Vector Machine (RBF Kernel)': SVC(),
    # 'Support Vector Machine (Regression)': SVR(),
    # 'Support Vector Machine (Linear Regression)': LinearSVR(max_iter=10000),
    # 'Support Vector Machine (NuSVR)': NuSVR(),
    'Decission Tree': DecisionTreeClassifier(),
    'Adaboost': AdaBoostClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting Classifier': GradientBoostingClassifier(),
    # 'XGBoost': XGBClassifier()
}


for name, model in models.items():
  model.fit(X_train,y_train)
  print(name + ' trained')

Logistic Regression trained




Support Vector Machine (Linear Kernel) trained
Support Vector Machine (RBF Kernel) trained
Decission Tree trained
Adaboost trained
Random Forest trained
Gradient Boosting Classifier trained


In [13]:
scores_list = []

for name,model in models.items():
    print(name)    
    scores_list.append({
    'Model': name,
    'Accuracy': accuracy_score(y_test,model.predict(X_test)),
    'Precision':  precision_score(y_test,model.predict(X_test), average='micro'),
    'Recall': recall_score(y_test,model.predict(X_test), average='micro'),
    'F1-Score': f1_score(y_test,model.predict(X_test), average='micro')
    })
scores = pd.DataFrame(scores_list)

Logistic Regression
Support Vector Machine (Linear Kernel)
Support Vector Machine (RBF Kernel)
Decission Tree
Adaboost
Random Forest
Gradient Boosting Classifier


In [14]:
scores

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,Logistic Regression,0.213262,0.213262,0.213262,0.213262
1,Support Vector Machine (Linear Kernel),0.212853,0.212853,0.212853,0.212853
2,Support Vector Machine (RBF Kernel),0.204666,0.204666,0.204666,0.204666
3,Decission Tree,0.103971,0.103971,0.103971,0.103971
4,Adaboost,0.160868,0.160868,0.160868,0.160868
5,Random Forest,0.136717,0.136717,0.136717,0.136717
6,Gradient Boosting Classifier,0.198936,0.198936,0.198936,0.198936
