In [12]:
import pandas as pd
import numpy as np
import math
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest,f_classif
from sklearn.metrics import accuracy_score, precision_score,recall_score,f1_score

In [13]:
events_dict = {
    'BLUE: first_blood': 0,
    'BLUE: dragon': 1,
    'BLUE: herald': 2,
    'BLUE: first_tower_top': 3,
    'BLUE: first_tower_mid': 4,
    'BLUE: first_tower_bot': 5,
    'BLUE: second_tower_top': 6,
    'BLUE: second_tower_mid': 7,
    'BLUE: second_tower_bot': 8,
    'BLUE: third_tower_top': 9,
    'BLUE: third_tower_mid': 10,
    'BLUE: third_tower_bot': 11,
    'BLUE: inhibitor_top': 12,
    'BLUE: inhibitor_mid': 13,
    'BLUE: inhibitor_bot': 14,
    'BLUE: baron': 15,
    'BLUE: elder_dragon': 16,
    'BLUE: nexus_tower': 17,
    'BLUE: nexus': 18,
    'RED: first_blood': 19,
    'RED: dragon': 20,
    'RED: herald': 21,
    'RED: first_tower_top': 22,
    'RED: first_tower_mid': 23,
    'RED: first_tower_bot': 24,
    'RED: second_tower_top': 25,
    'RED: second_tower_mid': 26,
    'RED: second_tower_bot': 27,
    'RED: third_tower_top': 28,
    'RED: third_tower_mid': 29,
    'RED: third_tower_bot': 30,
    'RED: inhibitor_top': 31,
    'RED: inhibitor_mid': 32,
    'RED: inhibitor_bot': 33,
    'RED: baron': 34,
    'RED: elder_dragon': 35,
    'RED: nexus_tower': 36,
    'RED: nexus': 37
}

df = pd.read_csv('data/clean-one-line.csv')
df = df.drop('golId', axis=1)
data = df.values.tolist()

for game in data:
    for i, s in enumerate(game):
        if s in events_dict:
            game[i] = events_dict.get(s)

labelsArray = []

for game in data:
    labelsArray += game

sequence = [b for b in labelsArray if not(isinstance(b, float))]
n_in = 6

In [14]:
def splitSequence(sequence, n_steps_in, n_steps_out=1):
	X, y = [], []
	for i in range(len(sequence)):
		# find the end of this pattern
		end_ix = i + n_steps_in
		out_end_ix = end_ix + n_steps_out
		# check if we are beyond the sequence
		if out_end_ix > len(sequence):
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:out_end_ix]
		X.append(seq_x)
		y.append(seq_y)
	return X, y

In [15]:
x_train, x_test = train_test_split(sequence,test_size=0.15,random_state=42,shuffle=False)
len(x_train)

29714

In [16]:
train, y_train = splitSequence(x_train, n_in)
print(train[:5])
y_train[:5]

[[19, 1, 2, 1, 3, 24], [1, 2, 1, 3, 24, 2], [2, 1, 3, 24, 2, 23], [1, 3, 24, 2, 23, 6], [3, 24, 2, 23, 6, 1]]


[[2], [23], [6], [1], [5]]

In [17]:
test, y_test = splitSequence(x_test, n_in)
print(test[:5])
y_test[:5]

[[18, 0, 21, 1, 24, 20], [0, 21, 1, 24, 20, 21], [21, 1, 24, 20, 21, 22], [1, 24, 20, 21, 22, 5], [24, 20, 21, 22, 5, 23]]


[[21], [22], [5], [23], [26]]

In [18]:
df_test = pd.DataFrame({'event_1': [],
                   'event_2': [],
                   'event_3': [],
                   'event_4': [],
                   'event_5': [],
                   'event_6': [],
                   'target': []})

df_train = pd.DataFrame({'event_1': [],
                   'event_2': [],
                   'event_3': [],
                   'event_4': [],
                   'event_5': [],
                   'event_6': [],
                   'target': []})

for index,test_row in enumerate(test):
    event_1 = test_row[0]
    event_2 = test_row[1]
    event_3 = test_row[2]
    event_4 = test_row[3]
    event_5 = test_row[4]
    event_6 = test_row[5]
    df_test.loc[len(df_test.index)] = [event_1, event_2, event_3, event_4, event_5, event_6, y_test[index][0]]

In [19]:
y_test = df_test['target'].copy()
X_test = df_test.drop(['target'],axis=1)
df_test

Unnamed: 0,event_1,event_2,event_3,event_4,event_5,event_6,target
0,18.0,0.0,21.0,1.0,24.0,20.0,21.0
1,0.0,21.0,1.0,24.0,20.0,21.0,22.0
2,21.0,1.0,24.0,20.0,21.0,22.0,5.0
3,1.0,24.0,20.0,21.0,22.0,5.0,23.0
4,24.0,20.0,21.0,22.0,5.0,23.0,26.0
...,...,...,...,...,...,...,...
5233,4.0,24.0,6.0,20.0,34.0,26.0,29.0
5234,24.0,6.0,20.0,34.0,26.0,29.0,32.0
5235,6.0,20.0,34.0,26.0,29.0,32.0,36.0
5236,20.0,34.0,26.0,29.0,32.0,36.0,36.0


In [20]:
for index, train_row in enumerate(train):
    event_1 = train_row[0]
    event_2 = train_row[1]
    event_3 = train_row[2]
    event_4 = train_row[3]
    event_5 = train_row[4]
    event_6 = train_row[5]
    df_train.loc[len(df_train.index)] = [event_1, event_2, event_3, event_4, event_5, event_6, y_train[index][0]]

In [21]:
y_train = df_train['target'].copy()
X_train = df_train.drop(['target'],axis=1)
y_train

0         2.0
1        23.0
2         6.0
3         1.0
4         5.0
         ... 
29703    10.0
29704    13.0
29705     8.0
29706    17.0
29707    17.0
Name: target, Length: 29708, dtype: float64

In [22]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import plot_confusion_matrix, classification_report

lrl = LogisticRegression(multi_class='ovr', C=1e12, max_iter=5000)

model_ovr = lrl.fit(X_train, y_train)

y_pred = model_ovr.predict(X_test)

print(f'training model score: {model_ovr.score(X_train, y_train)}')
print(f'test model score: {model_ovr.score(X_test, y_test)}')
print(classification_report(y_test, y_pred))

training model score: 0.15221489161168708
test model score: 0.13631156930126
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00       102
         1.0       0.09      0.44      0.15       436
         2.0       0.12      0.07      0.09       225
         3.0       0.00      0.00      0.00       199
         4.0       0.00      0.00      0.00       176
         5.0       0.21      0.04      0.07       201
         6.0       0.00      0.00      0.00       120
         7.0       0.00      0.00      0.00       138
         8.0       0.00      0.00      0.00       125
         9.0       0.00      0.00      0.00        58
        10.0       0.00      0.00      0.00       104
        11.0       0.00      0.00      0.00        74
        12.0       0.00      0.00      0.00        49
        13.0       0.00      0.00      0.00       114
        14.0       0.00      0.00      0.00        75
        15.0       0.00      0.00      0.00       152
    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
