In [1]:
import pandas as pd
import numpy as np
import math
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest,f_classif
from sklearn.metrics import accuracy_score, precision_score,recall_score,f1_score

In [2]:
events_dict = {
    'BLUE: first_blood': 0,
    'BLUE: dragon': 1,
    'BLUE: herald': 2,
    'BLUE: first_tower_top': 3,
    'BLUE: first_tower_mid': 4,
    'BLUE: first_tower_bot': 5,
    'BLUE: second_tower_top': 6,
    'BLUE: second_tower_mid': 7,
    'BLUE: second_tower_bot': 8,
    'BLUE: third_tower_top': 9,
    'BLUE: third_tower_mid': 10,
    'BLUE: third_tower_bot': 11,
    'BLUE: inhibitor_top': 12,
    'BLUE: inhibitor_mid': 13,
    'BLUE: inhibitor_bot': 14,
    'BLUE: baron': 15,
    'BLUE: elder_dragon': 16,
    'BLUE: nexus_tower': 17,
    'BLUE: nexus': 18,
    'RED: first_blood': 19,
    'RED: dragon': 20,
    'RED: herald': 21,
    'RED: first_tower_top': 22,
    'RED: first_tower_mid': 23,
    'RED: first_tower_bot': 24,
    'RED: second_tower_top': 25,
    'RED: second_tower_mid': 26,
    'RED: second_tower_bot': 27,
    'RED: third_tower_top': 28,
    'RED: third_tower_mid': 29,
    'RED: third_tower_bot': 30,
    'RED: inhibitor_top': 31,
    'RED: inhibitor_mid': 32,
    'RED: inhibitor_bot': 33,
    'RED: baron': 34,
    'RED: elder_dragon': 35,
    'RED: nexus_tower': 36,
    'RED: nexus': 37
}

n_in = 19
df = pd.read_csv('data/clean-one-line.csv')
df = df.drop('golId', axis=1)
data = df.values.tolist()

for game in data:
    for i, s in enumerate(game):
        if s in events_dict:
            game[i] = events_dict.get(s)

sequence = [b for b in data if not(isinstance(b, float))]
sequence = [[x for x in y if not np.isnan(x)] for y in sequence]

In [3]:
def splitSequence(games, n_steps_in, n_steps_out=1):
    X, Y = [], []
    
    for sequence in games:
        for i in range(len(sequence)):
            # find the end of this pattern
            end_ix = i + n_steps_in
            out_end_ix = end_ix + n_steps_out
            # check if we are beyond the sequence
            if out_end_ix > len(sequence):
                break
            # gather input and output parts of the pattern
            seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:out_end_ix]
            X.append(seq_x)
            Y.append(seq_y)
	
    return X, Y

In [4]:
x_train, x_test = train_test_split(sequence,test_size=0.15,random_state=42,shuffle=False)
len(x_train)

2462

In [5]:
train, y_train = splitSequence(x_train, n_in)

In [6]:
test, y_test = splitSequence(x_test, n_in)

In [7]:
df_test = pd.DataFrame({'event_1': [],
                   'event_2': [],
                   'event_3': [],
                   'event_4': [],
                   'event_5': [],
                   'event_6': [],
                   'target': []})

df_train = pd.DataFrame({'event_1': [],
                   'event_2': [],
                   'event_3': [],
                   'event_4': [],
                   'event_5': [],
                   'event_6': [],
                   'target': []})

for index,test_row in enumerate(test):
    event_1 = test_row[0]
    event_2 = test_row[1]
    event_3 = test_row[2]
    event_4 = test_row[3]
    event_5 = test_row[4]
    event_6 = test_row[5]
    df_test.loc[len(df_test.index)] = [event_1, event_2, event_3, event_4, event_5, event_6, y_test[index][0]]

In [8]:
y_test = df_test['target'].copy()
X_test = df_test.drop(['target'],axis=1)
df_test

Unnamed: 0,event_1,event_2,event_3,event_4,event_5,event_6,target
0,19.0,1.0,2.0,1.0,3.0,24.0,11.0
1,1.0,2.0,1.0,3.0,24.0,2.0,10.0
2,2.0,1.0,3.0,24.0,2.0,23.0,9.0
3,1.0,3.0,24.0,2.0,23.0,6.0,14.0
4,3.0,24.0,2.0,23.0,6.0,1.0,13.0
...,...,...,...,...,...,...,...
14756,5.0,8.0,1.0,2.0,3.0,1.0,18.0
14757,19.0,2.0,20.0,3.0,20.0,2.0,33.0
14758,2.0,20.0,3.0,20.0,2.0,24.0,36.0
14759,20.0,3.0,20.0,2.0,24.0,5.0,36.0


In [9]:
for index, train_row in enumerate(train):
    event_1 = train_row[0]
    event_2 = train_row[1]
    event_3 = train_row[2]
    event_4 = train_row[3]
    event_5 = train_row[4]
    event_6 = train_row[5]
    df_train.loc[len(df_train.index)] = [event_1, event_2, event_3, event_4, event_5, event_6, y_train[index][0]]

In [10]:
y_train = df_train['target'].copy()
X_train = df_train.drop(['target'],axis=1)
y_train

0        11.0
1        10.0
2         9.0
3        14.0
4        13.0
         ... 
14756    18.0
14757    33.0
14758    36.0
14759    36.0
14760    37.0
Name: target, Length: 14761, dtype: float64

In [11]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import plot_confusion_matrix, classification_report

lrl = LogisticRegression(multi_class='ovr', C=1e12, max_iter=5000)

model_ovr = lrl.fit(X_train, y_train)

y_pred = model_ovr.predict(X_test)

print(f'training model score: {model_ovr.score(X_train, y_train)}')
print(f'test model score: {model_ovr.score(X_test, y_test)}')
print(classification_report(y_test, y_pred))

training model score: 0.21021611001964635
test model score: 0.21021611001964635
              precision    recall  f1-score   support

         1.0       0.00      0.00      0.00       370
         3.0       0.00      0.00      0.00        16
         4.0       0.00      0.00      0.00        36
         5.0       0.00      0.00      0.00        16
         6.0       0.00      0.00      0.00       156
         7.0       0.00      0.00      0.00       142
         8.0       0.00      0.00      0.00       116
         9.0       0.00      0.00      0.00       267
        10.0       0.00      0.00      0.00       384
        11.0       0.00      0.00      0.00       249
        12.0       0.00      0.00      0.00       346
        13.0       0.00      0.00      0.00       643
        14.0       0.00      0.00      0.00       425
        15.0       0.00      0.00      0.00       301
        16.0       0.00      0.00      0.00        93
        17.0       0.20      0.67      0.31      2201
 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
