# Import data
Training data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from utils import get_train_data_only
df_teams, df_BLUE, df_RED = get_train_data_only()

In [3]:
#How many data points we have
len(df_teams), len(df_BLUE), len(df_RED)

(30465, 152325, 152325)

## Rebuild the data into a df that simulates the one we will receive from the web


In [4]:
import numpy as np 
import pandas as pd
from tqdm import tqdm

In [5]:
def rearrange_df(df):
    df_top = df[['champion_id', 'game_id']][df.role == 'TOP']
    df_jgl = df[['champion_id', 'game_id']][df.role == 'JGL']
    df_top_jgl = pd.merge(df_top.rename(columns={'champion_id': 'TOP'}), df_jgl.rename(columns={'champion_id': 'JGL'}), on='game_id')

    df_bot = df[['champion_id', 'game_id']][df.role == 'BOT']
    df_mid = df[['champion_id', 'game_id']][df.role == 'MID']
    df_bot_mid = pd.merge(df_bot.rename(columns={'champion_id': 'BOT'}), df_mid.rename(columns={'champion_id': 'MID'}), on='game_id')

    df_top_jgl_bot_mid = pd.merge(df_top_jgl, df_bot_mid, on='game_id')

    df_sup = df[['champion_id', 'game_id']][df.role == 'SUP']
    df_top_jgl_bot_mid_sup = pd.merge(df_top_jgl_bot_mid, df_sup.rename(columns={'champion_id': 'SUP'}), on='game_id')
    return df_top_jgl_bot_mid_sup

In [6]:
df_blue = rearrange_df(df_BLUE)
df_red = rearrange_df(df_RED)
data = pd.merge(df_blue, df_red, on='game_id')

In [7]:
#side that won
df_teams['game_id'] = df_teams['id']
#df_teams[['winner', 'game_id']]

In [8]:
#For the blue side the champions are indexed as _x, red side champions are indexed as _y
full_data = pd.merge(data, df_teams[['winner', 'game_id']], on='game_id')
full_data

Unnamed: 0,TOP_x,game_id,JGL_x,BOT_x,MID_x,SUP_x,TOP_y,JGL_y,BOT_y,MID_y,SUP_y,winner
0,223,OPL/2016 Season/Split 1/Scoreboards_4_2,421,236,112,412,117,36,18,99,48,BLUE
1,245,Latin America Cup/LAN/2016 Season/Closing Cup/...,421,51,112,43,57,104,15,26,201,BLUE
2,57,OPL/2016 Season/Split 2/Scoreboards/Week 2_2_3,203,67,268,201,48,60,81,112,412,RED
3,245,OPL/2016 Season/Split 2/Scoreboards/Week 2_2_2,154,236,41,432,57,203,81,112,48,RED
4,245,OPL/2016 Season/Split 2/Scoreboards/Week 2_2_1,421,51,13,201,57,203,81,4,432,RED
...,...,...,...,...,...,...,...,...,...,...,...,...
30460,516,Liga Nexo/2020 Season/G2 Arctic Showmatch/Scor...,141,21,84,875,41,113,523,268,40,BLUE
30461,420,Liga Nexo/2020 Season/Split 1/Scoreboards/Week...,245,134,58,432,516,48,21,161,350,RED
30462,24,Liga Nexo/2020 Season/Split 1/Scoreboards/Week...,104,134,57,44,41,30,21,516,143,BLUE
30463,62,Liga Nexo/2020 Season/Split 1/Scoreboards/Week...,59,523,61,40,58,154,21,134,143,BLUE


# Machine Learning

## Pipe with logit regression

In [9]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
#from sklearn.compose import make_column_transformer
from sklearn.compose import ColumnTransformer
from sklearn.compose import make_column_selector
from sklearn import set_config; set_config(display='diagram')

from transformers import SynergyFeature, RoleFeature, ChampionWinrateFeature

# Paralellize column transformers
preproc = ColumnTransformer([
    ('synergy_blue', SynergyFeature(), ['TOP_x', 'JGL_x','BOT_x','MID_x','SUP_x']),
    ('synergy_red', SynergyFeature(), ['TOP_y', 'JGL_y','BOT_y','MID_y','SUP_y']),
    ('winrate_blue', ChampionWinrateFeature(), ['TOP_x', 'JGL_x','BOT_x','MID_x','SUP_x']),
    ('winrate_red', ChampionWinrateFeature(), ['TOP_y', 'JGL_y','BOT_y','MID_y','SUP_y']),
    ('TOP_rate', RoleFeature('TOP'), ['TOP_x', 'TOP_y']),
    ('SUP_rate', RoleFeature('SUP'), ['SUP_x', 'SUP_y']),
    ('MID_rate', RoleFeature('MID'), ['MID_x', 'MID_y']),
    ('BOT_rate', RoleFeature('BOT'), ['BOT_x', 'BOT_y']),
    ('JGL_rate', RoleFeature('JGL'), ['JGL_x', 'JGL_y'])
    ])

#add model
pipe = make_pipeline(preproc, LogisticRegression(solver='liblinear'))
pipe

In [10]:
from sklearn.preprocessing import LabelEncoder

y_train = LabelEncoder().fit(full_data.winner).transform(full_data.winner)
X_train = full_data.drop(['game_id', 'winner'], axis=1)

In [11]:
# Train pipeline
pipe.fit(X_train,y_train)

In [12]:
from sklearn.model_selection import cross_val_score

# Cross validate pipeline
cross_val_score(pipe, X_train, y_train, cv=2, scoring='accuracy').mean()

0.5946294763522068

In [13]:
import joblib

joblib.dump(pipe, 'model_logit.joblib')

['model_logit.joblib']

# Use the test data

## Import the test data

In [14]:
#Import the data through the utils file
test_teams, test_BLUE, test_RED = get_train_data_only(train_data = False, test_data = True)
len(test_teams), len(test_BLUE), len(test_RED)

(4771, 23855, 23855)

In [15]:
#Fix the dataframe with the columns we need
test_blue = rearrange_df(test_BLUE)
test_red = rearrange_df(test_RED)
test_data = pd.merge(test_blue, test_red, on='game_id')
test_data

Unnamed: 0,TOP_x,game_id,JGL_x,BOT_x,MID_x,SUP_x,TOP_y,JGL_y,BOT_y,MID_y,SUP_y
0,516,UK League Championship/2021 Season/Spring Seas...,104,523,112,412,79,876,21,18,201
1,516,LFL Division 2/2021 Season/Spring Season/Score...,104,360,112,526,41,76,145,126,89
2,98,LPLOL/2021 Season/Spring Season/Scoreboards_4_1,2,523,4,412,516,876,145,777,53
3,58,Esports Balkan League/Season 8/Scoreboards_3_1,163,145,3,497,79,113,523,38,201
4,58,Belgian League/2021 Season/Spring Split/Scoreb...,876,81,112,57,79,76,145,777,526
...,...,...,...,...,...,...,...,...,...,...,...
4766,150,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,245,222,64,201,98,56,145,517,89
4767,54,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,25,222,112,111,164,121,81,96,40
4768,516,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,64,145,13,201,164,876,67,61,526
4769,54,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,56,145,84,526,150,77,96,134,117


In [16]:
#get the side that won
test_teams['game_id'] = test_teams['id']
test_teams[['winner', 'game_id']].head()

Unnamed: 0,winner,game_id
27183,BLUE,UK League Championship/2021 Season/Spring Seas...
27184,RED,LFL Division 2/2021 Season/Spring Season/Score...
27194,BLUE,LPLOL/2021 Season/Spring Season/Scoreboards_4_1
27181,BLUE,Esports Balkan League/Season 8/Scoreboards_3_1
27180,RED,Belgian League/2021 Season/Spring Split/Scoreb...


In [17]:
full_test_data = pd.merge(test_data, test_teams[['winner', 'game_id']], on='game_id')
full_test_data

Unnamed: 0,TOP_x,game_id,JGL_x,BOT_x,MID_x,SUP_x,TOP_y,JGL_y,BOT_y,MID_y,SUP_y,winner
0,516,UK League Championship/2021 Season/Spring Seas...,104,523,112,412,79,876,21,18,201,BLUE
1,516,LFL Division 2/2021 Season/Spring Season/Score...,104,360,112,526,41,76,145,126,89,RED
2,98,LPLOL/2021 Season/Spring Season/Scoreboards_4_1,2,523,4,412,516,876,145,777,53,BLUE
3,58,Esports Balkan League/Season 8/Scoreboards_3_1,163,145,3,497,79,113,523,38,201,BLUE
4,58,Belgian League/2021 Season/Spring Split/Scoreb...,876,81,112,57,79,76,145,777,526,RED
...,...,...,...,...,...,...,...,...,...,...,...,...
4766,150,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,245,222,64,201,98,56,145,517,89,BLUE
4767,54,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,25,222,112,111,164,121,81,96,40,BLUE
4768,516,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,64,145,13,201,164,876,67,61,526,BLUE
4769,54,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,56,145,84,526,150,77,96,134,117,BLUE


In [18]:
np.unique(test_teams.patch)

array([11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7, 11.8, 11.9],
      dtype=float32)

## Test the data

In [19]:
y_test = LabelEncoder().fit(full_test_data.winner).transform(full_test_data.winner)
X_test = full_test_data.drop(['game_id', 'winner'], axis=1)

In [20]:
pipe.score(X_test, y_test)

0.6814085097463844

In [21]:
from sklearn import metrics

predicted = pipe.predict(X_test)
print(metrics.accuracy_score(y_test, predicted))
print(metrics.classification_report(y_test, predicted)) 

0.6814085097463844
              precision    recall  f1-score   support

           0       0.66      0.86      0.74      2583
           1       0.74      0.47      0.58      2188

    accuracy                           0.68      4771
   macro avg       0.70      0.67      0.66      4771
weighted avg       0.69      0.68      0.67      4771

