# Import data
Training data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from utils import get_train_data_only
df_teams, df_BLUE, df_RED = get_train_data_only()

In [3]:
#How many data points we have
len(df_teams), len(df_BLUE), len(df_RED)

(28186, 140930, 140930)

In [4]:
#Teams DataFrame used for Machine Learning
df_teams_ML = df_teams[['id', 'patch', 'year', 'winner']]
df_teams_ML.head()

Unnamed: 0,id,patch,year,winner
25053,OPL/2020 Season/Split 2 Playoffs/Scoreboards_2_3,10.16,2020,BLUE
29321,Baltic Masters/2021 Season/Spring Playoffs/Sco...,11.4,2021,BLUE
24714,NLC/2020 Season/Summer Playoffs/Scoreboards_5_1,10.15,2020,BLUE
20355,OPL/2020 Season/Split 1/Scoreboards/Week 9_1_1,10.6,2020,BLUE
30462,LCK CL/2021 Season/Spring Season/Scoreboards/W...,11.5,2021,BLUE


In [5]:
import numpy as np 
import pandas as pd
from tqdm import tqdm

# Feature Engineering

## Feature 1: Synergy between champions of the same team (rate of victory for a pair of champions playing together)

In [6]:
from build_rate_matrix import synergy_matrix
champions_won_percentage_imputed = synergy_matrix(df_BLUE, df_RED)

100%|██████████████████████████████████████████████████████████████| 154/154 [03:10<00:00,  1.24s/it]
100%|██████████████████████████████████████████████████████████████| 154/154 [01:39<00:00,  1.54it/s]


In [7]:
#check that there is no division by zero
np.isinf(champions_won_percentage_imputed).values.sum()

0

In [8]:
#check how many values have been imputed
np.isnan(champions_won_percentage_imputed).values.sum()#, np.isnan(champions_won_percentage).values.sum()

0

In [12]:
from transformers import SynergyFeature

synergy_feature = SynergyFeature('blue', df_BLUE)

####It's working until here!!!!!

In [9]:
#Get the synergy of the Blue team champions

df_blue = pair_wise_synergy(df_BLUE, champions_won_percentage_imputed, 'blue')
df_blue['id'] = df_blue.index
df_teams_ML =pd.merge(df_teams_ML, df_blue, on='id', how='inner')

NameError: name 'pair_wise_synergy' is not defined

In [None]:
df_teams_ML.head()

## Feature 2: Win rate of champions against the same ROLE of the other team's champion

In [None]:
df_BLUE_RED = pd.merge(left=df_BLUE, right=df_RED, left_on= 'game_id', right_on= 'game_id')
df_role = df_BLUE_RED[['champion_id_x', 'role_x', 'role_y', 'champion_id_y', 'win_x', 'game_id']]

In [None]:
#times that a given champion won or lost against another champion
champion_vs_champion = pd.DataFrame(df_role[['champion_id_x', 'role_x', 'role_y', 'champion_id_y', 'win_x']].value_counts())
champion_vs_champion

In [None]:
#times that a given champion played against another champion by role
total_champion_vs_champion = pd.DataFrame(df_BLUE_RED[['champion_id_x', 'role_x', 'role_y', 'champion_id_y']].value_counts())
total_champion_vs_champion

In [None]:
#percentage of times that a champion has lost or won against another champion
rate_champion_vs_champion = champion_vs_champion.div(total_champion_vs_champion)
rate_champion_vs_champion

In [None]:
global rate_role
rate_role = rate_champion_vs_champion

In [None]:
def get_vs_rate(id_x, role_x, role_y, id_y):
    try:
      x = rate_champion_vs_champion.loc[id_x, role_x, role_y, id_y, True]
    except KeyError:
        try:
          x = 1-rate_champion_vs_champion.loc[id_x, role_x, role_y, id_y, False]
        except KeyError:
          x = 0.5
    return x

In [None]:
df_same_role = df_role[df_role['role_x'] == df_role['role_y']]
df_same_role['same_role_win_rate'] = df_same_role.apply(lambda z: 
              get_vs_rate(z.champion_id_x, z.role_x, z.role_y, z.champion_id_y),
              axis=1)
df_same_role

In [None]:
roles = ['TOP', 'JGL', 'MID', 'BOT', 'SUP']

for role in roles:
    df_role = df_same_role[df_same_role.role_x == role][['game_id', 'same_role_win_rate']]
    df_role[role] = df_role['same_role_win_rate']
    df_role['id'] = df_role['game_id']
    df_role.drop(['game_id', 'same_role_win_rate'], axis=1, inplace=True)
    df_teams_ML = pd.merge(df_teams_ML, df_role, on='id', how='inner')

df_teams_ML

## Feature 3: Simple averages

# Machine Learning

## Pipe with logit regression

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.compose import make_column_transformer
from sklearn.compose import make_column_selector
from sklearn import set_config; set_config(display='diagram')


# Impute then Scale for numerical variables: 
num_transformer = make_pipeline(
                    SimpleImputer(strategy = 'mean'),
                    MinMaxScaler())

# Encode categorical variables
cat_transformer = OneHotEncoder(sparse = False)#, handle_unknown='ignore')

# Paralellize "num_transformer" and "One hot encoder"
preproc = make_column_transformer(
    (num_transformer, make_column_selector(dtype_include=['float64'])),
    (cat_transformer, make_column_selector(dtype_include=['object','bool'])),
    remainder='passthrough')

#add model
pipe = make_pipeline(preproc, LogisticRegression(solver='liblinear'))
pipe

In [None]:
from sklearn.preprocessing import LabelEncoder

y_train = LabelEncoder().fit(df_teams_ML.winner).transform(df_teams_ML.winner)
X_train = df_teams_ML.drop(['id', 'winner', 'patch', 'year'], axis=1)

In [None]:
# Train pipeline
pipe.fit(X_train,y_train)

In [None]:
from sklearn.model_selection import cross_val_score

# Cross validate pipeline
cross_val_score(pipe, X_train, y_train, cv=20, scoring='accuracy').mean()

# Use the test data

## Import the test data

In [None]:
#Import the data through the utils file
test_teams, test_BLUE, test_RED = get_train_data_only(train_data = False, test_data = True)
len(test_teams), len(test_BLUE), len(test_RED)

In [None]:
#Teams DataFrame used for Machine Learning
test_teams_ML = test_teams[['id', 'patch', 'year', 'winner']]
test_teams_ML.head(2)

## Synergy feature

In [None]:
#Get the synergy of the Blue team champions
test_blue = pair_wise_synergy(test_BLUE, champions_won_percentage_imputed, 'blue')
test_blue['id'] = test_blue.index
test_teams_ML =pd.merge(test_teams_ML, test_blue, on='id', how='inner')

In [None]:
#Get the synergy of the Red team champions
test_red = pair_wise_synergy(test_RED, champions_won_percentage_imputed, 'red')
test_red['id'] = test_red.index
test_teams_ML =pd.merge(test_teams_ML, test_red, on='id', how='inner')

In [None]:
test_teams_ML.head(2)

## Role rate winrate

In [None]:
test_blue_red = pd.merge(left=test_BLUE, right=test_RED, left_on= 'game_id', right_on= 'game_id')
test_role = test_blue_red[['champion_id_x', 'role_x', 'role_y', 'champion_id_y', 'win_x', 'game_id']]
test_role.head(2)

In [None]:
test_same_role = test_role[test_role['role_x'] == test_role['role_y']]
test_same_role['same_role_win_rate'] = test_same_role.apply(lambda z: 
              get_vs_rate(z.champion_id_x, z.role_x, z.role_y, z.champion_id_y),
              axis=1)
test_same_role.head(2)

In [None]:
roles = ['TOP', 'JGL', 'MID', 'BOT', 'SUP']

for role in roles:
    test_role = test_same_role[test_same_role.role_x == role][['game_id', 'same_role_win_rate']]
    test_role[role] = test_role['same_role_win_rate']
    test_role['id'] = test_role['game_id']
    test_role.drop(['game_id', 'same_role_win_rate'], axis=1, inplace=True)
    test_teams_ML = pd.merge(test_teams_ML, test_role, on='id', how='inner')

test_teams_ML.head(2)

## Test the data

In [None]:
y_test = LabelEncoder().fit(test_teams_ML.winner).transform(test_teams_ML.winner)
X_test = test_teams_ML.drop(['id', 'winner', 'patch', 'year'], axis=1)

In [None]:
pipe.score(X_test, y_test)

In [None]:
from sklearn import metrics

predicted = pipe.predict(X_test)
print(metrics.accuracy_score(y_test, predicted))
print(metrics.classification_report(y_test, predicted)) 

# Evaluate data

## Import the evaluate data

In [None]:
#Import the data through the utils file
eval_teams, eval_BLUE, eval_RED = get_train_data_only(train_data = False, evaluate_data = True)
len(eval_teams), len(eval_BLUE), len(eval_RED)

In [None]:
#Teams DataFrame used for Machine Learning
eval_teams_ML = eval_teams[['id', 'patch', 'year', 'winner']]
eval_teams_ML.head(5)

## Synergy Feature

In [None]:
#Get the synergy of the Blue team champions
eval_blue = pair_wise_synergy(eval_BLUE, champions_won_percentage_imputed, 'blue')
eval_blue['id'] = eval_blue.index
eval_teams_ML =pd.merge(eval_teams_ML, eval_blue, on='id', how='inner')

In [None]:
#Get the synergy of the Red team champions
eval_red = pair_wise_synergy(eval_RED, champions_won_percentage_imputed, 'red')
eval_red['id'] = eval_red.index
eval_teams_ML =pd.merge(eval_teams_ML, eval_red, on='id', how='inner')

In [None]:
eval_teams_ML

## Role rate winrate

In [None]:
eval_blue_red = pd.merge(left=eval_BLUE, right=eval_RED, left_on= 'game_id', right_on= 'game_id')
eval_role = eval_blue_red[['champion_id_x', 'role_x', 'role_y', 'champion_id_y', 'win_x', 'game_id']]
eval_role.head(2)

In [None]:
eval_same_role = eval_role[eval_role['role_x'] == eval_role['role_y']]
eval_same_role['same_role_win_rate'] = eval_same_role.apply(lambda z: 
              get_vs_rate(z.champion_id_x, z.role_x, z.role_y, z.champion_id_y),
              axis=1)
eval_same_role.head(5)

In [None]:
roles = ['TOP', 'JGL', 'MID', 'BOT', 'SUP']

for role in roles:
    eval_role = eval_same_role[eval_same_role.role_x == role][['game_id', 'same_role_win_rate']]
    eval_role[role] = eval_role['same_role_win_rate']
    eval_role['id'] = eval_role['game_id']
    eval_role.drop(['game_id', 'same_role_win_rate'], axis=1, inplace=True)
    eval_teams_ML = pd.merge(eval_teams_ML, eval_role, on='id', how='inner')

eval_teams_ML.head(2)

In [None]:
y_eval = LabelEncoder().fit(eval_teams_ML.winner).transform(eval_teams_ML.winner)
X_eval = eval_teams_ML.drop(['id', 'winner', 'patch', 'year'], axis=1)

In [None]:
pipe.predict(X_eval), y_eval