# Import data
Training data

In [1]:
%load_ext autoreload
%autoreload 2

In [6]:
from utils import get_train_data_only
df_teams, df_BLUE, df_RED = get_train_data_only()

In [7]:
#How many data points we have
len(df_teams), len(df_BLUE), len(df_RED)

(28254, 141270, 141270)

In [8]:
#Teams DataFrame used for Machine Learning
df_teams_ML = df_teams[['id', 'patch', 'year', 'winner']]
df_teams_ML.head()

Unnamed: 0,id,patch,year,winner
31563,LMS/2016 Season/Spring Season/Scoreboards_3_2,5.24,2015,BLUE
31596,LJL/2016 Season/Spring Season/Scoreboards_1_2,5.24,2015,RED
31595,LJL/2016 Season/Spring Season/Scoreboards_1_1,5.24,2015,RED
31588,CBLOL/2016 Season/Split 1/Scoreboards_2_2,5.24,2015,BLUE
31587,CBLOL/2016 Season/Split 1/Scoreboards_2_1,5.24,2015,RED


In [9]:
import numpy as np 
import pandas as pd
from tqdm import tqdm

# Feature Engineering

## Feature 1: Synergy between champions of the same team (rate of victory for a pair of champions playing together)

In [10]:
df_BLUE_lite_synergy = df_BLUE[['champion_id', 'win', 'game_id']]
df_RED_lite_synergy = df_RED[['champion_id', 'win', 'game_id']]

#Dosen't matter if it was the red or blue team so let's concat the data!
df_result = pd.concat([df_BLUE_lite_synergy, df_RED_lite_synergy])

#include a column for the outcome
df_result['outcome'] = df_result['win']*1

#reset the index to the game_id
df_result.index = df_result.game_id

In [11]:
# get the unique game_id and champion_id played
game_id_unique = np.unique(df_result.game_id)
champions_id_unique = np.unique(df_result.champion_id)

In [12]:
#create two dataframes
##one to populate the number of games played by each champion pair
champions_play_together = pd.DataFrame(np.zeros([len(champions_id_unique), len(champions_id_unique)]), columns=champions_id_unique, index=champions_id_unique)

##one to populate te number of games won by each champion pair
champions_won_together = pd.DataFrame(np.zeros([len(champions_id_unique), len(champions_id_unique)]), columns=champions_id_unique, index=champions_id_unique)

In [13]:
for champion_a in tqdm(champions_id_unique):
    champions_played_together_list = list(df_result.loc[df_result[df_result.champion_id == champion_a].index]['champion_id'])
    for champion_b in champions_played_together_list:
        if champion_a == champion_b:
            continue
        else:
            champions_play_together.loc[champion_a][champion_b] += 1

100%|█████████████████████████████████████████| 152/152 [04:02<00:00,  1.60s/it]


In [14]:
for champ_a in tqdm(champions_id_unique):
    champ_played_together_won_list = list(df_result.loc[df_result[(df_result.champion_id == champ_a) & (df_result.outcome == 1)].index]['champion_id'])
    for champ_b in champ_played_together_won_list:
        if champ_a == champ_b:
            continue
        else:
            champions_won_together.loc[champ_a][champ_b] += 1

100%|█████████████████████████████████████████| 152/152 [02:00<00:00,  1.26it/s]


In [15]:
champions_won_percentage = champions_won_together.div(champions_play_together)

In [16]:
#Impute the missing values (champions that never played together)
from sklearn.impute import SimpleImputer

impute_nan = SimpleImputer(missing_values=np.nan, strategy='constant', fill_value=0.5)
champions_won_percentage_imputed = pd.DataFrame(impute_nan.fit(champions_won_percentage).transform(champions_won_percentage), columns=champions_id_unique, index=champions_id_unique)
champions_won_percentage_imputed

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,497,498,516,517,518,523,555,777,875,876
1,0.500000,0.250000,0.000000,0.500000,1.000000,1.000000,0.250000,0.500000,0.500000,1.000000,...,0.500000,0.500000,0.000000,0.333333,0.000000,1.000000,0.000000,1.000000,0.000000,0.500000
2,0.500000,0.500000,0.505682,0.542553,0.642857,0.533040,0.565789,0.536278,0.576923,0.491803,...,0.515284,0.480287,0.496994,0.532203,0.556250,0.477273,0.514851,0.333333,0.497076,0.800000
3,0.333333,0.485795,0.500000,0.373626,0.459854,0.477352,0.500000,0.526829,0.627907,0.440000,...,0.481013,0.494828,0.500000,0.511194,0.485714,0.479290,0.465753,0.625000,0.503030,0.419355
4,0.500000,0.531915,0.659341,0.500000,0.250000,0.400000,0.594595,0.620000,0.538462,0.294118,...,0.504762,0.546667,0.565714,0.613208,0.500000,0.570681,0.658537,0.000000,0.625767,0.487805
5,1.000000,0.375000,0.423358,0.500000,0.500000,0.471311,0.527132,0.470588,0.428571,0.714286,...,0.424528,0.460674,0.400000,0.481013,0.358974,0.375000,0.540984,0.666667,0.222222,0.500000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
523,0.000000,0.500000,0.550296,0.575916,0.125000,0.551020,0.592417,0.500000,0.531250,0.459459,...,0.525974,0.597561,0.530988,0.578947,0.416667,0.500000,0.600000,0.333333,0.529740,0.461538
555,0.333333,0.495050,0.383562,0.341463,0.557377,0.544118,0.510204,0.474074,0.689655,0.571429,...,0.460000,0.453947,0.479592,0.424051,0.478261,0.200000,0.500000,0.500000,0.388889,0.666667
777,0.500000,0.666667,0.625000,1.000000,1.000000,0.500000,0.000000,0.500000,0.000000,0.400000,...,0.555556,1.000000,0.500000,0.666667,0.500000,1.000000,0.500000,0.500000,0.600000,0.666667
875,1.000000,0.561404,0.509091,0.496933,0.666667,0.523810,0.571429,0.564706,0.470588,0.585366,...,0.489474,0.508929,0.526652,0.490196,0.733333,0.552045,0.333333,0.300000,0.500000,0.516129


In [17]:
#check that there is no division by zero
np.isinf(champions_won_percentage_imputed).values.sum()

0

In [18]:
#check how many values have been imputed
np.isnan(champions_won_percentage_imputed).values.sum(), np.isnan(champions_won_percentage).values.sum()

(0, 1482)

In [19]:
def get_synergy(x,y, df_synergy_matrix):
    try:
        x = df_synergy_matrix.loc[x][y]
    except KeyError:
        x = 0.5
    return x

In [20]:
"""This function is needed to transform the rest of the data"""

def pair_wise_synergy(df, df_synergy_matrix, name):
    #get the mean value of the pairwise combination of champions synergy
    result_Synergy = df[['game_id', 'champion_id']].groupby(['game_id']).aggregate({
        'champion_id': lambda z: [get_synergy(x,y, df_synergy_matrix) for x in z for y in z if x != y]
    })
    result_Synergy['mean_synergy_'+str(name)] = result_Synergy.champion_id.apply(lambda x: sum(x)/len(x))
    return result_Synergy.drop('champion_id', axis=1)

In [21]:
#Get the synergy of the Blue team champions

df_blue = pair_wise_synergy(df_BLUE, champions_won_percentage_imputed, 'blue')
df_blue['id'] = df_blue.index
df_teams_ML =pd.merge(df_teams_ML, df_blue, on='id', how='inner')

In [22]:
#Get the synergy of the Red team champions

df_red = pair_wise_synergy(df_RED, champions_won_percentage_imputed, 'red')
df_red['id'] = df_red.index
df_teams_ML = pd.merge(df_teams_ML, df_red, on='id', how='inner')

In [23]:
df_teams_ML.head()

Unnamed: 0,id,patch,year,winner,mean_synergy_blue,mean_synergy_red
0,LMS/2016 Season/Spring Season/Scoreboards_3_2,5.24,2015,BLUE,0.502761,0.480432
1,LJL/2016 Season/Spring Season/Scoreboards_1_2,5.24,2015,RED,0.498885,0.508034
2,LJL/2016 Season/Spring Season/Scoreboards_1_1,5.24,2015,RED,0.489649,0.517847
3,CBLOL/2016 Season/Split 1/Scoreboards_2_2,5.24,2015,BLUE,0.498627,0.503166
4,CBLOL/2016 Season/Split 1/Scoreboards_2_1,5.24,2015,RED,0.481985,0.486419


## Feature 2: Win rate of champions against the same ROLE of the other team's champion

In [24]:
df_BLUE_RED = pd.merge(left=df_BLUE, right=df_RED, left_on= 'game_id', right_on= 'game_id')
df_role = df_BLUE_RED[['champion_id_x', 'role_x', 'role_y', 'champion_id_y', 'win_x', 'game_id']]

In [25]:
#times that a given champion won or lost against another champion
champion_vs_champion = pd.DataFrame(df_role[['champion_id_x', 'role_x', 'role_y', 'champion_id_y', 'win_x']].value_counts())
champion_vs_champion

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,0
champion_id_x,role_x,role_y,champion_id_y,win_x,Unnamed: 5_level_1
12,SUP,SUP,201,True,372
201,SUP,SUP,12,True,337
412,SUP,SUP,201,True,318
201,SUP,SUP,12,False,317
12,SUP,SUP,201,False,307
...,...,...,...,...,...
84,MID,SUP,1,True,1
84,MID,SUP,9,True,1
84,MID,SUP,16,True,1
84,MID,SUP,22,False,1


In [26]:
#times that a given champion played against another champion by role
total_champion_vs_champion = pd.DataFrame(df_BLUE_RED[['champion_id_x', 'role_x', 'role_y', 'champion_id_y']].value_counts())
total_champion_vs_champion

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,0
champion_id_x,role_x,role_y,champion_id_y,Unnamed: 4_level_1
12,SUP,SUP,201,679
201,SUP,SUP,12,654
201,SUP,SUP,412,574
412,SUP,SUP,201,555
201,SUP,BOT,81,522
...,...,...,...,...
114,TOP,BOT,63,1
58,MID,MID,91,1
58,MID,MID,61,1
163,MID,TOP,38,1


In [27]:
#percentage of times that a champion has lost or won against another champion
rate_champion_vs_champion = champion_vs_champion.div(total_champion_vs_champion)
rate_champion_vs_champion

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,0
champion_id_x,role_x,role_y,champion_id_y,win_x,Unnamed: 5_level_1
1,MID,BOT,21,False,1.0
1,MID,BOT,51,False,1.0
1,MID,BOT,235,True,1.0
1,MID,BOT,429,False,1.0
1,MID,BOT,498,False,0.5
...,...,...,...,...,...
876,TOP,SUP,111,True,1.0
876,TOP,SUP,432,False,1.0
876,TOP,TOP,58,False,1.0
876,TOP,TOP,126,False,0.5


In [28]:
global rate_role
rate_role = rate_champion_vs_champion

In [29]:
def get_vs_rate(id_x, role_x, role_y, id_y):
    try:
      x = rate_champion_vs_champion.loc[id_x, role_x, role_y, id_y, True]
    except KeyError:
        try:
          x = 1-rate_champion_vs_champion.loc[id_x, role_x, role_y, id_y, False]
        except KeyError:
          x = 0.5
    return x

In [30]:
df_same_role = df_role[df_role['role_x'] == df_role['role_y']]
df_same_role['same_role_win_rate'] = df_same_role.apply(lambda z: 
              get_vs_rate(z.champion_id_x, z.role_x, z.role_y, z.champion_id_y),
              axis=1)
df_same_role

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_same_role['same_role_win_rate'] = df_same_role.apply(lambda z:


Unnamed: 0,champion_id_x,role_x,role_y,champion_id_y,win_x,game_id,same_role_win_rate
0,13,TOP,TOP,54,True,LMS/2016 Season/Spring Season/Scoreboards_3_2,1.000000
6,421,JGL,JGL,203,True,LMS/2016 Season/Spring Season/Scoreboards_3_2,0.547170
12,81,MID,MID,38,True,LMS/2016 Season/Spring Season/Scoreboards_3_2,1.000000
18,429,BOT,BOT,18,True,LMS/2016 Season/Spring Season/Scoreboards_3_2,0.507692
24,412,SUP,SUP,12,True,LMS/2016 Season/Spring Season/Scoreboards_3_2,0.510288
...,...,...,...,...,...,...,...
706325,516,TOP,TOP,875,False,LCL/2020 Season/Spring Season/Scoreboards_2_1,0.519481
706331,59,JGL,JGL,64,False,LCL/2020 Season/Spring Season/Scoreboards_2_1,0.474654
706337,142,MID,MID,131,False,LCL/2020 Season/Spring Season/Scoreboards_2_1,0.166667
706343,81,BOT,BOT,523,False,LCL/2020 Season/Spring Season/Scoreboards_2_1,0.528226


In [31]:
roles = ['TOP', 'JGL', 'MID', 'BOT', 'SUP']

for role in roles:
    df_role = df_same_role[df_same_role.role_x == role][['game_id', 'same_role_win_rate']]
    df_role[role] = df_role['same_role_win_rate']
    df_role['id'] = df_role['game_id']
    df_role.drop(['game_id', 'same_role_win_rate'], axis=1, inplace=True)
    df_teams_ML = pd.merge(df_teams_ML, df_role, on='id', how='inner')

df_teams_ML

Unnamed: 0,id,patch,year,winner,mean_synergy_blue,mean_synergy_red,TOP,JGL,MID,BOT,SUP
0,LMS/2016 Season/Spring Season/Scoreboards_3_2,5.24,2015,BLUE,0.502761,0.480432,1.000000,0.547170,1.000000,0.507692,0.510288
1,LJL/2016 Season/Spring Season/Scoreboards_1_2,5.24,2015,RED,0.498885,0.508034,0.444444,0.579832,0.600000,0.250000,0.370370
2,LJL/2016 Season/Spring Season/Scoreboards_1_1,5.24,2015,RED,0.489649,0.517847,0.500000,0.463158,0.500000,0.477551,0.615385
3,CBLOL/2016 Season/Split 1/Scoreboards_2_2,5.24,2015,BLUE,0.498627,0.503166,1.000000,0.444444,0.409091,0.508475,0.400000
4,CBLOL/2016 Season/Split 1/Scoreboards_2_1,5.24,2015,RED,0.481985,0.486419,0.000000,0.527108,0.333333,0.200000,0.515789
...,...,...,...,...,...,...,...,...,...,...,...
28249,VCS/2020 Season/Spring Season/Scoreboards/Week...,10.30,2020,BLUE,0.489887,0.494892,0.437500,0.508772,0.833333,0.553846,0.547253
28250,VCS/2020 Season/Spring Season/Scoreboards/Week...,10.30,2020,RED,0.500072,0.494267,0.531915,0.544000,0.472222,0.583333,0.515789
28251,VCS/2020 Season/Spring Season/Scoreboards/Week...,10.30,2020,RED,0.508416,0.496037,0.333333,0.557769,0.555556,0.600000,0.409091
28252,LCK/2020 Season/Spring Season/Scoreboards/Week...,10.30,2020,BLUE,0.513824,0.482004,0.531915,0.555556,0.596330,0.488889,0.516129


## Feature 3: Simple averages

# Machine Learning

## Pipe with logit regression

In [32]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.compose import make_column_transformer
from sklearn.compose import make_column_selector
from sklearn import set_config; set_config(display='diagram')


# Impute then Scale for numerical variables: 
num_transformer = make_pipeline(
                    SimpleImputer(strategy = 'mean'),
                    MinMaxScaler())

# Encode categorical variables
cat_transformer = OneHotEncoder(sparse = False)#, handle_unknown='ignore')

# Paralellize "num_transformer" and "One hot encoder"
preproc = make_column_transformer(
    (num_transformer, make_column_selector(dtype_include=['float64'])),
    (cat_transformer, make_column_selector(dtype_include=['object','bool'])),
    remainder='passthrough')

#add model
pipe = make_pipeline(preproc, LogisticRegression(solver='liblinear'))
pipe

In [33]:
from sklearn.preprocessing import LabelEncoder

y_train = LabelEncoder().fit(df_teams_ML.winner).transform(df_teams_ML.winner)
X_train = df_teams_ML.drop(['id', 'winner', 'patch', 'year'], axis=1)

In [34]:
# Train pipeline
pipe.fit(X_train,y_train)

In [35]:
from sklearn.model_selection import cross_val_score

# Cross validate pipeline
cross_val_score(pipe, X_train, y_train, cv=20, scoring='accuracy').mean()

0.7492087084919676

# Use the test data

## Import the test data

In [36]:
#Import the data through the utils file
test_teams, test_BLUE, test_RED = get_train_data_only(train_data = False, test_data = True)
len(test_teams), len(test_BLUE), len(test_RED)

(7059, 35295, 35295)

In [54]:
#Teams DataFrame used for Machine Learning
test_teams_ML = test_teams[['id', 'patch', 'year', 'winner']]
test_teams_ML.head(2)

Unnamed: 0,id,patch,year,winner
19070,LCK/2020 Season/Spring Season/Scoreboards/Week...,10.3,2020,BLUE
19099,LLA/2020 Season/Opening Season/Scoreboards/Wee...,10.3,2020,BLUE


## Synergy feature

In [55]:
#Get the synergy of the Blue team champions
test_blue = pair_wise_synergy(test_BLUE, champions_won_percentage_imputed, 'blue')
test_blue['id'] = test_blue.index
test_teams_ML =pd.merge(test_teams_ML, test_blue, on='id', how='inner')

In [56]:
#Get the synergy of the Red team champions
test_red = pair_wise_synergy(test_RED, champions_won_percentage_imputed, 'red')
test_red['id'] = test_red.index
test_teams_ML =pd.merge(test_teams_ML, test_red, on='id', how='inner')

In [57]:
test_teams_ML.head(2)

Unnamed: 0,id,patch,year,winner,mean_synergy_blue,mean_synergy_red
0,LCK/2020 Season/Spring Season/Scoreboards/Week...,10.3,2020,BLUE,0.488413,0.500616
1,LLA/2020 Season/Opening Season/Scoreboards/Wee...,10.3,2020,BLUE,0.485404,0.477892


## Role rate winrate

In [58]:
test_blue_red = pd.merge(left=test_BLUE, right=test_RED, left_on= 'game_id', right_on= 'game_id')
test_role = test_blue_red[['champion_id_x', 'role_x', 'role_y', 'champion_id_y', 'win_x', 'game_id']]
test_role.head(2)

Unnamed: 0,champion_id_x,role_x,role_y,champion_id_y,win_x,game_id
0,82,TOP,TOP,266,True,LCK/2020 Season/Spring Season/Scoreboards/Week...
1,82,TOP,JGL,113,True,LCK/2020 Season/Spring Season/Scoreboards/Week...


In [59]:
test_same_role = test_role[test_role['role_x'] == test_role['role_y']]
test_same_role['same_role_win_rate'] = test_same_role.apply(lambda z: 
              get_vs_rate(z.champion_id_x, z.role_x, z.role_y, z.champion_id_y),
              axis=1)
test_same_role.head(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_same_role['same_role_win_rate'] = test_same_role.apply(lambda z:


Unnamed: 0,champion_id_x,role_x,role_y,champion_id_y,win_x,game_id,same_role_win_rate
0,82,TOP,TOP,266,True,LCK/2020 Season/Spring Season/Scoreboards/Week...,0.295455
6,59,JGL,JGL,113,True,LCK/2020 Season/Spring Season/Scoreboards/Week...,0.558989


In [60]:
roles = ['TOP', 'JGL', 'MID', 'BOT', 'SUP']

for role in roles:
    test_role = test_same_role[test_same_role.role_x == role][['game_id', 'same_role_win_rate']]
    test_role[role] = test_role['same_role_win_rate']
    test_role['id'] = test_role['game_id']
    test_role.drop(['game_id', 'same_role_win_rate'], axis=1, inplace=True)
    test_teams_ML = pd.merge(test_teams_ML, test_role, on='id', how='inner')

test_teams_ML.head(2)

Unnamed: 0,id,patch,year,winner,mean_synergy_blue,mean_synergy_red,TOP,JGL,MID,BOT,SUP
0,LCK/2020 Season/Spring Season/Scoreboards/Week...,10.3,2020,BLUE,0.488413,0.500616,0.295455,0.558989,0.588235,0.550725,0.5
1,LLA/2020 Season/Opening Season/Scoreboards/Wee...,10.3,2020,BLUE,0.485404,0.477892,0.375,0.474654,0.6,0.583333,0.536585


## Test the data

In [61]:
y_test = LabelEncoder().fit(test_teams_ML.winner).transform(test_teams_ML.winner)
X_test = test_teams_ML.drop(['id', 'winner', 'patch', 'year'], axis=1)

In [81]:
pipe.score(X_test, y_test)

0.5092789346932993

In [63]:
from sklearn import metrics

predicted = pipe.predict(X_test)
print(metrics.accuracy_score(y_test, predicted))
print(metrics.classification_report(y_test, predicted)) 

0.5092789346932993
              precision    recall  f1-score   support

           0       0.55      0.49      0.52      3809
           1       0.47      0.53      0.50      3250

    accuracy                           0.51      7059
   macro avg       0.51      0.51      0.51      7059
weighted avg       0.51      0.51      0.51      7059



# Evaluate data

## Import the evaluate data

In [67]:
#Import the data through the utils file
eval_teams, eval_BLUE, eval_RED = get_train_data_only(train_data = False, evaluate_data = True)
len(eval_teams), len(eval_BLUE), len(eval_RED)

(5, 25, 25)

In [69]:
#Teams DataFrame used for Machine Learning
eval_teams_ML = eval_teams[['id', 'patch', 'year', 'winner']]
eval_teams_ML.head(5)

Unnamed: 0,id,patch,year,winner
31455,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,11.9,2021,BLUE
31446,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,11.9,2021,RED
31445,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,11.9,2021,RED
31441,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,11.9,2021,RED
31507,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,11.9,2021,BLUE


## Synergy Feature

In [70]:
#Get the synergy of the Blue team champions
eval_blue = pair_wise_synergy(eval_BLUE, champions_won_percentage_imputed, 'blue')
eval_blue['id'] = eval_blue.index
eval_teams_ML =pd.merge(eval_teams_ML, eval_blue, on='id', how='inner')

In [71]:
#Get the synergy of the Red team champions
eval_red = pair_wise_synergy(eval_RED, champions_won_percentage_imputed, 'red')
eval_red['id'] = eval_red.index
eval_teams_ML =pd.merge(eval_teams_ML, eval_red, on='id', how='inner')

In [72]:
eval_teams_ML

Unnamed: 0,id,patch,year,winner,mean_synergy_blue,mean_synergy_red
0,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,11.9,2021,BLUE,0.482098,0.436252
1,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,11.9,2021,RED,0.48685,0.492741
2,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,11.9,2021,RED,0.481386,0.482032
3,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,11.9,2021,RED,0.451278,0.531786
4,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,11.9,2021,BLUE,0.508897,0.508237


## Role rate winrate

In [73]:
eval_blue_red = pd.merge(left=eval_BLUE, right=eval_RED, left_on= 'game_id', right_on= 'game_id')
eval_role = eval_blue_red[['champion_id_x', 'role_x', 'role_y', 'champion_id_y', 'win_x', 'game_id']]
eval_role.head(2)

Unnamed: 0,champion_id_x,role_x,role_y,champion_id_y,win_x,game_id
0,54,TOP,TOP,150,True,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...
1,54,TOP,JGL,77,True,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...


In [76]:
eval_same_role = eval_role[eval_role['role_x'] == eval_role['role_y']]
eval_same_role['same_role_win_rate'] = eval_same_role.apply(lambda z: 
              get_vs_rate(z.champion_id_x, z.role_x, z.role_y, z.champion_id_y),
              axis=1)
eval_same_role.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eval_same_role['same_role_win_rate'] = eval_same_role.apply(lambda z:


Unnamed: 0,champion_id_x,role_x,role_y,champion_id_y,win_x,game_id,same_role_win_rate
0,54,TOP,TOP,150,True,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,0.75
6,56,JGL,JGL,77,True,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,0.5
12,84,MID,MID,134,True,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,0.472222
18,145,BOT,BOT,96,True,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,0.473684
24,526,SUP,SUP,117,True,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,0.5


In [77]:
roles = ['TOP', 'JGL', 'MID', 'BOT', 'SUP']

for role in roles:
    eval_role = eval_same_role[eval_same_role.role_x == role][['game_id', 'same_role_win_rate']]
    eval_role[role] = eval_role['same_role_win_rate']
    eval_role['id'] = eval_role['game_id']
    eval_role.drop(['game_id', 'same_role_win_rate'], axis=1, inplace=True)
    eval_teams_ML = pd.merge(eval_teams_ML, eval_role, on='id', how='inner')

eval_teams_ML.head(2)

Unnamed: 0,id,patch,year,winner,mean_synergy_blue,mean_synergy_red,TOP,JGL,MID,BOT,SUP
0,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,11.9,2021,BLUE,0.482098,0.436252,0.75,0.5,0.472222,0.473684,0.5
1,Magyar Nemzeti E-sport Bajnokság/Season 3/Scor...,11.9,2021,RED,0.48685,0.492741,0.5,0.5,0.518519,0.363636,0.45283


In [78]:
y_eval = LabelEncoder().fit(eval_teams_ML.winner).transform(eval_teams_ML.winner)
X_eval = eval_teams_ML.drop(['id', 'winner', 'patch', 'year'], axis=1)

In [85]:
pipe.predict(X_eval), y_eval

(array([0, 1, 0, 1, 0]), array([0, 1, 1, 1, 0]))