# Kaggle League of Legends competition - Neural Network Models

## Team: Elden Ring

<img src="https://eldenring.wiki.fextralife.com/file/Elden-Ring/mirel_pastor_of_vow.jpg" alt="PRAISE DOG" style="width:806px;height:600px;"/>

#### PRAISE THE DOG!

## How to Win at League of Legends?

### Uninstall LoL and [install Dota 2](https://store.steampowered.com/app/570/Dota_2/), EZ. (just kidding, both games are great. Volvo pls gib patch.)

<img src = "https://static.wikia.nocookie.net/dota2_gamepedia/images/7/78/Keyart_phoenix.jpg/revision/latest/" alt="SKREE CAW CAW IM A BIRD" style="width:800px;height:497px;">

In [159]:
import pandas as pd
import numpy as np
import datetime as dt
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import cross_val_score, KFold

from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.preprocessing import MinMaxScaler

Gradually the list of files to read increased as I kept adding variables and complexities to models that could handle that.

In [2]:
X_train_original = pd.read_csv('../data/participants_train.csv')
X_test_original = pd.read_csv('../data/participants_test.csv')
y_train_original = pd.read_csv('../data/train_winners.csv')

champion_mastery = pd.read_csv('../data/champion_mastery.csv')
champion = pd.read_json('../data/champion.json')

team_positions = pd.read_csv('../data/teamPositions.csv')

train_last_frame_values = pd.read_csv('../data/train_last_frame_values.csv')
test_last_frame_values = pd.read_csv('../data/test_last_frame_values.csv')

training_events = pd.read_csv('../data/training_events.csv')
testing_events = pd.read_csv('../data/testing_events.csv')

submission = pd.read_csv('../data/sample_submission.csv')

In [3]:
# function that converts values to negative (for the second team, teamId 200)
# it leaves the first team values, teamId 100 intact

def convert_team_values(df, col_names):
        
    for col in col_names:
        df[col] = np.where(df['teamId'] == 200,
                            -1* df[col],
                                df[col])
        
    return

In [4]:
# transformation needed on the dataframes (see the file prediction_models)
vars = ['wards_placed', 'wards_killed', 'turretplates_destroyed', 'elite_monsters_killed']

convert_team_values(training_events, vars)
convert_team_values(testing_events, vars)

training_events = training_events.groupby('matchId')[vars].sum()
testing_events = testing_events.groupby('matchId')[vars].sum()

champion_data = pd.json_normalize(champion['data'])
champion_data['key'] = champion_data['key'].astype(int)

champion_types= champion_data.explode('tags').pivot_table(values='id', index='key', columns='tags', aggfunc='count').fillna(0).reset_index()

In [5]:
# to be used later to measure the accuracy!
kfold = KFold(n_splits = 10, shuffle = True, random_state = 42)

# this is to extract the column that is needed for training
y_train = y_train_original['winner']

## Using Neural Networks to Improve the Logreg Predictions!

In [6]:
variables = ['summonerLevel', 'championLevel','championPoints', 
             'Assassin', 'Fighter', 'Mage', 'Marksman', 'Support', 'Tank',
            'info.attack', 'info.defense', 'info.magic', 'info.difficulty',
            'stats.hpregenperlevel',	'stats.mpregen', 'stats.mpregenperlevel',	'stats.crit',	'stats.critperlevel',
            'stats.attackdamage', 'stats.attackdamageperlevel', 'stats.attackspeedperlevel',	'stats.attackspeed',
            'final_gold', 'final_xp', 'final_abilityhaste', 'final_abilitypower', 'final_armor', 'final_armorpen',
            'final_armorpenpercent', 'final_atkdmg', 'final_bns_armorpenpercent', 'final_bns_magicpenpercent', 'final_ccreduction',
            'final_cdreduction', 'final_remaining_health', 'final_health', 'final_healthrgn', 'final_lifesteal', 'final_mppen',
            'final_mgpenpercent', 'final_mgres', 'final_ms', 'final_omnivamp', 'final_physicalvamp', 'final_power', 'final_powermax',
            'final_powerregen', 'final_spellvamp', 'final_currentgold', 'final_magicdmgdone', 'final_magicdmgdonetochamps', 'final_magicdmgtaken',
            'final_physdmgdone', 'final_physdmgdonetochamps', 'final_physdmgtaken', 'final_dmgdone', 'final_dmgdonetochamps', 'final_dmgtaken', 
            'final_truedmgdone', 'final_truedmgdonetochamps', 'final_truedmgtaken', 'final_goldpersec', 'final_jungleminionskilled', 'final_lvl',
            'final_minionskilled', 'final_jungleminionskilled', 'final_jungleminionskilled', 'final_jungleminionskilled', 'final_enemycontrolled'                  
             ]

X_train = pd.merge(X_train_original, team_positions, how='inner', on=['matchId', 'participantId'])
X_train = pd.merge(X_train, champion_data, how='inner', left_on='championId', right_on='key')
X_train = pd.merge(X_train, champion_types, how='inner', left_on='championId', right_on='key')
X_train = pd.merge(X_train, train_last_frame_values, how='inner', on=['matchId', 'participantId'])
X_train = pd.merge(X_train, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)

X_train = X_train.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)


convert_team_values(X_train, variables)

X_train_perlane = X_train.groupby(['matchId', 'teamPosition'])[['final_gold']].sum().pivot_table(values='final_gold', index='matchId', columns='teamPosition').reset_index().drop(columns=0)

for lane in ['BOTTOM', 'JUNGLE', 'MIDDLE', 'TOP', 'UTILITY']:
  X_train_perlane[f'{lane}'] = np.where(X_train_perlane[f'{lane}'] >= 0, 1, -1)

X_train = (
    X_train
    .groupby(['matchId'])[variables]
    .sum()
    .reset_index()
)

X_train = pd.merge(X_train, X_train_perlane, how='inner', on='matchId').reset_index(drop = True)
X_train = pd.merge(X_train, training_events, how='inner', on='matchId').reset_index(drop = True)

In [7]:
X_train.head()

Unnamed: 0,matchId,summonerLevel,championLevel,championPoints,Assassin,Fighter,Mage,Marksman,Support,Tank,...,final_enemycontrolled,BOTTOM,JUNGLE,MIDDLE,TOP,UTILITY,wards_placed,wards_killed,turretplates_destroyed,elite_monsters_killed
0,0,682,0.0,-605428.0,-1.0,-1.0,2.0,0.0,0.0,-2.0,...,67664,-1,-1,1,-1,-1,-64,-1,-2,-1
1,1,628,8.0,1356027.0,3.0,0.0,0.0,0.0,-1.0,-1.0,...,-61783,1,1,1,-1,1,1,0,-1,-1
2,2,1049,1.0,-273911.0,-2.0,1.0,0.0,0.0,0.0,0.0,...,-132630,1,-1,-1,-1,-1,4,1,-2,1
3,3,-1027,-3.0,-287667.0,1.0,1.0,1.0,-1.0,-1.0,0.0,...,-39616,1,1,-1,-1,1,4,0,2,0
4,4,1612,7.0,503668.0,0.0,0.0,2.0,-1.0,0.0,-1.0,...,16629,-1,1,-1,1,1,4,0,-1,-1


In [8]:
pipeline_neuralnetwork = Pipeline(
    steps = [
        ('scaler', MinMaxScaler()),
        ('nn', MLPClassifier(verbose = True,
                             hidden_layer_sizes = (100, 100, 100, 100),
                             activation = 'tanh',
                             max_iter = 10000,
                             alpha=0.05))
    ]
)

In [9]:
pipeline_neuralnetwork.fit(X_train, y_train)

Iteration 1, loss = 0.65727970
Iteration 2, loss = 0.61403113
Iteration 3, loss = 0.61605887
Iteration 4, loss = 0.62173213
Iteration 5, loss = 0.60096988
Iteration 6, loss = 0.59451160
Iteration 7, loss = 0.59471611
Iteration 8, loss = 0.59364611
Iteration 9, loss = 0.59688439
Iteration 10, loss = 0.59102111
Iteration 11, loss = 0.58756731
Iteration 12, loss = 0.59510892
Iteration 13, loss = 0.58998631
Iteration 14, loss = 0.58541963
Iteration 15, loss = 0.58294796
Iteration 16, loss = 0.58535731
Iteration 17, loss = 0.58652887
Iteration 18, loss = 0.58085443
Iteration 19, loss = 0.57894982
Iteration 20, loss = 0.57995711
Iteration 21, loss = 0.58401510
Iteration 22, loss = 0.57831234
Iteration 23, loss = 0.57767067
Iteration 24, loss = 0.57559296
Iteration 25, loss = 0.57490748
Iteration 26, loss = 0.57927736
Iteration 27, loss = 0.57238270
Iteration 28, loss = 0.57627896
Iteration 29, loss = 0.57043315
Iteration 30, loss = 0.57277950
Iteration 31, loss = 0.57232347
Iteration 32, los

In [10]:
accuracy_score(
    y_true = y_train,
    y_pred = pipeline_neuralnetwork.predict(X_train)
)

0.725625

In [11]:
summoner_cv_scores = cross_val_score(
    estimator = pipeline_neuralnetwork,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(summoner_cv_scores)
print(np.mean(summoner_cv_scores))

Iteration 1, loss = 0.66646843
Iteration 2, loss = 0.62959231
Iteration 3, loss = 0.61642563
Iteration 4, loss = 0.61104296
Iteration 5, loss = 0.60275513
Iteration 6, loss = 0.59776119
Iteration 7, loss = 0.60205821
Iteration 8, loss = 0.59823554
Iteration 9, loss = 0.59391677
Iteration 10, loss = 0.59442807
Iteration 11, loss = 0.59049584
Iteration 12, loss = 0.59265938
Iteration 13, loss = 0.59102323
Iteration 14, loss = 0.58738650
Iteration 15, loss = 0.58705233
Iteration 16, loss = 0.58641518
Iteration 17, loss = 0.59130026
Iteration 18, loss = 0.58164870
Iteration 19, loss = 0.57932351
Iteration 20, loss = 0.58228079
Iteration 21, loss = 0.58372173
Iteration 22, loss = 0.58742920
Iteration 23, loss = 0.57940665
Iteration 24, loss = 0.58405909
Iteration 25, loss = 0.58878568
Iteration 26, loss = 0.57689643
Iteration 27, loss = 0.57845166
Iteration 28, loss = 0.57799100
Iteration 29, loss = 0.57608896
Iteration 30, loss = 0.57440819
Iteration 31, loss = 0.57395092
Iteration 32, los

In [12]:
variables = ['summonerLevel', 'championLevel','championPoints', 
             'Assassin', 'Fighter', 'Mage', 'Marksman', 'Support', 'Tank',
            'info.attack', 'info.defense', 'info.magic', 'info.difficulty',
            'stats.hpregenperlevel',	'stats.mpregen', 'stats.mpregenperlevel',	'stats.crit',	'stats.critperlevel',
            'stats.attackdamage', 'stats.attackdamageperlevel', 'stats.attackspeedperlevel',	'stats.attackspeed',
            'final_gold', 'final_xp', 'final_abilityhaste', 'final_abilitypower', 'final_armor', 'final_armorpen',
            'final_armorpenpercent', 'final_atkdmg', 'final_bns_armorpenpercent', 'final_bns_magicpenpercent', 'final_ccreduction',
            'final_cdreduction', 'final_remaining_health', 'final_health', 'final_healthrgn', 'final_lifesteal', 'final_mppen',
            'final_mgpenpercent', 'final_mgres', 'final_ms', 'final_omnivamp', 'final_physicalvamp', 'final_power', 'final_powermax',
            'final_powerregen', 'final_spellvamp', 'final_currentgold', 'final_magicdmgdone', 'final_magicdmgdonetochamps', 'final_magicdmgtaken',
            'final_physdmgdone', 'final_physdmgdonetochamps', 'final_physdmgtaken', 'final_dmgdone', 'final_dmgdonetochamps', 'final_dmgtaken', 
            'final_truedmgdone', 'final_truedmgdonetochamps', 'final_truedmgtaken', 'final_goldpersec', 'final_jungleminionskilled', 'final_lvl',
            'final_minionskilled', 'final_jungleminionskilled', 'final_jungleminionskilled', 'final_jungleminionskilled', 'final_enemycontrolled'                  
             ]

X_test = pd.merge(X_test_original, team_positions, how='inner', on=['matchId', 'participantId'])
X_test = pd.merge(X_test, champion_data, how='inner', left_on='championId', right_on='key')
X_test = pd.merge(X_test, champion_types, how='inner', left_on='championId', right_on='key')
X_test = pd.merge(X_test, test_last_frame_values, how='inner', on=['matchId', 'participantId'])
X_test = pd.merge(X_test, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)

X_test = X_test.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)


convert_team_values(X_test, variables)

X_test_perlane = X_test.groupby(['matchId', 'teamPosition'])[['final_gold']].sum().pivot_table(values='final_gold', index='matchId', columns='teamPosition').reset_index().drop(columns=0)

for lane in ['BOTTOM', 'JUNGLE', 'MIDDLE', 'TOP', 'UTILITY']:
  X_test_perlane[f'{lane}'] = np.where(X_test_perlane[f'{lane}'] >= 0, 1, -1)

X_test = (
    X_test
    .groupby(['matchId'])[variables]
    .sum()
    .reset_index()
)

X_test = pd.merge(X_test, X_test_perlane, how='inner', on='matchId').reset_index(drop = True)
X_test = pd.merge(X_test, testing_events, how='inner', on='matchId').reset_index(drop = True)

In [13]:
y_pred = pipeline_neuralnetwork.predict(X_test)

In [14]:
submission['winner'] = y_pred
submission.head()

Unnamed: 0,matchId,winner
0,8000,100
1,8001,200
2,8002,200
3,8003,200
4,8004,200


In [15]:
#submission.to_csv('../data/submission_neural_network_hidden_layers4x100_alpha005_activtahn_2023_03_30.csv', index=False)

### Attempt at Gradient Boosting

In [130]:
from sklearn.ensemble import GradientBoostingClassifier

In [17]:
pd.get_dummies(X_train_original, prefix='Champion')

Unnamed: 0,matchId,teamId,participantId,summonerId,summonerLevel,championId,Champion_Aatrox,Champion_Ahri,Champion_Akali,Champion_Akshan,...,Champion_Yone,Champion_Yorick,Champion_Yuumi,Champion_Zac,Champion_Zed,Champion_Zeri,Champion_Ziggs,Champion_Zilean,Champion_Zoe,Champion_Zyra
0,0,100,1,0,303,82,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,100,2,1,616,517,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,100,3,2,667,127,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,100,4,3,860,51,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,100,5,4,325,25,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79995,7999,200,6,13979,595,83,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
79996,7999,200,7,39643,38,106,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
79997,7999,200,8,5570,498,34,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
79998,7999,200,9,10228,733,29,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [18]:
X_train = X_train_original

X_train = pd.get_dummies(X_train, prefix='Champion')
champions_encoded = list(X_train.drop(columns=['matchId', 'teamId', 'participantId', 'summonerId', 'summonerLevel', 'championId']).columns.values)

variables = ['summonerLevel', 'championLevel','championPoints', 
            # 'Assassin', 'Fighter', 'Mage', 'Marksman', 'Support', 'Tank',
            # 'info.attack', 'info.defense', 'info.magic', 'info.difficulty',
            # 'stats.hpregenperlevel',	'stats.mpregen', 'stats.mpregenperlevel',	'stats.crit',	'stats.critperlevel',
            # 'stats.attackdamage', 'stats.attackdamageperlevel', 'stats.attackspeedperlevel',	'stats.attackspeed',
            'final_gold', 'final_xp', 'final_abilityhaste', 'final_abilitypower', 'final_armor', 'final_armorpen',
            'final_armorpenpercent', 'final_atkdmg', 'final_bns_armorpenpercent', 'final_bns_magicpenpercent', 'final_ccreduction',
            'final_cdreduction', 'final_remaining_health', 'final_health', 'final_healthrgn', 'final_lifesteal', 'final_mppen',
            'final_mgpenpercent', 'final_mgres', 'final_ms', 'final_omnivamp', 'final_physicalvamp', 'final_power', 'final_powermax',
            'final_powerregen', 'final_spellvamp', 'final_currentgold', 'final_magicdmgdone', 'final_magicdmgdonetochamps', 'final_magicdmgtaken',
            'final_physdmgdone', 'final_physdmgdonetochamps', 'final_physdmgtaken', 'final_dmgdone', 'final_dmgdonetochamps', 'final_dmgtaken', 
            'final_truedmgdone', 'final_truedmgdonetochamps', 'final_truedmgtaken', 'final_goldpersec', 'final_jungleminionskilled', 'final_lvl',
            'final_minionskilled', 'final_jungleminionskilled', 'final_jungleminionskilled', 'final_jungleminionskilled', 'final_enemycontrolled'                  
             ] + champions_encoded

X_train = pd.merge(X_train, team_positions, how='inner', on=['matchId', 'participantId'])

X_train = pd.merge(X_train, champion_data, how='inner', left_on='championId', right_on='key')
X_train = pd.merge(X_train, champion_types, how='inner', left_on='championId', right_on='key')
X_train = pd.merge(X_train, train_last_frame_values, how='inner', on=['matchId', 'participantId'])
X_train = pd.merge(X_train, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)

X_train = X_train.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)


convert_team_values(X_train, variables)

X_train_perlane = X_train.groupby(['matchId', 'teamPosition'])[['final_gold']].sum().pivot_table(values='final_gold', index='matchId', columns='teamPosition').reset_index().drop(columns=0)

for lane in ['BOTTOM', 'JUNGLE', 'MIDDLE', 'TOP', 'UTILITY']:
  X_train_perlane[f'{lane}'] = np.where(X_train_perlane[f'{lane}'] >= 0, 1, -1)

X_train = (
    X_train
    .groupby(['matchId'])[variables]
    .sum()
    .reset_index()
)

X_train = pd.merge(X_train, X_train_perlane, how='inner', on='matchId').reset_index(drop = True)
X_train = pd.merge(X_train, training_events, how='inner', on='matchId').reset_index(drop = True)

In [19]:
gbr = Pipeline(
    steps = [
        #('scaler', MinMaxScaler()),
        ('gb', GradientBoostingClassifier(n_estimators = 1000, learning_rate=0.01))
    ]
)

gbr.fit(X_train, y_train)

In [20]:
accuracy_score(
    y_true = y_train,
    y_pred = gbr.predict(X_train)
)

0.76125

In [21]:
print(classification_report(y_train, gbr.predict(X_train)))

              precision    recall  f1-score   support

         100       0.76      0.77      0.77      4071
         200       0.76      0.75      0.76      3929

    accuracy                           0.76      8000
   macro avg       0.76      0.76      0.76      8000
weighted avg       0.76      0.76      0.76      8000



In [22]:
importances = pd.DataFrame({
    'variable': gbr.feature_names_in_,
    'importance': gbr['gb'].feature_importances_
})

importances.sort_values('importance', ascending = False).head(20)

Unnamed: 0,variable,importance
4,final_gold,0.610688
221,elite_monsters_killed,0.045315
5,final_xp,0.043786
2,championLevel,0.033521
61,Champion_AurelionSol,0.019381
17,final_health,0.015361
37,final_dmgdone,0.013412
1,summonerLevel,0.012571
213,BOTTOM,0.010613
58,Champion_Annie,0.009574


In [23]:
X_test = X_test_original

X_test = pd.get_dummies(X_test, prefix='Champion')
champions_encoded = list(X_test.drop(columns=['matchId', 'teamId', 'participantId', 'summonerId', 'summonerLevel', 'championId']).columns.values)

variables = ['summonerLevel', 'championLevel','championPoints', 
            # 'Assassin', 'Fighter', 'Mage', 'Marksman', 'Support', 'Tank',
            # 'info.attack', 'info.defense', 'info.magic', 'info.difficulty',
            # 'stats.hpregenperlevel',	'stats.mpregen', 'stats.mpregenperlevel',	'stats.crit',	'stats.critperlevel',
            # 'stats.attackdamage', 'stats.attackdamageperlevel', 'stats.attackspeedperlevel',	'stats.attackspeed',
            'final_gold', 'final_xp', 'final_abilityhaste', 'final_abilitypower', 'final_armor', 'final_armorpen',
            'final_armorpenpercent', 'final_atkdmg', 'final_bns_armorpenpercent', 'final_bns_magicpenpercent', 'final_ccreduction',
            'final_cdreduction', 'final_remaining_health', 'final_health', 'final_healthrgn', 'final_lifesteal', 'final_mppen',
            'final_mgpenpercent', 'final_mgres', 'final_ms', 'final_omnivamp', 'final_physicalvamp', 'final_power', 'final_powermax',
            'final_powerregen', 'final_spellvamp', 'final_currentgold', 'final_magicdmgdone', 'final_magicdmgdonetochamps', 'final_magicdmgtaken',
            'final_physdmgdone', 'final_physdmgdonetochamps', 'final_physdmgtaken', 'final_dmgdone', 'final_dmgdonetochamps', 'final_dmgtaken', 
            'final_truedmgdone', 'final_truedmgdonetochamps', 'final_truedmgtaken', 'final_goldpersec', 'final_jungleminionskilled', 'final_lvl',
            'final_minionskilled', 'final_jungleminionskilled', 'final_jungleminionskilled', 'final_jungleminionskilled', 'final_enemycontrolled'                  
             ] + champions_encoded

X_test = pd.merge(X_test, team_positions, how='inner', on=['matchId', 'participantId'])
X_test = pd.merge(X_test, champion_data, how='inner', left_on='championId', right_on='key')
X_test = pd.merge(X_test, champion_types, how='inner', left_on='championId', right_on='key')
X_test = pd.merge(X_test, test_last_frame_values, how='inner', on=['matchId', 'participantId'])
X_test = pd.merge(X_test, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)

X_test = X_test.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)


convert_team_values(X_test, variables)

X_test_perlane = X_test.groupby(['matchId', 'teamPosition'])[['final_gold']].sum().pivot_table(values='final_gold', index='matchId', columns='teamPosition').reset_index().drop(columns=0)

for lane in ['BOTTOM', 'JUNGLE', 'MIDDLE', 'TOP', 'UTILITY']:
  X_test_perlane[f'{lane}'] = np.where(X_test_perlane[f'{lane}'] >= 0, 1, -1)

X_test = (
    X_test
    .groupby(['matchId'])[variables]
    .sum()
    .reset_index()
)

X_test = pd.merge(X_test, X_test_perlane, how='inner', on='matchId').reset_index(drop = True)
X_test = pd.merge(X_test, testing_events, how='inner', on='matchId').reset_index(drop = True)

In [24]:
y_pred = gbr.predict(X_test)

In [25]:
submission['winner'] = y_pred
submission.head()

Unnamed: 0,matchId,winner
0,8000,100
1,8001,200
2,8002,200
3,8003,200
4,8004,200


In [26]:
# best submission so far 7.08 on kaggle !
#submission.to_csv('../data/submission_gradientboosting_n1000_learn001_2023_04_01.csv', index=False)

# submission with this (all champs) is 7.06
#submission.to_csv('../data/submission_gradientboosting_all_champions_n1000_learn001_2023_04_01.csv', index=False)

## Adding winrates and ban rates

This was once competition was over but I was curious to see how much the predictions could have been imporved with this added information.

In [28]:
match_patch = pd.read_csv('../data/match_patch.csv')
#champion_wr_stats = pd.read_csv('../data/champion_wr_stats.csv')
champion_wr_stats = pd.read_csv('../data/champion_wr_stats_all.csv')
X_train_original = pd.read_csv('../data/participants_train.csv')

In [29]:
champion_wr_stats['Name'] = champion_wr_stats['Name'].str.lower()

# unfortunately can't match on 'teamPosition' and 'Role' due to some missing
champion_wr_stats = champion_wr_stats.drop(columns=['Trend','Tier', 'Role']).groupby(['Name', 'Patch_Version']).max().reset_index()

In [132]:
X_train = X_train_original

X_train['championName'] = X_train['championName'].str.lower()
# additional name changes to match
X_train['championName'] = X_train['championName'].str.replace('monkeyking', 'wukong')
X_train['championName'] = X_train['championName'].str.replace('renata', 'renataglasc')
X_train['championName'] = X_train['championName'].str.replace('drmundo', 'dr.mundo')

variables = ['summonerLevel', 'championLevel','championPoints', 
            'Assassin', 'Fighter', 'Mage', 'Marksman', 'Support', 'Tank',
            'info.attack', 'info.defense', 'info.magic', 'info.difficulty',
            'stats.hpregenperlevel',	'stats.mpregen', 'stats.mpregenperlevel',	'stats.crit',	'stats.critperlevel',
            'stats.attackdamage', 'stats.attackdamageperlevel', 'stats.attackspeedperlevel',	'stats.attackspeed',
            'final_gold', 'final_xp', 'final_abilityhaste', 'final_abilitypower', 'final_armor', 'final_armorpen',
            'final_armorpenpercent', 'final_atkdmg', 'final_bns_armorpenpercent', 'final_bns_magicpenpercent', 'final_ccreduction',
            'final_cdreduction', 'final_remaining_health', 'final_health', 'final_healthrgn', 'final_lifesteal', 'final_mppen',
            'final_mgpenpercent', 'final_mgres', 'final_ms', 'final_omnivamp', 'final_physicalvamp', 'final_power', 'final_powermax',
            'final_powerregen', 'final_spellvamp', 'final_currentgold', 'final_magicdmgdone', 'final_magicdmgdonetochamps', 'final_magicdmgtaken',
            'final_physdmgdone', 'final_physdmgdonetochamps', 'final_physdmgtaken', 'final_dmgdone', 'final_dmgdonetochamps', 'final_dmgtaken', 
            'final_truedmgdone', 'final_truedmgdonetochamps', 'final_truedmgtaken', 'final_goldpersec', 'final_jungleminionskilled', 'final_lvl',
            'final_minionskilled', 'final_enemycontrolled',
            'Score', 'Win %', 'Pick %', 'Ban %', 'KDA'                  
             ]

X_train = pd.merge(X_train, team_positions, how='inner', on=['matchId', 'participantId'])
X_train = pd.merge(X_train, match_patch, how='inner', on='matchId')
X_train = pd.merge(X_train, champion_wr_stats, how='inner', left_on=['championName', 'patch_version'], right_on=['Name', 'Patch_Version'])
X_train = pd.merge(X_train, champion_data, how='inner', left_on='championId', right_on='key')
X_train = pd.merge(X_train, champion_types, how='inner', left_on='championId', right_on='key')
X_train = pd.merge(X_train, train_last_frame_values, how='inner', on=['matchId', 'participantId'])
X_train = pd.merge(X_train, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)

X_train = X_train.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)


convert_team_values(X_train, variables)

X_train_perlane = X_train.groupby(['matchId', 'teamPosition'])[['final_gold']].sum().pivot_table(values='final_gold', index='matchId', columns='teamPosition').reset_index().drop(columns=0)

for lane in ['BOTTOM', 'JUNGLE', 'MIDDLE', 'TOP', 'UTILITY']:
  X_train_perlane[f'{lane}'] = np.where(X_train_perlane[f'{lane}'] >= 0, 1, -1)

X_train = (
    X_train
    .groupby(['matchId'])[variables]
    .sum()
    .reset_index()
)

X_train = pd.merge(X_train, X_train_perlane, how='inner', on='matchId').reset_index(drop = True)
X_train = pd.merge(X_train, training_events, how='inner', on='matchId').reset_index(drop = True)

In [133]:
gbr_wr = Pipeline(
    steps = [
        #('scaler', MinMaxScaler()),
        ('gb', GradientBoostingClassifier(n_estimators = 1000, learning_rate=0.01))
    ]
)

gbr_wr.fit(X_train, y_train)

## Finally, attempting XGBoost

In [157]:
from xgboost import XGBClassifier

from sklearn.model_selection import GridSearchCV

In [135]:
# for xgboost need 1 and 0 instead of 100 and 200
y_train_modified = y_train.copy()

y_train_modified = y_train_modified.replace(100, 0)
y_train_modified = y_train_modified.replace(200, 1)

In [177]:
bst = XGBClassifier(n_estimators=1000,
                    max_depth=6,
                    learning_rate=0.001,
                    subsample=0.8,
                    # reg_lambda=2,
                    # reg_alpha=1,
                    objective='binary:logistic')

bst.fit(X_train, y_train_modified)


In [178]:
accuracy_score(
    y_true = y_train_modified,
    y_pred = bst.predict(X_train)
)

0.787125

In [179]:
confusion_matrix(y_train_modified, bst.predict(X_train))

array([[3232,  839],
       [ 864, 3065]])

In [180]:
print(classification_report(y_train_modified, bst.predict(X_train)))

              precision    recall  f1-score   support

           0       0.79      0.79      0.79      4071
           1       0.79      0.78      0.78      3929

    accuracy                           0.79      8000
   macro avg       0.79      0.79      0.79      8000
weighted avg       0.79      0.79      0.79      8000



In [138]:
# importances = pd.DataFrame({
#     'variable': bst.feature_names_in_,
#     'importance': bst.feature_importances_
# })

# importances.sort_values('importance', ascending = False).head(20)

In [139]:
# param_test = {
# #  'max_depth':range(3,10,2),
# #  'learning_rate': [0.05, 0.09, 0.1, 0.3]
#    'n_estimators': [100, 300, 500, 700 ,1000]
# }

# gsearch = GridSearchCV(estimator = XGBClassifier(max_depth=9, learning_rate=0.1, objective= 'binary:logistic'),
#                        param_grid = param_test,
#                         scoring='roc_auc',
#                         n_jobs=4, cv=5)
# gsearch.fit(X_train,y_train_modified)

# print( gsearch.best_params_, gsearch.best_score_)

In [141]:
accuracy_score(
    y_true = y_train,
    y_pred = gbr_wr.predict(X_train)
)

0.763625

In [41]:
print(classification_report(y_train, gbr_wr.predict(X_train)))

              precision    recall  f1-score   support

         100       0.76      0.78      0.77      4071
         200       0.76      0.75      0.76      3929

    accuracy                           0.76      8000
   macro avg       0.76      0.76      0.76      8000
weighted avg       0.76      0.76      0.76      8000



In [42]:
importances = pd.DataFrame({
    'variable': gbr_wr.feature_names_in_,
    'importance': gbr_wr['gb'].feature_importances_
})

importances.sort_values('importance', ascending = False).head(20)

Unnamed: 0,variable,importance
23,final_gold,0.592997
71,Win %,0.049038
83,elite_monsters_killed,0.046754
24,final_xp,0.046214
2,championLevel,0.035903
36,final_health,0.01488
56,final_dmgdone,0.013568
75,BOTTOM,0.01047
1,summonerLevel,0.010141
74,KDA,0.009496


In [181]:
X_test_original = pd.read_csv('../data/participants_test.csv')

In [182]:
X_test = X_test_original

X_test['championName'] = X_test['championName'].str.lower()
# additional name changes to match
X_test['championName'] = X_test['championName'].str.replace('monkeyking', 'wukong')
X_test['championName'] = X_test['championName'].str.replace('renata', 'renataglasc')
X_test['championName'] = X_test['championName'].str.replace('drmundo', 'dr.mundo')


variables = ['summonerLevel', 'championLevel','championPoints', 
            'Assassin', 'Fighter', 'Mage', 'Marksman', 'Support', 'Tank',
            'info.attack', 'info.defense', 'info.magic', 'info.difficulty',
            'stats.hpregenperlevel',	'stats.mpregen', 'stats.mpregenperlevel',	'stats.crit',	'stats.critperlevel',
            'stats.attackdamage', 'stats.attackdamageperlevel', 'stats.attackspeedperlevel',	'stats.attackspeed',
            'final_gold', 'final_xp', 'final_abilityhaste', 'final_abilitypower', 'final_armor', 'final_armorpen',
            'final_armorpenpercent', 'final_atkdmg', 'final_bns_armorpenpercent', 'final_bns_magicpenpercent', 'final_ccreduction',
            'final_cdreduction', 'final_remaining_health', 'final_health', 'final_healthrgn', 'final_lifesteal', 'final_mppen',
            'final_mgpenpercent', 'final_mgres', 'final_ms', 'final_omnivamp', 'final_physicalvamp', 'final_power', 'final_powermax',
            'final_powerregen', 'final_spellvamp', 'final_currentgold', 'final_magicdmgdone', 'final_magicdmgdonetochamps', 'final_magicdmgtaken',
            'final_physdmgdone', 'final_physdmgdonetochamps', 'final_physdmgtaken', 'final_dmgdone', 'final_dmgdonetochamps', 'final_dmgtaken', 
            'final_truedmgdone', 'final_truedmgdonetochamps', 'final_truedmgtaken', 'final_goldpersec', 'final_jungleminionskilled', 'final_lvl',
            'final_minionskilled', 'final_enemycontrolled',
            'Score', 'Win %', 'Pick %', 'Ban %', 'KDA'                     
             ]

X_test = pd.merge(X_test, team_positions, how='inner', on=['matchId', 'participantId'])
X_test = pd.merge(X_test, match_patch, how='inner', on='matchId')
X_test = pd.merge(X_test, champion_wr_stats, how='inner', left_on=['championName', 'patch_version'], right_on=['Name', 'Patch_Version'])
X_test = pd.merge(X_test, champion_data, how='inner', left_on='championId', right_on='key')
X_test = pd.merge(X_test, champion_types, how='inner', left_on='championId', right_on='key')
X_test = pd.merge(X_test, test_last_frame_values, how='inner', on=['matchId', 'participantId'])
X_test = pd.merge(X_test, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)

X_test = X_test.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)


convert_team_values(X_test, variables)

X_test_perlane = X_test.groupby(['matchId', 'teamPosition'])[['final_gold']].sum().pivot_table(values='final_gold', index='matchId', columns='teamPosition').reset_index().drop(columns=0)

for lane in ['BOTTOM', 'JUNGLE', 'MIDDLE', 'TOP', 'UTILITY']:
  X_test_perlane[f'{lane}'] = np.where(X_test_perlane[f'{lane}'] >= 0, 1, -1)

X_test = (
    X_test
    .groupby(['matchId'])[variables]
    .sum()
    .reset_index()
)

X_test = pd.merge(X_test, X_test_perlane, how='inner', on='matchId').reset_index(drop = True)
X_test = pd.merge(X_test, testing_events, how='inner', on='matchId').reset_index(drop = True)

In [45]:
y_pred = gbr_wr.predict(X_test)

In [183]:
y_pred_modified = bst.predict(X_test)

y_pred_modified[y_pred_modified == 0] = 100
y_pred_modified[y_pred_modified == 1] = 200

In [184]:
#submission['winner'] = y_pred
submission['winner'] = y_pred_modified
submission.head()

Unnamed: 0,matchId,winner
0,8000,100
1,8001,200
2,8002,200
3,8003,200
4,8004,200


In [185]:
#submission.to_csv('../data/submission_gradientboosting_champwinrate_n1000_learn001_2023_04_15.csv', index=False)
#submission.to_csv('../data/submission_gradientboosting_champwinrate_n1000_learn001_2023_04_30.csv', index=False)
#submission.to_csv('../data/submission_gradientboosting_champ_all_winrate_n1000_learn001_2023_04_30.csv', index=False)
#submission.to_csv('../data/submission_xgboost_n1000_mdepth10_learnrate_0001_2023_04_30.csv', index=False)
#submission.to_csv('../data/submission_xgboost_n500_mdepth8_learnrate_00001_2023_04_30.csv', index=False)
#submission.to_csv('../data/submission_xgboost_default_params_2023_04_30.csv', index=False)

# this last prediction is the best one so far! 0.727 prediction on the private leaderboard; beats out the gradientboosting
# submission.to_csv('../data/submission_xgboost_n1000_mdepth6_learnrate_0001_subsamp08_2023_04_30.csv', index=False)