# Kaggle League of Legends competition - ML Models

## Team: Elden Ring

<img src="https://eldenring.wiki.fextralife.com/file/Elden-Ring/mirel_pastor_of_vow.jpg" alt="PRAISE DOG" style="width:806px;height:600px;"/>

#### PRAISE THE DOG!

## How to Win at League of Legends?

### Uninstall LoL and [install Dota 2](https://store.steampowered.com/app/570/Dota_2/), EZ.

<img src = "https://static.wikia.nocookie.net/dota2_gamepedia/images/7/78/Keyart_phoenix.jpg/revision/latest/" alt="SKREE CAW CAW IM A BIRD" style="width:800px;height:497px;">

In [126]:
import pandas as pd
import numpy as np
import datetime as dt
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, KFold

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [2]:
X_train_original = pd.read_csv('../data/participants_train.csv')
X_test_original = pd.read_csv('../data/participants_test.csv')
y_train_original = pd.read_csv('../data/train_winners.csv')

champion_mastery = pd.read_csv('../data/champion_mastery.csv')
champion = pd.read_json('../data/champion.json')
team_positions = pd.read_csv('../data/teamPositions.csv')

submission = pd.read_csv('../data/sample_submission.csv')

In [3]:
# function that converts values to negative (for the second team, teamId 200)
# it leaves the first team values, teamId intact

def convert_team_values(df, col_names):
    
    
    for col in col_names:
        df[col] = np.where(df['teamId'] == 200,
                            -1* df[col],
                                df[col])
        
    return

In [4]:
# to be used later to measure the accuracy!
kfold = KFold(n_splits = 10, shuffle = True, random_state = 42)

In [5]:
# this is to extract the column that is needed for training
y_train = y_train_original['winner']

## Formulating Sample Submission as LogReg

In order to replicate it, I will take the max from each SummonerLevel per match & teamId combo.

In [6]:
# variables to fit on
variables = ['summonerLevel']

# first copy the original data to not accidentally change it
X_train = X_train_original

# find the max Summoner Level per each team
X_train = X_train.groupby(['matchId', 'teamId'])[variables].max().reset_index()

# mark them as positive (first team) or negative (second team), to compare the values
convert_team_values(X_train, variables)

# finally, see which team has max summoner level (by adding the + and - from before)
X_train = X_train.groupby('matchId')[variables].sum().reset_index(drop = True)

In [7]:
logreg = LogisticRegression().fit(X_train, y_train)

In [8]:
print(logreg.intercept_)
print(logreg.coef_)
print(- logreg.intercept_[0] / logreg.coef_[0])

[-0.03635984]
[[-9.10448626e-05]]
[-399.36181513]


In [9]:
y_predictions = logreg.predict(X_train)
y_predictions

array([100, 100, 100, ..., 100, 200, 100])

In [10]:
accuracy_score(
    y_true = y_train,
    y_pred = y_predictions
)

0.50925

> NOTE: successfully replicated, with slightly better prediction, 50.9% with respect to 50.4%

In [11]:
base_cv_scores = cross_val_score(
    estimator = logreg,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(base_cv_scores)
print(np.mean(base_cv_scores))

[0.5     0.50125 0.5     0.47375 0.5225  0.51125 0.5     0.50375 0.52875
 0.505  ]
0.504625


## Now LogReg with Champion mastery added

In [12]:
X_train = pd.merge(X_train_original, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)

> NOTE: found out champion lvl is actually more indicative

In [13]:
variables = ['summonerLevel', 'championLevel']

X_train = X_train.groupby(['matchId', 'teamId'])[variables].agg({'summonerLevel': 'max', 'championLevel': 'sum'}).reset_index()

convert_team_values(X_train, variables)

X_train = X_train.groupby('matchId')[variables].sum().reset_index(drop = True)

In [14]:
logreg_mastery = LogisticRegression().fit(X_train, y_train)

In [15]:
accuracy_score(
    y_true = y_train,
    y_pred = logreg_mastery.predict(X_train)
)

0.5525

In [16]:
mastery_cv_scores = cross_val_score(
    estimator = logreg_mastery,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(mastery_cv_scores)
print(np.mean(mastery_cv_scores))

[0.56625 0.57125 0.5325  0.545   0.545   0.55375 0.5475  0.53875 0.5525
 0.55875]
0.5511250000000001


In [17]:
print(classification_report(y_train, logreg_mastery.predict(X_train)))

              precision    recall  f1-score   support

         100       0.56      0.60      0.58      4071
         200       0.55      0.51      0.53      3929

    accuracy                           0.55      8000
   macro avg       0.55      0.55      0.55      8000
weighted avg       0.55      0.55      0.55      8000



In [18]:
print(confusion_matrix(y_train, logreg_mastery.predict(X_train)))

[[2425 1646]
 [1934 1995]]


## Looking into other Champion info

In [19]:
champion_data = pd.json_normalize(champion['data'])
champion_data['key'] = champion_data['key'].astype(int)

In [20]:
variables = ['summonerLevel', 'championLevel', 'info.attack', 'info.defense', 'info.magic', 'info.difficulty']

X_train = pd.merge(X_train_original, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)
X_train = pd.merge(X_train, champion_data, how='inner', left_on='championId', right_on='key')

X_train = X_train.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)

X_train = (
    X_train
    .groupby(['matchId', 'teamId'])[variables]
    .agg({'summonerLevel': 'max',
          'championLevel': 'sum',
          'info.attack': 'sum',
          'info.defense': 'sum',
          'info.magic': 'sum',
          'info.difficulty': 'sum'})
    .reset_index()
)


convert_team_values(X_train, variables)

X_train = X_train.groupby('matchId')[variables].sum().reset_index(drop = True)

In [21]:
logreg_mastery_champion = LogisticRegression().fit(X_train, y_train)

In [22]:
accuracy_score(
    y_true = y_train,
    y_pred = logreg_mastery_champion.predict(X_train)
)

0.564625

In [23]:
mastery_champion_cv_scores = cross_val_score(
    estimator = logreg_mastery_champion,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(mastery_champion_cv_scores)
print(np.mean(mastery_champion_cv_scores))

[0.57625 0.5725  0.5275  0.5825  0.5625  0.5775  0.555   0.545   0.555
 0.56125]
0.5615


In [24]:
## this was done on 3/24/2023 for submission!

variables = ['summonerLevel', 'championLevel', 'info.attack', 'info.defense', 'info.magic', 'info.difficulty']

X_test = pd.merge(X_test_original, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)
X_test = pd.merge(X_test, champion_data, how='inner', left_on='championId', right_on='key')

X_test = X_test.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)

X_test = (
    X_test
    .groupby(['matchId', 'teamId'])[variables]
    .agg({'summonerLevel': 'max',
          'championLevel': 'sum',
          'info.attack': 'sum',
          'info.defense': 'sum',
          'info.magic': 'sum',
          'info.difficulty': 'sum'}).reset_index()
)


convert_team_values(X_test, variables)

X_test = X_test.groupby('matchId')[variables].sum().reset_index(drop = True)

In [25]:
y_pred = logreg_mastery_champion.predict(X_test)

In [26]:
submission['winner'] = y_pred
submission.head()

Unnamed: 0,matchId,winner
0,8000,100
1,8001,100
2,8002,100
3,8003,100
4,8004,200


In [27]:
submission.to_csv('../data/submission_summonerlvl_champmastery_2023_03_24.csv', index=False)

follow-up to use stats too (other than info)

In [108]:
variables = ['summonerLevel', 'championLevel', 'info.attack', 'info.defense', 'info.magic', 'info.difficulty', 'stats.hp', 'stats.attackdamage','stats.attackspeed']

X_train = pd.merge(X_train_original, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)
X_train = pd.merge(X_train, champion_data, how='inner', left_on='championId', right_on='key')

X_train = X_train.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)

X_train = (
    X_train
    .groupby(['matchId', 'teamId'])[variables]
    .agg({'summonerLevel': 'max',
          'championLevel': 'sum',
          'info.attack': 'sum',
          'info.defense': 'sum',
          'info.magic': 'sum',
          'info.difficulty': 'sum',
          'stats.hp': 'sum',
          #'stats.hpperlevel': 'sum',
          #'stats.mp': 'sum',
          #'stats.mpperlevel': 'sum',
          #'stats.movespeed': 'sum',
          #'stats.armor': 'sum',
          #'stats.armorperlevel': 'sum',
          #'stats.spellblock': 'sum',
          #'stats.spellblockperlevel': 'sum',
          #'stats.attackrange': 'sum',
          #'stats.hpregen': 'sum',
          #'stats.hpregenperlevel': 'sum',
          #'stats.mpregen': 'sum',
          #'stats.mpregenperlevel': 'sum',
          #'stats.crit': 'sum',
          #'stats.critperlevel': 'sum',
          'stats.attackdamage': 'sum',
          #'stats.attackdamageperlevel': 'sum',
          #'stats.attackspeedperlevel': 'sum',
          'stats.attackspeed': 'sum'
          })
    .reset_index()
)


convert_team_values(X_train, variables)

X_train = X_train.groupby('matchId')[variables].sum().reset_index(drop = True)

In [109]:
logreg_mastery_champion_stats = LogisticRegression().fit(X_train, y_train)

In [110]:
accuracy_score(
    y_true = y_train,
    y_pred = logreg_mastery_champion_stats.predict(X_train)
)

0.5645

## Another idea: looking at individual players summoner

so instead of taking the max, trating each as an individual variable

In [28]:
X_train = X_train_original.pivot_table(values='summonerLevel', index='matchId', columns='participantId').reset_index(drop=True)

In [29]:
logreg_summoner = LogisticRegression().fit(X_train, y_train)

In [30]:
accuracy_score(
    y_true = y_train,
    y_pred = logreg_summoner.predict(X_train)
)

0.514125

In [31]:
summoner_cv_scores = cross_val_score(
    estimator = logreg_summoner,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(summoner_cv_scores)
print(np.mean(summoner_cv_scores))

[0.5     0.5     0.505   0.47375 0.50375 0.495   0.5     0.51125 0.52375
 0.48875]
0.5001249999999999


looks like accounting for individual summoner's levels does not matter; however, let's try and account for the difference in same roles. Currently using just the summoner number, but eventially will use the champion level instead.

In [32]:
variables= ['summonerLevel']

X_train = pd.merge(X_train_original, team_positions, how='inner', on=['matchId', 'participantId'])

convert_team_values(X_train, variables)

X_train = X_train.groupby(['matchId', 'teamPosition'])[variables].sum().reset_index()

X_train['lane_won'] = np.where(X_train[variables[0]] >= 0, 1, -1)

X_train = X_train.groupby('matchId')[['lane_won']].sum().reset_index(drop=True)

In [33]:
logreg_summoner = LogisticRegression().fit(X_train, y_train)

In [34]:
accuracy_score(
    y_true = y_train,
    y_pred = logreg_summoner.predict(X_train)
)

0.508875

In [35]:
summoner_cv_scores = cross_val_score(
    estimator = logreg_summoner,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(summoner_cv_scores)
print(np.mean(summoner_cv_scores))

[0.5025  0.5525  0.49875 0.46875 0.5125  0.50875 0.515   0.4975  0.525
 0.5075 ]
0.5088750000000001


Trying to do the same, but using champion mastery per comparing the lanes

In [36]:
variables = ['summonerLevel', 'championLevel']

X_train = pd.merge(X_train_original, team_positions, how='inner', on=['matchId', 'participantId'])

X_train = pd.merge(X_train, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)

convert_team_values(X_train, variables)

X_train = X_train.groupby(['matchId', 'teamPosition'])[variables].sum().reset_index()

In [37]:
X_train['lane_summon_won'] = np.where(X_train[variables[0]] >= 0, 1, -1)

conditions = [
    X_train[variables[1]] > 0,
    X_train[variables[1]] == 0,
    X_train[variables[1]] < 0
]

values = [1, 0, -1]

X_train['lane_champion_won'] = np.select(conditions, values)

In [38]:
X_train = X_train.groupby('matchId')[['lane_summon_won', 'lane_champion_won']].sum().reset_index(drop=True)

In [39]:
logreg_summoner = LogisticRegression().fit(X_train, y_train)

In [40]:
accuracy_score(
    y_true = y_train,
    y_pred = logreg_summoner.predict(X_train)
)

0.543625

In [41]:
summoner_cv_scores = cross_val_score(
    estimator = logreg_summoner,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(summoner_cv_scores)
print(np.mean(summoner_cv_scores))

[0.55875 0.53875 0.5325  0.5425  0.5375  0.55375 0.53125 0.5175  0.55875
 0.54   ]
0.541125


In [42]:
start = dt.datetime.now().timestamp()
end = dt.datetime.now().timestamp()
print(end - start)

2.5987625122070312e-05


## Using Frames

In [131]:
last_frame_values = pd.read_csv('../data/last_frame_values.csv')

In [113]:
last_frame_values.head()

Unnamed: 0,matchId,participantId,final_gold,final_xp
0,0,1,2741,4141
1,0,2,3045,2861
2,0,3,3545,4712
3,0,4,2789,2894
4,0,5,2026,2551


In [122]:
variables = ['summonerLevel', 'championLevel',  'final_gold', 'final_xp']

#'info.attack', 'info.defense', 'info.magic', 'info.difficulty',

X_train = pd.merge(X_train_original, last_frame_values, how='inner', on=['matchId', 'participantId'])
X_train = pd.merge(X_train, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)
#X_train = pd.merge(X_train, champion_data, how='inner', left_on='championId', right_on='key')

X_train = X_train.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)

X_train = (
    X_train
    .groupby(['matchId', 'teamId'])[variables]
    .agg({'summonerLevel': 'max',
          'championLevel': 'sum',
        #   'info.attack': 'sum',
        #   'info.defense': 'sum',
        #   'info.magic': 'sum',
        #   'info.difficulty': 'sum',
          'final_gold' : 'sum',
          'final_xp': 'sum'})
    .reset_index()
)


convert_team_values(X_train, variables)

X_train = X_train.groupby('matchId')[variables].sum().reset_index(drop = True)

In [123]:
logreg_lastframe = LogisticRegression().fit(X_train, y_train)

In [124]:
accuracy_score(
    y_true = y_train,
    y_pred = logreg_lastframe.predict(X_train)
)

0.7075

In [125]:
summoner_cv_scores = cross_val_score(
    estimator = logreg_lastframe,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(summoner_cv_scores)
print(np.mean(summoner_cv_scores))

[0.72625 0.6975  0.7075  0.68875 0.70375 0.745   0.70125 0.68875 0.7275
 0.67375]
0.7060000000000001


## Using Frames and Normalizing

In [143]:
variables = ['summonerLevel', 'championLevel',  'final_gold', 'final_xp', 'final_armor', 'final_atkdmg', 'final_atkspd',
             'final_health', 'final_healthrgn', 'final_lifesteal', 'final_mgpen', 'final_mgres', 'final_ms',
             'final_dmgdone', 'final_dmgtaken', 'final_truedmgdone', 'final_truedmgtaken', 'final_lvl',
             #'info.attack', 'info.defense', 'info.magic', 'info.difficulty'
             ]

#'info.attack', 'info.defense', 'info.magic', 'info.difficulty',

X_train = pd.merge(X_train_original, last_frame_values, how='inner', on=['matchId', 'participantId'])
X_train = pd.merge(X_train, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)
#X_train = pd.merge(X_train, champion_data, how='inner', left_on='championId', right_on='key')

X_train = X_train.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)

X_train = (
    X_train
    .groupby(['matchId', 'teamId'])[variables]
    .agg({'summonerLevel': 'max',
          'championLevel': 'sum',
        #    'info.attack': 'sum',
        #    'info.defense': 'sum',
        #    'info.magic': 'sum',
        #    'info.difficulty': 'sum',
          'final_gold' : 'sum',
          'final_xp': 'sum',
          'final_armor': 'sum',
          'final_atkdmg': 'sum',
          'final_atkspd': 'sum',
          'final_health': 'sum',
          'final_healthrgn': 'sum',
          'final_lifesteal': 'sum',
          'final_mgpen': 'sum',
          'final_mgres': 'sum',
          'final_ms': 'sum',
          'final_dmgdone': 'sum',
          'final_dmgtaken': 'sum',
          'final_truedmgdone': 'sum',
          'final_truedmgtaken': 'sum',
          'final_lvl': 'sum'})
    .reset_index()
)


convert_team_values(X_train, variables)

X_train = X_train.groupby('matchId')[variables].sum().reset_index(drop = True)

In [144]:
pipeline_lastframe = Pipeline(
    steps = [
        ('scaler', StandardScaler()),
        ('logreg', LogisticRegression())
        # note: more steps can be added here ...
    ]
)

In [145]:
pipeline_lastframe.fit(X_train, y_train)

In [146]:
accuracy_score(
    y_true = y_train,
    y_pred = pipeline_lastframe.predict(X_train)
)

0.70975

In [147]:
summoner_cv_scores = cross_val_score(
    estimator = pipeline_lastframe,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(summoner_cv_scores)
print(np.mean(summoner_cv_scores))

[0.7275  0.69625 0.70875 0.705   0.70125 0.74125 0.7     0.69625 0.73
 0.67   ]
0.707625


Attempting the test!

In [148]:
test_last_frame_values = pd.read_csv('../data/test_last_frame_values.csv')

In [149]:
variables = ['summonerLevel', 'championLevel',  'final_gold', 'final_xp', 'final_armor', 'final_atkdmg', 'final_atkspd',
             'final_health', 'final_healthrgn', 'final_lifesteal', 'final_mgpen', 'final_mgres', 'final_ms',
             'final_dmgdone', 'final_dmgtaken', 'final_truedmgdone', 'final_truedmgtaken', 'final_lvl',
             ]

#'info.attack', 'info.defense', 'info.magic', 'info.difficulty',

X_test = pd.merge(X_test_original, test_last_frame_values, how='inner', on=['matchId', 'participantId'])
X_test = pd.merge(X_test, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)

X_test = X_test.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)

X_test = (
    X_test
    .groupby(['matchId', 'teamId'])[variables]
    .agg({'summonerLevel': 'max',
          'championLevel': 'sum',
          'final_gold' : 'sum',
          'final_xp': 'sum',
          'final_armor': 'sum',
          'final_atkdmg': 'sum',
          'final_atkspd': 'sum',
          'final_health': 'sum',
          'final_healthrgn': 'sum',
          'final_lifesteal': 'sum',
          'final_mgpen': 'sum',
          'final_mgres': 'sum',
          'final_ms': 'sum',
          'final_dmgdone': 'sum',
          'final_dmgtaken': 'sum',
          'final_truedmgdone': 'sum',
          'final_truedmgtaken': 'sum',
          'final_lvl': 'sum'})
    .reset_index()
)


convert_team_values(X_test, variables)

X_test = X_test.groupby('matchId')[variables].sum().reset_index(drop = True)

In [150]:
y_pred = pipeline_lastframe.predict(X_test)

In [151]:
submission['winner'] = y_pred
submission.head()

Unnamed: 0,matchId,winner
0,8000,100
1,8001,100
2,8002,200
3,8003,200
4,8004,200


In [152]:
submission.to_csv('../data/submission_including_frames_2023_03_25.csv', index=False)