# Kaggle League of Legends competition - ML Models

## Team: Elden Ring

<img src="https://eldenring.wiki.fextralife.com/file/Elden-Ring/mirel_pastor_of_vow.jpg" alt="PRAISE DOG" style="width:806px;height:600px;"/>

#### PRAISE THE DOG!

## How to Win at League of Legends?

### Uninstall LoL and [install Dota 2](https://store.steampowered.com/app/570/Dota_2/), EZ. (just kidding, both games are great. Volvo pls gib patch.)

<img src = "https://static.wikia.nocookie.net/dota2_gamepedia/images/7/78/Keyart_phoenix.jpg/revision/latest/" alt="SKREE CAW CAW IM A BIRD" style="width:800px;height:497px;">

In [166]:
import pandas as pd
import numpy as np
import datetime as dt
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, KFold

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import PolynomialFeatures

In [167]:
X_train_original = pd.read_csv('../data/participants_train.csv')
X_test_original = pd.read_csv('../data/participants_test.csv')
y_train_original = pd.read_csv('../data/train_winners.csv')

champion_mastery = pd.read_csv('../data/champion_mastery.csv')
champion = pd.read_json('../data/champion.json')
team_positions = pd.read_csv('../data/teamPositions.csv')

submission = pd.read_csv('../data/sample_submission.csv')

In [3]:
# function that converts values to negative (for the second team, teamId 200)
# it leaves the first team values, teamId intact

def convert_team_values(df, col_names):
    
    
    for col in col_names:
        df[col] = np.where(df['teamId'] == 200,
                            -1* df[col],
                                df[col])
        
    return

In [4]:
# to be used later to measure the accuracy!
kfold = KFold(n_splits = 10, shuffle = True, random_state = 42)

In [5]:
# this is to extract the column that is needed for training
y_train = y_train_original['winner']

## Formulating Sample Submission as LogReg

In order to replicate it, I will take the max from each SummonerLevel per match & teamId combo.

In [6]:
# variables to fit on
variables = ['summonerLevel']

# first copy the original data to not accidentally change it
X_train = X_train_original

# find the max Summoner Level per each team
X_train = X_train.groupby(['matchId', 'teamId'])[variables].max().reset_index()

# mark them as positive (first team) or negative (second team), to compare the values
convert_team_values(X_train, variables)

# finally, see which team has max summoner level (by adding the + and - from before)
X_train = X_train.groupby('matchId')[variables].sum().reset_index(drop = True)

In [7]:
logreg = LogisticRegression().fit(X_train, y_train)

In [8]:
y_predictions = logreg.predict(X_train)
y_predictions

array([100, 100, 100, ..., 100, 200, 100])

In [9]:
accuracy_score(
    y_true = y_train,
    y_pred = y_predictions
)

0.50925

In [10]:
base_cv_scores = cross_val_score(
    estimator = logreg,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(base_cv_scores)
print(np.mean(base_cv_scores))

[0.5     0.50125 0.5     0.47375 0.5225  0.51125 0.5     0.50375 0.52875
 0.505  ]
0.504625


> NOTE: successfully replicated!

## Now LogReg with Champion mastery added

In [11]:
X_train = pd.merge(X_train_original, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)

> NOTE: found out champion lvl is actually more indicative

In [12]:
variables = ['summonerLevel', 'championLevel']

X_train = X_train.groupby(['matchId', 'teamId'])[variables].agg({'summonerLevel': 'max', 'championLevel': 'sum'}).reset_index()

convert_team_values(X_train, variables)

X_train = X_train.groupby('matchId')[variables].sum().reset_index(drop = True)

In [13]:
logreg_mastery = LogisticRegression().fit(X_train, y_train)

In [14]:
accuracy_score(
    y_true = y_train,
    y_pred = logreg_mastery.predict(X_train)
)

0.5525

In [15]:
mastery_cv_scores = cross_val_score(
    estimator = logreg_mastery,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(mastery_cv_scores)
print(np.mean(mastery_cv_scores))

[0.56625 0.57125 0.5325  0.545   0.545   0.55375 0.5475  0.53875 0.5525
 0.55875]
0.5511250000000001


In [16]:
print(classification_report(y_train, logreg_mastery.predict(X_train)))

              precision    recall  f1-score   support

         100       0.56      0.60      0.58      4071
         200       0.55      0.51      0.53      3929

    accuracy                           0.55      8000
   macro avg       0.55      0.55      0.55      8000
weighted avg       0.55      0.55      0.55      8000



In [17]:
print(confusion_matrix(y_train, logreg_mastery.predict(X_train)))

[[2425 1646]
 [1934 1995]]


## Looking into other Champion info

In [18]:
champion_data = pd.json_normalize(champion['data'])
champion_data['key'] = champion_data['key'].astype(int)

In [19]:
variables = ['summonerLevel', 'championLevel', 'info.attack', 'info.defense', 'info.magic', 'info.difficulty']

X_train = pd.merge(X_train_original, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)
X_train = pd.merge(X_train, champion_data, how='inner', left_on='championId', right_on='key')

X_train = X_train.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)

X_train = (
    X_train
    .groupby(['matchId', 'teamId'])[variables]
    .agg({'summonerLevel': 'max',
          'championLevel': 'sum',
          'info.attack': 'sum',
          'info.defense': 'sum',
          'info.magic': 'sum',
          'info.difficulty': 'sum'})
    .reset_index()
)


convert_team_values(X_train, variables)

X_train = X_train.groupby('matchId')[variables].sum().reset_index(drop = True)

In [20]:
logreg_mastery_champion = LogisticRegression().fit(X_train, y_train)

In [21]:
accuracy_score(
    y_true = y_train,
    y_pred = logreg_mastery_champion.predict(X_train)
)

0.564625

In [22]:
mastery_champion_cv_scores = cross_val_score(
    estimator = logreg_mastery_champion,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(mastery_champion_cv_scores)
print(np.mean(mastery_champion_cv_scores))

[0.57625 0.5725  0.5275  0.5825  0.5625  0.5775  0.555   0.545   0.555
 0.56125]
0.5615


In [23]:
## this was done on 3/24/2023 for submission!

variables = ['summonerLevel', 'championLevel', 'info.attack', 'info.defense', 'info.magic', 'info.difficulty']

X_test = pd.merge(X_test_original, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)
X_test = pd.merge(X_test, champion_data, how='inner', left_on='championId', right_on='key')

X_test = X_test.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)

X_test = (
    X_test
    .groupby(['matchId', 'teamId'])[variables]
    .agg({'summonerLevel': 'max',
          'championLevel': 'sum',
          'info.attack': 'sum',
          'info.defense': 'sum',
          'info.magic': 'sum',
          'info.difficulty': 'sum'}).reset_index()
)


convert_team_values(X_test, variables)

X_test = X_test.groupby('matchId')[variables].sum().reset_index(drop = True)

In [24]:
y_pred = logreg_mastery_champion.predict(X_test)

In [25]:
submission['winner'] = y_pred
submission.head()

Unnamed: 0,matchId,winner
0,8000,100
1,8001,100
2,8002,100
3,8003,100
4,8004,200


In [26]:
#submission.to_csv('../data/submission_summonerlvl_champmastery_2023_03_24.csv', index=False)

follow-up to use stats too (other than info)

In [27]:
variables = ['summonerLevel', 'championLevel', 'info.attack', 'info.defense', 'info.magic', 'info.difficulty', 'stats.hp', 'stats.attackdamage','stats.attackspeed',
            'stats.hpperlevel', 'stats.mp', 'stats.mpperlevel', 'stats.movespeed', 'stats.armor', 'stats.armorperlevel', 'stats.spellblock', 'stats.spellblockperlevel',
            'stats.attackrange', 'stats.hpregen', 'stats.hpregenperlevel', 'stats.mpregen', 'stats.mpregenperlevel', 'stats.crit', 'stats.critperlevel',
            'stats.attackdamageperlevel', 'stats.attackspeedperlevel']

X_train = pd.merge(X_train_original, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)
X_train = pd.merge(X_train, champion_data, how='inner', left_on='championId', right_on='key')

X_train = X_train.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)

X_train = (
    X_train
    .groupby(['matchId', 'teamId'])[variables]
    .agg({'summonerLevel': 'max',
          'championLevel': 'sum',
          'info.attack': 'sum',
          'info.defense': 'sum',
          'info.magic': 'sum',
          'info.difficulty': 'sum',
          'stats.hp': 'sum',
          'stats.hpperlevel': 'sum',
          'stats.mp': 'sum',
          'stats.mpperlevel': 'sum',
          'stats.movespeed': 'sum',
          'stats.armor': 'sum',
          'stats.armorperlevel': 'sum',
          'stats.spellblock': 'sum',
          'stats.spellblockperlevel': 'sum',
          'stats.attackrange': 'sum',
          'stats.hpregen': 'sum',
          'stats.hpregenperlevel': 'sum',
          'stats.mpregen': 'sum',
          'stats.mpregenperlevel': 'sum',
          'stats.crit': 'sum',
          'stats.critperlevel': 'sum',
          'stats.attackdamage': 'sum',
          'stats.attackdamageperlevel': 'sum',
          'stats.attackspeedperlevel': 'sum',
          'stats.attackspeed': 'sum'
          })
    .reset_index()
)


convert_team_values(X_train, variables)

X_train = X_train.groupby('matchId')[variables].sum().reset_index(drop = True)

In [28]:
pipeline_mastery_champion_stats = Pipeline(
    steps = [
        ('scaler', StandardScaler()),
        ('logreg', LogisticRegression())
        # note: more steps can be added here ...
    ]
)

In [29]:
pipeline_mastery_champion_stats.fit(X_train, y_train)

In [30]:
accuracy_score(
    y_true = y_train,
    y_pred = pipeline_mastery_champion_stats.predict(X_train)
)

0.57

In [31]:
mastery_champion_stats_cv_scores = cross_val_score(
    estimator = pipeline_mastery_champion_stats,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(mastery_champion_stats_cv_scores)
print(np.mean(mastery_champion_stats_cv_scores))

[0.57    0.5575  0.52375 0.5425  0.54625 0.5725  0.55875 0.5625  0.58
 0.5675 ]
0.558125


It appears that adding the hero stats (pre-game) doesn't add much information beyond what is contained in the simple info!

## Another idea: looking at individual players summoner

so instead of taking the max, trating each as an individual variable

In [32]:
X_train = X_train_original.pivot_table(values='summonerLevel', index='matchId', columns='participantId').reset_index(drop=True)

In [33]:
logreg_summoner = LogisticRegression().fit(X_train, y_train)

In [34]:
accuracy_score(
    y_true = y_train,
    y_pred = logreg_summoner.predict(X_train)
)

0.514125

In [35]:
summoner_cv_scores = cross_val_score(
    estimator = logreg_summoner,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(summoner_cv_scores)
print(np.mean(summoner_cv_scores))

[0.5     0.5     0.505   0.47375 0.50375 0.495   0.5     0.51125 0.52375
 0.48875]
0.5001249999999999


looks like accounting for individual summoner's levels does not matter; however, let's try and account for the difference in same roles. Currently using just the summoner number, but eventially will use the champion level instead.

In [36]:
variables= ['summonerLevel']

X_train = pd.merge(X_train_original, team_positions, how='inner', on=['matchId', 'participantId'])

convert_team_values(X_train, variables)

X_train = X_train.groupby(['matchId', 'teamPosition'])[variables].sum().reset_index()

X_train['lane_won'] = np.where(X_train[variables[0]] >= 0, 1, -1)

X_train = X_train.groupby('matchId')[['lane_won']].sum().reset_index(drop=True)

In [37]:
logreg_summoner = LogisticRegression().fit(X_train, y_train)

In [38]:
accuracy_score(
    y_true = y_train,
    y_pred = logreg_summoner.predict(X_train)
)

0.508875

In [39]:
summoner_cv_scores = cross_val_score(
    estimator = logreg_summoner,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(summoner_cv_scores)
print(np.mean(summoner_cv_scores))

[0.5025  0.5525  0.49875 0.46875 0.5125  0.50875 0.515   0.4975  0.525
 0.5075 ]
0.5088750000000001


Trying to do the same, but using champion mastery per comparing the lanes

In [40]:
variables = ['summonerLevel', 'championLevel']

X_train = pd.merge(X_train_original, team_positions, how='inner', on=['matchId', 'participantId'])

X_train = pd.merge(X_train, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)

convert_team_values(X_train, variables)

X_train = X_train.groupby(['matchId', 'teamPosition'])[variables].sum().reset_index()

In [41]:
X_train['lane_summon_won'] = np.where(X_train[variables[0]] >= 0, 1, -1)

conditions = [
    X_train[variables[1]] > 0,
    X_train[variables[1]] == 0,
    X_train[variables[1]] < 0
]

values = [1, 0, -1]

X_train['lane_champion_won'] = np.select(conditions, values)

In [42]:
X_train = X_train.groupby('matchId')[['lane_summon_won', 'lane_champion_won']].sum().reset_index(drop=True)

In [43]:
logreg_summoner = LogisticRegression().fit(X_train, y_train)

In [44]:
accuracy_score(
    y_true = y_train,
    y_pred = logreg_summoner.predict(X_train)
)

0.543625

In [45]:
summoner_cv_scores = cross_val_score(
    estimator = logreg_summoner,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(summoner_cv_scores)
print(np.mean(summoner_cv_scores))

[0.55875 0.53875 0.5325  0.5425  0.5375  0.55375 0.53125 0.5175  0.55875
 0.54   ]
0.541125


In [46]:
start = dt.datetime.now().timestamp()
end = dt.datetime.now().timestamp()
print(end - start)

3.504753112792969e-05


## Using Frames

In [169]:
train_last_frame_values = pd.read_csv('../data/train_last_frame_values.csv')

In [170]:
train_last_frame_values.head()

Unnamed: 0,matchId,participantId,final_gold,final_xp,final_armor,final_atkdmg,final_atkspd,final_health,final_healthrgn,final_lifesteal,final_mgpen,final_mgres,final_ms,final_dmgdone,final_dmgtaken,final_truedmgdone,final_truedmgtaken,final_enemycontrolled,final_lvl
0,0,1,2741,4141,112,84,115,1525,26,0,7,43,335,3446,3965,0,0,11961,8
1,0,2,3045,2861,53,72,238,1234,25,0,0,42,516,2471,10014,16,68,153462,6
2,0,3,3545,4712,56,70,107,1325,20,0,0,37,350,2700,3835,359,583,58119,8
3,0,4,2789,2894,51,114,125,1082,11,0,0,35,350,2112,2445,0,54,8159,6
4,0,5,2026,2551,44,69,106,1102,14,0,0,35,360,1138,1376,0,235,86831,6


In [49]:
variables = ['summonerLevel', 'championLevel',  'final_gold', 'final_xp']

#'info.attack', 'info.defense', 'info.magic', 'info.difficulty',

X_train = pd.merge(X_train_original, train_last_frame_values, how='inner', on=['matchId', 'participantId'])
X_train = pd.merge(X_train, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)
#X_train = pd.merge(X_train, champion_data, how='inner', left_on='championId', right_on='key')

X_train = X_train.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)

X_train = (
    X_train
    .groupby(['matchId', 'teamId'])[variables]
    .agg({'summonerLevel': 'max',
          'championLevel': 'sum',
        #   'info.attack': 'sum',
        #   'info.defense': 'sum',
        #   'info.magic': 'sum',
        #   'info.difficulty': 'sum',
          'final_gold' : 'sum',
          'final_xp': 'sum'})
    .reset_index()
)


convert_team_values(X_train, variables)

X_train = X_train.groupby('matchId')[variables].sum().reset_index(drop = True)

In [50]:
logreg_lastframe = LogisticRegression().fit(X_train, y_train)

In [51]:
accuracy_score(
    y_true = y_train,
    y_pred = logreg_lastframe.predict(X_train)
)

0.7075

In [52]:
summoner_cv_scores = cross_val_score(
    estimator = logreg_lastframe,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(summoner_cv_scores)
print(np.mean(summoner_cv_scores))

[0.72625 0.6975  0.7075  0.68875 0.70375 0.745   0.70125 0.68875 0.7275
 0.67375]
0.7060000000000001


## Using Frames and Normalizing

In [53]:
variables = ['summonerLevel', 'championLevel',  'final_gold', 'final_xp', 'final_armor', 'final_atkdmg', 'final_atkspd',
             'final_health', 'final_healthrgn', 'final_lifesteal', 'final_mgpen', 'final_mgres', 'final_ms',
             'final_dmgdone', 'final_dmgtaken', 'final_truedmgdone', 'final_truedmgtaken', 'final_lvl'
             ]

# found out later these are the important ones:
# variables = ['summonerLevel', 'championLevel', 'final_gold', 'final_xp', 'final_armor', 'final_health', 'final_atkdmg']


X_train = pd.merge(X_train_original, train_last_frame_values, how='inner', on=['matchId', 'participantId'])
X_train = pd.merge(X_train, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)

X_train = X_train.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)

X_train = (
    X_train
    .groupby(['matchId', 'teamId'])[variables]
    .agg({'summonerLevel': 'max',
          'championLevel': 'sum',
          'final_gold' : 'sum',
          'final_xp': 'sum',
          'final_armor': 'sum',
          'final_atkdmg': 'sum',
          'final_atkspd': 'sum',
          'final_health': 'sum',
          'final_healthrgn': 'sum',
          'final_lifesteal': 'sum',
          'final_mgpen': 'sum',
          'final_mgres': 'sum',
          'final_ms': 'sum',
          'final_dmgdone': 'sum',
          'final_dmgtaken': 'sum',
          'final_truedmgdone': 'sum',
          'final_truedmgtaken': 'sum',
          'final_lvl': 'sum'})
    .reset_index()
)


convert_team_values(X_train, variables)

X_train = X_train.groupby('matchId')[variables].sum().reset_index(drop = True)

In [54]:
pipeline_lastframe = Pipeline(
    steps = [
        ('scaler', StandardScaler()),
        ('logreg', LogisticRegression())
        # note: more steps can be added here ...
    ]
)

In [55]:
pipeline_lastframe.fit(X_train, y_train)

In [56]:
accuracy_score(
    y_true = y_train,
    y_pred = pipeline_lastframe.predict(X_train)
)

0.70975

In [57]:
summoner_cv_scores = cross_val_score(
    estimator = pipeline_lastframe,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(summoner_cv_scores)
print(np.mean(summoner_cv_scores))

[0.7275  0.69625 0.70875 0.705   0.70125 0.74125 0.7     0.69625 0.73
 0.67   ]
0.707625


Attempting the test!

In [58]:
test_last_frame_values = pd.read_csv('../data/test_last_frame_values.csv')

In [59]:
variables = ['summonerLevel', 'championLevel',  'final_gold', 'final_xp', 'final_armor', 'final_atkdmg', 'final_atkspd',
             'final_health', 'final_healthrgn', 'final_lifesteal', 'final_mgpen', 'final_mgres', 'final_ms',
             'final_dmgdone', 'final_dmgtaken', 'final_truedmgdone', 'final_truedmgtaken', 'final_lvl',
             ]

#'info.attack', 'info.defense', 'info.magic', 'info.difficulty',

X_test = pd.merge(X_test_original, test_last_frame_values, how='inner', on=['matchId', 'participantId'])
X_test = pd.merge(X_test, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)

X_test = X_test.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)

X_test = (
    X_test
    .groupby(['matchId', 'teamId'])[variables]
    .agg({'summonerLevel': 'max',
          'championLevel': 'sum',
          'final_gold' : 'sum',
          'final_xp': 'sum',
          'final_armor': 'sum',
          'final_atkdmg': 'sum',
          'final_atkspd': 'sum',
          'final_health': 'sum',
          'final_healthrgn': 'sum',
          'final_lifesteal': 'sum',
          'final_mgpen': 'sum',
          'final_mgres': 'sum',
          'final_ms': 'sum',
          'final_dmgdone': 'sum',
          'final_dmgtaken': 'sum',
          'final_truedmgdone': 'sum',
          'final_truedmgtaken': 'sum',
          'final_lvl': 'sum'})
    .reset_index()
)


convert_team_values(X_test, variables)

X_test = X_test.groupby('matchId')[variables].sum().reset_index(drop = True)

In [60]:
y_pred = pipeline_lastframe.predict(X_test)

In [61]:
submission['winner'] = y_pred
submission.head()

Unnamed: 0,matchId,winner
0,8000,100
1,8001,100
2,8002,200
3,8003,200
4,8004,200


In [62]:
# This submission produced 70.4% on the Kaggle test set!!
# submission.to_csv('../data/submission_including_frames_2023_03_25.csv', index=False)

## Possibly Final idea: comparing the lanes individually and interaction terms

In [63]:
# champion types data make predictions worse!!
champion_types= champion_data.explode('tags').pivot_table(values='id', index='key', columns='tags', aggfunc='count').fillna(0).reset_index()

In [214]:
variables = ['championLevel','final_gold', 'final_xp', 'final_armor', 'final_health', 'final_atkdmg',
             #'Assassin', 'Fighter', 'Mage', 'Marksman', 'Support', 'Tank'
             ]

X_train = pd.merge(X_train_original, team_positions, how='inner', on=['matchId', 'participantId'])
#X_train = pd.merge(X_train, champion_types, how='inner', left_on='championId', right_on='key')
X_train = pd.merge(X_train, train_last_frame_values, how='inner', on=['matchId', 'participantId'])
X_train = pd.merge(X_train, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)

X_train = X_train.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)

# below doesn't make the model better
# X_train[['Assassin', 'Fighter', 'Mage', 'Marksman', 'Support', 'Tank']] = X_train[['Assassin', 'Fighter', 'Mage', 'Marksman', 'Support', 'Tank']].multiply(X_train['championLevel'], axis='index')

convert_team_values(X_train, variables)

X_train_perlane = X_train.groupby(['matchId', 'teamPosition'])[['final_gold']].sum().pivot_table(values='final_gold', index='matchId', columns='teamPosition').reset_index().drop(columns=0)
X_train_perlane['BOTTOM'] = np.where(X_train_perlane['BOTTOM'] >= 0, 1, -1)
X_train_perlane['JUNGLE'] = np.where(X_train_perlane['JUNGLE'] >= 0, 1, -1)
X_train_perlane['MIDDLE'] = np.where(X_train_perlane['MIDDLE'] >= 0, 1, -1)
X_train_perlane['TOP'] = np.where(X_train_perlane['TOP'] >= 0, 1, -1)
X_train_perlane['UTILITY'] = np.where(X_train_perlane['UTILITY'] >= 0, 1, -1)

X_train = (
    X_train
    .groupby(['matchId'])[variables]
    .agg({ 'championLevel': 'sum', 
           'final_gold' : 'sum', 
           'final_xp': 'sum', # interchangable with final_lvl
           'final_armor': 'sum', 
           'final_health': 'sum',
           'final_atkdmg': 'sum',
        #    'Assassin': 'sum',
        #    'Fighter': 'sum',
        #    'Mage': 'sum',
        #    'Marksman': 'sum',
        #    'Support': 'sum',
        #    'Tank': 'sum'
          })
    .reset_index()
)

X_train = pd.merge(X_train, X_train_perlane, how='inner', on='matchId').reset_index(drop = True)


In [215]:
X_train.head()

Unnamed: 0,matchId,championLevel,final_gold,final_xp,final_armor,final_health,final_atkdmg,BOTTOM,JUNGLE,MIDDLE,TOP,UTILITY
0,0,0.0,-3393,-1614,27,228,-170,-1,-1,1,-1,-1
1,1,8.0,2846,-258,-61,-778,48,1,1,1,-1,1
2,2,1.0,-973,-630,-25,-587,29,1,-1,-1,-1,-1
3,3,-3.0,4102,1366,47,480,1,1,1,-1,-1,1
4,4,7.0,1391,1070,4,475,-130,-1,1,-1,1,1


In [216]:
pipeline_lastframe_lanes = Pipeline(
    steps = [
        #('pf', PolynomialFeatures(interaction_only = True, include_bias = False)),
        ('scaler', StandardScaler()),
        #('logreg', LogisticRegression(penalty = 'l1', solver = 'saga', max_iter = 10000, C=0.05)) # this is to regularize and get some polynomial features to 0
        ('logreg', LogisticRegression()) 
    ]
)

In [211]:
# used this below to find out best C value is 0.05

# from sklearn.model_selection import GridSearchCV

# gs = GridSearchCV(estimator = pipeline_lastframe_lanes, 
#                  param_grid = {'logreg__C': [1, 0.5, 0.1, 0.05, 0.01]},
#                  scoring = 'accuracy')

# gs.fit(X_train, y_train)

# gs.best_params_

In [217]:
pipeline_lastframe_lanes.fit(X_train, y_train)

In [218]:
accuracy_score(
    y_true = y_train,
    y_pred = pipeline_lastframe_lanes.predict(X_train)
)

0.715875

In [219]:
summoner_cv_scores = cross_val_score(
    estimator = pipeline_lastframe_lanes,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(summoner_cv_scores)
print(np.mean(summoner_cv_scores))

[0.71625 0.7075  0.71    0.70625 0.72375 0.7425  0.70125 0.70375 0.72625
 0.67375]
0.711125


And now to put this to test..

In [220]:
variables = ['championLevel','final_gold', 'final_xp', 'final_armor', 'final_health', 'final_atkdmg',
             #'Assassin', 'Fighter', 'Mage', 'Marksman', 'Support', 'Tank'
             ]

X_test = pd.merge(X_test_original, team_positions, how='inner', on=['matchId', 'participantId'])
#X_test = pd.merge(X_test, champion_types, how='inner', left_on='championId', right_on='key')
X_test = pd.merge(X_test, test_last_frame_values, how='inner', on=['matchId', 'participantId'])
X_test = pd.merge(X_test, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)

X_test = X_test.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)

# below doesn't make the model better
# X_train[['Assassin', 'Fighter', 'Mage', 'Marksman', 'Support', 'Tank']] = X_train[['Assassin', 'Fighter', 'Mage', 'Marksman', 'Support', 'Tank']].multiply(X_train['championLevel'], axis='index')

convert_team_values(X_test, variables)

X_test_perlane = X_test.groupby(['matchId', 'teamPosition'])[['final_gold']].sum().pivot_table(values='final_gold', index='matchId', columns='teamPosition').reset_index().drop(columns=0)
X_test_perlane['BOTTOM'] = np.where(X_test_perlane['BOTTOM'] >= 0, 1, -1)
X_test_perlane['JUNGLE'] = np.where(X_test_perlane['JUNGLE'] >= 0, 1, -1)
X_test_perlane['MIDDLE'] = np.where(X_test_perlane['MIDDLE'] >= 0, 1, -1)
X_test_perlane['TOP'] = np.where(X_test_perlane['TOP'] >= 0, 1, -1)
X_test_perlane['UTILITY'] = np.where(X_test_perlane['UTILITY'] >= 0, 1, -1)

X_test = (
    X_test
    .groupby(['matchId'])[variables]
    .agg({ 'championLevel': 'sum', 
           'final_gold' : 'sum', 
           'final_xp': 'sum', # interchangable with final_lvl
           'final_armor': 'sum', 
           'final_health': 'sum',
           'final_atkdmg': 'sum',
        #    'Assassin': 'sum',
        #    'Fighter': 'sum',
        #    'Mage': 'sum',
        #    'Marksman': 'sum',
        #    'Support': 'sum',
        #    'Tank': 'sum'
          })
    .reset_index()
)

X_test = pd.merge(X_test, X_test_perlane, how='inner', on='matchId').reset_index(drop = True)


In [221]:
y_pred = pipeline_lastframe_lanes.predict(X_test)

In [222]:
submission['winner'] = y_pred
submission.head()

Unnamed: 0,matchId,winner
0,8000,100
1,8001,100
2,8002,200
3,8003,100
4,8004,200


In [223]:
# submission.to_csv('../data/submission_including_frames_lanes_2023_03_26.csv', index=False)
# submission.to_csv('../data/submission_including_frames_lanes_herotypes_2023_03_26.csv', index=False)
# submission.to_csv('../data/submission_including_frames_lanes_interactions_2023_03_28.csv', index=False)
# submission.to_csv('../data/submission_including_frames_lanes_interactions_reg_2023_03_28.csv', index=False)

## Ok one more attempt to better determine won/lost lane

In [136]:
variables = ['championLevel','final_gold', 'final_xp', 'final_armor', 'final_health', 'final_atkdmg']

X_train = pd.merge(X_train_original, team_positions, how='inner', on=['matchId', 'participantId'])
X_train = pd.merge(X_train, train_last_frame_values, how='inner', on=['matchId', 'participantId'])
X_train = pd.merge(X_train, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)

X_train = X_train.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)

convert_team_values(X_train, variables)

# attempting better "lane won" conditions
X_train_lane_gold = X_train.groupby(['matchId', 'teamPosition'])[['final_gold']].sum().pivot_table(values='final_gold', index='matchId', columns='teamPosition').reset_index().drop(columns=0)
X_train_lane_xp = X_train.groupby(['matchId', 'teamPosition'])[['final_xp']].sum().pivot_table(values='final_xp', index='matchId', columns='teamPosition').reset_index().drop(columns=0)


X_train_perlane = pd.merge(X_train_lane_gold, X_train_lane_xp, how='inner', on='matchId')

for lane in ['BOTTOM', 'JUNGLE', 'MIDDLE', 'TOP', 'UTILITY']:

    conditions = [
        (X_train_perlane[f'{lane}_x'] > 0) & (X_train_perlane[f'{lane}_y'] > 0),
        (X_train_perlane[f'{lane}_x'] > 0) & (X_train_perlane[f'{lane}_y'] < 0),
        (X_train_perlane[f'{lane}_x'] < 0) & (X_train_perlane[f'{lane}_y'] > 0),
        (X_train_perlane[f'{lane}_x'] < 0) & (X_train_perlane[f'{lane}_y'] < 0)
    ]

    values = [1, 0, 0, -1]

    X_train_perlane[f'{lane}'] = np.select(conditions, values)

X_train_perlane = X_train_perlane[['matchId', 'BOTTOM', 'JUNGLE', 'MIDDLE', 'TOP', 'UTILITY']]

X_train = (
    X_train
    .groupby(['matchId'])[variables]
    .agg({ 'championLevel': 'sum', 
           'final_gold' : 'sum', 
           'final_xp': 'sum', # interchangable with final_lvl
           'final_armor': 'sum', 
           'final_health': 'sum',
           'final_atkdmg': 'sum'
          })
    .reset_index()
)

X_train = pd.merge(X_train, X_train_perlane, how='inner', on='matchId').reset_index(drop = True)


In [128]:
pipeline_lastframe_lanes = Pipeline(
    steps = [
        ('scaler', StandardScaler()),
        ('logreg', LogisticRegression())
    ]
)

In [129]:
pipeline_lastframe_lanes.fit(X_train, y_train)

In [130]:
accuracy_score(
    y_true = y_train,
    y_pred = pipeline_lastframe_lanes.predict(X_train)
)

0.710875

In [131]:
summoner_cv_scores = cross_val_score(
    estimator = pipeline_lastframe_lanes,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(summoner_cv_scores)
print(np.mean(summoner_cv_scores))

[0.7125  0.69375 0.7275  0.71625 0.7175  0.7375  0.68375 0.69625 0.72125
 0.67625]
0.7082499999999999


## Just testing ...

In [171]:
variables = ['final_gold', 'final_xp', 'final_armor', 'final_health', 'final_atkdmg', 'final_enemycontrolled'
             ]

X_train = pd.merge(X_train_original, team_positions, how='inner', on=['matchId', 'participantId'])
X_train = pd.merge(X_train, train_last_frame_values, how='inner', on=['matchId', 'participantId'])


convert_team_values(X_train, variables)

#X_train = X_train.groupby(['matchId', 'teamPosition'])[['final_gold']].sum().pivot_table(values='final_gold', index='matchId', columns='teamPosition').reset_index()

X_train = (
    X_train
    .groupby(['matchId'])[variables]
    .agg({ 'final_gold' : 'sum', 
           'final_xp': 'sum', # interchangable with final_lvl
           'final_armor': 'sum', 
           'final_health': 'sum',
           'final_atkdmg': 'sum',
           'final_enemycontrolled': 'sum'
          })
    .reset_index()
).reset_index(drop = True)

#X_train = pd.merge(X_train, X_train_perlane, how='inner', on='matchId').reset_index(drop = True)


In [172]:
X_train.head()

Unnamed: 0,matchId,final_gold,final_xp,final_armor,final_health,final_atkdmg,final_enemycontrolled
0,0,-3393,-1614,27,228,-170,67664
1,1,2846,-258,-61,-778,48,-61783
2,2,-973,-630,-25,-587,29,-132630
3,3,4102,1366,47,480,1,-39616
4,4,1391,1070,4,475,-130,16629


In [173]:
pipeline_lastframe_lanes = Pipeline(
    steps = [
        ('scaler', StandardScaler()),
        ('logreg', LogisticRegression())
    ]
)

In [174]:
pipeline_lastframe_lanes.fit(X_train, y_train)

In [175]:
summoner_cv_scores = cross_val_score(
    estimator = pipeline_lastframe_lanes,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(summoner_cv_scores)
print(np.mean(summoner_cv_scores))

[0.71875 0.69375 0.72    0.7025  0.705   0.74625 0.68625 0.7075  0.72125
 0.67625]
0.7077499999999999


## Trying to add wards

In [232]:
training_events = pd.read_csv('../data/training_events.csv')

In [233]:
vars = ['wards_placed', 'wards_killed', 'turretplates_destroyed', 'elite_monsters_killed']

convert_team_values(training_events, vars)

training_events = training_events.groupby('matchId')[vars].sum()

In [267]:
variables = ['championLevel','final_gold', 'final_xp', 'final_armor', 'final_health', 'final_atkdmg',
             'final_atkspd', 'final_health', 'final_healthrgn', 'final_lifesteal', 'final_mgpen', 'final_mgres', 'final_ms',
             'final_dmgdone', 'final_dmgtaken', 'final_truedmgdone', 'final_truedmgtaken', 'final_lvl',
#             'Assassin', 'Fighter', 'Mage', 'Marksman', 'Support', 'Tank'
             ]

X_train = pd.merge(X_train_original, team_positions, how='inner', on=['matchId', 'participantId'])
#X_train = pd.merge(X_train, champion_types, how='inner', left_on='championId', right_on='key')
X_train = pd.merge(X_train, train_last_frame_values, how='inner', on=['matchId', 'participantId'])
X_train = pd.merge(X_train, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)

X_train = X_train.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)

# below doesn't make the model better
# X_train[['Assassin', 'Fighter', 'Mage', 'Marksman', 'Support', 'Tank']] = X_train[['Assassin', 'Fighter', 'Mage', 'Marksman', 'Support', 'Tank']].multiply(X_train['championLevel'], axis='index')

convert_team_values(X_train, variables)

X_train_perlane = X_train.groupby(['matchId', 'teamPosition'])[['final_gold']].sum().pivot_table(values='final_gold', index='matchId', columns='teamPosition').reset_index().drop(columns=0)
X_train_perlane['BOTTOM'] = np.where(X_train_perlane['BOTTOM'] >= 0, 1, -1)
X_train_perlane['JUNGLE'] = np.where(X_train_perlane['JUNGLE'] >= 0, 1, -1)
X_train_perlane['MIDDLE'] = np.where(X_train_perlane['MIDDLE'] >= 0, 1, -1)
X_train_perlane['TOP'] = np.where(X_train_perlane['TOP'] >= 0, 1, -1)
X_train_perlane['UTILITY'] = np.where(X_train_perlane['UTILITY'] >= 0, 1, -1)

X_train = (
    X_train
    .groupby(['matchId'])[variables]
    .agg({ 'championLevel': 'sum', 
           'final_gold' : 'sum', 
           'final_xp': 'sum', # interchangable with final_lvl
           'final_armor': 'sum', 
           'final_health': 'sum',
           'final_atkdmg': 'sum',
          #  'Assassin': 'sum',
          #  'Fighter': 'sum',
          #  'Mage': 'sum',
          #  'Marksman': 'sum',
          #  'Support': 'sum',
          #  'Tank': 'sum',
           'final_atkspd': 'sum',
           'final_health': 'sum',
           'final_healthrgn': 'sum',
           'final_lifesteal': 'sum',
           'final_mgpen': 'sum',
           'final_mgres': 'sum',
           'final_ms': 'sum',
            'final_dmgdone': 'sum',
            'final_dmgtaken': 'sum',
            'final_truedmgdone': 'sum',
            'final_truedmgtaken': 'sum',
            'final_lvl': 'sum'
          })
    .reset_index()
)

X_train = pd.merge(X_train, X_train_perlane, how='inner', on='matchId').reset_index(drop = True)
X_train = pd.merge(X_train, training_events, how='inner', on='matchId').reset_index(drop = True)

In [268]:
X_train.head()

Unnamed: 0,matchId,championLevel,final_gold,final_xp,final_armor,final_health,final_atkdmg,final_atkspd,final_healthrgn,final_lifesteal,...,final_lvl,BOTTOM,JUNGLE,MIDDLE,TOP,UTILITY,wards_placed,wards_killed,turretplates_destroyed,elite_monsters_killed
0,0,0.0,-3393,-1614,27,12308,-170,23,2,-16,...,-1,-1,-1,1,-1,-1,-64,-1,-2,-1
1,1,8.0,2846,-258,-61,12050,48,103,-95,9,...,0,1,1,1,-1,1,1,0,-1,-1
2,2,1.0,-973,-630,-25,12413,29,-58,13,1,...,-2,1,-1,-1,-1,-1,4,1,-2,1
3,3,-3.0,4102,1366,47,12000,1,162,31,8,...,2,1,1,-1,-1,1,4,0,2,0
4,4,7.0,1391,1070,4,12109,-130,-88,82,0,...,3,-1,1,-1,1,1,4,0,-1,-1


In [269]:
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.preprocessing import MinMaxScaler

In [270]:
pipeline_neuralnetwork = Pipeline(
    steps = [
        ('scaler', MinMaxScaler()),
        ('nn', MLPClassifier(verbose = True,
                             hidden_layer_sizes = (100, 100, 100),
                             activation = 'tanh',
                             max_iter = 10000))
    ]
)

In [236]:
pipeline_frames_lanes = Pipeline(
    steps = [
        #('pf', PolynomialFeatures(interaction_only = True, include_bias = False)),
        ('scaler', StandardScaler()),
        #('logreg', LogisticRegression(penalty = 'l1', solver = 'saga', max_iter = 10000, C=0.05)) # this is to regularize and get some polynomial features to 0
        ('logreg', LogisticRegression()) 
    ]
)

In [271]:
pipeline_neuralnetwork.fit(X_train, y_train)

Iteration 1, loss = 0.61019271
Iteration 2, loss = 0.57854907
Iteration 3, loss = 0.57442100
Iteration 4, loss = 0.56679515
Iteration 5, loss = 0.56763913
Iteration 6, loss = 0.56368981
Iteration 7, loss = 0.56219946
Iteration 8, loss = 0.56332839
Iteration 9, loss = 0.56215489
Iteration 10, loss = 0.56610935
Iteration 11, loss = 0.56167408
Iteration 12, loss = 0.55947760
Iteration 13, loss = 0.56029098
Iteration 14, loss = 0.56653601
Iteration 15, loss = 0.56097927
Iteration 16, loss = 0.56052132
Iteration 17, loss = 0.56196663
Iteration 18, loss = 0.55892603
Iteration 19, loss = 0.55792073
Iteration 20, loss = 0.55591075
Iteration 21, loss = 0.55837006
Iteration 22, loss = 0.55788586
Iteration 23, loss = 0.55736543
Iteration 24, loss = 0.55785622
Iteration 25, loss = 0.56030719
Iteration 26, loss = 0.56172630
Iteration 27, loss = 0.55619708
Iteration 28, loss = 0.55774606
Iteration 29, loss = 0.55577476
Iteration 30, loss = 0.55607030
Iteration 31, loss = 0.55631671
Iteration 32, los

In [272]:
accuracy_score(
    y_true = y_train,
    y_pred = pipeline_neuralnetwork.predict(X_train)
)

0.7145

In [273]:
summoner_cv_scores = cross_val_score(
    estimator = pipeline_neuralnetwork,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(summoner_cv_scores)
print(np.mean(summoner_cv_scores))

Iteration 1, loss = 0.60183325
Iteration 2, loss = 0.57448732
Iteration 3, loss = 0.56759126
Iteration 4, loss = 0.56525413
Iteration 5, loss = 0.56537085
Iteration 6, loss = 0.57082294
Iteration 7, loss = 0.55989042
Iteration 8, loss = 0.56099043
Iteration 9, loss = 0.56102401
Iteration 10, loss = 0.56193262
Iteration 11, loss = 0.56056259
Iteration 12, loss = 0.55767112
Iteration 13, loss = 0.56382595
Iteration 14, loss = 0.56081993
Iteration 15, loss = 0.55811175
Iteration 16, loss = 0.55773554
Iteration 17, loss = 0.56049518
Iteration 18, loss = 0.55725971
Iteration 19, loss = 0.55803488
Iteration 20, loss = 0.55555016
Iteration 21, loss = 0.55607114
Iteration 22, loss = 0.55609477
Iteration 23, loss = 0.55446387
Iteration 24, loss = 0.55434388
Iteration 25, loss = 0.55502594
Iteration 26, loss = 0.55309927
Iteration 27, loss = 0.55703565
Iteration 28, loss = 0.55305037
Iteration 29, loss = 0.55487363
Iteration 30, loss = 0.55496022
Iteration 31, loss = 0.55222023
Iteration 32, los

To train the above on testing ...

In [261]:
testing_events = pd.read_csv('../data/testing_events.csv')

In [262]:
vars = ['wards_placed', 'wards_killed', 'turretplates_destroyed', 'elite_monsters_killed']

convert_team_values(testing_events, vars)

testing_events = testing_events.groupby('matchId')[vars].sum()

In [263]:
variables = ['championLevel','final_gold', 'final_xp', 'final_armor', 'final_health', 'final_atkdmg',
             'final_atkspd', 'final_health', 'final_healthrgn', 'final_lifesteal', 'final_mgpen', 'final_mgres', 'final_ms',
             'final_dmgdone', 'final_dmgtaken', 'final_truedmgdone', 'final_truedmgtaken', 'final_lvl',
             'Assassin', 'Fighter', 'Mage', 'Marksman', 'Support', 'Tank'
             ]

X_test = pd.merge(X_test_original, team_positions, how='inner', on=['matchId', 'participantId'])
X_test = pd.merge(X_test, champion_types, how='inner', left_on='championId', right_on='key')
X_test = pd.merge(X_test, test_last_frame_values, how='inner', on=['matchId', 'participantId'])
X_test = pd.merge(X_test, champion_mastery, how='left', on=['summonerId', 'championId']).fillna(0)

X_test = X_test.sort_values(['matchId', 'participantId'], ascending = [True, True]).reset_index(drop=True)


convert_team_values(X_test, variables)

X_test_perlane = X_test.groupby(['matchId', 'teamPosition'])[['final_gold']].sum().pivot_table(values='final_gold', index='matchId', columns='teamPosition').reset_index().drop(columns=0)
X_test_perlane['BOTTOM'] = np.where(X_test_perlane['BOTTOM'] >= 0, 1, -1)
X_test_perlane['JUNGLE'] = np.where(X_test_perlane['JUNGLE'] >= 0, 1, -1)
X_test_perlane['MIDDLE'] = np.where(X_test_perlane['MIDDLE'] >= 0, 1, -1)
X_test_perlane['TOP'] = np.where(X_test_perlane['TOP'] >= 0, 1, -1)
X_test_perlane['UTILITY'] = np.where(X_test_perlane['UTILITY'] >= 0, 1, -1)

X_test = (
    X_test
    .groupby(['matchId'])[variables]
    .agg({ 'championLevel': 'sum', 
           'final_gold' : 'sum', 
           'final_xp': 'sum', # interchangable with final_lvl
           'final_armor': 'sum', 
           'final_health': 'sum',
           'final_atkdmg': 'sum',
           'Assassin': 'sum',
           'Fighter': 'sum',
           'Mage': 'sum',
           'Marksman': 'sum',
           'Support': 'sum',
           'Tank': 'sum',
           'final_atkspd': 'sum',
           'final_health': 'sum',
           'final_healthrgn': 'sum',
           'final_lifesteal': 'sum',
           'final_mgpen': 'sum',
           'final_mgres': 'sum',
           'final_ms': 'sum',
            'final_dmgdone': 'sum',
            'final_dmgtaken': 'sum',
            'final_truedmgdone': 'sum',
            'final_truedmgtaken': 'sum',
            'final_lvl': 'sum'
          })
    .reset_index()
)

X_test = pd.merge(X_test, X_test_perlane, how='inner', on='matchId').reset_index(drop = True)
X_test = pd.merge(X_test, testing_events, how='inner', on='matchId').reset_index(drop = True)

In [264]:
y_pred = pipeline_neuralnetwork.predict(X_test)

In [265]:
submission['winner'] = y_pred
submission.head()

Unnamed: 0,matchId,winner
0,8000,200
1,8001,100
2,8002,200
3,8003,200
4,8004,200


In [266]:
#
#submission.to_csv('../data/submission_first_neural_network_2023_03_28.csv', index=False)