In [80]:
import pandas as pd
pd.options.display.max_rows = 100

df = pd.read_csv(
    "data/OraclesElixir/2024_LoL_esports_match_data_from_OraclesElixir.csv",
    dtype={"url": "str"}
)

rows, cols = df.shape
print(f"The CSV file has {rows} rows and {cols} columns.")

print(len(df.columns.tolist()))
print(df.columns.tolist())


The CSV file has 117648 rows and 161 columns.
161
['gameid', 'datacompleteness', 'url', 'league', 'year', 'split', 'playoffs', 'date', 'game', 'patch', 'participantid', 'side', 'position', 'playername', 'playerid', 'teamname', 'teamid', 'champion', 'ban1', 'ban2', 'ban3', 'ban4', 'ban5', 'pick1', 'pick2', 'pick3', 'pick4', 'pick5', 'gamelength', 'result', 'kills', 'deaths', 'assists', 'teamkills', 'teamdeaths', 'doublekills', 'triplekills', 'quadrakills', 'pentakills', 'firstblood', 'firstbloodkill', 'firstbloodassist', 'firstbloodvictim', 'team kpm', 'ckpm', 'firstdragon', 'dragons', 'opp_dragons', 'elementaldrakes', 'opp_elementaldrakes', 'infernals', 'mountains', 'clouds', 'oceans', 'chemtechs', 'hextechs', 'dragons (type unknown)', 'elders', 'opp_elders', 'firstherald', 'heralds', 'opp_heralds', 'void_grubs', 'opp_void_grubs', 'firstbaron', 'barons', 'opp_barons', 'firsttower', 'towers', 'opp_towers', 'firstmidtower', 'firsttothreetowers', 'turretplates', 'opp_turretplates', 'inhib

In [81]:
for i, col in enumerate(df.columns):
    print(f"Column {i}: {col}")
    print(df[col].describe())
    if df[col].dtype == 'object' and df[col].nunique() <= 20:
        print("Unique values:", df[col].unique())
    print("------------------------------------")

Column 0: gameid
count               117648
unique                9804
top       LOLTMNT02_194401
freq                    12
Name: gameid, dtype: object
------------------------------------
Column 1: datacompleteness
count       117648
unique           2
top       complete
freq        100956
Name: datacompleteness, dtype: object
Unique values: ['partial' 'complete']
------------------------------------
Column 2: url
count                                            16692
unique                                             551
top       https://lpl.qq.com/es/stats.shtml?bmid=10934
freq                                                60
Name: url, dtype: object
------------------------------------
Column 3: league
count     117648
unique        51
top          LPL
freq        8604
Name: league, dtype: object
------------------------------------
Column 4: year
count    117648.000000
mean       2024.035292
std           0.194213
min        2023.000000
25%        2024.000000
50%        2024.00

In [82]:
team_rows = df[df['position'] == 'team'].copy()
player_rows = df[df['position'] != 'team']

positions = ['top', 'jng', 'mid', 'bot', 'sup']

for pos in positions:
    champ_col = (
        player_rows[player_rows['position'] == pos]
        .loc[:, ['gameid', 'side', 'champion']]
        .rename(columns={'champion': f'{pos}_champ'})
    )
    
    team_rows = team_rows.merge(champ_col, on=['gameid', 'side'], how='left')
df = team_rows


Dropping Data

In [83]:
columns_to_drop = (
    df.columns[1:11]  # Metadata columns
    .union(df.columns[12:18])  # Additional metadata columns
    .union(df.columns[30:43])  # End game data columns
    .union(df.columns[50:57])  # Drake-related columns
    .union(df.columns[40:43])  # Individual data columns
    .union(pd.Index([df.columns[78]]))  # Specific column (xpat15)
    .union(pd.Index([df.columns[91]]))  # Specific column (opp_deathsat15)
    .union(pd.Index([df.columns[95]]))  # Specific column (bot_champ)
    .union(pd.Index([df.columns[28]]))  # Specific column (firstbaron)
    .union(df.columns[131:161])  # Data after 20 minutes
)

df.drop(columns=columns_to_drop, axis=1, inplace=True)



In [84]:
df.head()

Unnamed: 0,gameid,side,ban1,ban2,ban3,ban4,ban5,pick1,pick2,pick3,pick4,pick5,result,team kpm,ckpm,firstdragon,dragons,opp_dragons,elementaldrakes,opp_elementaldrakes,elders,opp_elders,firstherald,heralds,opp_heralds,void_grubs,opp_void_grubs,firstbaron,barons,opp_barons,firsttower,towers,opp_towers,firstmidtower,firsttothreetowers,turretplates,opp_turretplates,inhibitors,opp_inhibitors,damagetochampions,dpm,damagetakenperminute,damagemitigatedperminute,wardsplaced,wpm,wardskilled,wcpm,controlwardsbought,visionscore,vspm,totalgold,earnedgold,earned gpm,goldspent,gspd,gpr,minionkills,monsterkills,monsterkillsownjungle,monsterkillsenemyjungle,cspm,goldat10,xpat10,csat10,opp_goldat10,opp_xpat10,opp_csat10,golddiffat10,xpdiffat10,csdiffat10,killsat10,assistsat10,deathsat10,opp_killsat10,opp_assistsat10,opp_deathsat10,goldat15,xpat15,csat15,opp_goldat15,opp_xpat15,opp_csat15,golddiffat15,xpdiffat15,csdiffat15,killsat15,assistsat15,deathsat15,opp_killsat15,opp_assistsat15,opp_deathsat15,top_champ,jng_champ,mid_champ,bot_champ,sup_champ
0,10660-10660_game_1,Blue,Akali,Nocturne,K'Sante,Lee Sin,Wukong,Kalista,Senna,Orianna,Maokai,Aatrox,0,0.0954,0.6045,,2.0,3.0,,,,,,,,,,,0.0,2.0,,2.0,9.0,,,,,0.0,1.0,41801,1329.8303,2574.9735,,97,3.0859,59,1.877,33,250,7.9533,49907,29292,931.877,47512,-0.131637,,,167,127.0,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Aatrox,Maokai,Orianna,Kalista,Senna
1,10660-10660_game_1,Red,Poppy,Ashe,Neeko,Vi,Jarvan IV,Renata Glasc,Varus,LeBlanc,Rell,Rumble,1,0.509,0.6045,,3.0,2.0,,,,,,,,,,,2.0,0.0,,9.0,2.0,,,,,1.0,0.0,56942,1811.5164,1917.0414,,122,3.8812,49,1.5589,56,277,8.8123,61737,41122,1308.2291,54207,0.131637,,,213,121.0,29.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Rumble,Rell,LeBlanc,Varus,Renata Glasc
2,10660-10660_game_2,Blue,Nocturne,Udyr,Renata Glasc,Nautilus,Lee Sin,Neeko,Bel'Veth,Kennen,Senna,Tahm Kench,0,0.0942,0.6279,,0.0,4.0,,,,,,,,,,,0.0,1.0,,2.0,9.0,,,,,0.0,1.0,54422,1708.697,2539.529,,88,2.763,47,1.4757,39,236,7.4097,49552,28682,900.5338,46730,-0.141117,,,166,126.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Kennen,Bel'Veth,Neeko,Senna,Tahm Kench
3,10660-10660_game_2,Red,Poppy,Ashe,Rumble,Tristana,Lucian,Kalista,Jax,LeBlanc,Rell,Jarvan IV,1,0.5338,0.6279,,4.0,0.0,,,,,,,,,,,1.0,0.0,,9.0,2.0,,,,,1.0,0.0,62898,1974.8195,2978.6499,,116,3.6421,56,1.7582,49,314,9.8587,63623,42753,1342.3234,53825,0.141117,,,211,144.0,16.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Jax,Jarvan IV,LeBlanc,Kalista,Rell
4,10660-10660_game_3,Blue,Rell,Nocturne,Tristana,Jarvan IV,Rumble,Neeko,Caitlyn,Lux,Jax,Bel'Veth,1,0.9517,1.0876,,2.0,1.0,,,,,,,,,,,1.0,0.0,,10.0,0.0,,,,,2.0,0.0,60633,2747.719,2341.2236,,60,2.719,35,1.5861,23,162,7.3414,51091,36208,1640.8459,42299,0.371884,,,153,69.0,38.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Jax,Bel'Veth,Neeko,Caitlyn,Lux


In [85]:
# Print the dataframe information
# df.info()

# Print the dataframe description
df.describe()

# Print the count of null values in each column
print("Null values in each column:")
null_counts = df.isnull().sum()
null_columns = null_counts[null_counts > 0]
print(null_columns)

# Print the count of unique values in each column
print("Unique values in each column:")
print(df.nunique())


Null values in each column:
ban1                         1232
ban2                         1204
ban3                         1217
ban4                         1260
ban5                         1303
pick1                        1850
pick2                        1850
pick3                        1850
pick4                        1850
pick5                        1850
firstdragon                  1166
elementaldrakes              2782
opp_elementaldrakes          2782
elders                       2782
opp_elders                   2782
firstherald                  2782
heralds                      1166
opp_heralds                  1166
void_grubs                   1166
opp_void_grubs               1166
firstbaron                   2782
firsttower                   1166
firstmidtower                2784
firsttothreetowers           2782
turretplates                 2782
opp_turretplates             2782
damagemitigatedperminute     2782
gpr                          2782
minionkills         