In [1]:
import src.lol_utils as lol
import pandas as pd

Fetching our dataframe from SQL and assigning it to df.

In [2]:
df = lol.fetch_from_sql(schema='soloq', table_name='lol_analytics')

In [3]:
from src.lol_utils import champion_list

# perfect we have the data! now let's do some pre-processing

# detect bad rows:
bad_nulls = df[(df['blue_team'].apply(len) == 0) | (df['red_team'].apply(len) == 0)]
bad_counts = df[(df['blue_team'].apply(len) != 5) | (df['red_team'].apply(len) != 5)]
# duplicates
df['blue_dup'] = df['blue_team'].apply(lambda L: len(set(L)) != len(L))
df['red_dup']  = df['red_team'].apply(lambda L: len(set(L)) != len(L))

# unknown champs: compare against canonical set
canonical = set(champion_list)  # fill with your champion list
df['blue_unknown'] = df['blue_team'].apply(lambda L: [c for c in L if c not in canonical])
df['red_unknown']  = df['red_team'].apply(lambda L: [c for c in L if c not in canonical])

In [5]:
# clean up bad rows where blue_unknown or red_unknown is non-empty, or blue_dup/red_dup is True, or team_parsed length != 5
clean_df = df[
    (df['blue_dup'] == False) &
    (df['red_dup'] == False) &
    (df['blue_unknown'].apply(len) == 0) &
    (df['red_unknown'].apply(len) == 0) &
    (df['blue_team'].apply(len) == 5) &
    (df['red_team'].apply(len) == 5)
].copy()

for i in range(5):
    clean_df[f'blue_{i}'] = clean_df['blue_team'].apply(lambda x: x[i])
    clean_df[f'red_{i}'] = clean_df['red_team'].apply(lambda x: x[i])

# One-hot encode all slots
clean_df = pd.get_dummies(
    clean_df,
    columns=[f'blue_{i}' for i in range(5)] + [f'red_{i}' for i in range(5)],
    prefix=[f'blue_{i}' for i in range(5)] + [f'red_{i}' for i in range(5)]
)

# Encode target variable: blue win = 1, red win = 0
clean_df['target'] = (clean_df['winner'] == 'blue').astype(int)

In [7]:
# so far all we have done is hot encode the target and slot encode the champions

# however, we can create our own unique features based on champion combinations (e.g., synergy between champions on the same team, counter-picks between teams, etc.)

from itertools import combinations, product

# We'll use the cleaned DataFrame with parsed teams and target
df = clean_df.copy()

# Calculate champion win rates
champ_winrates = lol.calculate_champion_winrates(df)

# Add synergy and counter scores to the DataFrame
df = lol.add_synergy_and_counter_scores(df, champ_winrates)

# Display the updated DataFrame
print(df[['synergy_blue', 'synergy_red', 'counter_delta']].head())

   synergy_blue  synergy_red  counter_delta
0    595.370955   495.227722     248.346592
1    569.217287   422.713292     325.170588
2    455.054420   480.219949     -82.514662
3    489.568753   658.786821    -422.811189
4    636.037126   533.436868     254.370648


In [8]:
df

Unnamed: 0,blue_team,red_team,winner,blue_dup,red_dup,blue_unknown,red_unknown,blue_0_Aatrox,blue_0_Akali,blue_0_Akshan,...,red_4_Yasuo,red_4_Yuumi,red_4_Zac,red_4_Zilean,red_4_Zoe,red_4_Zyra,target,synergy_blue,synergy_red,counter_delta
0,"[Rumble, MasterYi, Riven, Senna, Maokai]","[Aurora, Volibear, Yasuo, Caitlyn, Morgana]",blue,False,False,[],[],False,False,False,...,False,False,False,False,False,False,1,595.370955,495.227722,248.346592
1,"[Neeko, Jax, Sylas, Ezreal, Sona]","[Riven, Zac, Zed, Jhin, Karma]",red,False,False,[],[],False,False,False,...,False,False,False,False,False,False,0,569.217287,422.713292,325.170588
2,"[Riven, Zed, Akali, Corki, Blitzcrank]","[Renekton, Ekko, Vayne, Yasuo, Rakan]",red,False,False,[],[],False,False,False,...,False,False,False,False,False,False,0,455.054420,480.219949,-82.514662
3,"[KSante, Shaco, Taliyah, Ezreal, Bard]","[Riven, Gwen, Akshan, Yunara, Zoe]",blue,False,False,[],[],False,False,False,...,False,False,False,False,True,False,1,489.568753,658.786821,-422.811189
4,"[Yorick, Udyr, Pyke, Corki, Bard]","[Irelia, Rengar, KogMaw, Lucian, Anivia]",blue,False,False,[],[],False,False,False,...,False,False,False,False,False,False,1,636.037126,533.436868,254.370648
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4384,"[Rumble, Karthus, Irelia, Xayah, Nautilus]","[Ornn, MonkeyKing, Ryze, Kaisa, Alistar]",red,False,False,[],[],False,False,False,...,False,False,False,False,False,False,0,474.015110,497.675671,-57.669879
4385,"[Nocturne, Morgana, Sion, Corki, Pyke]","[Camille, Jax, Galio, Kaisa, Neeko]",blue,False,False,[],[],False,False,False,...,False,False,False,False,False,False,1,460.258046,637.949984,-445.187368
4386,"[Aurora, LeeSin, Ryze, Corki, Alistar]","[Jayce, Zed, Neeko, Sivir, Nautilus]",blue,False,False,[],[],False,False,False,...,False,False,False,False,False,False,1,575.995747,505.832610,175.197979
4387,"[Ambessa, Sylas, Yone, Corki, Pantheon]","[Renekton, Olaf, Ryze, Sivir, Rakan]",red,False,False,[],[],False,False,False,...,False,False,False,False,False,False,0,441.513982,570.561959,-319.392158


In [9]:
# Calculate champion presence (side-agnostic) and add as columns to df
for champ in lol.champion_list:
    df[f'{champ}_presence'] = df.apply(
        lambda row: int(champ in row['blue_team'] or champ in row['red_team']),
        axis=1
    )

# Only use champion presence and updated extra columns for features
champ_presence_cols = [f'{champ}_presence' for champ in lol.champion_list]
extra_cols = ['synergy_blue', 'synergy_red', 'counter_delta']  # Use only the new delta column
feature_cols = champ_presence_cols + [col for col in extra_cols if col in df.columns]

# Convert boolean columns in feature_cols to integers
for col in feature_cols:
    if df[col].dtype == 'bool':
        df[col] = df[col].astype(int)

# Prepare X and y for modeling
X = df[feature_cols]
y = df['target']

  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{champ}_presence'] = df.apply(
  df[f'{cham

In [10]:
df.to_pickle('cleaned_features.pkl')