In [38]:
import pandas as pd

def aggregate_team(df):
    """
    Agrège les données joueur → équipe (5 joueurs).
    Conserve side (Blue/Red), picks (1-5) et bans (1-5),
    ainsi que les stats utiles.
    """
    # Colonnes à agréger en moyenne (stats par joueur)
    mean_cols = ['goldat10','xpat10','csat10',
                 'goldat15','xpat15','csat15',
                 'goldat20','xpat20','csat20',
                 'damagetochampions','dpm','cspm']  
    
    # Colonnes à agréger en somme (événements équipe)
    sum_cols = ['kills','deaths','assists',
                'dragons','heralds','barons',
                'towers','inhibitors']  
    
    # Draft
    pick_cols = ['pick1','pick2','pick3','pick4','pick5']
    ban_cols  = ['ban1','ban2','ban3','ban4','ban5']
    
    # Fonction d’agrégation custom
    agg_funcs = {col:'mean' for col in mean_cols}
    agg_funcs.update({col:'sum' for col in sum_cols})
    agg_funcs.update({col:'first' for col in pick_cols + ban_cols})
    
    # Garder side et result
    agg_funcs['side'] = 'first'   # Blue ou Red
    agg_funcs['result'] = 'max'   # même valeur pour toute l’équipe
    
    # Agrégation
    team_df = df.groupby(['gameid','teamid']).agg(agg_funcs).reset_index()
    return team_df


def build_game_level_df(team_df):
    """
    Transforme un dataframe par équipe (2 lignes par game)
    → un dataframe par game (1 ligne avec features Blue + Red).
    """
    # Séparer les deux sides
    blue = team_df[team_df["side"].str.lower() == "blue"].copy()
    red  = team_df[team_df["side"].str.lower() == "red"].copy()
    
    # Ajouter suffixes pour éviter les conflits
    blue = blue.add_suffix("_blue")
    red  = red.add_suffix("_red")
    
    # Fusionner sur gameid
    game_df = pd.merge(blue, red, left_on="gameid_blue", right_on="gameid_red")
    
    # Nettoyer les colonnes redondantes
    game_df = game_df.drop(columns=["gameid_red"])
    game_df = game_df.rename(columns={"gameid_blue": "gameid"})
    
    # Définir le label : victoire du Blue side
    game_df["result"] = game_df["result_blue"]  # 1 si Blue gagne, 0 sinon
    
    return game_df



In [39]:
def select_features(team_df, stage='draft'):
    """
    Sélectionne les features selon le moment du jeu :
    - 'draft' : uniquement compo champion
    - '10' : draft + stats à 10 minutes
    - '15' : draft + stats à 15 minutes
    - '20' : draft + stats à 20 minutes
    """
    base_cols = ['gameid','teamid','result','champion']
    
    if stage == 'draft':
        return team_df[base_cols]
    
    if stage == '10':
        feat_cols = ['goldat10','xpat10','csat10']
    elif stage == '15':
        feat_cols = ['goldat15','xpat15','csat15','dragons','heralds','towers']
    elif stage == '20':
        feat_cols = ['goldat20','xpat20','csat20','dragons','heralds','barons','towers','inhibitors']
    else:
        raise ValueError("stage doit être 'draft', '10', '15' ou '20'")
    
    return team_df[base_cols + feat_cols]


In [None]:
# Charger tes données
df = pd.read_csv("data/2024_lol.csv")

# Agréger par équipe
team_df = aggregate_team(df)
game_df = build_game_level_df(team_df)
# Vérification
print(game_df.head())


  df = pd.read_csv("data/2024_lol.csv")


               gameid                              teamid_blue  goldat10_blue  \
0  10660-10660_game_1  oe:team:a9145b7711873f53e610fbba0493484            NaN   
1  10660-10660_game_2  oe:team:a9145b7711873f53e610fbba0493484            NaN   
2  10660-10660_game_3  oe:team:a9145b7711873f53e610fbba0493484            NaN   
3  10660-10660_game_4  oe:team:8516ca63facc91286d6c00212ca945e            NaN   
4  10661-10661_game_1  oe:team:b42d0f31f2727621b963042276817c4            NaN   

   xpat10_blue  csat10_blue  goldat15_blue  xpat15_blue  csat15_blue  \
0          NaN          NaN            NaN          NaN          NaN   
1          NaN          NaN            NaN          NaN          NaN   
2          NaN          NaN            NaN          NaN          NaN   
3          NaN          NaN            NaN          NaN          NaN   
4          NaN          NaN            NaN          NaN          NaN   

   goldat20_blue  xpat20_blue  ...  pick4_red  pick5_red  ban1_red  ban2_red  \


In [52]:
line = game_df.iloc[0]
# print every element of line
for col in game_df.columns:
    print(f"{col}: {line[col]}")

gameid: 10660-10660_game_1
teamid_blue: oe:team:a9145b7711873f53e610fbba0493484
goldat10_blue: nan
xpat10_blue: nan
csat10_blue: nan
goldat15_blue: nan
xpat15_blue: nan
csat15_blue: nan
goldat20_blue: nan
xpat20_blue: nan
csat20_blue: nan
damagetochampions_blue: 13933.666666666666
dpm_blue: 443.2767833333334
cspm_blue: 6.63626
kills_blue: 6
deaths_blue: 32
assists_blue: 14
dragons_blue: 2.0
heralds_blue: 0.0
barons_blue: 0.0
towers_blue: 2.0
inhibitors_blue: 0.0
pick1_blue: Kalista
pick2_blue: Senna
pick3_blue: Orianna
pick4_blue: Maokai
pick5_blue: Aatrox
ban1_blue: Akali
ban2_blue: Nocturne
ban3_blue: K'Sante
ban4_blue: Lee Sin
ban5_blue: Wukong
side_blue: Blue
result_blue: 0
teamid_red: oe:team:8516ca63facc91286d6c00212ca945e
goldat10_red: nan
xpat10_red: nan
csat10_red: nan
goldat15_red: nan
xpat15_red: nan
csat15_red: nan
goldat20_red: nan
xpat20_red: nan
csat20_red: nan
damagetochampions_red: 18980.833333333332
dpm_red: 603.8440999999999
cspm_red: 6.99894
kills_red: 32
deaths_red

In [46]:
len(game_df)

9428