In [15]:
import pandas as pd
import numpy as np

# load general data
general_pacific = pd.read_csv('champions-tour-2024-pacific-kickoff_data\general_data_champions-tour-2024-pacific-kickoff.csv')
general_americas = pd.read_csv('champions-tour-2024-americas-kickoff_data\general_data_champions-tour-2024-americas-kickoff.csv')
general_emea = pd.read_csv('champions-tour-2024-emea-kickoff_data\general_data_champions-tour-2024-emea-kickoff.csv')


In [111]:
def parse_value(x, index, default):

    split_values = x.strip().split('\n')

    if (len(split_values) > index) and (len(split_values[index]) > 0):
        return float(split_values[index])
    else:
        return default

def parse_hs_value(x, index, default):

    split_values = x.strip().split('\n')

    if (len(split_values) > index) and (len(split_values[index][:-1]) > 0):
        return float(split_values[index][:-1])
    else:
        return default

def general_feature_creation_for_teams(general, list_feature = ['R', 'ACS', 'K', 'D','ADR', 'HS%', 'FK']):

    """
        Function that creates a dataframe of the average/std features for a region with the general data. Individual feature only. 

        Parameter:
            general : dataframe from the scraper general_data_scraper
            list_feature : list of feature to compute
    """

    teams_of_regions = set(general['Team Name'])

    gathered_feature_name = []
    gathered_dictionnaries = []

    for feature_name in list_feature:

        if feature_name == 'HS%':

            default = np.mean(general[feature_name].apply(lambda x : float(x.strip().split('\n')[0][:-1])).values)
            # Action
            avrg_action_per_team = {team : np.mean(general[general['Team Name'] == team][feature_name].apply(lambda x : parse_hs_value(x, 0, default))) for team in teams_of_regions}
            std_action_per_team = {team : np.std(general[general['Team Name'] == team][feature_name].apply(lambda x : parse_hs_value(x, 0, default))) for team in teams_of_regions}
            # Action attack
            avrg_action_per_team_atk = {team : np.mean(general[general['Team Name'] == team][feature_name].apply(lambda x : parse_hs_value(x, 1, default))) for team in teams_of_regions}
            std_action_per_team_atk = {team : np.std(general[general['Team Name'] == team][feature_name].apply(lambda x : parse_hs_value(x, 1, default))) for team in teams_of_regions}
            # Action defense
            avrg_action_per_team_dfs = {team : np.mean(general[general['Team Name'] == team][feature_name].apply(lambda x : parse_hs_value(x, 2, default))) for team in teams_of_regions}
            std_action_per_team_dfs = {team : np.std(general[general['Team Name'] == team][feature_name].apply(lambda x : parse_hs_value(x, 2, default))) for team in teams_of_regions}
        else:
            default = np.mean(general[feature_name].apply(lambda x : float(x.strip().split('\n')[0])).values)
            # Action
            avrg_action_per_team = {team : np.mean(general[general['Team Name'] == team][feature_name].apply(lambda x : parse_value(x, 0, default))) for team in teams_of_regions}
            std_action_per_team = {team : np.std(general[general['Team Name'] == team][feature_name].apply(lambda x : parse_value(x, 0, default))) for team in teams_of_regions}
            # Action attack
            avrg_action_per_team_atk = {team : np.mean(general[general['Team Name'] == team][feature_name].apply(lambda x : parse_value(x, 1, default))) for team in teams_of_regions}
            std_action_per_team_atk = {team : np.std(general[general['Team Name'] == team][feature_name].apply(lambda x : parse_value(x, 1, default))) for team in teams_of_regions}
            # Action defense
            avrg_action_per_team_dfs = {team : np.mean(general[general['Team Name'] == team][feature_name].apply(lambda x : parse_value(x, 2, default))) for team in teams_of_regions}
            std_action_per_team_dfs = {team : np.std(general[general['Team Name'] == team][feature_name].apply(lambda x : parse_value(x, 2, default))) for team in teams_of_regions}

        gathered_dictionnaries.append(avrg_action_per_team)
        gathered_feature_name.append(f'avrg_{feature_name.lower()}_per_team')
        gathered_dictionnaries.append(std_action_per_team)
        gathered_feature_name.append(f'std_{feature_name.lower()}_per_team')
        gathered_dictionnaries.append(avrg_action_per_team_atk)
        gathered_feature_name.append(f'avrg_{feature_name.lower()}_per_team_atk')
        gathered_dictionnaries.append(std_action_per_team_atk)
        gathered_feature_name.append(f'std_{feature_name.lower()}_per_team_atk')
        gathered_dictionnaries.append(avrg_action_per_team_dfs)
        gathered_feature_name.append(f'avrg_{feature_name.lower()}_per_team_dfs')
        gathered_dictionnaries.append(std_action_per_team_dfs)
        gathered_feature_name.append(f'std_{feature_name.lower()}_per_team_dfs')
    
    # Create an empty DataFrame
    df = pd.DataFrame(columns=gathered_feature_name)

    # Iterate over the list of dictionaries
    for team in gathered_dictionnaries[0].keys():
        # Create a new row for each team
        row_values = [d[team] for d in gathered_dictionnaries]
        df.loc[team] = row_values
    
    return df
        

df_pacific = general_feature_creation_for_teams(general_pacific)
df_americas = general_feature_creation_for_teams(general_americas)
df_emea = general_feature_creation_for_teams(general_emea)

In [112]:
df

Unnamed: 0,avrg_r_per_team,std_r_per_team,avrg_r_per_team_atk,std_r_per_team_atk,avrg_r_per_team_dfs,std_r_per_team_dfs,avrg_acs_per_team,std_acs_per_team,avrg_acs_per_team_atk,std_acs_per_team_atk,...,avrg_hs%_per_team_atk,std_hs%_per_team_atk,avrg_hs%_per_team_dfs,std_hs%_per_team_dfs,avrg_fk_per_team,std_fk_per_team,avrg_fk_per_team_atk,std_fk_per_team_atk,avrg_fk_per_team_dfs,std_fk_per_team_dfs
Paper Rex,1.024889,0.287012,0.986,0.374305,1.05,0.464878,201.222222,50.676266,200.177778,72.785923,...,27.311111,10.778213,28.244444,14.968345,2.044444,1.67273,1.0,1.054093,1.044444,1.074049
BLEED,0.9456,0.237168,0.9596,0.347292,0.9324,0.284988,190.28,55.184433,195.16,71.295823,...,27.92,11.188995,24.92,9.769012,2.48,2.22926,1.2,1.523155,1.28,1.183892
DetonatioN FocusMe,0.881,0.225896,0.9,0.317852,0.8775,0.287313,182.05,50.927866,186.45,75.739339,...,32.25,10.113728,33.5,16.460559,2.0,1.67332,1.1,1.260952,0.9,0.888819
Rex Regum Qeon,0.936667,0.196949,0.799333,0.376634,1.041667,0.350971,192.4,44.091269,179.766667,73.78242,...,30.766667,11.856597,28.7,11.144954,1.933333,1.481741,0.833333,0.933928,1.1,1.220656
Gen.G,1.0361,0.287937,0.9912,0.410252,1.0902,0.422028,206.65,57.412782,201.86,81.377641,...,30.3,14.003214,28.33,13.544781,2.2,1.777639,1.02,1.183047,1.18,1.227844
ZETA DIVISION,0.943636,0.314685,0.923818,0.462131,0.962545,0.462924,190.654545,59.094752,185.927273,84.618256,...,30.618577,15.3237,27.636759,13.063999,1.836364,1.856327,0.945455,1.285134,0.890909,1.154796
Team Secret,1.095818,0.285541,1.106,0.384091,1.110909,0.535487,205.0,57.698905,206.2,73.431081,...,27.254545,11.510024,23.145455,14.00314,2.2,2.066178,1.036364,1.278687,1.163636,1.411171
DRX,1.065667,0.193161,1.148667,0.238757,0.996333,0.277627,203.566667,45.469905,204.066667,53.71836,...,33.066667,13.358975,30.466667,10.781878,2.533333,1.874981,1.233333,1.202313,1.3,1.159023
Global Esports,0.856857,0.225854,0.881429,0.346807,0.794286,0.312449,174.714286,48.892635,176.628571,79.814816,...,30.143478,13.11073,29.257143,13.582221,1.914286,2.102671,1.085714,1.421698,0.828571,0.970588
Talon Esports,0.979333,0.39017,1.019,0.468226,0.935667,0.495908,192.466667,71.08058,190.666667,83.600372,...,29.766667,17.517008,22.733333,10.23697,1.866667,1.765094,0.833333,1.067187,1.033333,1.224291


In [42]:
avrg_deaths_per_team = {team : np.mean(general_pacific[general_pacific['Team Name'] == team]['D'].apply(lambda x : int(x.split('\n')[0]))) for team in set(general_pacific['Team Name'])}
std_deaths_per_team = {team : np.std(general_pacific[general_pacific['Team Name'] == team]['D'].apply(lambda x : int(x.split('\n')[0]))) for team in set(general_pacific['Team Name'])}

In [54]:
general_pacific['FK']

0      8\n3\n5
1      1\n0\n1
2      3\n2\n1
3      3\n2\n1
4      0\n0\n0
        ...   
455    5\n5\n0
456    3\n2\n1
457    1\n0\n1
458    3\n1\n2
459    1\n0\n1
Name: FK, Length: 460, dtype: object