In [None]:
# Load data into dfs
import pandas as pd

# Read CSV files into DataFrames
matches = pd.read_csv("v2_data/matches.csv") # matches with outcome
player_hero_stats = pd.read_csv("v2_data/player_hero_stats_full8.12.25.csv") # each players stats, broken down by hero
player_stats = pd.read_csv("v2_data/player_stats_with_retries.csv") # aggregated player stats as totals (total kills, deaths, matches)
players = pd.read_csv("v2_data/players.csv") # player <> match <> hero data

print(f"Matches csv: \n\n{matches.head()}")
print(f"Player Hero Stats csv: \n\n{player_hero_stats.head()}")
print(f"Player Stats csv: \n\n{player_stats.head()}")
print(f"Players csv: \n\n{players.head()}")

In [None]:
# Merge player stats onto players (which is player+match data)
player_match_stats = pd.merge(players,player_stats, on="account_id",how="left")
print(f"Player Match Stats csv: \n\n{player_match_stats.head()}")

In [None]:
# rename columns to indicate player stats from match stats
player_match_stats.rename(columns={
    "kills": "pm_kills",
    "deaths": "pm_deaths",
    "assists": "pm_assists",
    "damage_per_min": "pm_damage_per_min",
    "denies": "pm_denies",
    "net_worth": "pm_net_worth",
    "win": "pm_win",
    'matches_played': 'p_total_matches_played',
    'total_kills': 'p_total_kills',
    'total_deaths': 'p_total_deaths',
    'total_assists': 'p_total_assists',
    'avg_kd': 'p_total_avg_kd',
    'win_rate': 'p_total_win_rate',
    'total_time_played': 'p_total_time_played'
})

In [None]:
player_match_stats.to_csv("v2_data/player_match_stats8.12.csv")

In [None]:
# build player_hero features, starting with a test set
test_player_hero = player_hero_stats.copy()

In [None]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler

def create_player_hero_stats(ph_stats_base):
    """
    Create player hero stats by aggregating the player_hero_stats DataFrame.
    """
    ph_stats = pd.DataFrame()
    ph_stats = ph_stats_base.copy()
    ph_stats['ph_total_kd'] = np.where(ph_stats['deaths'] == 0, 0, ph_stats['kills'] / ph_stats['deaths'])
    ph_stats['h_total_kd'] = (ph_stats.groupby('hero_id')['ph_total_kd'].transform("mean"))
    ph_stats['ph_kd_ratio'] = ph_stats['h_total_kd']/ ph_stats['ph_total_kd']
    ph_stats['h_avg_total_time_played'] = (ph_stats.groupby('hero_id')['time_played'].transform("mean"))
    ph_stats['ph_time_played_ratio'] = ph_stats['time_played']/ ph_stats['h_avg_total_time_played']
    ph_stats['h_total_damage_per_min'] = (ph_stats.groupby('hero_id')['damage_per_min'].transform("mean"))
    ph_stats['ph_damage_per_min_ratio'] = ph_stats['damage_per_min']/ ph_stats['h_total_damage_per_min']
    ph_stats['h_total_assists'] = (ph_stats.groupby('hero_id')['assists'].transform("mean"))
    ph_stats['ph_assists_ratio'] = ph_stats['assists']/ ph_stats['h_total_assists']
    ph_stats.rename(columns={
        "wins": "ph_wins",
        "kills": "ph_kills",
        "deaths": "ph_deaths",
        "assists": "ph_assists",
        "damage_per_min": "ph_damage_per_min",
        'time_played': 'ph_time_played'
    })

    return ph_stats

In [None]:
# testiong different aggregations
# import numpy as np
# def test(df):
#     """
#     Create player hero stats by aggregating the player_hero_stats DataFrame.
#     """


#     return df

# test_player_hero = test(test_player_hero)
# test_player_hero.head(30)

In [None]:
calc_player_hero_stats = create_player_hero_stats(player_hero_stats)
calc_player_hero_stats.head(10)

In [None]:
calc_player_hero_stats.describe()

Currently, stat names are duplicated, need to ensure they are p_stat, ph_stat, or h_stat.

In [None]:
# join player_match_stats on player_hero_stats on account_id AND hero_id
merged_stats = pd.merge(player_match_stats,calc_player_hero_stats,on=["hero_id", "account_id"], how="left")
merged_stats.head(11)

In [None]:
merged_stats.to_csv("v2_data/merged_stats8.12_v2.csv")

In [None]:
tean_stats = merged_stats.groupby()