In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [None]:
# load data
df = pd.read_csv("parsed_data.csv")

# pick the following columns
# teams = ["dire", "radiant"]
positions = ["1", "2", "3", "4", "5"]
# times = ["5", "10", "15", "20"]
times = ["10"]

# statistic columns
player_stats_params = ["networth", "xpm", "kills"]
stats_key = "position_{pos}_{param}_{time}"

radiant_columns = {}
dire_columns = {}
player_stats_columns = []

# create the column names to rename
for pos in positions:
    for param in player_stats_params:
        for time in times:
            col_name = stats_key.format(pos=pos, param=param, time=time)
            radiant_col_name = f"radiant_{col_name}"
            dire_col_name = f"dire_{col_name}"
            radiant_columns[radiant_col_name] = col_name
            dire_columns[dire_col_name] = col_name

            player_stats_columns.append(col_name)

# split the data into two halves randomly
winner_data = df.sample(frac=0.5, random_state=0)
loser_data = df.drop(winner_data.index)

# rename columns and drop opponent data
radiant_winner = winner_data[winner_data["winner"] == "radiant"]
radiant_winner = radiant_winner[radiant_columns.keys()]
radiant_winner = radiant_winner.rename(columns=radiant_columns)

dire_winner = winner_data[winner_data["winner"] == "dire"]
dire_winner = dire_winner[dire_columns.keys()]
dire_winner = dire_winner.rename(columns=dire_columns)

winner_data = pd.concat([radiant_winner, dire_winner], axis=0)
# winner_data["result"] = 1

radiant_loser = loser_data[loser_data["winner"] == "dire"]
radiant_loser = radiant_loser[radiant_columns.keys()]
radiant_loser = radiant_loser.rename(columns=radiant_columns)

dire_loser = loser_data[loser_data["winner"] == "radiant"]
dire_loser = dire_loser[dire_columns.keys()]
dire_loser = dire_loser.rename(columns=dire_columns)

loser_data = pd.concat([radiant_loser, dire_loser], axis=0)
# loser_data["result"] = 0

# combine data
# dota_data = pd.concat([winner_data, loser_data], axis=0)
# dota_data["result"] = dota_data["result"].astype("category")
# dota_data.describe()

### Basic statistics

In [None]:
# statistics of the winner
print(winner_data.describe(include="all"))

In [None]:
# statistics of the loser
print(loser_data.describe(include="all"))

**Skewness**
- Zero: symmetric, normal distribution
- Positive: right skewed
- Negative: left skewed

**Kurtosis**
- Zero: thin tails, normal distribution
- Positive: fat tails, sharp peak
- Negative: thin tails, flat peak

In [None]:
# skewness and kurtosis of winner
skewness = winner_data.skew().rename("skewness")
kurtosis = winner_data.kurt().rename("kurtosis")
winner_skew_kurt = pd.concat([skewness, kurtosis], axis=1)
print(winner_skew_kurt)

In [None]:
# skewness and kurtosis of loser
skewness = loser_data.skew().rename("skewness")
kurtosis = loser_data.kurt().rename("kurtosis")
loser_skew_kurt = pd.concat([skewness, kurtosis], axis=1)
print(loser_skew_kurt)

### Plots

In [None]:
winner_copy = winner_data.copy(deep=True)
loser_copy = loser_data.copy(deep=True)



for time in times:
    for param in player_stats_params:
        fix, axs = plt.subplots(5, 2, figsize=(20, 30))
        for idx, pos in enumerate(positions):
            col = f"position_{pos}_{param}_{time}"

            # logarithmic normalization for skewed data
            # winner_copy[col] = np.log(winner_copy[col] + 1)
            # loser_copy[col] = np.log(loser_copy[col] + 1)

            # standard normalization
            # winner_copy[col] = (
            #     winner_copy[col] - winner_copy[stats_key].mean()
            # ) / winner_copy[col].std()

            # loser_copy[col] = (
            #     loser_copy[col] - loser_copy[stats_key].mean()
            # ) / loser_copy[col].std()

            ax = axs[idx, 0]
            sns.histplot(winner_copy[col], ax=ax)
            ax.set_title("Winning Team")
            ax.axvline(
                winner_copy[col].mean(),
                color="red",
                linestyle="dashed",
                linewidth=2,
            )

            ax = axs[idx, 1]
            sns.histplot(loser_copy[col], ax=ax)
            ax.set_title("Losing Team")
            ax.axvline(
                loser_copy[col].mean(),
                color="red",
                linestyle="dashed",
                linewidth=2,
            )

        plt.tight_layout()
        plt.show()


# for idx, col in enumerate(player_stats_columns):
# logarithmic normalization for skewed data


# winner_copy[col] = np.log(winner_copy[col] + 1)
# loser_copy[col] = np.log(loser_copy[col] + 1)



# standard normalization
# winner_copy[col] = (
#     winner_copy[col] - winner_copy[col].mean()
# ) / winner_copy[col].std()
# loser_copy[col] = (loser_copy[col] - loser_copy[col].mean()) / loser_copy[
#     col
# ].std()

# plot winner and loser data side by side