# Hall of Shame

This notebook is a simple analysis of the worst players in the game.

Get the below data:
- Long match duration
- High death count
- High death per minute
- Low win rate

In [None]:
from tqdm import tqdm
from json import loads

## Set up parameters

In [None]:
minimum_matches = 50
n_worsts = 50
raw_data = "./raw_matches_2024_0710.csv"
output_dir = "./study"
date = "20240911"
chunk_size = 20000

## Load the raw data and create a dataframe

In [None]:
dfs = pd.read_csv(raw_data, chunksize=chunk_size)

In [None]:
data = []
for df in dfs:
    for match in tqdm(df.match_data):
        info = loads(match)["info"]
        for part in info["participants"]:
            data.append(
                [
                    info["gameDuration"],
                    part["deaths"],
                    part["summonerName"],
                    part["summonerId"],
                    part["win"],
                ]
            )
user_df = pd.DataFrame(data, columns=["duration", "death", "name", "id", "win"])
user_df.to_csv(f"{output_dir}/{date}-user.csv", index=False)

## Filter out the users with less than 50 matches

In [None]:
ids = user_df.id.value_counts() > 50
valid_ids = ids[ids.values].index
valid_df = user_df[user_df.id.isin(valid_ids)]

## Get the name of the summoner

In [None]:
name_df = (
    user_df[["name", "id"]]
    .drop_duplicates(
        subset="id",
        keep="last",
    )
    .reset_index(drop=True)
)
name_map = name_df.set_index("id").to_dict(orient="dict")["name"]

## Get the mean of the data and calculate the death per minute

In [None]:
mean_df = valid_df.groupby(["id"])[["death", "duration", "win"]].apply("mean")
mean_df["death_per_min"] = mean_df.death * 60 / mean_df.duration
columns = ["death", "death_per_min", "duration", "win"]

## Get the top... worst (:<) players for each column

In [None]:
count_df = valid_df.groupby(["id"]).count()
mean_df["n_matches"] = count_df.win
descending = ["win"]
for col in columns:
    rank_df = (
        mean_df.sort_values(by=[col], ascending=col in descending)
        .reset_index()[[col, "n_matches", "id"]]
        .iloc[:n_worsts]
    )
    rank_df["name"] = rank_df.id.map(name_map)
    rank_df[["name", col, "n_matches"]].to_csv(
        f"{output_dir}/{date}-{col}.csv", index=False
    )