# Summary Stats

In [None]:
from os import makedirs
from os.path import join, isdir
import numpy as np
import pandas as pd

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import markplotlib

In [None]:
from modules import load_xft

## load data

In [None]:
cleandir = join("..", "data", "clean")
plotdir = join("..", "plots", "score-modeling")

In [None]:
xft = load_xft.load_competition_results(
    join(cleandir, "competition_results.parquet"), 500
)
xft.drop(columns=["gender"], inplace=True)
xft.sort_index(inplace=True)
xft.sample(10)

## summaries

In [None]:
df = xft[["height", "weight", "age", "overallRank"]].loc[:, :, 1]
df = df.loc[:, :, pd.IndexSlice[["Men", "Women"]]]
df

In [None]:
g = df.groupby(level=[0, 1, 2])
top = pd.concat(
    {
        n: g.apply(lambda x: x.sort_values("overallRank").iloc[:n].mean())
        for n in [5, 10, 20, 50, 100, 500]
    },
    names=["top"],
)
top["height"] *= 3.28
top["weight"] *= 2.2
top.index = top.index.set_levels(top.index.levels[0].astype(str), level=0)
top

In [None]:
fig, ax = plt.subplots()
comp = "games"
division = "Women"
target = "weight"
tops = pd.IndexSlice[["5", "10", "20"]]
palette = "Oranges"
sns.lineplot(
    data=top.loc[tops, :, comp, division].reset_index(),
    x="year",
    y=target,
    hue="top",
    linewidth=1.5,
    palette=palette,
    ax=ax,
)
ax.set_title(
    f"Average {target.title()} of Top {division.title()}'s {comp.title()} Finishers"
)
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5), title="Top Athletes")
ax.set_xticklabels([str(int(x)) for x in ax.get_xticks()])
fig.tight_layout()