In [None]:
import altair as alt
import polars as pl
from alltime_athletics_python.io import download_data, import_running_only_events
from camminapy.plot.altair_config import altair_theme

from alltime_athletics_viz.show import show_df

alt.data_transformers.disable_max_rows()
altair_theme()

In [None]:
# if not os.path.exists("data"):
#     download_data()
df = import_running_only_events("../data")

# World Records

In [None]:
show_df(
    df.filter(pl.col("rank") == 1)
    .filter(pl.col("event type") == "standard")
    .select("event", "name", "result", "sex")
    .pivot(
        index="event",
        values=["name", "result"],
        columns="sex",
        aggregate_function="first",
    )
    .select(
        "event",
        "name_sex_female",
        "result_sex_female",
        "name_sex_male",
        "result_sex_male",
    )
)

In [None]:
world_records = (
    df.filter(pl.col("event").str.contains("walk") == False)
    .filter(pl.col("event type") == "standard")
    .sort("sex", "distance", "event", "date of event")
    .with_columns(
        pl.col("result seconds")
        .cummin()
        .over("sex", "event")
        .alias("world record time")
    )
    .filter(pl.col("result seconds") == pl.col("world record time"))
    .groupby("sex", "event", "result seconds", maintain_order=True)
    .first()
    .with_columns(
        (
            100
            * pl.col("result seconds")
            / pl.col("result seconds").min().over("sex", "event")
        ).alias("percent of wr")
    )
)

world_records = pl.concat(
    [
        world_records,
        world_records.filter(pl.col("rank") == 1).with_columns(
            [
                pl.lit("2023-06-07")
                .str.strptime(pl.Date, format="%Y-%m-%d")
                .alias("date of event"),
                pl.lit(-1).cast(pl.Int64).alias("rank"),
            ]
        ),
    ]
).with_columns(pl.col("sex").apply(lambda s: s.title()))

In [None]:
legend_selection = alt.selection_point(fields=["event"], bind="legend")
legend_selection_empty = alt.selection_point(
    fields=["event"], bind="legend", empty=False
)

base = (
    alt.Chart(world_records.to_pandas())
    .encode(
        x=alt.X("date of event:T")
        .scale(domain=("1950-01-01", "2026-01-01"))
        .title("Year"),
        y=alt.Y("percent of wr:Q")
        .scale(domain=(100, 110))
        .axis(values=list(range(100, 120, 2)))
        .title("Time in % of current WR"),
        color=alt.Color(
            "event:N",
            sort=world_records.sort("distance")["event"]
            .unique(maintain_order=True)
            .to_list(),
        ).scale(scheme="dark2"),
        # strokeDash="sex:N",
        opacity=alt.condition(legend_selection, alt.value(1), alt.value(0)),
    )
    .properties(width=1400, height=500)
    .add_params(legend_selection)
    .add_params(legend_selection_empty)
)

base_no_endpoint = base.transform_filter(alt.datum["rank"] > 0)

text = base_no_endpoint.encode(
    text="name:N",
    opacity=alt.condition(legend_selection_empty, alt.value(0.9), alt.value(0.0)),
)


alt.layer(
    base.mark_line(interpolate="step-after", clip=True, strokeWidth=3),
    base_no_endpoint.mark_point(filled=True, clip=True, size=100),
    text.mark_text(clip=True, fontSize=14, angle=270 + 45, align="left", dx=15),
).facet(
    row=alt.Row("sex:N").title("").header(labelAngle=0),
    title="World Record Progression",
).resolve_scale(
    x="independent"
)

In [None]:
(
    world_records.with_columns(
        pl.col("date of event")
        .shift_and_fill(
            periods=-1,
            fill_value=pl.lit("2023-06-05").str.strptime(pl.Date, format="%Y-%m-%d"),
        )
        .over("sex", "event")
        .alias("wr valid until")
    )
    .with_columns(
        (pl.col("wr valid until") - pl.col("date of event")).alias("wr duration")
    )
    .filter(pl.col("sex") == "Male")
    # .filter(pl.col("event") == "marathon")
    .drop("has hurdles", "event type", "distance", "on track")
    .filter(pl.col("wr duration") == pl.col("wr duration").max().over("sex", "event"))
    .sort("wr duration")
)

In [None]:
df.groupby("sex", "event").head(100).groupby("name").count().sort("count").filter(
    pl.col("count") > 30
)

In [None]:
(
    df.filter(pl.col("event type") == "standard")
    .filter(~pl.col("event").str.contains("walk"))
    .sort("distance")
    .groupby("sex", "event", maintain_order=True)
    .head(20)
    .groupby("sex", "event", "name", maintain_order=True)
    .count()
    .sort("count")
    .groupby("sex", "event", maintain_order=True)
    .last()
    .pivot(
        index="event",
        columns="sex",
        values=["name", "count"],
        aggregate_function="first",
    )
    .select(
        [
            "event",
            "name_sex_female",
            "count_sex_female",
            "name_sex_male",
            "count_sex_male",
        ]
    )
)

In [None]:
world_records.groupby("name").count().sort("count").filter(pl.col("count") > 3)

In [None]:
alt.Chart(
    world_records.with_columns(pl.col("date of event").dt.year().alias("year of event"))
    .filter(pl.col("rank") > 0)
    # .filter(pl.col("distance type") == "sprint")
    .groupby("year of event", "distance type", maintain_order=True)
    .count()
    .to_pandas()
).mark_bar().encode(
    x=alt.X("year of event:N").axis(values=list(range(1950, 2030, 10))),
    y="count:Q",
    color="distance type:N",
)

In [None]:
world_records