In [None]:
import glob
import os

import altair as alt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import polars as pl
from vega_datasets import data

from automatic_climb_detection import logger

alt.data_transformers.disable_max_rows()

In [None]:
def resample_dataframe(
    df: pl.DataFrame,
    interpolation_column: str,
    interpolation_step: float,
) -> pl.DataFrame:
    """Resamples a dataframe to obtain data at interpolation points.

    Parameters
    ----------
    df : pl.DataFrame
        The dataframe to interpolate.
    interpolation_column : str
        Which numeric column to use for the interpolation points.
    interpolation_step : float
        Steps for the newly create interpolation points

    Returns
    -------
    pl.DataFrame
        A dataframe with the same columns as the input dataframe and where
        `interpolation_column` is spaced as `interpolation_step` and all other
        data is interpolated onto that timeline.
    """

    interpolation_points = pl.DataFrame(
        {
            interpolation_column: np.arange(
                df.min()[0, interpolation_column],
                df.max()[0, interpolation_column],
                step=interpolation_step,
            )
        }
    )

    interpolated_df = interpolation_points.join(
        interpolation_points.join(df, on=[interpolation_column], how="outer")
        .sort(interpolation_column)
        .interpolate(),
        on=[interpolation_column],
        how="left",
    ).sort(interpolation_column)

    return interpolated_df


def resample_dataframe_grouped(
    df: pl.DataFrame,
    interpolation_column: str,
    interpolation_step: float,
    group_column: str,
) -> pl.DataFrame:
    """Groupwise resamples a dataframe to obtain data at interpolation points.

    Parameters
    ----------
    df : pl.DataFrame
        The dataframe to interpolate.
    interpolation_column : str
        Which numeric column to use for the interpolation points.
    interpolation_step : float
        Steps for the newly create interpolation points
    group_column:str
        The column over which to group

    Returns
    -------
    pl.DataFrame
        A dataframe with the same columns as the input dataframe and where
        `interpolation_column` is spaced as `interpolation_step` and all other
        data is interpolated onto that timeline.
    """
    return pl.concat(
        [
            resample_dataframe(
                groupdf,
                interpolation_column=interpolation_column,
                interpolation_step=interpolation_step,
            )
            for _, groupdf in df.groupby(group_column, maintain_order=True)
        ]
    )

In [None]:
df = pd.DataFrame()
for file in glob.glob("../data/TourDeFrance2022/*.csv"):
    dfi = pd.read_csv(file, index_col=0)
    dfi["Stage"] = int(os.path.basename(file).split("-")[1])
    df = pd.concat([df, dfi])
df = df.sort_values(["Stage", "distance"])
downsampledf = resample_dataframe_grouped(
    pl.DataFrame(df),
    interpolation_column="distance",
    interpolation_step=1000,
    group_column="Stage",
).to_pandas()

In [None]:
world = data.world_110m.url

background = alt.Chart(alt.topo_feature(world, "countries")).mark_geoshape(
    fill="#ddd", stroke="#fff", strokeWidth=1
)

In [None]:
url = "https://raw.githubusercontent.com/deldersveld/topojson/master/continents/europe.json"

data_map = alt.topo_feature(url, "continent_Europe_subunits")
background = alt.Chart(data_map).mark_geoshape(
    fill="#ddd", stroke="#fff", strokeWidth=1
)

# alt.data_transformers.disable_max_rows()
chart = (
    alt.Chart(df)
    .mark_point(filled=True, size=10, clip=True)
    .encode(
        latitude=alt.Latitude("latitude:Q"),
        longitude=alt.Longitude("longitude:Q"),
        color=alt.Color("Stage:N", scale=alt.Scale(scheme="tableau10")),
    )
)


alt.layer(background, chart).project(scale=2000, translate=[300, 2500]).properties(
    width=800, height=900
)

In [None]:
stages = df["Stage"].unique()
dropdown = alt.binding_select(options=stages)
select = alt.selection_single(
    fields=["Stage"], bind=dropdown, name="Selector ", init={"Stage": stages[0]}
)
brush = alt.selection_interval()

base = (
    alt.Chart(df)
    .mark_point(filled=True, size=10, clip=True)
    # .mark_line(clip=True)
    .encode(
        color=alt.Color("Stage:N", scale=alt.Scale(scheme="tableau10")),
        opacity=alt.condition(brush, alt.value(1.0), alt.value(0.1)),
    )
    .properties(width=500, height=200)
    # .interactive()
    .resolve_scale(y="independent", x="independent")
    .add_selection(brush)
    .add_selection(select)
    .transform_filter(select)
)


chart_elevation = base.encode(
    x=alt.X("distance", scale=alt.Scale(zero=False)),
    y=alt.Y("altitude", scale=alt.Scale(zero=False)),
)
chart_map = base.encode(
    x=alt.X("latitude", sort=None, scale=alt.Scale(zero=False)),
    y=alt.Y("longitude", scale=alt.Scale(zero=False)),
)

chart_elevation | chart_map

In [None]:
fig, ax = plt.subplots()
stage = 2
ax.plot(
    df.loc[df["Stage"] == stage, "distance"],
    df.loc[df["Stage"] == stage, "altitude"],
    "-x",
)
ax.plot(
    downsampledf.loc[downsampledf["Stage"] == stage, "distance"],
    downsampledf.loc[downsampledf["Stage"] == stage, "altitude"],
    "-x",
)