In [None]:
import polars as pl
import seaborn as sns

In [None]:
years = range(2021, 2024, 2)
filepaths = {
    y: rf"\\Store\gis\Data\Observed\Streets\Speed\CMP-floating_car_run\{y}\floating_car-speed-summary_stats.csv"
    for y in years
}
dfs = {
    y: pl.read_csv(filepaths[y])
    for y in years
}

In [None]:
wide_index_cols = ["cmp_segid", "period", "direction"]


def make_wide_df(dfs, years, index_cols):
    speed_cols = ["avg_speed", "std_speed", "min_speed", "max_speed"]
    df = dfs[years[0]].select(index_cols)
    for y in years:
        df = df.join(
            dfs[y].select(
                index_cols,
                pl.col(speed_cols).suffix(f"-{y}"),
                (pl.col("std_speed") / pl.col("avg_speed")).alias(
                    f"std/avg_speed-{y}"
                ),
            ),
            on=index_cols,
        )
    return df


def make_long_df(dfs, years):
    return pl.concat(
        (dfs[y].with_columns(pl.lit(y).alias("year")) for y in years)
    ).with_columns(
        (pl.col("std_speed") / pl.col("avg_speed")).alias("std/avg_speed")
    )


def calc_diff(wide_df, comparison_year, base_year):
    return wide_df.with_columns(
        (
            pl.col(f"avg_speed-{comparison_year}")
            - pl.col(f"avg_speed-{base_year}")
        ).alias("avg_speed-diff")
    ).select(
        wide_index_cols,
        "avg_speed-diff",
        (pl.col("avg_speed-diff") / pl.col("avg_speed-2021") * 100).alias(
            "avg_speed-diff-pct"
        ),
    )

In [None]:
wide_df = make_wide_df(dfs, years, wide_index_cols)
long_df = make_long_df(dfs, years)

In [None]:
diff_df = calc_diff(wide_df, 2023, 2021)

In [None]:
df = wide_df.with_columns(
    (pl.col("avg_speed-2023") - pl.col("avg_speed-2021")).alias(
        "avg_speed-diff"
    )
).with_columns(
    (pl.col("avg_speed-diff") / pl.col("avg_speed-2021") * 100).alias(
        "avg_speed-diff-pct"
    )
)

In [None]:
sns.histplot(long_df, x="avg_speed", hue="year")

In [None]:
sns.histplot(long_df, x="std_speed", hue="year")

In [None]:
sns.histplot(long_df, x="std/avg_speed", hue="year")

In [None]:
sns.histplot(diff_df, x="avg_speed-diff")

In [None]:
sns.histplot(diff_df, x="avg_speed-diff-pct")

In [None]:
wide_df.join(diff_df, on=wide_index_cols).sort('avg_speed-diff-pct')

In [None]:
wide_df.join(diff_df, on=wide_index_cols).sort('std/avg_speed-2023', descending=True).head(10)