In [None]:
from pathlib import Path

import altair as alt
import numpy as np
import pandas as pd
import polars as pl

In [None]:
years = range(2019, 2024, 2)
color_ampm_domain = ["AM", "PM"]
color_ampm_range = ["#8cb7c9", "#d3d655"]
filepaths = {
    # y: rf"Q:\CMP\LOS Monitoring 2023\Auto_LOS_and_Reliability\CMP{y}_Auto_LOS_and_Reliability.csv"
    # for y in years
    2019: r"Q:\CMP\LOS Monitoring 2021\Auto_LOS\CMP2019_Auto_Speeds_Reliability.csv",
    2021: r"Q:\CMP\LOS Monitoring 2023\Auto_LOS_and_Reliability\CMP2021_Auto_LOS_and_Reliability.csv",
    2023: r"Q:\CMP\LOS Monitoring 2023\Auto_LOS_and_Reliability\CMP2023_Auto_LOS_and_Reliability.csv",
}
figs_dir = Path(r"Q:\CMP\reports\CMPSF 2023\Draft\figures\multimodal_performance\speed")
# segment IDs 1-245 are the officially defined CMP segments
cmp_segid_filter = pl.col("cmp_segid") < 246

In [None]:
dfs = {
    y: pl.read_csv(
        filepaths[y],
        columns=["cmp_segid", "year", "source", "period", "avg_speed"],
    )
    for y in years
}
df_long = pl.concat(dfs.values()).filter(cmp_segid_filter)
# wide: each year as a separate column to allow scatter chart plotting
df_wide = df_long.pivot(
    index=["cmp_segid", "period"],  # ignore "source"
    columns="year",
    values="avg_speed",
    aggregate_function=None,
    separator="-",
    # the rename shouldn't be needed after a pivot but unclear why not working
).rename(
    {
        "period": "peak period",
        "2019": "avg speed (2019)",
        "2021": "avg speed (2021)",
        "2023": "avg speed (2023)",
    }
)

In [None]:
# TODO merge with df with CMP segment names, and show on tooltip instead of ID

In [None]:
# calculate max speed to set x/y limits of the chart
max_speed_recorded = df_long.select(pl.max("avg_speed")).item()
max_speed_chart = np.ceil(max_speed_recorded / 10) * 10
scale_domain = (0, max_speed_chart)  # x/y limits of the chart

In [None]:
def plot_scatter(df_wide, x_year, y_year):
    x_col = f"avg speed ({x_year})"
    y_col = f"avg speed ({y_year})"
    chart_scatter = (
        alt.Chart(df_wide)
        .mark_circle(size=20)
        .encode(
            alt.X(f"{x_col}:Q")
            .title(f"average speed ({x_year})")
            .scale(domain=scale_domain),
            alt.Y(f"{y_col}:Q")
            .title(f"average speed ({y_year})")
            .scale(domain=scale_domain),
            color=alt.Color("peak period:N").scale(
                domain=color_ampm_domain, range=color_ampm_range
            ),
            tooltip=["cmp_segid:O", "peak period:N", f"{x_col}:Q", f"{y_col}:Q"],
        )
    )
    chart_diagonal = (
        alt.Chart(pd.DataFrame({x_col: scale_domain, y_col: scale_domain}))
        .mark_line(color="grey", opacity=0.5)
        .encode(
            alt.X(x_col),
            alt.Y(y_col),
        )
    )
    chart = chart_scatter + chart_diagonal
    chart.save(figs_dir / f"speed_scatter-{x_year}-{y_year}.html", scale_factor=2)
    chart.save(figs_dir / f"speed_scatter-{x_year}-{y_year}.png", scale_factor=2)
    return chart.interactive()

In [None]:
df_wide.write_csv(figs_dir / "speed_scatter-multi_year.csv")

In [None]:
plot_scatter(df_wide, 2019, 2023)

In [None]:
plot_scatter(df_wide, 2021, 2023)