In [None]:
from pathlib import Path

import altair as alt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import polars as pl
import seaborn as sns

In [None]:
years = range(2015, 2024, 2)
color_ampm_domain = ["a.m.", "p.m."]
color_ampm_range = ["#8cb7c9", "#d3d655"]
figs_dir = r"Q:\CMP\reports\CMPSF 2023\Draft\figures\multimodal_performance\cmp_counts"
csv_filepaths = {
    year: rf"Q:\Data\Observed\Streets\Counts\CMP\{year}\intersection\parsed-intersection-totals.csv"
    for year in years
}
dfs = {year: pd.read_csv(f, index_col=0) for year, f in csv_filepaths.items()}

In [None]:
def calc_diffs(dfs, comparison_year, base_year):
    diff = dfs[comparison_year] - dfs[base_year]
    pct_diff = diff / dfs[base_year] * 100
    # the difference in terms of std devs
    # (assume Poisson, so it's just sd = sqrt(value))
    sd_diff = diff / np.sqrt(dfs[base_year])
    return pct_diff, sd_diff


def heatmap(df, title=None, ax=None, annot=True):
    sns.heatmap(
        df,
        ax=ax,
        annot=annot,
        cmap="BrBG",
        fmt=".0f",
        vmin=-100,
        vmax=100,
        center=0,
    )
    plt.title(title)
    plt.show()


def calc_diffs_and_plot(dfs, comparison_year, base_year):
    pct_diff, sd_diff = calc_diffs(dfs, comparison_year, base_year)
    heatmap(
        pct_diff,
        title=f"% change from {base_year} to {comparison_year}",
    )
    # Everything seems alright with this check, so commenting this out:
    # heatmap(
    #     pct_diff,
    #     comparison_year,
    #     base_year,
    #     title=(
    #         f"% change (color) & how many stddev the change is (value)"
    #         "\nfrom {base_year} to {comparison_year}"
    #     ),
    #     annot=sd_diff,
    # )


# def calc_diffs_and_plot(dfs, comparison_years, base_years):
#     fig, axs = plt.subplots(1, len(comparison_years))
#     for i, (comparison_year, base_year) in enumerate(
#         zip(comparison_years, base_years)
#     ):
#         pct_diff, _ = calc_diffs(dfs, comparison_year, base_year)
#         heatmap(
#             pct_diff,
#             title=f"% change from {base_year} to {comparison_year}",
#             ax=axs[i],
#         )
#     plt.show()

In [None]:
# Check that the same locations were collected for all years
df_shapes = [dfs[y].shape for y in years]
df_locations = {y: dfs[y].index for y in years}
assert(len(set(df_shapes)) == 1)
assert(all(df_locations[years[0]].equals(df_locations[y]) for y in years))

In [None]:
calc_diffs_and_plot(dfs, 2019, 2017)

In [None]:
sums_wide = pd.DataFrame({y: dfs[y].sum() for y in years}).T
sums_wide.index.name = "year"
sums_wide

In [None]:
# Calculate percent change for body text in CMP report analysis


def pct_change(sums_wide_df, comparison_year, base_year):
    return (
        (sums_wide_df.loc[comparison_year] / sums_wide_df.loc[base_year]) - 1
    ) * 100


pd.concat(
    {
        "21to23": pct_change(sums_wide, 2023, 2021),
        "19to23": pct_change(sums_wide, 2023, 2019),
        "19to21": pct_change(sums_wide, 2021, 2019),
    },
    axis=1,
).T

In [None]:
sums_long = pl.from_pandas(sums_wide, include_index=True).melt(
    id_vars="year",
    variable_name="mode_period",
    value_name="counts",
).with_columns(
    pl.col("mode_period").str.split("_"),
    # unnest not working right now
    # .to_struct(fields=["mode", "period"]).struct.unnest()
).select(
    pl.col("year"),
    pl.date("year", 1, 1).alias("datetime"),
    pl.col("mode_period").list.get(0).alias("mode"),
    pl.col("mode_period")
    .list.get(1)
    .map_dict({"am": "a.m.", "pm": "p.m."})
    .alias("peak period"),
    pl.col("counts"),
)
veh_long = sums_long.filter(pl.col("mode") == "veh").to_pandas()

In [None]:
def save_mode_csv(mode_long_df, filepath):
    mode_long_df.select("year", "peak period", "counts").write_csv(filepath)


def mode_chart(sums_long_df, figs_dir, mode_str):
    mode_long_df = sums_long_df.filter(pl.col("mode") == mode_str)
    chart = (
        alt.Chart(mode_long_df.to_pandas())  # to_pandas for DateTime
        .mark_line(point=True)
        .encode(
            alt.X("datetime:T").title("year"),
            y="counts:Q",
            color=alt.Color("peak period:N").scale(
                domain=color_ampm_domain, range=color_ampm_range
            ),
            tooltip=["year", "peak period", "counts"],
            text="counts",
        )
    )
    output_filepath_stem = Path(figs_dir) / f"intersection-{mode_str}"
    save_mode_csv(mode_long_df, f"{output_filepath_stem}.csv")
    chart.save(f"{output_filepath_stem}.png")
    return chart

In [None]:
mode_chart(sums_long, figs_dir, "veh")

In [None]:
mode_chart(sums_long, figs_dir, "bike")

In [None]:
mode_chart(sums_long, figs_dir, "ped")