In [None]:
from pathlib import Path

import altair as alt
import matplotlib.pyplot as plt
import pandas as pd
import polars as pl
import seaborn as sns

In [None]:
years = range(2015, 2024, 2)
color_ampm_domain = ["a.m.", "p.m."]
color_ampm_range = ["#8cb7c9", "#d3d655"]
color_daily = "#8cb7c9"
figs_dir = r"Q:\CMP\reports\CMPSF 2023\Draft\figures\multimodal_performance\cmp_counts"
filepaths = {
    y: rf"Q:\Data\Observed\Streets\Counts\CMP\{y}\midblock\cmp_midblock_weekday_adt-{y}.csv"
    for y in years
}
dfs = {
    y: pd.read_csv(filepaths[y]).set_index(
        ["2023_location_id", "location", "direction"]
    )
    for y in years
}

In [None]:
def calc_diffs(dfs, comparison_year, base_year):
    diff = dfs[comparison_year] - dfs[base_year]
    pct_diff = diff / dfs[base_year] * 100
    return pct_diff


def heatmap(df, title=None, ax=None, annot=True):
    _, _ = plt.subplots(figsize=(10, 10))
    sns.heatmap(
        df,
        ax=ax,
        annot=annot,
        cmap="BrBG",
        fmt=".0f",
        vmin=-100,
        vmax=100,
        center=0,
    )
    plt.title(title)
    plt.show()


def calc_diffs_and_plot(dfs, comparison_year, base_year):
    pct_diff = calc_diffs(dfs, comparison_year, base_year)
    heatmap(
        pct_diff,
        title=f"% change from {base_year} to {comparison_year}",
    )

In [None]:
location_directions = {y: set(dfs[y].index) for y in years}
# location-directions that have data from every year
repeated_location_directions = set.intersection(
    *(location_directions.values())
)
all_location_directions = set.union(
    *(locdir for locdir in location_directions.values())
)
missing_location_directions = {
    y: all_location_directions - location_directions[y] for y in years
}
incomplete_years = [
    y for y, locdirs in missing_location_directions.items() if locdirs
]  # if non-empty

missing_location_directions

In [None]:
calc_diffs_and_plot(dfs, 2023, 2019)

In [None]:
# totals, repeated location-directions only, wide form
totals_repeated_wide = pd.DataFrame(
    {
        y: dfs[y].loc[list(repeated_location_directions)].sum(axis=0)
        for y in years
    }
).T
# totals, years without missing location-directions, wide form
totals_complete_wide = pd.DataFrame(
    {y: dfs[y].sum(axis=0) for y in (set(years) - set(incomplete_years))}
).T
totals_repeated_wide.index.name = "year"
totals_complete_wide.index.name = "year"

In [None]:
def to_long_form(df, var_name):
    df = df.reset_index().melt(
        id_vars="year", value_name="counts", var_name=var_name
    )
    df["datetime"] = pd.to_datetime(df["year"], format="%Y")
    return df


def peak_only(df):
    return df.rename(
        columns={
            "cmp_am_peak_vol": f"a.m.",
            "cmp_pm_peak_vol": f"p.m.",
        }
    ).drop(columns=["cmp_non_peak_vol", "daily_vol"])


def daily_only(df):
    df = df.reset_index().rename(columns={"daily_vol": "counts"})[
        ["year", "counts"]
    ]
    df["datetime"] = pd.to_datetime(df["year"], format="%Y")
    return df

totals_repeated_peak_long = to_long_form(
    peak_only(totals_repeated_wide), "peak period"
)
totals_complete_peak_long = to_long_form(
    peak_only(totals_complete_wide), "peak period"
)

totals_repeated_daily = daily_only(totals_repeated_wide)
totals_complete_daily = daily_only(totals_complete_wide)

In [None]:
def save_mode_csv(totals_df, filepath, columns):
    pl.from_pandas(totals_df).select(columns).write_csv(filepath)


def totals_chart(
    totals_complete_df, totals_repeated_df, save_filepath_stem, peak: bool
):
    totals_complete_df = totals_complete_df.round({"counts": 0}).astype(
        {"counts": int}
    )
    totals_repeated_df = totals_repeated_df.round({"counts": 0}).astype(
        {"counts": int}
    )
    # Data collected April–May biennially at the same locations, counts shown in
    # the line graph (non_missing_chart) are summed over locations and directions,
    # whereas the scatter plot squares (repeated_chart) exclude counts from Van
    # Ness between California and Pine, where no data were collected in 2017.
    # For clarity, Joe/Chun Ho decided to just not display the 2017 data point.
    all_locations_chart = alt.Chart(totals_complete_df).mark_line(
        point=True, opacity=1
    )
    if peak:
        columns = ["year", "peak period", "counts"]
        color = alt.Color("peak period:N").scale(
            domain=color_ampm_domain, range=color_ampm_range
        )

    else:
        columns = ["year", "counts"]
        color = alt.value("#006c69")  # or set in mark_line()
    all_locations_chart = all_locations_chart.encode(
        alt.X("datetime:T").title("year"),
        y="counts:Q",
        color=color,
        tooltip=columns,
    )
    repeated_locations_chart = (
        alt.Chart(totals_repeated_df)
        .mark_point(shape="square", filled=True, size=20)
        .encode(
            alt.X(
                "datetime:T",
                # axis=alt.Axis(tickCount={"interval": "year", "step": 2}),
            ).title("year"),
            alt.Y("counts:Q"),
            color=color,
            tooltip=columns,
        )
    )
    chart = all_locations_chart + repeated_locations_chart
    save_mode_csv(
        totals_complete_df, f"{save_filepath_stem}-all_locations.csv", columns
    )
    save_mode_csv(
        totals_repeated_df,
        f"{save_filepath_stem}-repeated_locations.csv",
        columns,
    )
    chart.save(f"{save_filepath_stem}.png")
    # chart.interactive()
    return chart

In [None]:
# Weekday average daily traffic (ADT) 2015-2023
save_filepath_stem = Path(figs_dir) / "midblock-daily"
totals_chart(
    totals_complete_daily,
    totals_repeated_daily,
    save_filepath_stem,
    peak=False,
)

In [None]:
# Weekday average a.m./p.m. peak traffic counts 2015-2023
save_filepath_stem = Path(figs_dir) / "midblock-peak"
totals_chart(
    totals_complete_peak_long,
    totals_repeated_peak_long,
    save_filepath_stem,
    peak=True,
)