In [None]:
%load_ext autoreload

In [None]:
import sys

import altair as alt
import polars as pl

sys.path.append("../")
%autoreload
from bart import (
    filter_montofri,
    geo_dest_filters_for_stacking_dict,
    geo_orig_filters_for_stacking_dict,
    group_hourly_od_df,
    read_hourly_od_csvs,
)

from altair_utils import color_sfcore_restofsf_restofbayarea
from utils import add_normalization_col

In [None]:
years = [2018, 2019, 2020, 2021, 2022, 2023]
hourly_od_df = read_hourly_od_csvs(years)

In [None]:
def average_orig_dest(orig_df, dest_df, index_cols):
    return orig_df.join(dest_df, on=index_cols).select(
        index_cols,
        avg_daily_ridership=pl.mean_horizontal(
            pl.col("avg_daily_ridership", "avg_daily_ridership_right")
        ),
    )

In [None]:
# geography col: origin station
annual_dow_orig_df = group_hourly_od_df(
    hourly_od_df, geo_orig_filters_for_stacking_dict, "1y", dow_filter=None
)

# geography col: destination station
annual_dow_dest_df = group_hourly_od_df(
    hourly_od_df, geo_dest_filters_for_stacking_dict, "1y", dow_filter=None
)


In [None]:
# avg of orig/dest in each geography
# ! use for daily numbers, NOT for time of day
annual_dow_df = add_normalization_col(  # norm col is for tooltip
    (
        average_orig_dest(
            annual_dow_orig_df, annual_dow_dest_df, ["year", "dow", "geography"]
        )
        .with_columns(
            weekday=filter_montofri.replace_strict(
                [True, False], ["weekday", "weekend"]
            )
        )
        .filter(pl.col("geography") != "rest of BART system")
        .group_by("year", "geography", "weekday")
        .agg(pl.mean("avg_daily_ridership"))
    ),
    "year",
    "avg_daily_ridership",
    r"ridership (% of 2019)",
    2019,
).with_columns(
    pl.col("avg_daily_ridership").round(0), pl.col("ridership (% of 2019)").round(3)
)
annual_dow_df.write_csv("../output/data/bart-ridership-trend.csv")

In [None]:
bart_ridership_trend_chart = (
    alt.Chart(annual_dow_df.with_columns(date=pl.date(pl.col("year"), 1, 1)))
    # .transform_calculate(  # for area charts
    #     geog_order="{'SF (Market St)': 1, 'rest of SF': 2, 'rest of BART system': 3}[datum.geography]"
    # )
    .mark_line()
    .encode(
        x=alt.X("date:T", title="year"),
        y=alt.Y("avg_daily_ridership:Q", title=None),
        color=alt.Color(
            "geography", sort=alt.SortField("geog_order", "ascending")
        ).scale(
            domain=["SF (Market St)", "rest of SF"],  # "rest of BART system"],
            range=color_sfcore_restofsf_restofbayarea[:2],
        ),
        # order=alt.Order("geog_order:O"),  # for area charts
        # strokeDash="weekday",
        column=alt.Column("weekday", title=None, spacing=30),
        tooltip=[
            "year",
            "geography",
            alt.Tooltip("avg_daily_ridership", title="ridership"),
            alt.Tooltip("ridership (% of 2019)", format=".1%"),
        ],
    )
    .properties(
        title="BART average daily ridership",
        autosize=alt.AutoSizeParams(type="fit", contains="padding", resize=True),
    )
)
bart_ridership_trend_chart.save("../output/Links/bart-ridership-trend.png", scale=3)
bart_ridership_trend_chart.properties(width="container").save(
    "../output/Links/bart-ridership-trend.html"
)
bart_ridership_trend_chart
