In [2]:
%load_ext autoreload

In [8]:
from pathlib import Path

import altair as alt
import polars as pl

%autoreload

In [4]:
dir = Path(r"Q:\Data\PeerAgencyDashboards\MTC-VitalSigns")
region_df = pl.read_csv(
    dir
    / "Commute Mode Choice - Commute Mode Choice for the Region and Counties - Region (by place of residence) - downloaded 240522 - manual edits 241017 for 2023.csv"
)
county_df = pl.read_csv(
    dir
    / "Commute Mode Choice - Commute Mode Choice for the Region and Counties - County (by place of residence) - downloaded 240522 - manual edits 240912-241017 for SF 2022-2023.csv"
)

In [65]:
def group_modes(df):
    return (
        df.with_columns(
            mode=pl.when(pl.col("mode").is_in({"Drive Alone", "Carpool"}))
            .then(pl.lit("Automobile"))
            .when(pl.col("mode").is_in({"Walk", "Other"}))
            .then(pl.lit("Walk, Bike, and Other"))
            .otherwise(pl.col("mode"))
        )
        .group_by("year", "geography", "data_type", "mode")
        .agg(pl.sum("share"))
        .sort("year")
    )


def plot_geography(df, geography):
    # visually, an area plot is better, but it doesn't work with the tooltip...
    # use bar instead if we want to have a functioning tooltip
    return (
        alt.Chart(df.with_columns(date=pl.date(pl.col("year"), 1, 1)))
        .mark_line()
        .encode(
            x=alt.X("date", title="year"),
            y=alt.Y("share").axis(format="%"),
            color="mode",
            # color=alt.Color("mode").scale(  # TODO
            #     domain=[
            #         "Automobile",
            #         "Transit",
            #         "Walk, Bike, and Other",
            #         "Work From Home",
            #     ],
            #     range=j,
            # ),
            tooltip=["year", "geography", "mode", alt.Tooltip("share", format=".1%")],
        )
        .properties(title=geography)
    )


In [63]:
df = group_modes(
    pl.concat(
        [
            county_df.filter(
                (pl.col("year") > 2017) & (pl.col("county") == "San Francisco County")
            )
            .with_columns(geography=pl.lit("San Francisco"))
            .drop("county"),
            region_df.filter(pl.col("year") > 2017)
            .with_columns(geography=pl.lit("Bay Area"))
            .drop("region"),
        ]
    )
)
df.write_csv("output/data/commute_mode_choice.csv")


In [None]:
sf_plot = plot_geography(
    df.filter(pl.col("geography") == "San Francisco"),
    "San Francisco",
)
bayarea_plot = plot_geography(df.filter(pl.col("geography") == "Bay Area"), "Bay Area")
plot = sf_plot | bayarea_plot
plot.save("output/Links/commute_mode_choice.html")
plot.save("output/Links/commute_mode_choice.png")
plot