In [None]:
%load_ext autoreload

In [None]:
import altair as alt
import polars as pl

%autoreload
from altair_utils import color_sf_bayarea

In [None]:
df = (
    pl.read_csv(
        r"Q:\Data\Surveys\CA-EDD\LocalAreaUnemploymentStats\laborforceandunemployment_annual_2024627.csv"
    )
    .filter(
        # just to make sure we don't get duplicate entries
        (pl.col("Area Type") == "County")
        & (pl.col("Month") == "Annual")
        & (pl.col("Seasonally Adjusted(Y/N)") == "N")
    )
    .drop("Area Type", "Month", "Seasonally Adjusted(Y/N)")
    .rename({"Year": "year", "Unemployment Rate": "unemployment rate (%)"})
    .with_columns((pl.col("unemployment rate (%)") / 100).alias("unemployment rate"))
)
sf = df.filter(pl.col("Area Name") == "San Francisco County").select(
    "year",
    "unemployment rate",
    "unemployment rate (%)",
    geography=pl.lit("San Francisco"),
)
bayarea = (
    df.filter(
        pl.col("Area Name").is_in(
            {
                "San Francisco County",
                "Marin County",
                "Sonoma County",
                "Napa County",
                "Solano County",
                "Contra Costa County",
                "Alameda County",
                "Santa Clara County",
                "San Mateo County",
            }
        )
    )
    .group_by("year")
    .agg(pl.sum("Labor Force", "Unemployment"))
    .with_columns(
        (pl.col("Unemployment") / pl.col("Labor Force")).alias("unemployment rate")
    )
    .with_columns(pl.col("unemployment rate").round(3))
    .select(
        "year",
        "unemployment rate",
        (pl.col("unemployment rate") * 100).alias("unemployment rate (%)"),
        geography=pl.lit("Bay Area"),
    )
    .sort("year")
)
output_df = pl.concat((sf, bayarea)).filter(pl.col("year") > 2007)

In [None]:
output_df.write_csv("output/data/unemployment.csv")
chart = (
    alt.Chart(output_df.with_columns(pl.date("year", 1, 1)))
    .mark_line()
    .encode(
        x=alt.X(
            "date",
            title="year",
            axis=alt.Axis(values=["2008-1-1", "2013-1-1", "2018-1-1", "2023-1-1"]),
        ),
        y=alt.Y("unemployment rate").axis(format="%"),
        color=alt.Color("geography").scale(
            domain=["San Francisco", "Bay Area"],
            range=color_sf_bayarea,
        ),
        tooltip=["year", "geography", "unemployment rate (%)"],
    )
    .interactive()
)
chart.save("output/Links/unemployment.png", scale_factor=3)
chart.properties(width="container").save("output/Links/unemployment.html")
chart