In [None]:
import hvplot.polars
import polars as pl
from bokeh.io import output_notebook

hvplot.extension("bokeh")
output_notebook()

In [None]:
northeast_core_analysis_neighborhoods = {
    "Financial District/South Beach",
    "Mission Bay",
    "South of Market",
    "Tenderloin",
    "Nob Hill",
    "Chinatown",
    "North Beach",
    "Russian Hill",
}


northeast_core_market_st_adjacent_analysis_neighborhoods = {
    "Financial District/South Beach",
    "South of Market",
    "Tenderloin",
}


def add_geography_col(df):
    return df.with_columns(
        # since the 3 Market St adjacent neighborhoods look so different,
        # there's value in separating them out
        # geography=pl.when(
        #     pl.col("analysis_neighborhood").is_in(
        #         northeast_core_analysis_neighborhoods
        #     )
        # )
        # .then(pl.lit("northeast core"))
        # .otherwise(pl.lit("rest of SF")),
        geography=pl.when(
            pl.col("analysis_neighborhood").is_in(
                northeast_core_market_st_adjacent_analysis_neighborhoods
            )
        )
        .then(pl.lit("1. FiDi / South Beach, SoMa, Tenderloin"))
        .when(
            pl.col("analysis_neighborhood").is_in(
                northeast_core_analysis_neighborhoods  # and not in the above 3
            )
        )
        .then(pl.lit("2. rest of northeast core"))
        .otherwise(pl.lit("3. rest of SF")),
    )

In [None]:
filepath = r"Q:\Data\PeerAgencyDashboards\SFCity-EconRecovery\sales_tax-quarterly-updated240624.csv"
df = (
    pl.read_csv(filepath)
    .with_columns(
        pl.col("Date").str.split(" ").list.to_struct("max_width", ["year", "quarter"]),
        pl.col("Sales Tax Revenue")
        .str.replace("$", "", literal=True)
        .str.replace_all(",", "")
        .cast(int),
    )
    .rename({"Neighborhood": "analysis_neighborhood"})
    .unnest("Date")
    .with_columns(pl.col("year").cast(int))
)

In [None]:
annual_total_df = (
    df
    # sum sales tax revenue over the 4 quarters of a year
    .group_by("year")
    .agg(pl.sum("Sales Tax Revenue"))
    .sort("year")
)


In [None]:
annual_total_df.with_columns(
    sales_tax_revenue_vs_2018=pl.col("Sales Tax Revenue")
    / annual_total_df.filter(pl.col("year") == 2018).select("Sales Tax Revenue").item(),
    sales_tax_revenue_vs_2019=pl.col("Sales Tax Revenue")
    / annual_total_df.filter(pl.col("year") == 2019).select("Sales Tax Revenue").item(),
)


In [None]:
# just plotting this for interactivity since bokeh area plot tooltips are broken
(
    df
    # sum sales tax revenue over the 4 quarters of a year
    .group_by("year")
    .agg(pl.sum("Sales Tax Revenue"))
    .sort("year")
).plot(x="year", y="Sales Tax Revenue", title="total annual sales tax revenue for SF")


In [None]:
annual_total_df = (
    (
        add_geography_col(df)
        # sum sales tax revenue over
        # 1. the 4 quarters of a year
        # 2. the geographies / grouped analysis neighborhoods
        .group_by("year", "geography")
        .agg(pl.sum("Sales Tax Revenue"))
    )
    .with_columns(
        sales_tax_revenue_share=(
            pl.col("Sales Tax Revenue") / pl.col("Sales Tax Revenue").sum().over("year")
        )
    )
    .sort("year", "geography")
)
# no need to write CSV, use raw data directly
# annual_total_df.write_csv("output/csvs/sales_tax.csv")
sales_tax_plot = annual_total_df.plot.area(
    x="year",
    y="Sales Tax Revenue",
    by="geography",
    stacked=True,
    # ylim=[0, 200000000],
    frame_width=250,
    legend=False,
) + annual_total_df.plot.line(
    x="year",
    y="Sales Tax Revenue",
    by="geography",
    frame_width=250,
)
hvplot.save(sales_tax_plot, "output/figs/sales_tax.html")
hvplot.save(sales_tax_plot, "output/figs/sales_tax.png")
sales_tax_plot