In [1]:
%load_ext autoreload

In [17]:
import altair as alt
import polars as pl

%autoreload
from altair_utils import color_sfubercore_restofsfcore_restofsf


In [22]:
northeast_core_analysis_neighborhoods = {
    "Financial District/South Beach",
    "Mission Bay",
    "South of Market",
    "Tenderloin",
    "Nob Hill",
    "Chinatown",
    "North Beach",
    "Russian Hill",
}


northeast_core_market_st_adjacent_analysis_neighborhoods = {
    "Financial District/South Beach",
    "South of Market",
    "Tenderloin",
}


def add_geography_col(df):
    return df.with_columns(
        # since the 3 Market St adjacent neighborhoods look so different,
        # there's value in separating them out
        # geography=pl.when(
        #     pl.col("analysis_neighborhood").is_in(
        #         northeast_core_analysis_neighborhoods
        #     )
        # )
        # .then(pl.lit("northeast core"))
        # .otherwise(pl.lit("rest of SF")),
        geography=pl.when(
            pl.col("analysis_neighborhood").is_in(
                northeast_core_market_st_adjacent_analysis_neighborhoods
            )
        )
        .then(pl.lit("FiDi / South Beach, SoMa, Tenderloin"))
        .when(
            pl.col("analysis_neighborhood").is_in(
                northeast_core_analysis_neighborhoods  # and not in the above 3
            )
        )
        .then(pl.lit("rest of northeast core"))
        .otherwise(pl.lit("rest of SF")),
    )

In [23]:
filepath = r"Q:\Data\PeerAgencyDashboards\SFCity-EconRecovery\sales_tax-quarterly-updated240624.csv"
df = (
    pl.read_csv(filepath)
    .with_columns(
        pl.col("Date").str.split(" ").list.to_struct("max_width", ["year", "quarter"]),
        pl.col("Sales Tax Revenue")
        .str.replace("$", "", literal=True)
        .str.replace_all(",", "")
        .cast(int),
    )
    .rename({"Neighborhood": "analysis_neighborhood"})
    .unnest("Date")
    .with_columns(pl.col("year").cast(int))
)

In [24]:
annual_wide_df = (
    df.group_by("year", "analysis_neighborhood")
    .agg(pl.sum("Sales Tax Revenue"))
    .sort("year", "analysis_neighborhood")
    .pivot(index="analysis_neighborhood", on="year", values="Sales Tax Revenue")
)
# for Abe to make the map with the GIS file for Analysis Neighborhoods
annual_wide_df.write_csv(
    r"Q:\Model Research\downtown_today\output\data\sales_tax-annual-wide.csv"
)

In [25]:
annual_total_df = (
    df
    # sum sales tax revenue over the 4 quarters of a year
    .group_by("year")
    .agg(pl.sum("Sales Tax Revenue"))
    .sort("year")
)

In [None]:
annual_total_df.with_columns(
    sales_tax_revenue_vs_2018=pl.col("Sales Tax Revenue")
    / annual_total_df.filter(pl.col("year") == 2018).select("Sales Tax Revenue").item(),
    sales_tax_revenue_vs_2019=pl.col("Sales Tax Revenue")
    / annual_total_df.filter(pl.col("year") == 2019).select("Sales Tax Revenue").item(),
)

In [None]:
(
    df
    # sum sales tax revenue over the 4 quarters of a year
    .group_by("year")
    .agg(pl.sum("Sales Tax Revenue"))
    .sort("year")
).plot.line(
    x="year",
    y="Sales Tax Revenue",
).properties(title="total annual sales tax revenue for SF")

In [62]:
annual_total_df = (
    add_geography_col(df)
    .rename({"Sales Tax Revenue": "sales tax revenue"})
    # sum sales tax revenue over
    # 1. the 4 quarters of a year
    # 2. the geographies / grouped analysis neighborhoods
    .group_by("year", "geography")
    .agg(pl.sum("sales tax revenue"))
    .with_columns(
        (pl.col("sales tax revenue") / pl.col("sales tax revenue").sum().over("year"))
        .round(3)
        .alias("sales tax revenue share")
    )
    .sort("year", "geography")
    .filter(pl.col("year") > 2017)
)
# no need to write CSV, use raw data directly
annual_total_df.write_csv("output/data/sales_tax.csv")
annual_total_df = annual_total_df.with_columns(date=pl.date(pl.col("year"), 1, 1))

In [65]:
sales_tax_chart = (
    alt.Chart(annual_total_df)
    .transform_calculate(
        order="{'rest of SF': 0, 'rest of northeast core': 1, 'FiDi / South Beach, SoMa, Tenderloin': 2}[datum.geography]"
    )
    .mark_area()
    .encode(
        x=alt.X("date", title="year"),
        y=alt.Y("sales tax revenue").axis(format="$s"),
        color=alt.Color("geography:N").scale(
            domain=[
                "FiDi / South Beach, SoMa, Tenderloin",
                "rest of northeast core",
                "rest of SF",
            ],
            range=color_sfubercore_restofsfcore_restofsf,
        ),
        order="order:O",
        tooltip=["year", "geography", "sales tax revenue"],
    )
)
sales_tax_chart.save("output/Links/sales_tax.html")
sales_tax_chart.save("output/Links/sales_tax.png")
sales_tax_chart


In [None]:
sales_tax_shares_chart = (
    alt.Chart(annual_total_df.with_columns())
    .mark_line()
    .encode(
        x=alt.X("date", title="year"),
        y=alt.Y(
            "sales tax revenue share",
            title="sales tax revenue shares within San Francisco",
        ).axis(format="%"),
        color=alt.Color("geography:N").scale(
            domain=[
                "FiDi / South Beach, SoMa, Tenderloin",
                "rest of northeast core",
                "rest of SF",
            ],
            range=color_sfubercore_restofsfcore_restofsf,
        ),
        order="order:O",
        tooltip=["year", "geography", "sales tax revenue", "sales tax revenue share"],
    )
)
sales_tax_shares_chart.save("output/Links/sales_tax-shares.html")
sales_tax_shares_chart.save("output/Links/sales_tax-shares.png")
sales_tax_shares_chart
