In [None]:
import sys
from pathlib import Path

import geopandas as gpd
import polars as pl

sys.path.append("../2024-downtown_today")
from downtown_today_utils import parse_geog, taz_cols_to_county

In [None]:
survey_2018_dir = Path(r"Q:\Data\Surveys\HouseholdSurveys\MTC-SFCTA2018")

# LOAD otaz, dtaz, dpurp FROM TRIPS (AFTER SFCTA PIPELINE)
trips_raw = pl.read_csv(
    survey_2018_dir / "Deliverable_20210302/trip.csv",
    columns=["hh_id", "person_id", "person_num", "linked_trip_id", "trip_id"],
)
# processed through SFCTA pipeline
trips_processed = (
    pl.read_csv(
        survey_2018_dir / r"Processing_20210302\3_merge_skims\survey2018_tripx.dat",
        separator=" ",
        columns=["hhno", "pno", "tsvid", "otaz", "dtaz", "dpurp", "trexpfac"],
    )
    # deal with linked trips that have multiple trip entries -> merge them back together
    .with_columns(
        pl.col("otaz", "dtaz").replace({-1: None}),
        pl.col("dpurp").replace({10: None}),  # CH suspects these are transfers
    )
    .group_by("hhno", "pno", "tsvid")
    # max should be fine because I set the values that I want to drop to null
    .agg(pl.max("trexpfac", "otaz", "dtaz", "dpurp"))
)
trips = trips_raw.join(
    trips_processed,
    left_on=["hh_id", "person_num", "linked_trip_id"],
    right_on=["hhno", "pno", "tsvid"],
    how="left",
    validate="m:1",
)

In [None]:
taz_analysis_neighborhoods_gis_filepath = (
    Path(r"Q:\GIS\Policy\San_Francisco\Analysis_Neighborhoods")
    / "taz2454-sf_only-with_analysis_neighborhoods.gpkg"
)
taz_analysis_neighborhoods = gpd.read_file(
    taz_analysis_neighborhoods_gis_filepath, columns=["TAZ", "analysis_neighborhood"]
)
taz_gis_filepath = r"Q:\GIS\Model\TAZ\SFCTA_TAZ\TAZ2454.shp"
taz_county = gpd.read_file(taz_gis_filepath, columns=["TAZ", "COUNTY"])

In [None]:
matched_trips = taz_cols_to_county(
    parse_geog(
        # matched trips csv created with
        # Q:\Data\Surveys\HouseholdSurveys\MTC-SFCTA2018\Processing_20210302
        #   \Map Matching\notebooks\i80_101_between_280_and_baybridge_traces.ipynb
        pl.read_csv(
            survey_2018_dir
            / r"Processing_20210302\Map Matching\output_processed"
            / "trips-US101&I80-between_I280_and_baybridge.csv",
            # / "trips_bay_bridge.csv",
            schema_overrides={"trexpfac_y": pl.Float64},
        )
        .join(trips.select("trip_id", "otaz", "dtaz", "dpurp"), on="trip_id")
        .rename({"trexpfac_y": "trexpfac"})
        .filter(pl.col("trexpfac") > 0),
        taz_analysis_neighborhoods,
    ),
    taz_county,
).with_columns(
    pl.col("o_county", "d_county").replace_strict(
        {
            1: "1-sf",
            2: "2-san mateo",
            3: "3-santa clara",
            4: "4-alameda",
            5: "5-contra costa",
            6: "6-solano",
            7: "7-napa",
            8: "8-sonoma",
            9: "9-marin",
        }
    )
)

In [None]:
matched_trips_od_long = (
    matched_trips.group_by("o_geog", "d_geog")
    .agg(pl.len(), pl.sum("trexpfac"))
    .sort("o_geog", "d_geog")
    .with_columns(weighted_share=(pl.col("trexpfac") / pl.sum("trexpfac")))
)
matched_trips_od_long

In [None]:
matched_trips_od_long.pivot(
    index=["o_geog"],
    on="d_geog",
    values=["len"],
    sort_columns=True,
)

In [None]:
matched_trips_od_long.pivot(
    index=["o_geog"],
    on="d_geog",
    values=["trexpfac"],
    sort_columns=True,
)

In [None]:
matched_trips_od_long.pivot(
    index=["o_geog"],
    on="d_geog",
    values=["weighted_share"],
    sort_columns=True,
)

In [None]:
matched_trips_dpurp_long = (
    matched_trips.group_by("dpurp")
    .agg(pl.len(), pl.sum("trexpfac"))
    .sort("dpurp")
    .with_columns(weighted_share=(pl.col("trexpfac") / pl.sum("trexpfac")))
)
matched_trips_dpurp_long

In [None]:
matched_trips_county_long = (
    matched_trips.group_by("o_county", "d_county")
    .agg(pl.len(), pl.sum("trexpfac"))
    .sort("o_county", "d_county")
    .with_columns(weighted_share=(pl.col("trexpfac") / pl.sum("trexpfac")))
)

In [None]:
matched_trips_county_long.pivot(
    index=["o_county"],
    on="d_county",
    values=["trexpfac"],
    sort_columns=True,
)

In [None]:
matched_trips_county_long.pivot(
    index=["o_county"],
    on="d_county",
    values=["weighted_share"],
    sort_columns=True,
)