In [None]:
from pathlib import Path
import tomli as tomllib
import pandas as pd

In [3]:
from transit import (
    model_run_dir,
    output_transit_dir,
    read_dbf_and_groupby_sum,
    transit_assignment_filepaths,
    read_transit_assignments,
    time_periods
    
)

In [4]:
data = read_transit_assignments(model_run_dir, time_periods)

Successfully read and added 'TOD' to: X:\Projects\CHAMP7\Run25z_Market\SFALLMSAEA.DBF
Successfully read and added 'TOD' to: X:\Projects\CHAMP7\Run25z_Market\SFALLMSAAM.DBF
Successfully read and added 'TOD' to: X:\Projects\CHAMP7\Run25z_Market\SFALLMSAMD.DBF
Successfully read and added 'TOD' to: X:\Projects\CHAMP7\Run25z_Market\SFALLMSAPM.DBF
Successfully read and added 'TOD' to: X:\Projects\CHAMP7\Run25z_Market\SFALLMSAEV.DBF


In [5]:
def read_nodes(model_run_dir):
    filepath = Path(model_run_dir) / "nodes.xls"
    return pd.read_excel(
        filepath, header=None, names=["Node", "Node Name"]
    )

In [47]:
def station_name():
    station_name = {
        "Station": [
            "12TH",
            "16TH",
            "19TH",
            "24TH",
            "ANTC",
            "ASHB",
            "BALB",
            "BAYF",
            "CAST",
            "CIVC",
            "COLM",
            "COLS",
            "CONC",
            "DALY",
            "DBRK",
            "DELN",
            "DUBL",
            "EMBR",
            "FRMT",
            "FTVL",
            "GLEN",
            "HAYW",
            "LAFY",
            "LAKE",
            "MCAR",
            "MLBR",
            "MONT",
            "NBRK",
            "NCON",
            "OAKL",
            "ORIN",
            "PCTR",
            "PHIL",
            "PITT",
            "PLZA",
            "POWL",
            "RICH",
            "ROCK",
            "SANL",
            "SBRN",
            "SFIA",
            "SHAY",
            "SSAN",
            "UCTY",
            "WARM",
            "WCRK",
            "WDUB",
            "WOAK",
        ],
        "Node": [
            16509,
            16515,
            16508,
            16516,
            15231,
            16525,
            16518,
            16530,
            16537,
            16514,
            16539,
            16532,
            16501,
            16519,
            16523,
            16521,
            16538,
            16511,
            16526,
            16533,
            16517,
            16529,
            16504,
            16534,
            16507,
            16543,
            16512,
            16524,
            16535,
            16000,
            16505,
            15230,
            16502,
            16536,
            16522,
            16513,
            16520,
            16506,
            16531,
            16541,
            16542,
            16528,
            16540,
            16527,
            16544,
            16503,
            16545,
            16510,
        ],
    }
    df_station_name = pd.DataFrame(station_name)
    return df_station_name

In [46]:
def process_BART_data(file_name, model_run_dir):
    # Process BART data for different routes and columns
    nodes = read_nodes(model_run_dir)
    station = station_name()
    BART_BRDA = read_dbf_and_groupby_sum(file_name, "BART", ["A","TOD"], "AB_BRDA")
    EBART_BRDA = read_dbf_and_groupby_sum(file_name, "EBART", ["A", "TOD"], "AB_BRDA")
    OAC_BRDA = read_dbf_and_groupby_sum(file_name, "OAC", ["A", "TOD"], "AB_BRDA")

    BART_XITA = read_dbf_and_groupby_sum(file_name, "BART", ["A", "TOD"], "AB_XITA")
    EBART_XITA = read_dbf_and_groupby_sum(file_name, "EBART", ["A", "TOD"], "AB_XITA")
    OAC_XITA = read_dbf_and_groupby_sum(file_name, "OAC", ["A", "TOD"], "AB_XITA")

    # Concatenate and group data
    BART_A = pd.concat([BART_BRDA, EBART_BRDA, OAC_BRDA])
    BART_A = BART_A.groupby(["A", "TOD"])["AB_BRDA"].sum().reset_index()
    BART_A.columns = ["Node", "TOD", "AB_BRDA"]

    BART_B = pd.concat([BART_XITA, EBART_XITA, OAC_XITA])
    BART_B = BART_B.groupby(["A", "TOD"])["AB_XITA"].sum().reset_index()
    BART_B.columns = ["Node", "TOD", "AB_XITA"]

    # Merge with other dataframes
    BART_A = pd.merge(BART_A, nodes, on=["Node"], how="left")
    BART_A = pd.merge(BART_A, station, on=["Node"], how="right")
    BART = pd.merge(BART_A, BART_B, on=["Node", "TOD"], how="right")

    # Drop rows with specific values
    values_to_drop = ["Hillcrest eBART", "Coliseium OAC", "Somersville Road eBART"]
    BART = BART[~BART["Node Name"].isin(values_to_drop)]

    # Add columns and rearrange columns
    BART["Key"] = BART["Station"] + BART["TOD"]
    BART.columns = [
        "Node",
        "TOD",
        "Boardings",
        "Node Name",
        "Station",
        "Alightings",
        "Key",
    ]
    BART = BART[
        ["Node", "Node Name", "Station", "TOD", "Key", "Boardings", "Alightings"]
    ]

    # Sort and reset index
    BART = BART[["Station", "TOD", "Key", "Boardings", "Alightings"]]
    BART = BART.sort_values(by="Key").reset_index(drop=True)

    return BART


In [50]:
def map_station_to_county(station):
    counties = {
        "San Francisco": ["EMBR", "CIVC", "24TH", "MONT", "POWL", "GLEN", "16TH", "BALB"],
        "San Mateo": ["DALY", "COLM", "SSAN", "SBRN", "SFIA", "MLBR"],
        "Contra Costa": [
            "RICH",
            "ORIN",
            "LAFY",
            "WCRK",
            "CONC",
            "NCON",
            "PITT",
            "ANTC",
            "DELN",
            "PHIL",
            "PCTR",
            "PLZA",
        ],
        "Alameda": [
            "WOAK",
            "12TH",
            "19TH",
            "MCAR",
            "ASHB",
            "DUBL",
            "WDUB",
            "CAST",
            "WARM",
            "UCTY",
            "SHAY",
            "HAYW",
            "BAYF",
            "SANL",
            "OAKL",
            "COLS",
            "FTVL",
            "LAKE",
            "ROCK",
            "DBRK",
            "NBRK",
            "FRMT",
        ],
        "Santa Clara": [],  # Add stations for Santa Clara if available
    }
    for county, stations in counties.items():
        if station in stations:
            return county
    return None 

def process_BART_county(file_name, model_run_dir):
    BART_county = process_BART_data(file_name, model_run_dir)
    

    # Add the 'County' column to the DataFrame
    BART_county["County"] = BART_county["Station"].apply(lambda x: map_station_to_county(x))
    BART_county = (
        BART_county.groupby(["County", "TOD"])[["Boardings", "Alightings"]]
        .sum()
        .reset_index()
    )
    return BART_county

In [51]:
BART_county = process_BART_county(data, model_run_dir)

In [None]:
BART_county

Unnamed: 0,County,TOD,Boardings,Alightings
0,Alameda,AM,58272.88,30285.53
1,Alameda,EA,7385.66,3283.29
2,Alameda,EV,17202.58,23614.03
3,Alameda,MD,44296.0,41953.66
4,Alameda,PM,31618.39,52259.9
5,Contra Costa,AM,29904.18,3675.9
6,Contra Costa,EA,3935.42,269.25
7,Contra Costa,EV,2168.02,6940.36
8,Contra Costa,MD,13575.28,11251.26
9,Contra Costa,PM,5066.83,20596.98
