In [54]:
from pathlib import Path
import geopandas as gpd
import pandas as pd
import tomli as tomllib
from transit import transit_validation_2019_alfaro_filepath
from transit_function import read_transit_assignments, read_dbf_and_groupby_sum
from bart import process_BART_data, process_BART_county, process_BART_SL, read_nodes, station_name
from muni import process_muni, read_transit_lines
from screen import concat_final_SL
from map_data import process_muni_map, process_bart_map
from obs import process_obs_data
from total_val import process_valTotal_Operator, process_valTotal_Submode, assign_ferry_name, assign_muni_name
from simwrapper_table import process_mkd_muni, process_mkd_bart, process_mkd_screenline, process_data

In [55]:

with open("transit.toml", "rb") as f:
    config = tomllib.load(f)

model_run_dir = Path(config["directories"]["model_run"])
markdown_output_dir = Path(config["directories"]["markdown_output_dir"])
transit_input_dir = Path(config["directories"]["transit_input_dir"])
observed_BART = Path(config["transit"]["observed_BART"])
observed_BART_county = Path(config["transit"]["observed_BART_county"])
observed_BART_SL = Path(config["transit"]["observed_BART_SL"])
observed_MUNI_Line = Path(config["transit"]["observed_MUNI_Line"])
observed_SL = Path(config["transit"]["observed_SL"])
observed_NTD = Path(config["transit"]["observed_NTD"])
model_BART = Path(config["output"]["model_BART"])
model_BART_county = Path(config["output"]["model_BART_county"])
model_BART_SL = Path(config["output"]["model_BART_SL"])
model_MUNI_Line = Path(config["output"]["model_MUNI_Line"]) 
model_SL = Path(config["output"]["model_SL"])
output_dir = model_run_dir / "validation_workbook" / "output"
output_transit_dir = output_dir / "transit"
transit_validation_2019_alfaro_filepath = config["transit"][
    "transit_validation_2019_alfaro_filepath"
]
transit_line_rename_filepath = (
    Path(config["directories"]["resources"]) / config["transit"]["line_rename_filename"]
)
time_periods = ["EA", "AM", "MD", "PM", "EV"]
tod_order = ['EA', 'AM', 'MD', 'PM', 'EV', 'Total']


In [56]:
combined_gdf = read_transit_assignments(model_run_dir, time_periods)

Successfully read and added 'TOD' to: X:\Projects\CHAMP7\Run25z_Market\SFALLMSAEA.DBF
Successfully read and added 'TOD' to: X:\Projects\CHAMP7\Run25z_Market\SFALLMSAAM.DBF
Successfully read and added 'TOD' to: X:\Projects\CHAMP7\Run25z_Market\SFALLMSAMD.DBF
Successfully read and added 'TOD' to: X:\Projects\CHAMP7\Run25z_Market\SFALLMSAPM.DBF
Successfully read and added 'TOD' to: X:\Projects\CHAMP7\Run25z_Market\SFALLMSAEV.DBF


In [57]:
obs_MUNI_line = pd.read_excel(
    transit_validation_2019_alfaro_filepath,
    usecols="B:H",
    sheet_name="obs_MUNI_line",
    skiprows=list(range(9)),
)

In [85]:
nodes = read_nodes(model_run_dir)
nodes

Unnamed: 0,Node,Node Name
0,7607,Airport/101 SB ON Near Oyster Point
1,7712,Delong/John Daly
2,7714,Bayshore/near Sierra Point Pkwy
3,7715,Bayshore/101 SB Off
4,7716,Bayshore/San Bruno
...,...,...
9076,15600,SF Ferry Building
9077,15639,Mission Bay Water Shuttle
9078,15633,Central Waterfront Water Shuttle
9079,15637,Hunters Point Water Shuttle


In [94]:
bart_boarding = read_dbf_and_groupby_sum(combined_gdf, "BART", ["A","TOD"], "AB_BRDA")
ebart_boarding = read_dbf_and_groupby_sum(combined_gdf, "EBART", ["A", "TOD"], "AB_BRDA")
oac_boarding = read_dbf_and_groupby_sum(combined_gdf, "OAC", ["A", "TOD"], "AB_BRDA")

bart_alighting = read_dbf_and_groupby_sum(combined_gdf, "BART", ["A", "TOD"], "AB_XITA")
ebart_alighting = read_dbf_and_groupby_sum(combined_gdf, "EBART", ["A", "TOD"], "AB_XITA")
oac_alighting = read_dbf_and_groupby_sum(combined_gdf, "OAC", ["A", "TOD"], "AB_XITA")

# Concatenate and group data
bart_nodea = pd.concat([bart_boarding, ebart_boarding, oac_boarding])
bart_nodea = bart_nodea.groupby(["A", "TOD"])["AB_BRDA"].sum().reset_index()
bart_nodea.columns = ["Node", "TOD", "AB_BRDA"]

bart_nodeb = pd.concat([bart_alighting, ebart_alighting, oac_alighting])
bart_nodeb = bart_nodeb.groupby(["A", "TOD"])["AB_XITA"].sum().reset_index()
bart_nodeb.columns = ["Node", "TOD", "AB_XITA"]

# Merge with other dataframes
bart_nodea = pd.merge(bart_nodea, nodes, on=["Node"], how="left")
bart_nodea['Station'] = bart_nodea['Node Name'].map(station_dict)
# BART_A = pd.merge(BART_A, station, on=["Node"], how="right")
bart = pd.merge(bart_nodea, bart_nodeb, on=["Node", "TOD"], how="right")

# Drop rows with specific values
values_to_drop = ["Hillcrest eBART", "Coliseium OAC", "Somersville Road eBART"]


In [93]:
bart_nodea

Unnamed: 0,Node,TOD,AB_XITA,Node Name,Station
0,15230,AM,138.91,Somersville Road eBART,PCTR
1,15230,EA,7.47,Somersville Road eBART,PCTR
2,15230,EV,96.10,Somersville Road eBART,PCTR
3,15230,MD,172.13,Somersville Road eBART,PCTR
4,15230,PM,212.04,Somersville Road eBART,PCTR
...,...,...,...,...,...
240,16545,AM,0.00,West Dublin BART,WDUB
241,16545,EA,0.00,West Dublin BART,WDUB
242,16545,EV,0.00,West Dublin BART,WDUB
243,16545,MD,0.00,West Dublin BART,WDUB


In [97]:
bart

Unnamed: 0,Node,TOD,AB_BRDA,Node Name,Station,AB_XITA
10,16000,AM,6.97,Oakland Airport OAC,OAKL,0.0
11,16000,EA,0.06,Oakland Airport OAC,OAKL,0.0
12,16000,EV,52.69,Oakland Airport OAC,OAKL,0.0
13,16000,MD,73.31,Oakland Airport OAC,OAKL,0.0
14,16000,PM,146.16,Oakland Airport OAC,OAKL,0.0
...,...,...,...,...,...,...
240,16545,AM,0.00,West Dublin BART,WDUB,0.0
241,16545,EA,0.00,West Dublin BART,WDUB,0.0
242,16545,EV,0.00,West Dublin BART,WDUB,0.0
243,16545,MD,0.00,West Dublin BART,WDUB,0.0


In [96]:
bart = bart[~bart["Node Name"].isin(values_to_drop)]

In [59]:
node[node["Node"] == 16509]

Unnamed: 0,Node,Node Name
93,16509,Oakland City Center BART


In [60]:
BART_BRDA = read_dbf_and_groupby_sum(combined_gdf, "BART", ["A","TOD"], "AB_BRDA")
EBART_BRDA = read_dbf_and_groupby_sum(combined_gdf, "EBART", ["A", "TOD"], "AB_BRDA")
OAC_BRDA = read_dbf_and_groupby_sum(combined_gdf, "OAC", ["A", "TOD"], "AB_BRDA")
BART_A = pd.concat([BART_BRDA, EBART_BRDA, OAC_BRDA])
BART_A = BART_A.groupby(["A", "TOD"])["AB_BRDA"].sum().reset_index()
BART_A.columns = ["Node", "TOD", "AB_BRDA"]

In [61]:
BART_A

Unnamed: 0,Node,TOD,AB_BRDA
0,15230,AM,222.88
1,15230,EA,93.19
2,15230,EV,38.72
3,15230,MD,189.29
4,15230,PM,87.13
...,...,...,...
235,16545,AM,0.00
236,16545,EA,0.00
237,16545,EV,0.00
238,16545,MD,0.00


In [62]:
BART_A = pd.merge(BART_A, node, on=["Node"], how="left")
BART_A 

Unnamed: 0,Node,TOD,AB_BRDA,Node Name
0,15230,AM,222.88,Somersville Road eBART
1,15230,EA,93.19,Somersville Road eBART
2,15230,EV,38.72,Somersville Road eBART
3,15230,MD,189.29,Somersville Road eBART
4,15230,PM,87.13,Somersville Road eBART
...,...,...,...,...
240,16545,AM,0.00,West Dublin BART
241,16545,EA,0.00,West Dublin BART
242,16545,EV,0.00,West Dublin BART
243,16545,MD,0.00,West Dublin BART


In [63]:
station_dict = {
    "Oakland City Center BART": "12TH",
    "16th/Mission BART": "16TH",
    "19th St Oakland BART": "19TH",
    "24th/Mission BART": "24TH",
    "Hillcrest eBART": "ANTC",
    "Ashby BART": "ASHB",
    "Balboa Park BART": "BALB",
    "Bay Fair BART": "BAYF",
    "Castro Valley BART": "CAST",
    "Civic Center BART": "CIVC",
    "Colma BART": "COLM",
    "Coliseum OAK BART": "COLS",
    "Coliseium OAC": "COLS",
    "Concord BART": "CONC",
    "Daly City BART": "DALY",
    "Downtown Berkeley BART": "DBRK",
    "El Cerrito del Norte BART": "DELN",
    "Dublin/Pleasanton BART": "DUBL",
    "Embarcadero BART": "EMBR",
    "Fremont BART": "FRMT",
    "Fruitvale BART": "FTVL",
    "Glen Park BART": "GLEN",
    "Hayward BART": "HAYW",
    "Lafayette BART": "LAFY",
    "Lake Merritt BART": "LAKE",
    "MacArthur BART": "MCAR",
    "Millbrae BART": "MLBR",
    "Montgomery BART": "MONT",
    "North Berkeley BART": "NBRK",
    "North Concord BART": "NCON",
    "Oakland Airport OAC": "OAKL",
    "Orinda BART": "ORIN",
    "Somersville Road eBART": "PCTR",
    "Pleasant Hill BART": "PHIL",
    "Pittsburg/Bay Point BART": "PITT",
    "El Cerrito Plaza BART": "PLZA",
    "Powell BART": "POWL",
    "Richmond BART": "RICH",
    "Rockridge BART": "ROCK",
    "San Leandro BART": "SANL",
    "San Bruno BART": "SBRN",
    "SFO BART": "SFIA",
    "S Hayward BART": "SHAY",
    "South SF BART": "SSAN",
    "Union City BART": "UCTY",
    "Warm Springs BART": "WARM",
    "Walnut Creek BART": "WCRK",
    "West Dublin BART": "WDUB",
    "W Oakland BART": "WOAK"
}


In [64]:
counties = {
    "San Francisco": ["EMBR", "CIVC", "24TH", "MONT", "POWL", "GLEN", "16TH", "BALB"],
    "San Mateo": ["DALY", "COLM", "SSAN", "SBRN", "SFIA", "MLBR"],
    "Contra Costa": [
        "RICH",
        "ORIN",
        "LAFY",
        "WCRK",
        "CONC",
        "NCON",
        "PITT",
        "ANTC",
        "DELN",
        "PHIL",
        "PCTR",
        "PLZA",
    ],
    "Alameda": [
        "WOAK",
        "12TH",
        "19TH",
        "MCAR",
        "ASHB",
        "DUBL",
        "WDUB",
        "CAST",
        "WARM",
        "UCTY",
        "SHAY",
        "HAYW",
        "BAYF",
        "SANL",
        "OAKL",
        "COLS",
        "FTVL",
        "LAKE",
        "ROCK",
        "DBRK",
        "NBRK",
        "FRMT",
    ],
    "Santa Clara": [],  # Add stations for Santa Clara if available
}

In [65]:
BART_A['Station'] = BART_A['Node Name'].map(station_dict)

In [66]:
BART_A

Unnamed: 0,Node,TOD,AB_BRDA,Node Name,Station
0,15230,AM,222.88,Somersville Road eBART,PCTR
1,15230,EA,93.19,Somersville Road eBART,PCTR
2,15230,EV,38.72,Somersville Road eBART,PCTR
3,15230,MD,189.29,Somersville Road eBART,PCTR
4,15230,PM,87.13,Somersville Road eBART,PCTR
...,...,...,...,...,...
240,16545,AM,0.00,West Dublin BART,WDUB
241,16545,EA,0.00,West Dublin BART,WDUB
242,16545,EV,0.00,West Dublin BART,WDUB
243,16545,MD,0.00,West Dublin BART,WDUB


In [67]:
station = station_name()
BART_A = pd.merge(BART_A, station, on=["Node"], how="right")
BART_A

Unnamed: 0,Node,TOD,AB_BRDA,Node Name,Station_x,Station_y
0,16509,AM,4284.81,Oakland City Center BART,12TH,12TH
1,16509,EA,664.50,Oakland City Center BART,12TH,12TH
2,16509,EV,5321.28,Oakland City Center BART,12TH,12TH
3,16509,MD,4256.35,Oakland City Center BART,12TH,12TH
4,16509,PM,4259.75,Oakland City Center BART,12TH,12TH
...,...,...,...,...,...,...
240,16510,AM,1285.67,W Oakland BART,WOAK,WOAK
241,16510,EA,183.01,W Oakland BART,WOAK,WOAK
242,16510,EV,204.55,W Oakland BART,WOAK,WOAK
243,16510,MD,1118.81,W Oakland BART,WOAK,WOAK


In [69]:
unique_pairs = BART_A[['Node Name', 'Station']].drop_duplicates()

KeyError: "['Station'] not in index"

In [46]:
BART_A[['Node Name']].drop_duplicates()

Unnamed: 0,Node Name
0,Somersville Road eBART
5,Hillcrest eBART
10,Oakland Airport OAC
15,Concord BART
20,Pleasant Hill BART
25,Walnut Creek BART
30,Lafayette BART
35,Orinda BART
40,Rockridge BART
45,MacArthur BART


In [35]:
unique_pairs

Unnamed: 0,Node Name,Station
0,Oakland City Center BART,12TH
5,16th/Mission BART,16TH
10,19th St Oakland BART,19TH
15,24th/Mission BART,24TH
20,Hillcrest eBART,ANTC
25,Ashby BART,ASHB
30,Balboa Park BART,BALB
35,Bay Fair BART,BAYF
40,Castro Valley BART,CAST
45,Civic Center BART,CIVC


In [11]:
line = read_transit_lines(model_run_dir, transit_line_rename_filepath)

In [71]:
node_mapping = node.set_index("Node")["Node Name"].to_dict()

In [76]:
lines = ["BART", "EBART", "OAC"]

# Read, group, and sum data for each line
dfs = [read_dbf_and_groupby_sum(combined_gdf, line, ["A", "B", "TOD"], "AB_VOL") for line in lines]
intra = pd.concat(dfs)
intra["A_name"] = intra["A"].map(node_mapping)
intra["B_name"] = intra["B"].map(node_mapping)

In [52]:
node_mapping = node.set_index("Node")["Node Name"].to_dict()

In [78]:
intra["A_station"] = intra["A_name"].map(station_dict )
intra["B_station"] = intra["B_name"].map(station_dict )

In [79]:
intra

Unnamed: 0,A,B,TOD,AB_VOL,A_name,B_name,A_station,B_station
0,16501,16502,AM,8742.44,Concord BART,Pleasant Hill BART,CONC,PHIL
1,16501,16502,EA,1610.53,Concord BART,Pleasant Hill BART,CONC,PHIL
2,16501,16502,EV,248.50,Concord BART,Pleasant Hill BART,CONC,PHIL
3,16501,16502,MD,3109.44,Concord BART,Pleasant Hill BART,CONC,PHIL
4,16501,16502,PM,748.40,Concord BART,Pleasant Hill BART,CONC,PHIL
...,...,...,...,...,...,...,...,...
5,16532,16000,AM,159.66,Coliseium OAC,Oakland Airport OAC,COLS,OAKL
6,16532,16000,EA,22.44,Coliseium OAC,Oakland Airport OAC,COLS,OAKL
7,16532,16000,EV,4.22,Coliseium OAC,Oakland Airport OAC,COLS,OAKL
8,16532,16000,MD,69.28,Coliseium OAC,Oakland Airport OAC,COLS,OAKL


In [None]:
intra["A_name"] = intra["A"].map(node_mapping)
intra["B_name"] = intra["B"].map(node_mapping)

In [12]:
line

Unnamed: 0,Name,Line
154,MUN108,108
155,MUN108AM,108
156,MUN25I,25
157,MUN25O,25
158,MUN108PM,108
...,...,...
469,MUNTISH2,TISH
470,MUNTISH3,TISH
471,MUNTO,T
472,MUNTSHTI,TSHT


In [15]:
muni = process_muni(combined_gdf, model_run_dir, transit_line_rename_filepath, transit_input_dir, observed_MUNI_Line, output_transit_dir, model_MUNI_Line)

In [16]:
muni

In [80]:
station_locations = {
"downtown": ["CIVC", "POWL", "MONT", "EMBR"],
"not_downtown": ["GLEN", "BALB", "24TH", "16TH"],
}

# Create station to screenline mapping
station_to_label = {}
for loc in station_locations:
    stations = station_locations[loc]
    labels = [loc] * len(station_locations[loc])
    station_to_label = station_to_label | dict(zip(stations, labels))

relevant_stations = station_locations["downtown"] + station_locations["not_downtown"]

In [83]:
station_to_label = {station: label for label, stations in station_locations.items() for station in stations}

In [81]:
relevant_stations

['CIVC', 'POWL', 'MONT', 'EMBR', 'GLEN', 'BALB', '24TH', '16TH']

In [84]:
station_to_label

{'CIVC': 'downtown',
 'POWL': 'downtown',
 'MONT': 'downtown',
 'EMBR': 'downtown',
 'GLEN': 'not_downtown',
 'BALB': 'not_downtown',
 '24TH': 'not_downtown',
 '16TH': 'not_downtown'}