In [4]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path
import geopandas as gpd

In [None]:
folder_path = Path('stops_files')
transit_dfs = {}
for file in folder_path.glob("*.txt"):
    print(file.name + " loaded into transit_dfs")
    df = pd.read_csv(file)
    transit_dfs[file.name] = df

1709.txt loaded into transit_dfs
1906.txt loaded into transit_dfs
2406.txt loaded into transit_dfs
2201.txt loaded into transit_dfs
2001.txt loaded into transit_dfs
1509.txt loaded into transit_dfs
2206.txt loaded into transit_dfs
2004.txt loaded into transit_dfs
2401.txt loaded into transit_dfs
1901.txt loaded into transit_dfs
1309.txt loaded into transit_dfs
2006.txt loaded into transit_dfs
2101.txt loaded into transit_dfs
1409.txt loaded into transit_dfs
1609.txt loaded into transit_dfs
1806.txt loaded into transit_dfs
2301.txt loaded into transit_dfs
2310.txt loaded into transit_dfs
2106.txt loaded into transit_dfs
1209.txt loaded into transit_dfs
2306.txt loaded into transit_dfs
1801.txt loaded into transit_dfs
2111.txt loaded into transit_dfs
1401.txt loaded into transit_dfs
2109.txt loaded into transit_dfs
1206.txt loaded into transit_dfs
2309.txt loaded into transit_dfs
1601.txt loaded into transit_dfs
1406.txt loaded into transit_dfs
1410.txt loaded into transit_dfs
1606.txt l

In [None]:
common_cols = set(transit_dfs['1206.txt'].columns)
for df in transit_dfs.values():
    common_cols = common_cols.intersection(df.columns)
print(common_cols)

{'stop_place', 'stop_code', 'reference_place', 'parent_station', 'intersection_code', 'stop_name', 'wheelchair_boarding', 'stop_name_short', 'stop_lon', 'stop_lat', 'location_type', 'stop_id'}


Now, we need to standardize the columns across our dataframes.

In [None]:
for yymm in transit_dfs:
    df = transit_dfs[yymm]
    df = df[list(common_cols)]
    df = df.drop(['stop_code', 'stop_id', 'parent_station', 'wheelchair_boarding', 'intersection_code', 'stop_name'], axis=1)
    transit_dfs[yymm] = df

{'1709.txt':      stop_place reference_place         stop_name_short    stop_lon  \
 0        cabmon             NaN  Cabrillo Natl Monument -117.240426   
 1           NaN             NaN            Pearl&Draper -117.276187   
 2           NaN             NaN               Pearl&Fay -117.273810   
 3           NaN             NaN      Try Pines&Exchange -117.268673   
 4           NaN             NaN      Try Pines&Hillside -117.261752   
 ...         ...             ...                     ...         ...   
 4617        NaN             NaN                  swcoll -116.995951   
 4618        NaN             NaN                   sytro -117.029851   
 4619        NaN             NaN                     utc -117.213628   
 4620        NaN            wasS                  wasS_S -117.184250   
 4621        NaN             NaN                    welS -116.982109   
 
        stop_lat  location_type  
 0     32.674458              0  
 1     32.839578              0  
 2     32.840128    

In [None]:
transit_gdfs = {}
for yymm in transit_dfs:
    transit_gdfs[yymm] = gpd.GeoDataFrame(transit_dfs[yymm], geometry=gpd.points_from_xy(y=transit_dfs[yymm].stop_lat, x=transit_dfs[yymm].stop_lon), crs="EPSG:4326")

{'1709.txt':      stop_place reference_place         stop_name_short    stop_lon  \
 0        cabmon             NaN  Cabrillo Natl Monument -117.240426   
 1           NaN             NaN            Pearl&Draper -117.276187   
 2           NaN             NaN               Pearl&Fay -117.273810   
 3           NaN             NaN      Try Pines&Exchange -117.268673   
 4           NaN             NaN      Try Pines&Hillside -117.261752   
 ...         ...             ...                     ...         ...   
 4617        NaN             NaN                  swcoll -116.995951   
 4618        NaN             NaN                   sytro -117.029851   
 4619        NaN             NaN                     utc -117.213628   
 4620        NaN            wasS                  wasS_S -117.184250   
 4621        NaN             NaN                    welS -116.982109   
 
        stop_lat  location_type                     geometry  
 0     32.674458              0  POINT (-117.24043 32.67446

In [None]:
# Load zoning and classify
zones = gpd.read_file("Zoning_Base_SD_shapefile/Zoning_Base_SD.shp")

def classify_zone(z):
    if z.startswith("RS") or z.startswith("RM") or z.startswith("RX"):
         return "Residential"
    elif z.startswith("CO") or z.startswith("CN") or z.startswith("CC"):
         return "Commercial"
    elif z.startswith ("IP") or z.startswith("IL") or z.startswith("IH"):
         return "Industrial"
    elif z.startswith("AG") or z.startswith("AR"):
         return "Agricultural"
    else:
        return "Other"

zones["ZONE_NAME"] = zones["ZONE_NAME"].apply(classify_zone)

neighborhoods = gpd.read_file("SDPD_Beats_shapefile/SDPD_Beats.shp")
if neighborhoods.crs != zones.crs:
    zones = zones.to_crs(neighborhoods.crs)

zone_in_neighborhoods = gpd.sjoin(zones,neighborhoods, how="inner", predicate="intersects")
zone_counts = zone_in_neighborhoods.groupby(["NAME","ZONE_NAME"]).size().unstack(fill_value=0).reset_index()

for yymm in transit_gdfs:
    transit_gdfs[yymm] = transit_gdfs[yymm].to_crs(neighborhoods.crs)

    stops_in_neighborhoods = gpd.sjoin(transit_gdfs[yymm], neighborhoods, how="inner", predicate="within")
    stop_counts = stops_in_neighborhoods.groupby("NAME").size().reset_index(name="stop_count")

    neighborhood_stats = zone_counts.merge(stop_counts, on="NAME", how="left")
    neighborhood_stats["stop_count"].fillna(0, inplace=True)

    melted = neighborhood_stats.melt(
        id_vars=["NAME","stop_count"],
        value_vars=["Residential","Commercial","Industrial","Agricultural", "Other"],
        var_name="ZONE_NAME",
        value_name="zone_count"
    )

    melted["stops_per_zone_unit"] = melted["stop_count"] / (melted["zone_count"] + 1e-6)

    plt.figure(figsize=(10,6))
    sns.boxplot(data=melted, x="ZONE_NAME", y="stops_per_zone_unit")
    plt.title("Transit Stop Density by Zone Type in Neighborhoods")
    plt.ylabel("Transit Stops per Zone Unit")
    plt.xlabel("Zone Type")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()
