In [1]:
# Let's refactor the user's transit score calculation to use Kernel Density Estimation (KDE)
# We'll generate KDE rasters per time period and aggregate densities by neighborhood

import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.geometry import Point
from pathlib import Path
from scipy.stats import gaussian_kde
from rasterio.transform import from_origin
import rasterio
from rasterstats import zonal_stats

# Step 1: Load neighborhoods and filter by area
neighborhoods = gpd.read_file("SDPD_Beats_shapefile/SDPD_Beats.shp")
neighborhoods = neighborhoods.to_crs(epsg=2230)
neighborhoods['area'] = neighborhoods.geometry.area
neighborhoods = neighborhoods[neighborhoods['area'] > 150000]

# Step 2: Load and clean transit stop data
folder_path = Path('stops_files')
transit_dfs = {}
for file in folder_path.glob("*.txt"):
    df = pd.read_csv(file)
    file_name = file.stem
    transit_dfs[file_name] = df

# Step 3: Standardize transit stop columns and filter location_type
common_cols = set.intersection(*(set(df.columns) for df in transit_dfs.values()))
transit_dfs = {
    yymm: df[list(common_cols)].loc[df['location_type'] == 1]
    for yymm, df in transit_dfs.items()
}

# Step 4: Create GeoDataFrames and project to local CRS
transit_gdfs = {
    yymm: gpd.GeoDataFrame(
        df,
        geometry=gpd.points_from_xy(df.stop_lon, df.stop_lat),
        crs="EPSG:4326"
    ).to_crs(epsg=2230)
    for yymm, df in transit_dfs.items()
}

# Step 5: KDE function per year-month
def compute_kde(gdf, bandwidth=500, pixel_size=100):
    if len(gdf) == 0:
        return None, None

    xys = np.vstack([gdf.geometry.x, gdf.geometry.y])
    kde = gaussian_kde(xys, bw_method=bandwidth / xys.std(ddof=1))
    
    minx, miny, maxx, maxy = neighborhoods.total_bounds
    x_grid = np.arange(minx, maxx, pixel_size)
    y_grid = np.arange(miny, maxy, pixel_size)
    xx, yy = np.meshgrid(x_grid, y_grid)
    coords = np.vstack([xx.ravel(), yy.ravel()])
    
    z = kde(coords).reshape(xx.shape)
    transform = from_origin(minx, maxy, pixel_size, pixel_size)
    
    return z, transform

# Step 6: Calculate KDE scores and aggregate by neighborhood
kde_scores = {}

for yymm, gdf in transit_gdfs.items():
    z, transform = compute_kde(gdf)
    if z is None:
        continue

    # Save raster temporarily in memory
    out_meta = {
        "driver": "GTiff",
        "dtype": 'float32',
        "nodata": None,
        "width": z.shape[1],
        "height": z.shape[0],
        "count": 1,
        "crs": neighborhoods.crs,
        "transform": transform
    }

    with rasterio.io.MemoryFile() as memfile:
        with memfile.open(**out_meta) as dataset:
            dataset.write(z, 1)
            zs = zonal_stats(neighborhoods, dataset.read(1), affine=transform, stats=["mean"], nodata=None)
            kde_scores[yymm] = [zone['mean'] if zone['mean'] is not None else 0 for zone in zs]

# Step 7: Combine KDE scores into final DataFrame
kde_df = neighborhoods[['NAME', 'geometry']].copy()
for yymm in kde_scores:
    kde_df[yymm] = kde_scores[yymm]

# Rename for consistency
kde_df = kde_df.rename(columns={'NAME': 'neighborhood'})
kde_df.head()




Unnamed: 0,neighborhood,geometry,1709,1906,2406,2201,2001,1509,2206,2004,...,2209,1701,1501,1306,2009,1706,1909,2409,1301,1506
0,NORTH CITY,"POLYGON ((6258473.516 1939877.994, 6258489.997...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,SAN DIEGO,"MULTIPOLYGON (((6293859.935 1801280.003, 62938...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,,"MULTIPOLYGON (((6261640.429 1836823.561, 62616...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,NESTOR,"POLYGON ((6302781 1793246.001, 6302905 1793244...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,BIRDLAND,"POLYGON ((6284667.652 1874418.895, 6284694.392...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
def num_cols(row):
    yymm_cols = []
    for col in list(row.index):
        if str(col).isdigit() == True:
            yymm_cols.append(col)
    yymm_cols = sorted(yymm_cols, key=int)
    return row[yymm_cols]

def grp_sorter(row):
    yymm_row = num_cols(row)
    constant = yymm_row.iloc[0]
    for yymm in yymm_row:
        if constant != yymm:
            return 0
    return 1

def trans_sorter(row):
    if row['no_change'] == 0:
        return 0
    else:
        row = num_cols(row)
        if row.iloc[0] == 0:
            return 1
        else:
            return 0

In [5]:
kde_df['no_change'] = kde_df.apply(grp_sorter, axis=1)
kde_df['no_transit'] = kde_df.apply(trans_sorter, axis=1)
kde_df

Unnamed: 0,neighborhood,geometry,1709,1906,2406,2201,2001,1509,2206,2004,...,1501,1306,2009,1706,1909,2409,1301,1506,no_change,no_transit
0,NORTH CITY,"POLYGON ((6258473.516 1939877.994, 6258489.997...",0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,1,1
1,SAN DIEGO,"MULTIPOLYGON (((6293859.935 1801280.003, 62938...",0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,1,1
2,,"MULTIPOLYGON (((6261640.429 1836823.561, 62616...",0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,1,1
3,NESTOR,"POLYGON ((6302781 1793246.001, 6302905 1793244...",0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,1,1
4,BIRDLAND,"POLYGON ((6284667.652 1874418.895, 6284694.392...",0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130,TORREY HIGHLANDS,"POLYGON ((6287497.113 1936631.116, 6287462.453...",0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,1,1
131,RANCHO PENASQUITOS,"POLYGON ((6306176.245 1943421.146, 6306158.416...",9.572178e-10,2.867621e-13,2.257336e-10,2.340856e-10,2.867621e-13,2.963889e-13,2.341558e-10,2.867621e-13,...,5.650691e-13,5.649504e-13,2.867621e-13,9.571540e-10,2.867621e-13,2.258021e-10,5.649504e-13,5.650691e-13,0,0
132,SAN PASQUAL,"POLYGON ((6333849.005 1984149.149, 6333852.911...",0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,1,1
133,TIJUANA RIVER VALLEY,"POLYGON ((6305396 1784976, 6305598 1784969, 63...",0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,1,1


In [6]:
kde_df[kde_df['no_change'] == 0]

Unnamed: 0,neighborhood,geometry,1709,1906,2406,2201,2001,1509,2206,2004,...,1501,1306,2009,1706,1909,2409,1301,1506,no_change,no_transit
6,LOMA PORTAL,"POLYGON ((6263446.993 1854500.089, 6263451.987...",7.083079999999999e-20,7.157572e-20,1.246803e-20,8.097893e-21,7.157572e-20,7.183575e-20,8.198847e-21,7.157572e-20,...,7.238028e-20,7.166471e-20,7.157572e-20,7.073018999999999e-20,7.157572e-20,1.247306e-20,7.166471e-20,7.238028e-20,0,0
13,MISSION BAY,"POLYGON ((6263856.663 1869718.836, 6263859.877...",9.768209e-21,0.0,3.307735e-14,3.602702e-14,0.0,6.998326000000001e-17,3.571025e-14,0.0,...,2.115477e-24,2.102045e-24,0.0,9.768837e-21,0.0,3.308357e-14,2.102045e-24,2.115477e-24,0,0
16,NORTH CLAIREMONT,"POLYGON ((6276577.091 1889460.925, 6276593.485...",0.0,0.0,2.551389e-13,1.970463e-13,0.0,0.0,1.97144e-13,0.0,...,0.0,0.0,0.0,0.0,0.0,2.550272e-13,0.0,0.0,0,0
23,MIRAMAR RANCH NORTH,"POLYGON ((6300435.031 1923977.922, 6300439.976...",1.492827e-12,1.498296e-12,5.086951e-13,4.489496e-13,1.498296e-12,1.498642e-12,4.489661e-13,1.498296e-12,...,1.503369e-12,1.498669e-12,1.498296e-12,1.492125e-12,1.498296e-12,5.087037e-13,1.498669e-12,1.503369e-12,0,0
29,SORRENTO VALLEY,"MULTIPOLYGON (((6279268.686 1918971.966, 62793...",1.758686e-20,1.7257699999999998e-20,5.855207e-10,5.718688e-10,1.7257699999999998e-20,1.7794289999999998e-20,5.714918e-10,1.7257699999999998e-20,...,1.697217e-20,1.695444e-20,1.7257699999999998e-20,1.761591e-20,1.7257699999999998e-20,5.855185e-10,1.695444e-20,1.697217e-20,0,0
34,UNIVERSITY CITY,"POLYGON ((6262511.683 1907539.403, 6262511.029...",0.0,0.0,3.304679e-18,1.87829e-18,0.0,0.0,1.887815e-18,0.0,...,0.0,0.0,0.0,0.0,0.0,3.30668e-18,0.0,0.0,0,0
39,SABRE SPRINGS,"POLYGON ((6310896.018 1920990.084, 6310405.966...",2.459984e-31,2.4912960000000003e-31,3.145966e-14,2.477047e-14,2.4912960000000003e-31,2.435959e-31,2.494499e-14,2.4912960000000003e-31,...,2.4303200000000002e-31,2.3927880000000003e-31,2.4912960000000003e-31,2.4575310000000002e-31,2.4912960000000003e-31,3.142362e-14,2.3927880000000003e-31,2.4303200000000002e-31,0,0
40,RANCHO ENCANTADA,"POLYGON ((6332253.976 1923739.409, 6332822.999...",2.643798e-16,2.66214e-16,5.4128380000000005e-27,6.85367e-27,2.66214e-16,2.660817e-16,6.788237e-27,2.66214e-16,...,2.674855e-16,2.658358e-16,2.66214e-16,2.641514e-16,2.66214e-16,5.391024e-27,2.658358e-16,2.674855e-16,0,0
48,MISSION BAY,"POLYGON ((6266109.702 1871823.456, 6266227.199...",0.0,1.439791e-09,1.408424e-09,1.367346e-09,1.439791e-09,0.0,1.368308e-09,1.439791e-09,...,0.0,0.0,1.439791e-09,0.0,1.439791e-09,1.40853e-09,0.0,0.0,0,0
116,MIRA MESA,"POLYGON ((6297344.403 1924601.862, 6297713.75 ...",2.139661e-10,2.142654e-10,1.310526e-11,1.188915e-11,2.142654e-10,2.138661e-10,1.192333e-11,2.142654e-10,...,2.141932e-10,2.140087e-10,2.142654e-10,2.139384e-10,2.142654e-10,1.311849e-11,2.140087e-10,2.141932e-10,0,0
