In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import geopandas as gpd

neighborhoods = gpd.read_file("SDPD_Beats_shapefile/SDPD_Beats.shp")
neighborhoods['area'] = neighborhoods.geometry.area
neighborhoods = neighborhoods[neighborhoods['area'] > 150000]
zones = gpd.read_file("Zoning_Base_SD_shapefile/Zoning_Base_SD.shp")
folder_path = Path('stops_files')
transit_dfs = {}
for file in folder_path.glob("*.txt"):
    df = pd.read_csv(file)
    file_name = str(file.name)
    file_name = file_name.replace('.txt', '')
    transit_dfs[file_name] = df


common_cols = set(transit_dfs['1206'].columns)
for df in transit_dfs.values():
    common_cols = common_cols.intersection(df.columns)
    
for yymm in transit_dfs:
    df = transit_dfs[yymm]
    df = df[list(common_cols)]
    df = df.drop(['stop_code', 'stop_place', 'reference_place', 'parent_station', 'wheelchair_boarding', 'intersection_code', 'stop_name'], axis=1)
    #df = df[df['location_type'] == 1]
    transit_dfs[yymm] = df
transit_gdfs = {}
for yymm in transit_dfs:
    transit_gdfs[yymm] = gpd.GeoDataFrame(transit_dfs[yymm], geometry=gpd.points_from_xy(y=transit_dfs[yymm].stop_lat, x=transit_dfs[yymm].stop_lon), crs="EPSG:4326")

In [2]:
for yymm in transit_gdfs:
    transit_gdfs[yymm] = transit_gdfs[yymm].to_crs(epsg=2230)
uncounted_zones = zones[zones["ZONE_NAME"].isin(["AR-1-1", "AG-1-1", "AR-1-2"])]
zones_cleaned = zones[~zones["ZONE_NAME"].isin(["AR-1-1", "AG-1-1", "AR-1-2"])]
neighborhoods = neighborhoods.to_crs(zones_cleaned.crs)
neighborhoods_cleaned = gpd.overlay(neighborhoods, uncounted_zones, how='difference')
neighborhoods = neighborhoods.to_crs(epsg=2230)  # Example: California State Plane
neighborhoods_cleaned = neighborhoods_cleaned.rename(columns={'NAME': 'neighborhood'})
neighborhoods_cleaned = neighborhoods_cleaned.to_crs(epsg=2230)

def compute_gravity_scores(transit_gdfs, neighborhoods_gdf, radius=1500, weight_map=None):
    weight_map = weight_map or {0: 1, 1: 3}
    gravity_scores = pd.DataFrame()
    
    # Ensure projection is consistent (use local projection like EPSG:2230)
    neighborhoods = neighborhoods_gdf.copy()
    neighborhoods = neighborhoods.to_crs(epsg=2230)
    neighborhoods['centroid'] = neighborhoods.geometry.centroid
    neighborhoods = neighborhoods.set_index('neighborhood')
    
    for yymm, transit_gdf in transit_gdfs.items():
        print(f"Processing {yymm}...")
        transit_gdf = transit_gdf.to_crs(neighborhoods.crs)
        
        # Assign weights to stops
        transit_gdf['weight'] = transit_gdf['location_type'].map(weight_map).fillna(1)

        scores = []
        for idx, hood in neighborhoods.iterrows():
            center = hood['centroid']
            score = 0
            for _, stop in transit_gdf.iterrows():
                dist = center.distance(stop.geometry)
                if 0 < dist <= radius:
                    score += stop['weight'] / (dist ** 2)
            scores.append(score)
        
        gravity_scores[yymm] = scores

    gravity_scores.index = neighborhoods.index
    return gravity_scores

gravity_df = compute_gravity_scores(transit_gdfs, neighborhoods_cleaned)
gravity_df


  neighborhoods_cleaned = gpd.overlay(neighborhoods, uncounted_zones, how='difference')


Processing 1709...
Processing 1906...


KeyboardInterrupt: 

In [54]:
def num_cols(row):
    yymm_cols = []
    for col in list(row.index):
        if str(col).isdigit() == True:
            yymm_cols.append(col)
    yymm_cols = sorted(yymm_cols, key=int)
    return row[yymm_cols]

def grp_sorter(row):
    yymm_row = num_cols(row)
    constant = yymm_row.iloc[7]
    for yymm in yymm_row:
        if constant != yymm:
            return 0
    return 1

def trans_sorter(row):
    if row['no_change'] == 0:
        return 0
    else:
        row = num_cols(row)
        if row.iloc[0] == 0:
            return 1
        else:
            return 0

In [55]:
gravity_df['no_change'] = gravity_df.apply(grp_sorter, axis=1)
gravity_df['no_transit'] = gravity_df.apply(trans_sorter, axis=1)

In [59]:
gravity_df = gravity_df.merge(neighborhoods_cleaned[['neighborhood', 'geometry']], on='neighborhood', how='left')

In [None]:
#gravity_df.to_csv('gravity_df.csv', index = True)

In [58]:
gravity_df = gpd.GeoDataFrame(gravity_df, geometry='geometry', crs='EPSG:2230')
gravity_df

Unnamed: 0,neighborhood,1709,1906,2406,2201,2001,1509,2206,2004,2401,...,1306,2009,1706,1909,2409,1301,1506,no_change,no_transit,geometry
0,NORTH CITY,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000e+00,0.0,0.0,0.0,0.0,0.000000e+00,0.0,1,1,"MULTIPOLYGON (((6258808.014 1938466.991, 62591..."
1,SAN DIEGO,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000e+00,0.0,0.0,0.0,0.0,0.000000e+00,0.0,1,1,"MULTIPOLYGON (((6293794.477 1801763.592, 62938..."
2,SAN DIEGO,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000e+00,0.0,0.0,0.0,0.0,0.000000e+00,0.0,1,1,"POLYGON ((6353602.999 1802986.928, 6353600.279..."
3,SAN DIEGO,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000e+00,0.0,0.0,0.0,0.0,0.000000e+00,0.0,1,1,"POLYGON ((6470704.388 1829219.527, 6470708.439..."
4,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000e+00,0.0,0.0,0.0,0.0,0.000000e+00,0.0,1,1,"MULTIPOLYGON (((6261640.429 1836823.561, 62616..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149,TORREY HIGHLANDS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000e+00,0.0,0.0,0.0,0.0,0.000000e+00,0.0,1,1,"MULTIPOLYGON (((6287136.999 1934267.625, 62871..."
150,RANCHO PENASQUITOS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,9.507730e-07,0.0,0.0,0.0,0.0,9.507730e-07,0.0,0,0,"MULTIPOLYGON (((6306168.141 1942019.07, 630623..."
151,SAN PASQUAL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000e+00,0.0,0.0,0.0,0.0,0.000000e+00,0.0,1,1,"MULTIPOLYGON (((6341872.954 1982808.001, 63409..."
152,TIJUANA RIVER VALLEY,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000e+00,0.0,0.0,0.0,0.0,0.000000e+00,0.0,1,1,"MULTIPOLYGON (((6307704 1784860, 6307757.384 1..."
