# Create mapping from Taxi Zone to Police precincts

The following chunk of code was taken and adapted from ChatGPT and also debugged using ChatGPT

In [1]:
# Import required packages
import pandas as pd 
import geopandas as gpd 

In [2]:
# Load the shapefiles
taxi_zones = gpd.read_file('../data/raw/taxi_zones/taxi_zones.shp')  
police_precincts = gpd.read_file('../data/raw/precincts/nypp.shp')  

# Ensure both datasets use the same coordinate reference system (CRS)
taxi_zones = taxi_zones.to_crs(police_precincts.crs)

# Calculate the centroid of each taxi zone
taxi_zones['centroid'] = taxi_zones.geometry.centroid

# Convert centroids to a GeoDataFrame
taxi_centroids = gpd.GeoDataFrame(taxi_zones[['LocationID']], geometry=taxi_zones['centroid'])

# Perform a spatial join with precincts using the centroids
taxi_zones_with_precincts = gpd.sjoin(taxi_centroids, police_precincts, how='left', predicate='within')

# Now taxi_zones_with_precincts should have each taxi zone associated with a single precinct
mapping = taxi_zones_with_precincts[['LocationID', 'Precinct']].drop_duplicates()

# Identify taxi zones with blank precincts
missing_precincts = mapping[mapping['Precinct'].isnull()]

# Calculate centroids again if needed
missing_precincts = taxi_zones[taxi_zones['LocationID'].isin(missing_precincts['LocationID'])]

# Use a small buffer around the centroids to find the nearest precinct
missing_precincts['geometry'] = missing_precincts.geometry.buffer(100)  # 100 meters buffer

# Perform spatial join again using the buffered geometry
nearest_precincts = gpd.sjoin(missing_precincts, police_precincts, how='left', predicate='intersects')

# Update the original mapping
for index, row in nearest_precincts.iterrows():
    mapping.loc[mapping['LocationID'] == row['LocationID'], 'Precinct'] = row['Precinct']

# Drop rows where LocationID is 1 (removed from the taxi data as well)
mapping = mapping[mapping['LocationID'] != 1]

# Save the updated mapping
mapping.to_csv('../data/curated/taxi_zone_to_precinct_mapping.csv', index=False)

# Check the result
print(mapping.head())

   LocationID  Precinct
1           2     100.0
2           3      49.0
3           4       9.0
4           5     123.0
5           6     122.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
