In [53]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from shapely import wkt

In [100]:
df = pd.read_csv('Data/Raw/Parks_Data.csv')

# drop rows with no date
df.dropna(subset=['ACQUISITIONDATE'], inplace=True)
df.sample(5)

Unnamed: 0,ACQUISITIONDATE,ACRES,ADDRESS,BOROUGH,CLASS,COMMUNITYBOARD,COUNCILDISTRICT,DEPARTMENT,EAPPLY,GISOBJID,...,PIP_RATABLE,PRECINCT,RETIRED,SIGNNAME,SUBCATEGORY,TYPECATEGORY,US_CONGRESS,WATERFRONT,ZIPCODE,multipolygon
504,09/12/1941 12:00:00 AM,0.943,1480 PARK AVENUE,M,PARK,111,8,M-11,Peter Minuit Playground,100004991.0,...,True,23.0,False,Peter Minuit Playground,JOP,Jointly Operated Playground,13.0,False,10029,MULTIPOLYGON (((-73.94713332007458 40.79440606...
1332,11/24/1997 12:00:00 AM,1.868,786 EAST 181 STREET,X,PARK,206,15,X-06,Mapes Park,100005181.0,...,True,48.0,False,Mapes Park,Neighborhood Park,Neighborhood Park,15.0,False,10460,MULTIPOLYGON (((-73.88506332614014 40.84731526...
492,12/31/1949 12:00:00 AM,0.192,328 EAST 42 STREET,M,PARK,106,4,M-06,Tudor Grove Playground,100004409.0,...,True,17.0,False,Tudor Grove Playground,Neighborhood Plgd,Playground,12.0,False,10017,MULTIPOLYGON (((-73.97138027879109 40.74900978...
1769,03/04/2003 12:00:00 AM,0.68,,X,PARK,206,15,X-06,West Farms Soldiers Cemetery,100004210.0,...,False,48.0,False,West Farms Soldiers Cemetery,Cemetery,Cemetery,15.0,False,10460,MULTIPOLYGON (((-73.87880008486053 40.84376663...
1304,07/26/2018 12:00:00 AM,0.321,480 BEACH 43 STREET,Q,PARK,414,31,Q-14,,100042706.0,...,False,101.0,False,Edgemere Coalition,,Garden,5.0,False,11691,MULTIPOLYGON (((-73.77476815216733 40.59802335...


In [101]:
# change to datetime
df['year'] = pd.to_datetime(df['ACQUISITIONDATE']).dt.year

# trim columns
df = df[['year', 'ACRES','ZIPCODE', 'multipolygon', 'SIGNNAME']]
df

  df['year'] = pd.to_datetime(df['ACQUISITIONDATE']).dt.year


Unnamed: 0,year,ACRES,ZIPCODE,multipolygon,SIGNNAME
0,1916,0.545,10034,MULTIPOLYGON (((-73.92305320407046 40.86718368...,Dyckman House Museum
1,1936,2.213,10457,MULTIPOLYGON (((-73.88696663993288 40.85003709...,Belmont Playground
2,2002,0.114,10460,MULTIPOLYGON (((-73.88234391814649 40.84707387...,Volky Garden & Flowers
3,2006,6.219,11373,MULTIPOLYGON (((-73.88436854859785 40.72890922...,Elmhurst Park
4,1902,3.312,10021,MULTIPOLYGON (((-73.94903734207423 40.76900910...,John Jay Park
...,...,...,...,...,...
2047,1934,897.690,"11354, 11355, 11367, 11368",MULTIPOLYGON (((-73.84509867168501 40.75388350...,Flushing Meadows Corona Park
2048,1914,109.609,11434,MULTIPOLYGON (((-73.78439768325394 40.67917896...,Baisley Pond Park
2049,1926,1.395,10467,MULTIPOLYGON (((-73.8663734574437 40.877463712...,Marcus Garvey Square
2050,1981,1.774,11233,MULTIPOLYGON (((-73.92831562852702 40.67987313...,Jackie Robinson Park Playground


## Spatial Join Polygons to Census Tracts

In [102]:
# Load the census tract shapefile
tracts = gpd.read_file('Data/Raw/tracts2020_shapefile/nyct2020.shp')
tracts = tracts.to_crs(epsg = 4326)


In [103]:
geometry = wkt.loads(df['multipolygon'])
geo_df = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")

# Perform a spatial join to match points to census tracts
geo_df = gpd.sjoin(geo_df, tracts, how='inner', predicate="intersects")
geo_df = geo_df[['year','ACRES', 'GEOID']]
geo_df

Unnamed: 0,year,ACRES,GEOID
0,1916,0.545,36061029500
1,1936,2.213,36005037300
2,2002,0.114,36005036300
3,2006,6.219,36081049900
4,1902,3.312,36061013202
...,...,...,...
2049,1926,1.395,36005037800
2050,1981,1.774,36047038100
2051,1888,80.936,36005042500
2051,1888,80.936,36005041900



### Adjust Dates


In [135]:
geo_df['num_parks'] = 1

In [136]:
b4 = geo_df[geo_df.year<2009]
df_09 = b4.groupby(['GEOID'])[['ACRES', 'num_parks']].sum().reset_index()
df_09['year'] = '2009'
df_09.sort_values('GEOID')

Unnamed: 0,GEOID,ACRES,num_parks,year
0,36005000200,270.675000,3,2009
1,36005000400,362.538000,6,2009
2,36005001600,68.147000,2,2009
3,36005001902,3.400000,2,2009
4,36005001904,51.504366,2,2009
...,...,...,...,...
1268,36085030301,200.081000,2,2009
1269,36085030302,4.465000,1,2009
1270,36085031901,3.014000,1,2009
1271,36085031902,0.271000,1,2009


In [142]:
df_list = [df_09]
for yr in [2010,2011, 2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023]:
    df_yr = geo_df[geo_df.year==yr]
    df = pd.concat([df_list[-1], df_yr])
    grouped = df.groupby(['GEOID'])[['ACRES', 'num_parks']].sum().reset_index()
    grouped['year'] = str(yr)
    df_list.append(grouped)


In [145]:
df_out = pd.concat(df_list).sort_values(['GEOID', 'year'])
df_out.rename(columns={'ACRES':'park_acres'}, inplace=True)

In [146]:
df_out.sample(5)

Unnamed: 0,GEOID,park_acres,num_parks,year
0,36005000200,270.675,3,2009
0,36005000200,272.835,4,2010
0,36005000200,272.835,4,2011
0,36005000200,272.835,4,2012
0,36005000200,272.835,4,2013
...,...,...,...,...
1291,36085032300,233.336,6,2019
1291,36085032300,233.336,6,2020
1291,36085032300,233.336,6,2021
1291,36085032300,233.336,6,2022


In [147]:
# df_out.to_parquet('Data/Cleaned/parks.parquet')