In [2]:
import pandas as pd 
import geopandas as gpd 

import matplotlib.pyplot as plt 

from tqdm import tqdm 
from glob import glob 

import logging 
# add logger name, time and date to log messages
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
log = logging.getLogger("aggregation")


log.info("Initialization complete.")

2024-05-28 08:55:36,134 - aggregation - INFO - Initialization complete.


In [5]:
PROJ_CRS = 'EPSG:2263'

In [6]:
# load nyc sidewalks 
nyc_sidewalks = gpd.read_file("../data/Sidewalk.geojson").to_crs(PROJ_CRS)

In [3]:
# load clutter 
clutter = gpd.read_file("../data/nyc_sidewalks_clutter.geojson")
log.info("Loaded clutter data.")

2024-05-28 08:56:49,772 - aggregation - INFO - Loaded clutter data.


In [4]:
# load traffic 
traffic = gpd.read_file("../data/avg_traffic_by_sidewalk.geojson")
log.info("Loaded traffic data.")

2024-05-28 08:57:29,504 - aggregation - INFO - Loaded traffic data.


In [7]:
# merge clutter and nyc_sidewalks 
nyc_sidewalks = nyc_sidewalks.merge(clutter, on='source_id', how='left', suffixes=('', '_clutter'))
log.info("Merged clutter data.")

2024-05-28 09:00:33,746 - aggregation - INFO - Merged clutter data.


In [8]:
nyc_sidewalks = nyc_sidewalks.merge(traffic, on='source_id', how='left', suffixes=('', '_traffic'))
log.info("Merged traffic data.")

2024-05-28 09:00:34,854 - aggregation - INFO - Merged traffic data.


In [9]:
# drop columns with _clutter or _traffic 
nyc_sidewalks = nyc_sidewalks.drop(columns=[col for col in nyc_sidewalks.columns if "_clutter" in col or "_traffic" in col])

In [10]:
nyc_sidewalks 

Unnamed: 0,shape_area,shape_leng,feat_code,status,sub_code,source_id,geometry,bus_stop_count,trash_can_count,linknyc_count,citybench_count,bicycle_parking_shelter_count,bicycle_rack_count,clutter,shape_width,0,1,2,rta_width,crowdedness
0,12252.9105542,1763.88709176,3800,Unchanged,380000,21380000001,"MULTIPOLYGON (((984808.186 190837.354, 984802....",0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.946539,0.846154,0.000000,5.846154,1.301905,1.101612
1,5784.68378442,1004.77769266,3800,Unchanged,380000,21380000002,"MULTIPOLYGON (((985043.889 190997.298, 985062....",0.0,1.0,0.0,0.0,0.0,1.0,2.0,5.757178,0.535714,0.071429,6.178571,1.078998,0.578035
2,2662.09145608,828.312363728,3800,Unchanged,380000,21380000003,"MULTIPOLYGON (((989755.035 195457.503, 989753....",0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,
3,77.3438094756,38.36184204,3800,Unchanged,380000,21380000103,"MULTIPOLYGON (((986680.735 193019.818, 986680....",0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.016165,1.818182,0.727273,6.545455,0.377865,0.687028
4,31976.4071204,5063.77657774,3800,Updated,380000,21380000004,"MULTIPOLYGON (((989716.746 194185.683, 989795....",0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.314735,0.720339,0.110169,5.025424,1.183494,0.852517
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49474,135440.521284,15863.5182305,3800,Unchanged,380000,21380000348,"MULTIPOLYGON (((982997.678 198881.455, 982996....",0.0,1.0,0.0,0.0,0.0,0.0,1.0,8.537861,0.553273,0.023748,4.198331,1.600148,0.885319
49475,12990.088619,2561.65806742,3800,Unchanged,380000,10380050000,"MULTIPOLYGON (((1019963.216 216972.123, 101998...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.070969,0.428571,0.047619,8.761905,0.950390,0.407310
49476,20488.0641896,4073.85420592,3800,Unchanged,380000,10380001355,"MULTIPOLYGON (((1020012.570 217881.365, 102001...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.029160,0.333333,0.000000,9.555556,0.942554,0.314185
49477,13578.6839168,1234.50743421,3800,Updated,380000,21380051294,"MULTIPOLYGON (((980150.444 198874.262, 980203....",0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.999273,3.615385,0.884615,6.346154,2.061460,7.452971


In [11]:
# comptue relative to average clutter 
nyc_sidewalks['rta_clutter'] = nyc_sidewalks['clutter'] / nyc_sidewalks['clutter'].mean()

In [12]:
# compute 'claustrophobia' metric that is rta_clutter * crowdedness 
nyc_sidewalks['claustrophobia'] = nyc_sidewalks['rta_clutter'] * nyc_sidewalks['crowdedness']

In [13]:
nyc_sidewalks['claustrophobia'].describe()

count    33267.000000
mean         2.999681
std         10.301715
min          0.000000
25%          0.000000
50%          0.000000
75%          0.998003
max        267.230689
Name: claustrophobia, dtype: float64

In [14]:
# write to disk 
nyc_sidewalks.to_file("../data/nyc_sidewalks_claustrophobia.geojson", driver='GeoJSON')