## Claustrophobic Streets
Work by Matt Franchi


### Notebook Description: 
In this notebook, we aggregate the computed traffic and clutter together, producing a metric for 'claustrophobia'. Note, this metric is not rigorously validated, as ground truth here is extremely difficult to ascertain (and in fact, somewhat rooted in opinion.)

### Performance Notes: 
We run this notebook on a compute node with 64GB RAM and 8 CPUs. 




In [16]:
import pandas as pd 
import geopandas as gpd 
from shapely import wkt

import matplotlib.pyplot as plt 

from tqdm import tqdm 
from glob import glob 

import logging 
# add logger name, time and date to log messages
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
log = logging.getLogger("aggregation")


log.info("Initialization complete.")

2024-07-21 15:38:22,474 - aggregation - INFO - Initialization complete.


In [17]:
PROJ_CRS = 'EPSG:2263'

In [18]:
# load nyc sidewalks 
# load nyc sidewalk graph 
nyc_sidewalks = pd.read_csv("../data/segmentized_nyc_sidewalks.csv", engine='pyarrow')
nyc_sidewalks = gpd.GeoDataFrame(nyc_sidewalks, geometry=nyc_sidewalks['geometry'].apply(wkt.loads), crs='EPSG:2263')

# set first column to be named 'point index' 
nyc_sidewalks.columns = ['point_index'] + list(nyc_sidewalks.columns[1:])

In [19]:
nyc_sidewalks.describe() 

Unnamed: 0,point_index,shape_area,shape_leng,feat_code,sub_code,source_id
count,2532597.0,2532597.0,2532597.0,2532597.0,2532597.0,2532597.0
mean,1266298.0,29555.03,5155.917,3800.0,380001.2,13397480000.0
std,731097.9,77903.6,11858.98,0.0,3.249733,5278941000.0
min,0.0,10.18359,13.88815,3800.0,380000.0,1380000000.0
25%,633149.0,11982.01,2447.419,3800.0,380000.0,9380001000.0
50%,1266298.0,16908.38,3301.795,3800.0,380000.0,14380000000.0
75%,1899447.0,23317.85,4109.916,3800.0,380000.0,18380000000.0
max,2532596.0,1247661.0,171447.1,3800.0,380010.0,21380050000.0


In [20]:
# load clutter 
clutter = pd.read_csv("../data/nyc_sidewalks_clutter.csv", engine='pyarrow')
clutter = gpd.GeoDataFrame(clutter, geometry=clutter['geometry'].apply(wkt.loads), crs='EPSG:2263')
# fill na 0
clutter.fillna(0, inplace=True)
log.info("Loaded clutter data.")

2024-07-21 15:40:16,995 - aggregation - INFO - Loaded clutter data.


In [21]:
# load traffic 
traffic = pd.read_csv("../data/avg_traffic_by_sidewalk_august.csv", engine='pyarrow')
traffic = gpd.GeoDataFrame(traffic, geometry=traffic['geometry'].apply(wkt.loads), crs='EPSG:2263')
# fill na 0 
traffic.fillna(0, inplace=True)
log.info("Loaded traffic data.")

2024-07-21 15:40:29,359 - aggregation - INFO - Loaded traffic data.


In [22]:
# merge clutter and nyc_sidewalks 
nyc_sidewalks = nyc_sidewalks.merge(clutter, on='point_index', how='left', suffixes=('', '_clutter'))
log.info("Merged clutter data.")

2024-07-21 15:40:29,615 - aggregation - INFO - Merged clutter data.


In [23]:
nyc_sidewalks = nyc_sidewalks.merge(traffic, on='point_index', how='left', suffixes=('', '_traffic'))
log.info("Merged traffic data.")

2024-07-21 15:40:30,054 - aggregation - INFO - Merged traffic data.


In [24]:
# drop columns with _clutter or _traffic, signals duplicates of redundant metadata
nyc_sidewalks = nyc_sidewalks.drop(columns=[col for col in nyc_sidewalks.columns if "_clutter" in col or "_traffic" in col])

In [25]:
nyc_sidewalks 

Unnamed: 0,point_index,shape_area,shape_leng,feat_code,status,sub_code,source_id,geometry,Unnamed: 9,shape_width,...,crowdedness_14,crowdedness_15,crowdedness_16,crowdedness_17,crowdedness_18,crowdedness_19,crowdedness_20,crowdedness_21,crowdedness_22,crowdedness_23
0,0,12252.910554,1763.887092,3800,Unchanged,380000,21380000001,POINT (984808.190 190837.352),0,6.946539,...,0.058891,0.059059,0.059756,0.018377,0.047986,0.000000,0.000000,0.0,0.0,0.0
1,1,12252.910554,1763.887092,3800,Unchanged,380000,21380000001,POINT (984791.772 190796.639),1,6.946539,...,0.052667,0.069656,0.058268,0.018942,0.058758,0.000000,0.000000,0.0,0.0,0.0
2,2,12252.910554,1763.887092,3800,Unchanged,380000,21380000001,POINT (984775.354 190755.926),2,6.946539,...,0.041130,0.080976,0.049357,0.032506,0.045460,0.000000,0.000000,0.0,0.0,0.0
3,3,12252.910554,1763.887092,3800,Unchanged,380000,21380000001,POINT (984758.935 190715.213),3,6.946539,...,0.019194,0.035989,0.021729,0.028791,0.021593,0.000000,0.000000,0.0,0.0,0.0
4,4,12252.910554,1763.887092,3800,Unchanged,380000,21380000001,POINT (984742.517 190674.500),4,6.946539,...,0.010865,0.040683,0.019932,0.024820,0.023344,0.000000,0.000000,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2532592,2532592,179962.054211,12729.013141,3800,Updated,380000,21380000485,POINT (979569.116 199467.889),2532592,14.137942,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
2532593,2532593,179962.054211,12729.013141,3800,Updated,380000,21380000485,POINT (979593.477 199509.115),2532593,14.137942,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
2532594,2532594,179962.054211,12729.013141,3800,Updated,380000,21380000485,POINT (979617.839 199550.340),2532594,14.137942,...,0.000000,0.000000,0.141463,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
2532595,2532595,179962.054211,12729.013141,3800,Updated,380000,21380000485,POINT (979642.201 199591.565),2532595,14.137942,...,0.495122,0.000000,0.117886,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0


In [26]:
# comptue relative to average clutter 
nyc_sidewalks['rta_clutter'] = nyc_sidewalks['clutter'] / nyc_sidewalks['clutter'].mean()

In [27]:
nyc_sidewalks['crowdedness'].describe()

count    2.532597e+06
mean     7.887345e-02
std      1.302566e-01
min      0.000000e+00
25%      0.000000e+00
50%      3.257841e-02
75%      9.459214e-02
max      7.424472e-01
Name: crowdedness, dtype: float64

In [28]:
nyc_sidewalks['claustrophobia'] = (nyc_sidewalks['crowdedness']) * (nyc_sidewalks['clutter'] + 1)

In [29]:
nyc_sidewalks['claustrophobia'].describe()

count    2.532597e+06
mean     8.643264e-02
std      1.507943e-01
min      0.000000e+00
25%      0.000000e+00
50%      3.375325e-02
75%      9.920833e-02
max      1.213762e+00
Name: claustrophobia, dtype: float64

In [30]:
# write to disk 
nyc_sidewalks.to_csv("../data/nyc_sidewalks_claustrophobia.csv", index=False)