In [12]:
import pandas as pd 
import geopandas as gpd 
import numpy as np 
import json 
from glob import glob 

import sys 
sys.path.append("../")
from logger import setup_logger
logger = setup_logger("analysis-df-assembly")
logger.setLevel("INFO")

import os 

logger.info("Modules loaded.")



[34m2024-10-22 17:32:51 - analysis-df-assembly - INFO - Modules loaded.[0m


In [13]:
ICAR_NONE_RUN='../runs/icar_none/simulated_False/ahl_True/20241021-1038'
ICAR_CHEATING_RUN='../runs/icar_cheating/simulated_False/ahl_True/20241022-1130'

In [14]:
ICAR_NONE_ESTIMATES = glob(f"{ICAR_NONE_RUN}/estimate*.csv")
ICAR_CHEATING_ESTIMATES = glob(f"{ICAR_CHEATING_RUN}/estimate*.csv")
logger.info(f"Found {len(ICAR_NONE_ESTIMATES)} ICAR_NONE estimates and {len(ICAR_CHEATING_ESTIMATES)} ICAR_CHEATING estimates.")

[34m2024-10-22 17:32:52 - analysis-df-assembly - INFO - Found 2 ICAR_NONE estimates and 3 ICAR_CHEATING estimates.[0m


In [16]:
icar_cheating_estimates = {} 
for f in ICAR_CHEATING_ESTIMATES:
    df = pd.read_csv(f)
    icar_cheating_estimates[os.path.splitext(os.path.basename(f))[0]] = df


In [17]:
icar_cheating_estimates

{'estimate_p_y':           tract_id  empirical_estimate       p_y  p_y_CI_lower  p_y_CI_upper  \
 0     3.606100e+10                 NaN  0.004357      0.000315      0.019329   
 1     3.606100e+10            0.000000  0.002747      0.000933      0.006329   
 2     3.606100e+10            0.002367  0.003491      0.001280      0.007621   
 3     3.606100e+10            0.000000  0.002494      0.000954      0.005331   
 4     3.606100e+10            0.000000  0.002184      0.000753      0.005009   
 ...            ...                 ...       ...           ...           ...   
 2322  3.608502e+10            0.000000  0.002724      0.001018      0.005954   
 2323  3.608502e+10            0.004902  0.002706      0.001097      0.005603   
 2324  3.608503e+10            0.000000  0.002288      0.000870      0.004945   
 2325  3.600502e+10            0.000000  0.002583      0.000987      0.005556   
 2326  3.600502e+10            0.008000  0.002626      0.001082      0.005372   
 
       n_i

In [6]:
icar_none_estimates = {} 
for f in ICAR_NONE_ESTIMATES:
    df = pd.read_csv(f)
    icar_none_estimates[f] = df
    

In [18]:
icar_cheating_estimates['estimate_p_y']

Unnamed: 0,tract_id,empirical_estimate,p_y,p_y_CI_lower,p_y_CI_upper,n_images_by_area
0,3.606100e+10,,0.004357,0.000315,0.019329,0
1,3.606100e+10,0.000000,0.002747,0.000933,0.006329,320
2,3.606100e+10,0.002367,0.003491,0.001280,0.007621,845
3,3.606100e+10,0.000000,0.002494,0.000954,0.005331,182
4,3.606100e+10,0.000000,0.002184,0.000753,0.005009,771
...,...,...,...,...,...,...
2322,3.608502e+10,0.000000,0.002724,0.001018,0.005954,76
2323,3.608502e+10,0.004902,0.002706,0.001097,0.005603,204
2324,3.608503e+10,0.000000,0.002288,0.000870,0.004945,1264
2325,3.600502e+10,0.000000,0.002583,0.000987,0.005556,264


In [7]:
ct_nyc = gpd.read_file('geo/data/ct-nyc-wi-2020.geojson')
logger.info(f"Loaded NYC CT shapefile with {len(ct_nyc.index)} CTs.")

[34m2024-10-22 16:04:58 - analysis-df-assembly - INFO - Loaded NYC CT shapefile with 2325 CTs.[0m


In [8]:
ct_nyc_clip = gpd.read_file('geo/data/ct-nyc-2020.geojson')
logger.info(f"Loaded NYC CT (water clipped) shapefile with {len(ct_nyc_clip.index)} CTs.")

[34m2024-10-22 16:04:59 - analysis-df-assembly - INFO - Loaded NYC CT (water clipped) shapefile with 2327 CTs.[0m


In [9]:
ct_nyc

Unnamed: 0,OBJECTID,CTLabel,BoroCode,BoroName,CT2020,BoroCT2020,CDEligibil,NTAName,NTA2020,CDTA2020,CDTANAME,GEOID,PUMA,Shape__Area,Shape__Length,geometry
0,1,1,1,Manhattan,000100,1000100,,The Battery-Governors Island-Ellis Island-Libe...,MN0191,MN01,MN01 Financial District-Tribeca (CD 1 Equivalent),36061000100,4121,1.842846e+06,10832.203947,"MULTIPOLYGON (((-74.04388 40.69020, -74.04351 ..."
1,2,2.01,1,Manhattan,000201,1000201,,Chinatown-Two Bridges,MN0301,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),36061000201,4103,9.723121e+05,4754.495247,"POLYGON ((-73.98450 40.70952, -73.98655 40.709..."
2,3,6,1,Manhattan,000600,1000600,,Chinatown-Two Bridges,MN0301,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),36061000600,4103,2.582705e+06,6976.286215,"POLYGON ((-73.99022 40.71441, -73.98934 40.714..."
3,4,14.01,1,Manhattan,001401,1001401,,Lower East Side,MN0302,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),36061001401,4103,1.006117e+06,5075.332000,"POLYGON ((-73.98837 40.71645, -73.98754 40.716..."
4,5,14.02,1,Manhattan,001402,1001402,,Lower East Side,MN0302,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),36061001402,4103,1.226206e+06,4459.156019,"POLYGON ((-73.98507 40.71909, -73.98423 40.718..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2320,2321,176,5,Staten Island,017600,5017600,,Annadale-Huguenot-Prince's Bay-Woodrow,SI0304,SI03,SI03 South Shore (CD 3 Approximation),36085017600,4503,3.569832e+07,26056.597520,"POLYGON ((-74.16135 40.52938, -74.16086 40.528..."
2321,2322,228.02,5,Staten Island,022802,5022802,,Freshkills Park (North),SI0291,SI02,SI02 Mid-Island (CD 2 Approximation),36085022802,4502,6.815375e+07,44266.026025,"POLYGON ((-74.16720 40.60208, -74.16763 40.599..."
2322,2323,291.02,5,Staten Island,029102,5029102,,New Springville-Willowbrook-Bulls Head-Travis,SI0204,SI02,SI02 Mid-Island (CD 2 Approximation),36085029102,4502,1.062361e+08,58694.561770,"POLYGON ((-74.16873 40.62121, -74.16879 40.621..."
2323,2324,161,2,Bronx,016100,2016100,,Crotona Park East,BX0303,BX03,BX03 Morrisania-Crotona Park East (CD 3 Approx...,36005016100,4263,2.574284e+06,6476.484228,"POLYGON ((-73.88201 40.83745, -73.88204 40.837..."
