# **Introduction**

## **Chi-test (boroughs + svi)**
## **Bar-chart with svi as regression/scatterplot (boroughs first)**

source: https://www.atsdr.cdc.gov/place-health/media/pdfs/2024/10/SVI2022Documentation.pdf

source: https://www.atsdr.cdc.gov/place-health/php/svi/svi-interactive-map.html

In [None]:
# !pip install geopandas folium matplotlib seaborn scipy
# !pip install esda
# !pip install splot
# !pip install geopandas contextily
# # for google colab, had to reinstall some pacakges.

In [None]:
# !pip install geopandas folium matplotlib seaborn scipy esda splot

In [135]:
import pandas as pd
import geopandas as gpd
import numpy as np
import datetime as dt
import scipy

# visualization
import matplotlib.pyplot as plt
from matplotlib import colors as mcolors
import seaborn as sns

from scipy.stats import chi2_contingency
import statsmodels.api as sm

# system and utility
import warnings
import os
import io
from IPython.display import IFrame
from google.colab import files

# suppress warnings
warnings.filterwarnings('ignore')

# inline
%matplotlib inline

In [136]:
pd.set_option('display.float_format', lambda x: '%.4f' % x)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# **Step 1 Get the Eviction data**

In [137]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [138]:
# data source:
file_path1 = '/content/drive/My Drive/X999/bbl_evictions_311_svi_normal_times_correct_coordinates.csv'
file_path2 = '/content/drive/My Drive/X999/bbl_evictions_311_svi_covid_correct_coordinates.csv'

In [139]:
evictions_pre_post_raw = pd.read_csv(file_path1)
evictions_covid_raw = pd.read_csv(file_path2)
evictions_covid_raw.shape, evictions_pre_post_raw.shape
# correct dimensions

((5386, 95), (66397, 96))

In [140]:
evictions_pre_post = evictions_pre_post_raw.copy()
evictions_covid = evictions_covid_raw.copy()

In [141]:
evictions_pre_post[['latitude', 'longitude']].isna().sum()
# good, the bad bins have been cleaned

Unnamed: 0,0
latitude,0
longitude,0


In [147]:
bin_100000 = evictions_pre_post[evictions_pre_post['bin'] == 1000000]
bin_200000 = evictions_pre_post[evictions_pre_post['bin'] == 2000000]
bin_300000 = evictions_pre_post[evictions_pre_post['bin'] == 3000000]
bin_400000 = evictions_pre_post[evictions_pre_post['bin'] == 4000000]
bin_500000 = evictions_pre_post[evictions_pre_post['bin'] == 5000000]
bin_600000 = evictions_pre_post[evictions_pre_post['bin'] == 6000000]
bin_100000[['bin', 'primary_key', 'eviction_address', 'latitude', 'longitude']].head()

Unnamed: 0,bin,primary_key,eviction_address,latitude,longitude
1416,1000000,067758/17_23064,447-448 CENTRAL PARK WEST,40.7973,-73.9606
8259,1000000,300114/23N_116111,517 WEST 134TH STR EET,40.8188,-73.9541
10194,1000000,302057/23_19635,100 WEST 131ST STREET APARTMENT 3C,40.8118,-73.9432
11018,1000000,303174/23_210961,7 DEY STREET A/K/A 185 BROADWAY,40.7106,-74.01
11081,1000000,303279/22_10840,222 EAST 44TH STREET,40.7518,-73.9728


In [148]:
evictions_pre_post.head(2)

Unnamed: 0,primary_key,bbl,court_index_number,docket_number,eviction_address,eviction_apartment_number,executed_date,borough,zipcode,ejectment,eviction/legal_possession,latitude,longitude,community_board,council_district,census_tract,bin,nta,year,month_year,geometry,average_year_eviction_count,yearbuilt,bldgclass,numfloors,unitsres,ownername,bldgarea,building_type,building_category,is_condo,floor_category,rent_era,architectural_style,economic_period,residential_units_category,is_llc,building_size_category,size_quartile,decade,fips,e_totpop,rpl_theme1,rpl_theme2,rpl_theme3,rpl_theme4,rpl_themes,ep_pov150,ep_unemp,ep_nohsdp,ep_uninsur,ep_age65,ep_age17,ep_disabl,ep_limeng,ep_noveh,ep_crowd,ep_hburd,ep_afam,ep_hisp,ep_asian,ep_aian,ep_nhpi,ep_twomore,ep_otherrace,ep_minrty,ep_white,invalid_zip,svi_quartile,svi_group,air_quality,animal_issues,appliances,building_exterior,doors_windows,electrical_issues,elevator_issues,floors_stairs,general_complaints,graffiti_posting,heat_hot_water,homeless_issues,noise_complaints,other_issues,pest_issues,plumbing_issues,police_matters,public_nuisance,safety_concerns,sanitation_issues,walls_ceilings,total_complaints,average_year_eviction_unit_count,average_year_eviction_nta_count,evictions_nta_per_1k,evictions_nta_per_unit_per_1k
0,*308072/22_5865,3037420029,*308072/22,5865,356 MILLER AVE,1 AND BASEMENT,2024-12-04,BROOKLYN,11207,Not an Ejectment,Possession,40.6721,-73.8911,5.0,37.0,1152.0,3083989,East New York,2024,2024-12,POINT (-73.891105 40.672121),0.8,1930.0,C0,3.0,3.0,356 MILLER LLC,2700.0,pre-war,walk-up,False,low-rise,"Pre-1947, pre-rent-control","1921–1930, Art Deco Skyscrapers","1930-1945, great depression and WWII",3-5 units,True,small,Q3 (50-75%),1930-1939,11207,96801.0,0.9788,0.914,0.9808,0.9812,0.9839,33.9,11.1,19.1,6.0,13.8,22.5,13.8,5.3,57.8,9.1,44.7,55.9,32.8,1.5,0.0,0.0,2.9,1.6,94.7,5.3,False,Q3,medium-high,0.0,0.0,1.0,0.0,1.0,2.0,0.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,3.0,5.0,19.0,0.2667,0.0027,13.7395,0.0028
1,*313639/23_5202,3057940012,*313639/23,5202,710 61ST STREET,2ND FLOOR,2024-03-04,BROOKLYN,11220,Not an Ejectment,Possession,40.6359,-74.0119,7.0,38.0,118.0,3143881,Sunset Park East,2024,2024-03,POINT (-74.011883 40.635941),0.6,1920.0,B2,2.0,2.0,"A.R.M. PARKING, LLC",1204.0,pre-war,two-family,False,low-rise,"Pre-1947, pre-rent-control","1900–1920, Beaux-Arts","Pre-1929, pre-great depression",2-unit,True,very small,Q1 (smallest 25%),1920-1929,11220,93008.0,0.9885,0.7635,0.9594,0.9179,0.9662,37.5,7.5,37.9,11.6,13.1,25.4,8.4,40.2,61.7,23.7,43.6,1.7,40.9,40.7,0.4,0.0,1.2,0.2,85.0,15.0,False,Q3,medium-high,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,4.0,0.3,0.0004,2.0428,0.0032


In [149]:
evictions_covid.head(2)

Unnamed: 0,primary_key,bbl,court_index_number,docket_number,eviction_address,eviction_apartment_number,executed_date,borough,zipcode,ejectment,eviction/legal_possession,latitude,longitude,community_board,council_district,census_tract,bin,nta,year,month_year,geometry,average_year_eviction_count,yearbuilt,bldgclass,numfloors,unitsres,ownername,bldgarea,building_type,building_category,is_condo,floor_category,rent_era,architectural_style,economic_period,residential_units_category,is_llc,building_size_category,size_quartile,decade,fips,e_totpop,rpl_theme1,rpl_theme2,rpl_theme3,rpl_theme4,rpl_themes,ep_pov150,ep_unemp,ep_nohsdp,ep_uninsur,ep_age65,ep_age17,ep_disabl,ep_limeng,ep_noveh,ep_crowd,ep_hburd,ep_afam,ep_hisp,ep_asian,ep_aian,ep_nhpi,ep_twomore,ep_otherrace,ep_minrty,ep_white,invalid_zip,svi_quartile,air_quality,animal_issues,appliances,building_exterior,doors_windows,electrical_issues,elevator_issues,floors_stairs,general_complaints,graffiti_posting,heat_hot_water,homeless_issues,noise_complaints,other_issues,pest_issues,plumbing_issues,police_matters,public_nuisance,safety_concerns,sanitation_issues,walls_ceilings,total_complaints,average_year_eviction_unit_count,average_year_eviction_nta_count,evictions_nta_per_1k,evictions_nta_per_unit_per_1k
0,004123/20_209969,2032140141,004123/20,209969,2541 A GRAND AVE,ROOM 3B,2022-08-22,BRONX,10468,Not an Ejectment,Possession,40.8654,-73.9013,7.0,14.0,265.0,2113173,Kingsbridge Heights,2022,2022-08,POINT (-73.901317 40.865396),0.2,2004.0,C0,3.0,3.0,MONJU SARKER,3420.0,post-war,walk-up,False,low-rise,"1994–Present, vacancy decontrol","2001-present, New Architecture","1991–2008, modern economic growth",3-5 units,False,medium-small,Q4 (largest 25%),2000-2009,10468,81397.0,0.9954,0.9407,0.987,0.947,0.9874,39.5,11.6,28.3,9.2,11.2,26.4,12.2,26.9,71.8,19.2,56.7,15.6,78.0,2.3,0.0,0.0,0.5,0.5,96.9,3.1,False,Q3,0.0,0.0,0.0,0.0,3.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0,2.0,0.0,0.0,0.0,3.0,1.0,14.0,0.0667,0.0001,0.688,0.0008
1,0050153/20_106030,4031560133,0050153/20,106030,98-05 67TH AVENUE,12F,2022-04-14,QUEENS,11375,Not an Ejectment,Possession,40.7242,-73.8556,6.0,29.0,71306.0,4074666,Forest Hills,2022,2022-04,POINT (-73.855552 40.724241),0.2,1960.0,D3,13.0,181.0,MARSEILLES LEASING LIMITED PARTNERSHIP,177710.0,post-war,elevator,False,high-rise,"1947–1969, rent-control","1951–1980, the International Style, Alternativ...","1946–1975, pst war economic boom",100+ units,False,mega,Q4 (largest 25%),1960-1969,11375,75212.0,0.4759,0.5698,0.8789,0.8057,0.7322,12.0,4.8,6.1,3.7,20.4,18.0,10.5,7.9,41.9,5.8,25.4,2.7,16.4,28.5,0.1,0.0,4.6,0.7,53.0,47.0,False,Q1 (Low),0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,62.0,0.0,34.0,0.0,0.0,4.0,1.0,0.0,0.0,2.0,5.0,112.0,0.0011,0.0,0.2393,0.0


In [150]:
evictions_pre_post.columns, \
evictions_covid.columns, \
evictions_pre_post.shape, \
evictions_covid.shape

(Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
        'eviction_address', 'eviction_apartment_number', 'executed_date',
        'borough', 'zipcode', 'ejectment', 'eviction/legal_possession',
        'latitude', 'longitude', 'community_board', 'council_district',
        'census_tract', 'bin', 'nta', 'year', 'month_year', 'geometry',
        'average_year_eviction_count', 'yearbuilt', 'bldgclass', 'numfloors',
        'unitsres', 'ownername', 'bldgarea', 'building_type',
        'building_category', 'is_condo', 'floor_category', 'rent_era',
        'architectural_style', 'economic_period', 'residential_units_category',
        'is_llc', 'building_size_category', 'size_quartile', 'decade', 'fips',
        'e_totpop', 'rpl_theme1', 'rpl_theme2', 'rpl_theme3', 'rpl_theme4',
        'rpl_themes', 'ep_pov150', 'ep_unemp', 'ep_nohsdp', 'ep_uninsur',
        'ep_age65', 'ep_age17', 'ep_disabl', 'ep_limeng', 'ep_noveh',
        'ep_crowd', 'ep_hburd', 'ep_afam', 'ep_hisp

In [151]:
link = '/content/drive/My Drive/X999/svi_cleaned.csv'

In [152]:
svi_df = pd.read_csv(link)
svi_df.head(2)

Unnamed: 0,fips,location,area_sqmi,e_totpop,m_totpop,e_hu,m_hu,e_hh,m_hh,e_pov150,m_pov150,e_unemp,m_unemp,e_hburd,m_hburd,e_nohsdp,m_nohsdp,e_uninsur,m_uninsur,e_age65,m_age65,e_age17,m_age17,e_disabl,m_disabl,e_sngpnt,m_sngpnt,e_limeng,m_limeng,e_minrty,m_minrty,e_munit,m_munit,e_mobile,m_mobile,e_crowd,m_crowd,e_noveh,m_noveh,e_groupq,m_groupq,ep_pov150,mp_pov150,ep_unemp,mp_unemp,ep_hburd,mp_hburd,ep_nohsdp,mp_nohsdp,ep_uninsur,mp_uninsur,ep_age65,mp_age65,ep_age17,mp_age17,ep_disabl,mp_disabl,ep_sngpnt,mp_sngpnt,ep_limeng,mp_limeng,ep_minrty,mp_minrty,ep_munit,mp_munit,ep_mobile,mp_mobile,ep_crowd,mp_crowd,ep_noveh,mp_noveh,ep_groupq,mp_groupq,epl_pov150,epl_unemp,epl_hburd,epl_nohsdp,epl_uninsur,spl_theme1,rpl_theme1,epl_age65,epl_age17,epl_disabl,epl_sngpnt,epl_limeng,spl_theme2,rpl_theme2,epl_minrty,spl_theme3,rpl_theme3,epl_munit,epl_mobile,epl_crowd,epl_noveh,epl_groupq,spl_theme4,rpl_theme4,spl_themes,rpl_themes,f_pov150,f_unemp,f_hburd,f_nohsdp,f_uninsur,f_theme1,f_age65,f_age17,f_disabl,f_sngpnt,f_limeng,f_theme2,f_minrty,f_theme3,f_munit,f_mobile,f_crowd,f_noveh,f_groupq,f_theme4,f_total,e_daypop,e_noint,m_noint,e_afam,m_afam,e_hisp,m_hisp,e_asian,m_asian,e_aian,m_aian,e_nhpi,m_nhpi,e_twomore,m_twomore,e_otherrace,m_otherrace,ep_noint,mp_noint,ep_afam,mp_afam,ep_hisp,mp_hisp,ep_asian,mp_asian,ep_aian,mp_aian,ep_nhpi,mp_nhpi,ep_twomore,mp_twomore,ep_otherrace,mp_otherrace
0,10001,ZCTA5 10001,0.6238,27004,1827,16975,831,14375,782,5248,797,761,266,3314,531,1930,534,831,289,3428,432,2694,643,2310,499,501,215,1381,405,13460,2305,15840,898,15,23,389,135,12285,840,2213,218,20.3,2.7,4.3,1.5,23.1,3.5,9.1,2.4,3.1,1.0,12.7,1.6,10.0,2.1,8.6,1.9,3.5,1.5,5.3,1.5,49.8,7.8,93.3,2.7,0.1,0.1,2.7,0.9,85.5,2.8,8.2,0.6,0.6108,0.4574,0.5573,0.5902,0.4436,2.6593,0.5688,0.142,0.1161,0.1891,0.4707,0.8777,1.7956,0.1692,0.867,0.867,0.867,0.9853,0.271,0.7402,0.9949,0.9104,3.9018,0.9806,9.2237,0.7414,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,3,3,239407,1047,389,2220,576,5206,943,5031,774,0,25,0,25,780,326,223,169,7.3,2.6,8.2,2.2,19.3,3.0,18.6,2.9,0.0,0.1,0.0,0.1,2.9,1.2,0.8,0.6
1,10002,ZCTA5 10002,0.8223,76518,2894,39094,1241,36028,1326,27908,2853,2833,574,14688,1367,18301,1376,4074,766,17681,1287,10028,1549,9896,1062,2211,499,18393,1640,56964,3226,35725,1677,16,28,2461,449,29828,1403,2090,39,36.8,3.5,7.6,1.4,40.8,3.5,30.0,2.0,5.4,1.0,23.1,1.7,13.1,1.8,13.0,1.4,6.1,1.4,24.7,2.0,74.4,3.1,91.4,3.2,0.0,0.1,6.8,1.2,82.8,1.8,2.7,0.1,0.9148,0.7946,0.9219,0.9741,0.7207,4.3261,0.9639,0.7296,0.1831,0.5186,0.739,0.9944,3.1647,0.8781,0.9369,0.9369,0.9369,0.979,0.0,0.9105,0.9915,0.773,3.654,0.9254,12.0817,0.9656,1,0,1,1,0,3,0,0,0,0,1,1,1,1,1,0,1,1,0,3,8,64307,8590,1110,6141,1194,19864,2190,28477,1989,74,83,24,45,1810,486,574,394,23.8,2.9,8.0,1.5,26.0,2.5,37.2,2.2,0.1,0.1,0.0,0.1,2.4,0.6,0.8,0.5


In [153]:
svi_df.shape

(204, 153)

In [154]:
# list(svi_df.columns)

In [155]:
svi_df.ep_nhpi.unique()

array([ 0.00e+00,  1.00e-01,  3.00e-01,  2.00e-01, -9.99e+02,  8.00e-01,
        1.20e+00,  5.00e-01,  4.00e-01])

# **Step 2: SVI items**

q quick double check

In [156]:
link = "/content/drive/My Drive/X999/NewYork_ZCTA.csv"

In [157]:
svi_raw = pd.read_csv(link)
svi_raw.head(2)

Unnamed: 0,ST,STATE,ST_ABBR,FIPS,LOCATION,AREA_SQMI,E_TOTPOP,M_TOTPOP,E_HU,M_HU,E_HH,M_HH,E_POV150,M_POV150,E_UNEMP,M_UNEMP,E_HBURD,M_HBURD,E_NOHSDP,M_NOHSDP,E_UNINSUR,M_UNINSUR,E_AGE65,M_AGE65,E_AGE17,M_AGE17,E_DISABL,M_DISABL,E_SNGPNT,M_SNGPNT,E_LIMENG,M_LIMENG,E_MINRTY,M_MINRTY,E_MUNIT,M_MUNIT,E_MOBILE,M_MOBILE,E_CROWD,M_CROWD,E_NOVEH,M_NOVEH,E_GROUPQ,M_GROUPQ,EP_POV150,MP_POV150,EP_UNEMP,MP_UNEMP,EP_HBURD,MP_HBURD,EP_NOHSDP,MP_NOHSDP,EP_UNINSUR,MP_UNINSUR,EP_AGE65,MP_AGE65,EP_AGE17,MP_AGE17,EP_DISABL,MP_DISABL,EP_SNGPNT,MP_SNGPNT,EP_LIMENG,MP_LIMENG,EP_MINRTY,MP_MINRTY,EP_MUNIT,MP_MUNIT,EP_MOBILE,MP_MOBILE,EP_CROWD,MP_CROWD,EP_NOVEH,MP_NOVEH,EP_GROUPQ,MP_GROUPQ,EPL_POV150,EPL_UNEMP,EPL_HBURD,EPL_NOHSDP,EPL_UNINSUR,SPL_THEME1,RPL_THEME1,EPL_AGE65,EPL_AGE17,EPL_DISABL,EPL_SNGPNT,EPL_LIMENG,SPL_THEME2,RPL_THEME2,EPL_MINRTY,SPL_THEME3,RPL_THEME3,EPL_MUNIT,EPL_MOBILE,EPL_CROWD,EPL_NOVEH,EPL_GROUPQ,SPL_THEME4,RPL_THEME4,SPL_THEMES,RPL_THEMES,F_POV150,F_UNEMP,F_HBURD,F_NOHSDP,F_UNINSUR,F_THEME1,F_AGE65,F_AGE17,F_DISABL,F_SNGPNT,F_LIMENG,F_THEME2,F_MINRTY,F_THEME3,F_MUNIT,F_MOBILE,F_CROWD,F_NOVEH,F_GROUPQ,F_THEME4,F_TOTAL,E_DAYPOP,E_NOINT,M_NOINT,E_AFAM,M_AFAM,E_HISP,M_HISP,E_ASIAN,M_ASIAN,E_AIAN,M_AIAN,E_NHPI,M_NHPI,E_TWOMORE,M_TWOMORE,E_OTHERRACE,M_OTHERRACE,EP_NOINT,MP_NOINT,EP_AFAM,MP_AFAM,EP_HISP,MP_HISP,EP_ASIAN,MP_ASIAN,EP_AIAN,MP_AIAN,EP_NHPI,MP_NHPI,EP_TWOMORE,MP_TWOMORE,EP_OTHERRACE,MP_OTHERRACE
0,36,New York,NY,6390,ZCTA5 06390,4.0467,53,39,253,49,19,19,17,16,0,13,9,26,0,13,27,34,0,13,6,11,31,33,0,18,9,53,20,51,0,18,4,5,0,18,0,13,17,16,32.1,18.8,0.0,52.7,47.4,100.0,0.0,51.4,50.9,45.7,0.0,45.2,11.3,19.0,58.5,41.4,0.0,94.7,17.0,99.2,37.7,92.1,0.0,7.1,1.6,2.0,0.0,94.7,0.0,75.5,32.1,18.8,0.879,0.0,0.9635,0.0,0.996,2.8385,0.6342,0.0,0.1408,0.9944,0.0,0.9775,2.1127,0.3009,0.8062,0.8062,0.8062,0.0,0.4654,0.0,0.0,0.9735,1.4389,0.2205,7.1963,0.4192,0,0,1,0,1,2,0,0,1,0,1,2,0,0,0,0,0,0,1,1,5,601,9,14,0,13,9,19,0,13,0,13,8,16,3,7,0,13,47.4,51.8,0.0,45.2,17.0,35.0,0.0,45.2,0.0,45.2,15.1,32.1,5.7,12.1,0.0,45.2
1,36,New York,NY,10001,ZCTA5 10001,0.6238,27004,1827,16975,831,14375,782,5248,797,761,266,3314,531,1930,534,831,289,3428,432,2694,643,2310,499,501,215,1381,405,13460,2305,15840,898,15,23,389,135,12285,840,2213,218,20.3,2.7,4.3,1.5,23.1,3.5,9.1,2.4,3.1,1.0,12.7,1.6,10.0,2.1,8.6,1.9,3.5,1.5,5.3,1.5,49.8,7.8,93.3,2.7,0.1,0.1,2.7,0.9,85.5,2.8,8.2,0.6,0.6108,0.4574,0.5573,0.5902,0.4436,2.6593,0.5688,0.142,0.1161,0.1891,0.4707,0.8777,1.7956,0.1692,0.867,0.867,0.867,0.9853,0.271,0.7402,0.9949,0.9104,3.9018,0.9806,9.2237,0.7414,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,3,3,239407,1047,389,2220,576,5206,943,5031,774,0,25,0,25,780,326,223,169,7.3,2.6,8.2,2.2,19.3,3.0,18.6,2.9,0.0,0.1,0.0,0.1,2.9,1.2,0.8,0.6


In [158]:
def is_nyc_zipcode(zipcode):
    zip_int = int(zipcode) if isinstance(zipcode, str) else zipcode

    # Manhattan: 10001-10282
    if 10001 <= zip_int <= 10282:
        return True
    # addition Manhattan: 10300-10499
    if 10300 <= zip_int <= 10499:
        return True
    # Bronx: 10451-10475
    if 10451 <= zip_int <= 10475:
        return True
    # Brooklyn: 11201-11256
    if 11201 <= zip_int <= 11256:
        return True
    # Queens: 11351-11436, 11101-11109
    if (11351 <= zip_int <= 11436) or (11101 <= zip_int <= 11109):
        return True
    # Staten Island: 10301-10314
    if 10301 <= zip_int <= 10314:
        return True
    # additional Queens ZIPs
    if zip_int in [11004, 11005, 11411, 11412, 11413, 11418, 11419, 11420, 11421, 11422, 11423, 11426, 11427, 11428, 11429]:
        return True
    return False

In [159]:
nyc_df = svi_raw[svi_raw['FIPS'].apply(is_nyc_zipcode)]

In [160]:
nyc_df.shape

(204, 156)

In [161]:
nyc_df.EP_NHPI.unique()
# interesting, so there was an error in the source data

array([ 0.00e+00,  1.00e-01,  3.00e-01,  2.00e-01, -9.99e+02,  8.00e-01,
        1.20e+00,  5.00e-01,  4.00e-01])

In [162]:
svi_raw.EP_NHPI.unique()

array([ 1.51e+01,  0.00e+00,  1.00e-01,  3.00e-01,  2.00e-01, -9.99e+02,
        8.00e-01,  1.20e+00,  5.00e-01,  4.00e-01,  1.10e+00,  7.00e-01,
        1.50e+00,  1.80e+00,  9.00e-01,  6.00e-01,  1.40e+00,  2.10e+00,
        2.30e+00,  1.00e+00])

In [163]:
-9.99e+02, 0.00e+00, 9.00e-01, 2.00e-01

(-999.0, 0.0, 0.9, 0.2)

# **Step 3:  Boroughs and their eviction rates**

In [165]:
evictions_pre_post_mean = evictions_pre_post[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
evictions_pre_post_mean
# may need to merge ep_twomore and ep_otherrance toegther

Unnamed: 0,0
ep_afam,29.2346
ep_asian,8.904
ep_hisp,38.7307
ep_nhpi,0.0115
ep_white,19.0236
ep_twomore,2.8025
ep_otherrace,1.0239


In [166]:
evictions_pre_post_mean = evictions_pre_post_mean.reset_index()

In [167]:
evictions_pre_post_mean.rename(columns = {'index':'race_svi', 0: "racial percentage"}, inplace=True)

In [168]:
evictions_pre_post_mean

Unnamed: 0,race_svi,racial percentage
0,ep_afam,29.2346
1,ep_asian,8.904
2,ep_hisp,38.7307
3,ep_nhpi,0.0115
4,ep_white,19.0236
5,ep_twomore,2.8025
6,ep_otherrace,1.0239


In [169]:
# type(evictions_pre_post_mean)
# so this is correct

## **Step 3.1 Selected neighborhoods**

four types of aggregated and groupedby analysis:

1) average eviction count per building, per year;

2) average eviction count per nta population;

3) average eviction count per unit per building per year;

4) average eviction count per unit per nta population.

In [177]:
analysis_columns = evictions_pre_post[['primary_key', 'bin', 'bbl', 'latitude', 'longitude', 'eviction_address', 'zipcode', 'borough', 'nta','average_year_eviction_count', 	'average_year_eviction_unit_count',
                              'average_year_eviction_nta_count', 'evictions_nta_per_1k', 'evictions_nta_per_unit_per_1k',
                              'unitsres', 'e_totpop']]
analysis_columns.head()
# average_year_eviction_nta_count is (total evictions per nta) / (nta's population)

Unnamed: 0,primary_key,bin,bbl,latitude,longitude,eviction_address,zipcode,borough,nta,average_year_eviction_count,average_year_eviction_unit_count,average_year_eviction_nta_count,evictions_nta_per_1k,evictions_nta_per_unit_per_1k,unitsres,e_totpop
0,*308072/22_5865,3083989,3037420029,40.6721,-73.8911,356 MILLER AVE,11207,BROOKLYN,East New York,0.8,0.2667,0.0027,13.7395,0.0028,3.0,96801.0
1,*313639/23_5202,3143881,3057940012,40.6359,-74.0119,710 61ST STREET,11220,BROOKLYN,Sunset Park East,0.6,0.3,0.0004,2.0428,0.0032,2.0,93008.0
2,*324973/22_5308,3143435,3057820030,40.64,-74.0171,462 60TH STREET,11220,BROOKLYN,Sunset Park West,0.6,0.15,0.0004,2.1288,0.0016,4.0,93008.0
3,*53336/16_170279,2015444,2032510420,40.8772,-73.8896,3400 PAUL AVENUE,10468,BRONX,Van Cortlandt Village,0.8,0.0023,0.0021,10.6146,0.0,352.0,81397.0
4,*5990/17_2703,2003900,2025770038,40.8112,-73.9088,480 CONCORD AVENUE,10455,BRONX,Mott Haven-Port Morris,1.6,0.0246,0.0036,17.8909,0.0006,65.0,44380.0


In [178]:
evictions_pre_post.columns

Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
       'eviction_address', 'eviction_apartment_number', 'executed_date',
       'borough', 'zipcode', 'ejectment', 'eviction/legal_possession',
       'latitude', 'longitude', 'community_board', 'council_district',
       'census_tract', 'bin', 'nta', 'year', 'month_year', 'geometry',
       'average_year_eviction_count', 'yearbuilt', 'bldgclass', 'numfloors',
       'unitsres', 'ownername', 'bldgarea', 'building_type',
       'building_category', 'is_condo', 'floor_category', 'rent_era',
       'architectural_style', 'economic_period', 'residential_units_category',
       'is_llc', 'building_size_category', 'size_quartile', 'decade', 'fips',
       'e_totpop', 'rpl_theme1', 'rpl_theme2', 'rpl_theme3', 'rpl_theme4',
       'rpl_themes', 'ep_pov150', 'ep_unemp', 'ep_nohsdp', 'ep_uninsur',
       'ep_age65', 'ep_age17', 'ep_disabl', 'ep_limeng', 'ep_noveh',
       'ep_crowd', 'ep_hburd', 'ep_afam', 'ep_hisp', 'ep_asian',

In [193]:
neighbor_evictions = evictions_pre_post.groupby('nta').agg({'average_year_eviction_nta_count': 'mean',
                                                            'ep_afam': 'first',
                                                            'ep_asian': 'first',
                                                            'ep_hisp': 'first',
                                                            'ep_nhpi': 'first',
                                                            'ep_white': 'first',
                                                            'ep_twomore': 'first',
                                                            'ep_otherrace': 'first',
                                                            'rpl_themes': 'first',
                                                            'borough': 'first',
                                                            }).reset_index()
neighbor_evictions.sort_values('average_year_eviction_nta_count', ascending=False, inplace=True)
neighbor_evictions

Unnamed: 0,nta,average_year_eviction_nta_count,ep_afam,ep_asian,ep_hisp,ep_nhpi,ep_white,ep_twomore,ep_otherrace,rpl_themes,borough
27,Central Harlem North-Polo Grounds,0.0109,54.7,1.9,32.7,0.0,6.7,2.8,1.1,0.9748,MANHATTAN
182,Woodlawn-Wakefield,0.0074,31.3,2.0,22.8,0.0,38.8,3.2,1.7,0.9398,BRONX
59,Flatbush,0.0039,51.6,5.8,7.5,0.0,29.7,4.3,1.2,0.9174,BROOKLYN
38,Crown Heights North,0.0038,66.7,1.3,15.7,0.0,11.9,3.6,0.2,0.9438,BROOKLYN
11,Bedford Park-Fordham North,0.0038,15.6,2.3,78.0,0.0,3.1,0.5,0.5,0.9874,BRONX
178,Williamsbridge-Olinville,0.0037,52.2,5.5,29.5,0.0,8.8,2.3,1.0,0.9507,BRONX
52,East Tremont,0.0037,26.5,1.4,65.0,0.0,3.8,1.9,1.0,0.9954,BRONX
107,Mott Haven-Port Morris,0.0036,21.1,1.1,74.1,0.0,1.4,1.4,1.0,0.9971,BRONX
169,Washington Heights South,0.0035,12.8,4.5,65.7,0.0,14.6,2.0,0.4,0.9771,MANHATTAN
98,Marble Hill-Inwood,0.0034,5.7,3.3,69.0,0.0,19.1,2.4,0.4,0.9541,MANHATTAN


In [194]:
top_15 = neighbor_evictions.head(15)
bottom_15 = neighbor_evictions.tail(15)
selected_nta = pd.concat([top_15, bottom_15])
selected_nta

Unnamed: 0,nta,average_year_eviction_nta_count,ep_afam,ep_asian,ep_hisp,ep_nhpi,ep_white,ep_twomore,ep_otherrace,rpl_themes,borough
27,Central Harlem North-Polo Grounds,0.0109,54.7,1.9,32.7,0.0,6.7,2.8,1.1,0.9748,MANHATTAN
182,Woodlawn-Wakefield,0.0074,31.3,2.0,22.8,0.0,38.8,3.2,1.7,0.9398,BRONX
59,Flatbush,0.0039,51.6,5.8,7.5,0.0,29.7,4.3,1.2,0.9174,BROOKLYN
38,Crown Heights North,0.0038,66.7,1.3,15.7,0.0,11.9,3.6,0.2,0.9438,BROOKLYN
11,Bedford Park-Fordham North,0.0038,15.6,2.3,78.0,0.0,3.1,0.5,0.5,0.9874,BRONX
178,Williamsbridge-Olinville,0.0037,52.2,5.5,29.5,0.0,8.8,2.3,1.0,0.9507,BRONX
52,East Tremont,0.0037,26.5,1.4,65.0,0.0,3.8,1.9,1.0,0.9954,BRONX
107,Mott Haven-Port Morris,0.0036,21.1,1.1,74.1,0.0,1.4,1.4,1.0,0.9971,BRONX
169,Washington Heights South,0.0035,12.8,4.5,65.7,0.0,14.6,2.0,0.4,0.9771,MANHATTAN
98,Marble Hill-Inwood,0.0034,5.7,3.3,69.0,0.0,19.1,2.4,0.4,0.9541,MANHATTAN


In [195]:
# get rid of the two cemetery:
selected_nta = selected_nta[~selected_nta['nta'].isin(['park-cemetery-etc-Brooklyn', 'park-cemetery-etc-Bronx'])]
selected_nta.shape

(28, 11)

In [197]:
selected_nta['black+hispanic'] = selected_nta['ep_hisp'] + selected_nta['ep_nhpi']

In [198]:
selected_nta.head()

Unnamed: 0,nta,average_year_eviction_nta_count,ep_afam,ep_asian,ep_hisp,ep_nhpi,ep_white,ep_twomore,ep_otherrace,rpl_themes,borough,black+hispanic
27,Central Harlem North-Polo Grounds,0.0109,54.7,1.9,32.7,0.0,6.7,2.8,1.1,0.9748,MANHATTAN,32.7
182,Woodlawn-Wakefield,0.0074,31.3,2.0,22.8,0.0,38.8,3.2,1.7,0.9398,BRONX,22.8
59,Flatbush,0.0039,51.6,5.8,7.5,0.0,29.7,4.3,1.2,0.9174,BROOKLYN,7.5
38,Crown Heights North,0.0038,66.7,1.3,15.7,0.0,11.9,3.6,0.2,0.9438,BROOKLYN,15.7
11,Bedford Park-Fordham North,0.0038,15.6,2.3,78.0,0.0,3.1,0.5,0.5,0.9874,BRONX,78.0


In [200]:
selected_nta_excel = selected_nta[['nta', 'average_year_eviction_nta_count', 'black+hispanic', 'rpl_themes', 'borough']]
selected_nta_excel

Unnamed: 0,nta,average_year_eviction_nta_count,black+hispanic,rpl_themes,borough
27,Central Harlem North-Polo Grounds,0.0109,32.7,0.9748,MANHATTAN
182,Woodlawn-Wakefield,0.0074,22.8,0.9398,BRONX
59,Flatbush,0.0039,7.5,0.9174,BROOKLYN
38,Crown Heights North,0.0038,15.7,0.9438,BROOKLYN
11,Bedford Park-Fordham North,0.0038,78.0,0.9874,BRONX
178,Williamsbridge-Olinville,0.0037,29.5,0.9507,BRONX
52,East Tremont,0.0037,65.0,0.9954,BRONX
107,Mott Haven-Port Morris,0.0036,74.1,0.9971,BRONX
169,Washington Heights South,0.0035,65.7,0.9771,MANHATTAN
98,Marble Hill-Inwood,0.0034,69.0,0.9541,MANHATTAN


In [201]:
selected_nta_excel.T

Unnamed: 0,27,182,59,38,11,178,52,107,169,98,163,88,108,44,81,4,113,112,134,121,47,140,161,180,176,20,2,1
nta,Central Harlem North-Polo Grounds,Woodlawn-Wakefield,Flatbush,Crown Heights North,Bedford Park-Fordham North,Williamsbridge-Olinville,East Tremont,Mott Haven-Port Morris,Washington Heights South,Marble Hill-Inwood,University Heights-Morris Heights,Kew Gardens Hills,Mount Hope,East Concourse-Concourse Village,Hunters Point-Sunnyside-West Maspeth,Auburndale,New Springville-Bloomfield-Travis,New Dorp-Midland Beach,Queensboro Hill,Ocean Parkway South,East Flushing,Rossville-Woodrow,Todt Hill-Emerson Hill-Heartland Village-Light...,Windsor Terrace,Westerleigh,Brooklyn Heights-Cobble Hill,Arden Heights,Annadale-Huguenot-Prince's Bay-Eltingville
average_year_eviction_nta_count,0.0109,0.0074,0.0039,0.0038,0.0038,0.0037,0.0037,0.0036,0.0035,0.0034,0.0033,0.0033,0.0033,0.0032,0.0030,0.0002,0.0002,0.0001,0.0001,0.0001,0.0001,0.0001,0.0001,0.0001,0.0001,0.0001,0.0001,0.0001
black+hispanic,32.7000,22.8000,7.5000,15.7000,78.0000,29.5000,65.0000,74.1000,65.7000,69.0000,69.2000,18.5000,65.3000,56.3000,27.3000,21.2000,15.2000,16.2000,21.2000,11.9000,16.2000,9.6000,15.2000,16.4000,15.2000,11.0000,12.8000,9.6000
rpl_themes,0.9748,0.9398,0.9174,0.9438,0.9874,0.9507,0.9954,0.9971,0.9771,0.9541,0.9983,0.9547,0.9989,0.9994,0.7798,0.9077,0.7861,0.8739,0.9077,0.9702,0.9495,0.5642,0.7861,0.9197,0.7861,0.6233,0.5333,0.5642
borough,MANHATTAN,BRONX,BROOKLYN,BROOKLYN,BRONX,BRONX,BRONX,BRONX,MANHATTAN,MANHATTAN,BRONX,QUEENS,BRONX,BRONX,QUEENS,QUEENS,STATEN ISLAND,STATEN ISLAND,QUEENS,BROOKLYN,QUEENS,STATEN ISLAND,STATEN ISLAND,BROOKLYN,STATEN ISLAND,BROOKLYN,STATEN ISLAND,STATEN ISLAND
