# **Introduction**

## **Chi-test (boroughs + svi)**
## **Bar-chart with svi as regression/scatterplot (boroughs first)**

source: https://www.atsdr.cdc.gov/place-health/media/pdfs/2024/10/SVI2022Documentation.pdf

source: https://www.atsdr.cdc.gov/place-health/php/svi/svi-interactive-map.html

In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import datetime as dt
import scipy

# visualization
import matplotlib.pyplot as plt
from matplotlib import colors as mcolors
import seaborn as sns

from scipy.stats import chi2_contingency
import statsmodels.api as sm

# system and utility
import warnings
import os
import io
from IPython.display import IFrame
from google.colab import files

# suppress warnings
warnings.filterwarnings('ignore')

# inline
%matplotlib inline

In [None]:
pd.set_option('display.float_format', lambda x: '%.4f' % x)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# **Step 1 Get the Eviction data**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# data source:
link1 = '/content/drive/My Drive/X999/bbl_evictions_311_svi_normal_times_svi.csv'
link2 = '/content/drive/My Drive/X999/bbl_evictions_311_svi_covid_svi.csv'
# most updated

In [None]:
evictions_pre_post_raw = pd.read_csv(link1)
evictions_covid_raw = pd.read_csv(link2)
evictions_covid_raw.shape, evictions_pre_post_raw.shape
# correct dimensions

((5386, 103), (66397, 104))

In [None]:
evictions_pre_post = evictions_pre_post_raw.copy()
evictions_covid = evictions_covid_raw.copy()

In [None]:
evictions_pre_post[['latitude', 'longitude']].isna().sum()
evictions_pre_post.isna().sum().sum()
# good, the bad bins have been cleaned

np.int64(0)

In [None]:
bin_100000 = evictions_pre_post[evictions_pre_post['bin'] == 1000000]
bin_200000 = evictions_pre_post[evictions_pre_post['bin'] == 2000000]
bin_300000 = evictions_pre_post[evictions_pre_post['bin'] == 3000000]
bin_400000 = evictions_pre_post[evictions_pre_post['bin'] == 4000000]
bin_500000 = evictions_pre_post[evictions_pre_post['bin'] == 5000000]
bin_600000 = evictions_pre_post[evictions_pre_post['bin'] == 6000000]
bin_100000[['bin', 'primary_key', 'eviction_address', 'latitude', 'longitude']].head()
# all gone, and reassigned to unique bins

Unnamed: 0,bin,primary_key,eviction_address,latitude,longitude


In [None]:
evictions_pre_post.head(2)

Unnamed: 0,primary_key,bbl,court_index_number,docket_number,eviction_address,eviction_apartment_number,executed_date,borough,zipcode,ejectment,eviction/legal_possession,latitude,longitude,community_board,council_district,census_tract,bin,nta,year,month_year,geometry,yearbuilt,bldgclass,numfloors,unitsres,ownername,bldgarea,building_type,building_category,is_condo,floor_category,rent_era,architectural_style,economic_period,residential_units_category,is_llc,building_size_category,size_quartile,decade,fips,e_totpop,rpl_theme1,rpl_theme2,rpl_theme3,rpl_theme4,rpl_themes,ep_pov150,ep_unemp,ep_nohsdp,ep_uninsur,ep_age65,ep_age17,ep_disabl,ep_limeng,ep_noveh,ep_crowd,ep_hburd,ep_afam,ep_hisp,ep_asian,ep_aian,ep_nhpi,ep_twomore,ep_otherrace,ep_minrty,ep_white,invalid_zip,svi_quartile,svi_group,air_quality,animal_issues,appliances,building_exterior,doors_windows,electrical_issues,elevator_issues,floors_stairs,general_complaints,graffiti_posting,heat_hot_water,homeless_issues,noise_complaints,other_issues,pest_issues,plumbing_issues,police_matters,public_nuisance,safety_concerns,sanitation_issues,walls_ceilings,total_complaints,average_year_eviction_count_per_unit(building),average_year_eviction_count(building),average_year_eviction_count_per_nta(nta),evictions_per_nta_1kpop(nta),buildings_affected_per_nta,unit_per_nta,evictions_per_1kunit_nta_1kpop(nta),children_impacted_nta_per_1kpop,unemployed_impacted_nta_per_1kpop,elderly_impacted_nta_per_1kpop,bh_impacted_nta_per_1kpop,hburd_impacted_nta_per_1kpop,average_year_eviction_per_building_nta(nta)
0,*308072/22_5865,3037420029,*308072/22,5865,356 MILLER AVE,1 AND BASEMENT,2024-12-04,BROOKLYN,11207,Not an Ejectment,Possession,40.6721,-73.8911,5.0,37.0,1152.0,3083989,East New York,2024,2024-12,POINT (-73.891105 40.672121),1930.0,C0,3.0,3.0,356 MILLER LLC,2700.0,pre-war,walk-up,False,low-rise,"Pre-1947, pre-rent-control","1921–1930, Art Deco Skyscrapers","1930-1945, great depression and WWII",3-5 units,True,small,Q3 (50-75%),1930-1939,11207,96801.0,0.9788,0.914,0.9808,0.9812,0.9839,33.9,11.1,19.1,6.0,13.8,22.5,13.8,5.3,57.8,9.1,44.7,55.9,32.8,1.5,0.0,0.0,2.9,1.6,94.7,5.3,False,Q3,medium-high,0.0,0.0,1.0,0.0,1.0,2.0,0.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,3.0,5.0,19.0,0.2667,0.8,266.0,2.7479,725,2175.0,1.2634,0.6183,0.305,0.3792,2.4374,1.2283,3.669
1,*313639/23_5202,3057940012,*313639/23,5202,710 61ST STREET,2ND FLOOR,2024-03-04,BROOKLYN,11220,Not an Ejectment,Possession,40.6359,-74.0119,7.0,38.0,118.0,3143881,Sunset Park East,2024,2024-03,POINT (-74.011883 40.635941),1920.0,B2,2.0,2.0,"A.R.M. PARKING, LLC",1204.0,pre-war,two-family,False,low-rise,"Pre-1947, pre-rent-control","1900–1920, Beaux-Arts","Pre-1929, pre-great depression",2-unit,True,very small,Q1 (smallest 25%),1920-1929,11220,93008.0,0.9885,0.7635,0.9594,0.9179,0.9662,37.5,7.5,37.9,11.6,13.1,25.4,8.4,40.2,61.7,23.7,43.6,1.7,40.9,40.7,0.4,0.0,1.2,0.2,85.0,15.0,False,Q3,medium-high,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,4.0,0.3,0.6,38.0,0.4086,136,272.0,1.5021,0.1038,0.0306,0.0535,0.174,0.1781,2.7941


In [None]:
evictions_covid.head(2)

Unnamed: 0,primary_key,bbl,court_index_number,docket_number,eviction_address,eviction_apartment_number,executed_date,borough,zipcode,ejectment,eviction/legal_possession,latitude,longitude,community_board,council_district,census_tract,bin,nta,year,month_year,geometry,yearbuilt,bldgclass,numfloors,unitsres,ownername,bldgarea,building_type,building_category,is_condo,floor_category,rent_era,architectural_style,economic_period,residential_units_category,is_llc,building_size_category,size_quartile,decade,fips,e_totpop,rpl_theme1,rpl_theme2,rpl_theme3,rpl_theme4,rpl_themes,ep_pov150,ep_unemp,ep_nohsdp,ep_uninsur,ep_age65,ep_age17,ep_disabl,ep_limeng,ep_noveh,ep_crowd,ep_hburd,ep_afam,ep_hisp,ep_asian,ep_aian,ep_nhpi,ep_twomore,ep_otherrace,ep_minrty,ep_white,invalid_zip,svi_quartile,air_quality,animal_issues,appliances,building_exterior,doors_windows,electrical_issues,elevator_issues,floors_stairs,general_complaints,graffiti_posting,heat_hot_water,homeless_issues,noise_complaints,other_issues,pest_issues,plumbing_issues,police_matters,public_nuisance,safety_concerns,sanitation_issues,walls_ceilings,total_complaints,average_year_eviction_count_per_unit(building),average_year_eviction_count(building),average_year_eviction_count_per_nta(nta),evictions_per_nta_1kpop(nta),buildings_affected_per_nta,unit_per_nta,evictions_per_1kunit_nta_1kpop(nta),children_impacted_nta_per_1kpop,unemployed_impacted_nta_per_1kpop,elderly_impacted_nta_per_1kpop,bh_impacted_nta_per_1kpop,hburd_impacted_nta_per_1kpop,average_year_eviction_per_building_nta(nta)
0,004123/20_209969,2032140141,004123/20,209969,2541 A GRAND AVE,ROOM 3B,2022-08-22,BRONX,10468,Not an Ejectment,Possession,40.8654,-73.9013,7.0,14.0,265.0,2113173,Kingsbridge Heights,2022,2022-08,POINT (-73.901317 40.865396),2004.0,C0,3.0,3.0,MONJU SARKER,3420.0,post-war,walk-up,False,low-rise,"1994–Present, vacancy decontrol","2001-present, New Architecture","1991–2008, modern economic growth",3-5 units,False,medium-small,Q4 (largest 25%),2000-2009,10468,81397.0,0.9954,0.9407,0.987,0.947,0.9874,39.5,11.6,28.3,9.2,11.2,26.4,12.2,26.9,71.8,19.2,56.7,15.6,78.0,2.3,0.0,0.0,0.5,0.5,96.9,3.1,False,Q3,0.0,0.0,0.0,0.0,3.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0,2.0,0.0,0.0,0.0,3.0,1.0,14.0,0.0667,0.2,18.6667,0.2293,38,114.0,2.0117,0.0605,0.0266,0.0257,0.2147,0.13,4.9123
1,0050153/20_106030,4031560133,0050153/20,106030,98-05 67TH AVENUE,12F,2022-04-14,QUEENS,11375,Not an Ejectment,Possession,40.7242,-73.8556,6.0,29.0,71306.0,4074666,Forest Hills,2022,2022-04,POINT (-73.855552 40.724241),1960.0,D3,13.0,181.0,MARSEILLES LEASING LIMITED PARTNERSHIP,177710.0,post-war,elevator,False,high-rise,"1947–1969, rent-control","1951–1980, the International Style, Alternativ...","1946–1975, pst war economic boom",100+ units,False,mega,Q4 (largest 25%),1960-1969,11375,75212.0,0.4759,0.5698,0.8789,0.8057,0.7322,12.0,4.8,6.1,3.7,20.4,18.0,10.5,7.9,41.9,5.8,25.4,2.7,16.4,28.5,0.1,0.0,4.6,0.7,53.0,47.0,False,Q1 (Low),0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,62.0,0.0,34.0,0.0,0.0,4.0,1.0,0.0,0.0,2.0,5.0,112.0,0.0011,0.2,6.0,0.0798,17,3077.0,0.0259,0.0144,0.0038,0.0163,0.0152,0.0203,3.5294


In [None]:
evictions_pre_post.columns[-13:], \
evictions_covid.columns[-13:], \
evictions_pre_post.shape, \
evictions_covid.shape
# shape correct
# analysis columns correct

(Index(['average_year_eviction_count_per_unit(building)',
        'average_year_eviction_count(building)',
        'average_year_eviction_count_per_nta(nta)',
        'evictions_per_nta_1kpop(nta)', 'buildings_affected_per_nta',
        'unit_per_nta', 'evictions_per_1kunit_nta_1kpop(nta)',
        'children_impacted_nta_per_1kpop', 'unemployed_impacted_nta_per_1kpop',
        'elderly_impacted_nta_per_1kpop', 'bh_impacted_nta_per_1kpop',
        'hburd_impacted_nta_per_1kpop',
        'average_year_eviction_per_building_nta(nta)'],
       dtype='object'),
 Index(['average_year_eviction_count_per_unit(building)',
        'average_year_eviction_count(building)',
        'average_year_eviction_count_per_nta(nta)',
        'evictions_per_nta_1kpop(nta)', 'buildings_affected_per_nta',
        'unit_per_nta', 'evictions_per_1kunit_nta_1kpop(nta)',
        'children_impacted_nta_per_1kpop', 'unemployed_impacted_nta_per_1kpop',
        'elderly_impacted_nta_per_1kpop', 'bh_impacted_nta_per_1k

In [None]:
link = '/content/drive/My Drive/X999/svi_cleaned.csv'

In [None]:
svi_df = pd.read_csv(link)
svi_df.head(2)

Unnamed: 0,fips,location,area_sqmi,e_totpop,m_totpop,e_hu,m_hu,e_hh,m_hh,e_pov150,m_pov150,e_unemp,m_unemp,e_hburd,m_hburd,e_nohsdp,m_nohsdp,e_uninsur,m_uninsur,e_age65,m_age65,e_age17,m_age17,e_disabl,m_disabl,e_sngpnt,m_sngpnt,e_limeng,m_limeng,e_minrty,m_minrty,e_munit,m_munit,e_mobile,m_mobile,e_crowd,m_crowd,e_noveh,m_noveh,e_groupq,m_groupq,ep_pov150,mp_pov150,ep_unemp,mp_unemp,ep_hburd,mp_hburd,ep_nohsdp,mp_nohsdp,ep_uninsur,mp_uninsur,ep_age65,mp_age65,ep_age17,mp_age17,ep_disabl,mp_disabl,ep_sngpnt,mp_sngpnt,ep_limeng,mp_limeng,ep_minrty,mp_minrty,ep_munit,mp_munit,ep_mobile,mp_mobile,ep_crowd,mp_crowd,ep_noveh,mp_noveh,ep_groupq,mp_groupq,epl_pov150,epl_unemp,epl_hburd,epl_nohsdp,epl_uninsur,spl_theme1,rpl_theme1,epl_age65,epl_age17,epl_disabl,epl_sngpnt,epl_limeng,spl_theme2,rpl_theme2,epl_minrty,spl_theme3,rpl_theme3,epl_munit,epl_mobile,epl_crowd,epl_noveh,epl_groupq,spl_theme4,rpl_theme4,spl_themes,rpl_themes,f_pov150,f_unemp,f_hburd,f_nohsdp,f_uninsur,f_theme1,f_age65,f_age17,f_disabl,f_sngpnt,f_limeng,f_theme2,f_minrty,f_theme3,f_munit,f_mobile,f_crowd,f_noveh,f_groupq,f_theme4,f_total,e_daypop,e_noint,m_noint,e_afam,m_afam,e_hisp,m_hisp,e_asian,m_asian,e_aian,m_aian,e_nhpi,m_nhpi,e_twomore,m_twomore,e_otherrace,m_otherrace,ep_noint,mp_noint,ep_afam,mp_afam,ep_hisp,mp_hisp,ep_asian,mp_asian,ep_aian,mp_aian,ep_nhpi,mp_nhpi,ep_twomore,mp_twomore,ep_otherrace,mp_otherrace
0,10001,ZCTA5 10001,0.6238,27004,1827,16975,831,14375,782,5248,797,761,266,3314,531,1930,534,831,289,3428,432,2694,643,2310,499,501,215,1381,405,13460,2305,15840,898,15,23,389,135,12285,840,2213,218,20.3,2.7,4.3,1.5,23.1,3.5,9.1,2.4,3.1,1.0,12.7,1.6,10.0,2.1,8.6,1.9,3.5,1.5,5.3,1.5,49.8,7.8,93.3,2.7,0.1,0.1,2.7,0.9,85.5,2.8,8.2,0.6,0.6108,0.4574,0.5573,0.5902,0.4436,2.6593,0.5688,0.142,0.1161,0.1891,0.4707,0.8777,1.7956,0.1692,0.867,0.867,0.867,0.9853,0.271,0.7402,0.9949,0.9104,3.9018,0.9806,9.2237,0.7414,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,3,3,239407,1047,389,2220,576,5206,943,5031,774,0,25,0,25,780,326,223,169,7.3,2.6,8.2,2.2,19.3,3.0,18.6,2.9,0.0,0.1,0.0,0.1,2.9,1.2,0.8,0.6
1,10002,ZCTA5 10002,0.8223,76518,2894,39094,1241,36028,1326,27908,2853,2833,574,14688,1367,18301,1376,4074,766,17681,1287,10028,1549,9896,1062,2211,499,18393,1640,56964,3226,35725,1677,16,28,2461,449,29828,1403,2090,39,36.8,3.5,7.6,1.4,40.8,3.5,30.0,2.0,5.4,1.0,23.1,1.7,13.1,1.8,13.0,1.4,6.1,1.4,24.7,2.0,74.4,3.1,91.4,3.2,0.0,0.1,6.8,1.2,82.8,1.8,2.7,0.1,0.9148,0.7946,0.9219,0.9741,0.7207,4.3261,0.9639,0.7296,0.1831,0.5186,0.739,0.9944,3.1647,0.8781,0.9369,0.9369,0.9369,0.979,0.0,0.9105,0.9915,0.773,3.654,0.9254,12.0817,0.9656,1,0,1,1,0,3,0,0,0,0,1,1,1,1,1,0,1,1,0,3,8,64307,8590,1110,6141,1194,19864,2190,28477,1989,74,83,24,45,1810,486,574,394,23.8,2.9,8.0,1.5,26.0,2.5,37.2,2.2,0.1,0.1,0.0,0.1,2.4,0.6,0.8,0.5


In [None]:
svi_df.shape

(204, 153)

In [None]:
# list(svi_df.columns)

In [None]:
svi_df.ep_nhpi.unique()

array([ 0.00e+00,  1.00e-01,  3.00e-01,  2.00e-01, -9.99e+02,  8.00e-01,
        1.20e+00,  5.00e-01,  4.00e-01])

# **Step 2: SVI items**

q quick double check

In [None]:
link = "/content/drive/My Drive/X999/NewYork_ZCTA.csv"

In [None]:
svi_raw = pd.read_csv(link)
svi_raw.head(2)

Unnamed: 0,ST,STATE,ST_ABBR,FIPS,LOCATION,AREA_SQMI,E_TOTPOP,M_TOTPOP,E_HU,M_HU,E_HH,M_HH,E_POV150,M_POV150,E_UNEMP,M_UNEMP,E_HBURD,M_HBURD,E_NOHSDP,M_NOHSDP,E_UNINSUR,M_UNINSUR,E_AGE65,M_AGE65,E_AGE17,M_AGE17,E_DISABL,M_DISABL,E_SNGPNT,M_SNGPNT,E_LIMENG,M_LIMENG,E_MINRTY,M_MINRTY,E_MUNIT,M_MUNIT,E_MOBILE,M_MOBILE,E_CROWD,M_CROWD,E_NOVEH,M_NOVEH,E_GROUPQ,M_GROUPQ,EP_POV150,MP_POV150,EP_UNEMP,MP_UNEMP,EP_HBURD,MP_HBURD,EP_NOHSDP,MP_NOHSDP,EP_UNINSUR,MP_UNINSUR,EP_AGE65,MP_AGE65,EP_AGE17,MP_AGE17,EP_DISABL,MP_DISABL,EP_SNGPNT,MP_SNGPNT,EP_LIMENG,MP_LIMENG,EP_MINRTY,MP_MINRTY,EP_MUNIT,MP_MUNIT,EP_MOBILE,MP_MOBILE,EP_CROWD,MP_CROWD,EP_NOVEH,MP_NOVEH,EP_GROUPQ,MP_GROUPQ,EPL_POV150,EPL_UNEMP,EPL_HBURD,EPL_NOHSDP,EPL_UNINSUR,SPL_THEME1,RPL_THEME1,EPL_AGE65,EPL_AGE17,EPL_DISABL,EPL_SNGPNT,EPL_LIMENG,SPL_THEME2,RPL_THEME2,EPL_MINRTY,SPL_THEME3,RPL_THEME3,EPL_MUNIT,EPL_MOBILE,EPL_CROWD,EPL_NOVEH,EPL_GROUPQ,SPL_THEME4,RPL_THEME4,SPL_THEMES,RPL_THEMES,F_POV150,F_UNEMP,F_HBURD,F_NOHSDP,F_UNINSUR,F_THEME1,F_AGE65,F_AGE17,F_DISABL,F_SNGPNT,F_LIMENG,F_THEME2,F_MINRTY,F_THEME3,F_MUNIT,F_MOBILE,F_CROWD,F_NOVEH,F_GROUPQ,F_THEME4,F_TOTAL,E_DAYPOP,E_NOINT,M_NOINT,E_AFAM,M_AFAM,E_HISP,M_HISP,E_ASIAN,M_ASIAN,E_AIAN,M_AIAN,E_NHPI,M_NHPI,E_TWOMORE,M_TWOMORE,E_OTHERRACE,M_OTHERRACE,EP_NOINT,MP_NOINT,EP_AFAM,MP_AFAM,EP_HISP,MP_HISP,EP_ASIAN,MP_ASIAN,EP_AIAN,MP_AIAN,EP_NHPI,MP_NHPI,EP_TWOMORE,MP_TWOMORE,EP_OTHERRACE,MP_OTHERRACE
0,36,New York,NY,6390,ZCTA5 06390,4.0467,53,39,253,49,19,19,17,16,0,13,9,26,0,13,27,34,0,13,6,11,31,33,0,18,9,53,20,51,0,18,4,5,0,18,0,13,17,16,32.1,18.8,0.0,52.7,47.4,100.0,0.0,51.4,50.9,45.7,0.0,45.2,11.3,19.0,58.5,41.4,0.0,94.7,17.0,99.2,37.7,92.1,0.0,7.1,1.6,2.0,0.0,94.7,0.0,75.5,32.1,18.8,0.879,0.0,0.9635,0.0,0.996,2.8385,0.6342,0.0,0.1408,0.9944,0.0,0.9775,2.1127,0.3009,0.8062,0.8062,0.8062,0.0,0.4654,0.0,0.0,0.9735,1.4389,0.2205,7.1963,0.4192,0,0,1,0,1,2,0,0,1,0,1,2,0,0,0,0,0,0,1,1,5,601,9,14,0,13,9,19,0,13,0,13,8,16,3,7,0,13,47.4,51.8,0.0,45.2,17.0,35.0,0.0,45.2,0.0,45.2,15.1,32.1,5.7,12.1,0.0,45.2
1,36,New York,NY,10001,ZCTA5 10001,0.6238,27004,1827,16975,831,14375,782,5248,797,761,266,3314,531,1930,534,831,289,3428,432,2694,643,2310,499,501,215,1381,405,13460,2305,15840,898,15,23,389,135,12285,840,2213,218,20.3,2.7,4.3,1.5,23.1,3.5,9.1,2.4,3.1,1.0,12.7,1.6,10.0,2.1,8.6,1.9,3.5,1.5,5.3,1.5,49.8,7.8,93.3,2.7,0.1,0.1,2.7,0.9,85.5,2.8,8.2,0.6,0.6108,0.4574,0.5573,0.5902,0.4436,2.6593,0.5688,0.142,0.1161,0.1891,0.4707,0.8777,1.7956,0.1692,0.867,0.867,0.867,0.9853,0.271,0.7402,0.9949,0.9104,3.9018,0.9806,9.2237,0.7414,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,3,3,239407,1047,389,2220,576,5206,943,5031,774,0,25,0,25,780,326,223,169,7.3,2.6,8.2,2.2,19.3,3.0,18.6,2.9,0.0,0.1,0.0,0.1,2.9,1.2,0.8,0.6


In [None]:
def is_nyc_zipcode(zipcode):
    zip_int = int(zipcode) if isinstance(zipcode, str) else zipcode

    # Manhattan: 10001-10282
    if 10001 <= zip_int <= 10282:
        return True
    # addition Manhattan: 10300-10499
    if 10300 <= zip_int <= 10499:
        return True
    # Bronx: 10451-10475
    if 10451 <= zip_int <= 10475:
        return True
    # Brooklyn: 11201-11256
    if 11201 <= zip_int <= 11256:
        return True
    # Queens: 11351-11436, 11101-11109
    if (11351 <= zip_int <= 11436) or (11101 <= zip_int <= 11109):
        return True
    # Staten Island: 10301-10314
    if 10301 <= zip_int <= 10314:
        return True
    # additional Queens ZIPs
    if zip_int in [11004, 11005, 11411, 11412, 11413, 11418, 11419, 11420, 11421, 11422, 11423, 11426, 11427, 11428, 11429]:
        return True
    return False

In [None]:
nyc_df = svi_raw[svi_raw['FIPS'].apply(is_nyc_zipcode)]

In [None]:
nyc_df.shape

(204, 156)

In [None]:
nyc_df.EP_NHPI.unique()
# interesting, so there was an error in the source data

array([ 0.00e+00,  1.00e-01,  3.00e-01,  2.00e-01, -9.99e+02,  8.00e-01,
        1.20e+00,  5.00e-01,  4.00e-01])

In [None]:
svi_raw.EP_NHPI.unique()

array([ 1.51e+01,  0.00e+00,  1.00e-01,  3.00e-01,  2.00e-01, -9.99e+02,
        8.00e-01,  1.20e+00,  5.00e-01,  4.00e-01,  1.10e+00,  7.00e-01,
        1.50e+00,  1.80e+00,  9.00e-01,  6.00e-01,  1.40e+00,  2.10e+00,
        2.30e+00,  1.00e+00])

In [None]:
-9.99e+02, 0.00e+00, 9.00e-01, 2.00e-01

(-999.0, 0.0, 0.9, 0.2)

# **Step 3:  Boroughs and their eviction rates**

In [None]:
evictions_pre_post_mean = evictions_pre_post[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
evictions_pre_post_mean
# may need to merge ep_twomore and ep_otherrance toegther

Unnamed: 0,0
ep_afam,29.2346
ep_asian,8.904
ep_hisp,38.7307
ep_nhpi,0.0115
ep_white,19.0236
ep_twomore,2.8025
ep_otherrace,1.0239


In [None]:
evictions_pre_post_mean = evictions_pre_post_mean.reset_index()

In [None]:
evictions_pre_post_mean.rename(columns = {'index':'race_svi', 0: "racial percentage"}, inplace=True)

In [None]:
evictions_pre_post_mean

Unnamed: 0,race_svi,racial percentage
0,ep_afam,29.2346
1,ep_asian,8.904
2,ep_hisp,38.7307
3,ep_nhpi,0.0115
4,ep_white,19.0236
5,ep_twomore,2.8025
6,ep_otherrace,1.0239


In [None]:
# type(evictions_pre_post_mean)
# so this is correct

## **Step 3.1 Selected neighborhoods**

four types of aggregated and groupedby analysis:

1) average eviction count per building, per year;

2) average eviction count per nta population;

3) average eviction count per unit per building per year;

4) average eviction count per unit per nta population.

In [None]:
# normal[['average_year_eviction_count(building)', 'average_year_eviction_count_per_unit(building)', 'average_year_eviction_count_per_nta(nta)',
#         'average_year_eviction_per_building_nta(nta)', 'evictions_per_nta_1k(nta)', 'evictions_per_unit_nta_1k(nta)']]

In [None]:
analysis_columns = evictions_pre_post[['primary_key', 'bin', 'bbl', 'latitude', 'longitude', 'eviction_address', 'zipcode', 'borough', 'nta','average_year_eviction_count(building)', 'average_year_eviction_count_per_unit(building)', 'average_year_eviction_count_per_nta(nta)',
        # 'average_year_eviction_per_building_nta(nta)', \
                                       'evictions_per_nta_1kpop(nta)', 'evictions_per_1kunit_nta_1kpop(nta)',
                              'unitsres', 'e_totpop']]
analysis_columns.head()
# average_year_eviction_nta_count is (total evictions per nta) / (nta's population)

Unnamed: 0,primary_key,bin,bbl,latitude,longitude,eviction_address,zipcode,borough,nta,average_year_eviction_count(building),average_year_eviction_count_per_unit(building),average_year_eviction_count_per_nta(nta),evictions_per_nta_1kpop(nta),evictions_per_1kunit_nta_1kpop(nta),unitsres,e_totpop
0,*308072/22_5865,3083989,3037420029,40.6721,-73.8911,356 MILLER AVE,11207,BROOKLYN,East New York,0.8,0.2667,266.0,2.7479,1.2634,3.0,96801.0
1,*313639/23_5202,3143881,3057940012,40.6359,-74.0119,710 61ST STREET,11220,BROOKLYN,Sunset Park East,0.6,0.3,38.0,0.4086,1.5021,2.0,93008.0
2,*324973/22_5308,3143435,3057820030,40.64,-74.0171,462 60TH STREET,11220,BROOKLYN,Sunset Park West,0.6,0.15,39.6,0.4258,0.7241,4.0,93008.0
3,*53336/16_170279,2015444,2032510420,40.8772,-73.8896,3400 PAUL AVENUE,10468,BRONX,Van Cortlandt Village,0.8,0.0023,172.8,2.1229,0.0247,352.0,81397.0
4,*5990/17_2703,2003900,2025770038,40.8112,-73.9088,480 CONCORD AVENUE,10455,BRONX,Mott Haven-Port Morris,1.6,0.0246,158.8,3.5782,0.1704,65.0,44380.0


In [None]:
evictions_pre_post.columns

Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
       'eviction_address', 'eviction_apartment_number', 'executed_date',
       'borough', 'zipcode', 'ejectment',
       ...
       'evictions_per_nta_1kpop(nta)', 'buildings_affected_per_nta',
       'unit_per_nta', 'evictions_per_1kunit_nta_1kpop(nta)',
       'children_impacted_nta_per_1kpop', 'unemployed_impacted_nta_per_1kpop',
       'elderly_impacted_nta_per_1kpop', 'bh_impacted_nta_per_1kpop',
       'hburd_impacted_nta_per_1kpop',
       'average_year_eviction_per_building_nta(nta)'],
      dtype='object', length=104)

In [None]:
# evictions_pre_post.columns.tolist()

In [None]:
neighbor_evictions = evictions_pre_post.groupby('nta').agg({'average_year_eviction_count_per_nta(nta)': 'mean',
                                                            'ep_afam': 'first',
                                                            'ep_asian': 'first',
                                                            'ep_hisp': 'first',
                                                            'ep_nhpi': 'first',
                                                            'ep_white': 'first',
                                                            'ep_twomore': 'first',
                                                            'ep_otherrace': 'first',
                                                            'rpl_themes': 'first',
                                                            'borough': 'first',
                                                            }).reset_index()
neighbor_evictions.sort_values('average_year_eviction_count_per_nta(nta)', ascending=False, inplace=True)
neighbor_evictions

Unnamed: 0,nta,average_year_eviction_count_per_nta(nta),ep_afam,ep_asian,ep_hisp,ep_nhpi,ep_white,ep_twomore,ep_otherrace,rpl_themes,borough
27,Central Harlem North-Polo Grounds,326.4,54.7,1.9,32.7,0.0,6.7,2.8,1.1,0.9748,MANHATTAN
38,Crown Heights North,319.8,66.7,1.3,15.7,0.0,11.9,3.6,0.2,0.9438,BROOKLYN
11,Bedford Park-Fordham North,306.0,15.6,2.3,78.0,0.0,3.1,0.5,0.5,0.9874,BRONX
44,East Concourse-Concourse Village,287.6,38.2,0.7,56.3,0.0,2.4,1.8,0.4,0.9994,BRONX
178,Williamsbridge-Olinville,267.4,52.2,5.5,29.5,0.0,8.8,2.3,1.0,0.9507,BRONX
50,East New York,266.0,55.9,1.5,32.8,0.0,5.3,2.9,1.6,0.9839,BROOKLYN
163,University Heights-Morris Heights,265.6,24.6,1.6,69.2,0.0,2.0,1.9,0.7,0.9983,BRONX
108,Mount Hope,260.2,30.9,0.4,65.3,0.0,1.7,1.1,0.4,0.9989,BRONX
59,Flatbush,246.2,51.6,5.8,7.5,0.0,29.7,4.3,1.2,0.9174,BROOKLYN
52,East Tremont,219.8,26.5,1.4,65.0,0.0,3.8,1.9,1.0,0.9954,BRONX


In [None]:
top_15 = neighbor_evictions.head(15)
bottom_18 = neighbor_evictions.tail(18)
selected_nta = pd.concat([top_15, bottom_18])
selected_nta

Unnamed: 0,nta,average_year_eviction_count_per_nta(nta),ep_afam,ep_asian,ep_hisp,ep_nhpi,ep_white,ep_twomore,ep_otherrace,rpl_themes,borough,quartile,bh_pct
27,Central Harlem North-Polo Grounds,326.4,54.7,1.9,32.7,0.0,6.7,2.8,1.1,0.9748,MANHATTAN,Q4,87.4
38,Crown Heights North,319.8,66.7,1.3,15.7,0.0,11.9,3.6,0.2,0.9438,BROOKLYN,Q4,82.4
11,Bedford Park-Fordham North,306.0,15.6,2.3,78.0,0.0,3.1,0.5,0.5,0.9874,BRONX,Q4,93.6
44,East Concourse-Concourse Village,287.6,38.2,0.7,56.3,0.0,2.4,1.8,0.4,0.9994,BRONX,Q4,94.5
178,Williamsbridge-Olinville,267.4,52.2,5.5,29.5,0.0,8.8,2.3,1.0,0.9507,BRONX,Q4,81.7
50,East New York,266.0,55.9,1.5,32.8,0.0,5.3,2.9,1.6,0.9839,BROOKLYN,Q4,88.7
163,University Heights-Morris Heights,265.6,24.6,1.6,69.2,0.0,2.0,1.9,0.7,0.9983,BRONX,Q4,93.8
108,Mount Hope,260.2,30.9,0.4,65.3,0.0,1.7,1.1,0.4,0.9989,BRONX,Q4,96.2
59,Flatbush,246.2,51.6,5.8,7.5,0.0,29.7,4.3,1.2,0.9174,BROOKLYN,Q4,59.1
52,East Tremont,219.8,26.5,1.4,65.0,0.0,3.8,1.9,1.0,0.9954,BRONX,Q4,91.5


In [None]:
# get rid of the two cemetery:
selected_nta = selected_nta[~selected_nta['nta'].isin(['park-cemetery-etc-Brooklyn', 'park-cemetery-etc-Bronx', 'Glen Oaks-Floral Park-New Hyde Park'])]
selected_nta.shape

(30, 14)

In [None]:
selected_nta['black+hispanic'] = selected_nta['ep_hisp'] + selected_nta['ep_nhpi']

In [None]:
selected_nta

Unnamed: 0,nta,average_year_eviction_count_per_nta(nta),ep_afam,ep_asian,ep_hisp,ep_nhpi,ep_white,ep_twomore,ep_otherrace,rpl_themes,borough,quartile,bh_pct,black+hispanic
27,Central Harlem North-Polo Grounds,326.4,54.7,1.9,32.7,0.0,6.7,2.8,1.1,0.9748,MANHATTAN,Q4,87.4,32.7
38,Crown Heights North,319.8,66.7,1.3,15.7,0.0,11.9,3.6,0.2,0.9438,BROOKLYN,Q4,82.4,15.7
11,Bedford Park-Fordham North,306.0,15.6,2.3,78.0,0.0,3.1,0.5,0.5,0.9874,BRONX,Q4,93.6,78.0
44,East Concourse-Concourse Village,287.6,38.2,0.7,56.3,0.0,2.4,1.8,0.4,0.9994,BRONX,Q4,94.5,56.3
178,Williamsbridge-Olinville,267.4,52.2,5.5,29.5,0.0,8.8,2.3,1.0,0.9507,BRONX,Q4,81.7,29.5
50,East New York,266.0,55.9,1.5,32.8,0.0,5.3,2.9,1.6,0.9839,BROOKLYN,Q4,88.7,32.8
163,University Heights-Morris Heights,265.6,24.6,1.6,69.2,0.0,2.0,1.9,0.7,0.9983,BRONX,Q4,93.8,69.2
108,Mount Hope,260.2,30.9,0.4,65.3,0.0,1.7,1.1,0.4,0.9989,BRONX,Q4,96.2,65.3
59,Flatbush,246.2,51.6,5.8,7.5,0.0,29.7,4.3,1.2,0.9174,BROOKLYN,Q4,59.1,7.5
52,East Tremont,219.8,26.5,1.4,65.0,0.0,3.8,1.9,1.0,0.9954,BRONX,Q4,91.5,65.0


In [None]:
selected_nta.T

Unnamed: 0,27,38,11,44,178,50,163,108,59,52,169,132,106,21,171,161,112,73,118,177,4,176,29,180,134,42,20,2,1,140
nta,Central Harlem North-Polo Grounds,Crown Heights North,Bedford Park-Fordham North,East Concourse-Concourse Village,Williamsbridge-Olinville,East New York,University Heights-Morris Heights,Mount Hope,Flatbush,East Tremont,Washington Heights South,Prospect Lefferts Gardens-Wingate,Morrisania-Melrose,Brownsville,West Concourse,Todt Hill-Emerson Hill-Heartland Village-Light...,New Dorp-Midland Beach,Great Kills,Oakland Gardens,Whitestone,Auburndale,Westerleigh,Charleston-Richmond Valley-Tottenville,Windsor Terrace,Queensboro Hill,Douglas Manor-Douglaston-Little Neck,Brooklyn Heights-Cobble Hill,Arden Heights,Annadale-Huguenot-Prince's Bay-Eltingville,Rossville-Woodrow
average_year_eviction_count_per_nta(nta),326.4000,319.8000,306.0000,287.6000,267.4000,266.0000,265.6000,260.2000,246.2000,219.8000,209.2000,204.2000,202.0000,193.6000,182.4000,8.4000,8.2000,8.2000,7.6000,7.2000,7.2000,6.8000,6.2000,6.2000,6.2000,5.0000,4.8000,3.4000,3.4000,3.4000
ep_afam,54.7000,66.7000,15.6000,38.2000,52.2000,55.9000,24.6000,30.9000,51.6000,26.5000,12.8000,63.2000,38.2000,70.1000,30.9000,3.3000,3.0000,0.8000,1.6000,0.3000,11.2000,3.3000,0.1000,8.9000,11.2000,0.3000,11.4000,0.8000,0.5000,0.5000
ep_asian,1.9000,1.3000,2.3000,0.7000,5.5000,1.5000,1.6000,0.4000,5.8000,1.4000,4.5000,3.2000,0.7000,0.8000,0.4000,18.0000,13.8000,8.7000,48.6000,26.0000,43.0000,18.0000,2.1000,17.8000,43.0000,30.9000,14.2000,8.7000,4.1000,4.1000
ep_hisp,32.7000,15.7000,78.0000,56.3000,29.5000,32.8000,69.2000,65.3000,7.5000,65.0000,65.7000,14.9000,56.3000,19.5000,65.3000,15.2000,16.2000,12.8000,16.4000,14.8000,21.2000,15.2000,10.6000,16.4000,21.2000,12.4000,11.0000,12.8000,9.6000,9.6000
ep_nhpi,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.2000,0.0000,0.0000,0.1000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000
ep_white,6.7000,11.9000,3.1000,2.4000,8.8000,5.3000,2.0000,1.7000,29.7000,3.8000,14.6000,13.7000,2.4000,3.5000,1.7000,60.3000,63.2000,76.1000,30.2000,56.1000,22.2000,60.3000,86.0000,51.5000,22.2000,51.3000,57.5000,76.1000,83.5000,83.5000
ep_twomore,2.8000,3.6000,0.5000,1.8000,2.3000,2.9000,1.9000,1.1000,4.3000,1.9000,2.0000,4.1000,1.8000,5.4000,1.1000,2.4000,3.5000,1.2000,2.5000,1.9000,1.9000,2.4000,1.2000,4.6000,1.9000,3.8000,5.3000,1.2000,1.5000,1.5000
ep_otherrace,1.1000,0.2000,0.5000,0.4000,1.0000,1.6000,0.7000,0.4000,1.2000,1.0000,0.4000,0.7000,0.4000,0.6000,0.4000,0.5000,0.4000,0.4000,0.3000,0.5000,0.4000,0.5000,0.0000,0.9000,0.4000,0.3000,0.5000,0.4000,0.7000,0.7000
rpl_themes,0.9748,0.9438,0.9874,0.9994,0.9507,0.9839,0.9983,0.9989,0.9174,0.9954,0.9771,0.9220,0.9994,0.9948,0.9989,0.7861,0.8739,0.5333,0.8240,0.8561,0.9077,0.7861,0.3624,0.9197,0.9077,0.7987,0.6233,0.5333,0.5642,0.5642


In [None]:
selected_nta['new_nta'] = "(" + selected_nta['average_year_eviction_count_per_nta(nta)'].astype(str) + ") " + selected_nta['nta']
selected_nta

Unnamed: 0,nta,average_year_eviction_count_per_nta(nta),ep_afam,ep_asian,ep_hisp,ep_nhpi,ep_white,ep_twomore,ep_otherrace,rpl_themes,borough,quartile,bh_pct,black+hispanic,new_nta
27,Central Harlem North-Polo Grounds,326.4,54.7,1.9,32.7,0.0,6.7,2.8,1.1,0.9748,MANHATTAN,Q4,87.4,32.7,(326.4) Central Harlem North-Polo Grounds
38,Crown Heights North,319.8,66.7,1.3,15.7,0.0,11.9,3.6,0.2,0.9438,BROOKLYN,Q4,82.4,15.7,(319.8) Crown Heights North
11,Bedford Park-Fordham North,306.0,15.6,2.3,78.0,0.0,3.1,0.5,0.5,0.9874,BRONX,Q4,93.6,78.0,(306.0) Bedford Park-Fordham North
44,East Concourse-Concourse Village,287.6,38.2,0.7,56.3,0.0,2.4,1.8,0.4,0.9994,BRONX,Q4,94.5,56.3,(287.6) East Concourse-Concourse Village
178,Williamsbridge-Olinville,267.4,52.2,5.5,29.5,0.0,8.8,2.3,1.0,0.9507,BRONX,Q4,81.7,29.5,(267.4) Williamsbridge-Olinville
50,East New York,266.0,55.9,1.5,32.8,0.0,5.3,2.9,1.6,0.9839,BROOKLYN,Q4,88.7,32.8,(266.0) East New York
163,University Heights-Morris Heights,265.6,24.6,1.6,69.2,0.0,2.0,1.9,0.7,0.9983,BRONX,Q4,93.8,69.2,(265.6) University Heights-Morris Heights
108,Mount Hope,260.2,30.9,0.4,65.3,0.0,1.7,1.1,0.4,0.9989,BRONX,Q4,96.2,65.3,(260.2) Mount Hope
59,Flatbush,246.2,51.6,5.8,7.5,0.0,29.7,4.3,1.2,0.9174,BROOKLYN,Q4,59.1,7.5,(246.20000000000002) Flatbush
52,East Tremont,219.8,26.5,1.4,65.0,0.0,3.8,1.9,1.0,0.9954,BRONX,Q4,91.5,65.0,(219.8) East Tremont


In [None]:
boro_map = {
    'MANHATTAN': 'M',
    'BRONX': 'Br',
    'BROOKLYN': 'Bk',
    'QUEENS': 'Q',
    'STATEN ISLAND': 'S'
}

In [None]:
selected_nta['new_boro'] = selected_nta['borough'].map(boro_map)
selected_nta

Unnamed: 0,nta,average_year_eviction_count_per_nta(nta),ep_afam,ep_asian,ep_hisp,ep_nhpi,ep_white,ep_twomore,ep_otherrace,rpl_themes,borough,quartile,bh_pct,black+hispanic,new_nta,new_boro
27,Central Harlem North-Polo Grounds,326.4,54.7,1.9,32.7,0.0,6.7,2.8,1.1,0.9748,MANHATTAN,Q4,87.4,32.7,(326.4) Central Harlem North-Polo Grounds,M
38,Crown Heights North,319.8,66.7,1.3,15.7,0.0,11.9,3.6,0.2,0.9438,BROOKLYN,Q4,82.4,15.7,(319.8) Crown Heights North,Bk
11,Bedford Park-Fordham North,306.0,15.6,2.3,78.0,0.0,3.1,0.5,0.5,0.9874,BRONX,Q4,93.6,78.0,(306.0) Bedford Park-Fordham North,Br
44,East Concourse-Concourse Village,287.6,38.2,0.7,56.3,0.0,2.4,1.8,0.4,0.9994,BRONX,Q4,94.5,56.3,(287.6) East Concourse-Concourse Village,Br
178,Williamsbridge-Olinville,267.4,52.2,5.5,29.5,0.0,8.8,2.3,1.0,0.9507,BRONX,Q4,81.7,29.5,(267.4) Williamsbridge-Olinville,Br
50,East New York,266.0,55.9,1.5,32.8,0.0,5.3,2.9,1.6,0.9839,BROOKLYN,Q4,88.7,32.8,(266.0) East New York,Bk
163,University Heights-Morris Heights,265.6,24.6,1.6,69.2,0.0,2.0,1.9,0.7,0.9983,BRONX,Q4,93.8,69.2,(265.6) University Heights-Morris Heights,Br
108,Mount Hope,260.2,30.9,0.4,65.3,0.0,1.7,1.1,0.4,0.9989,BRONX,Q4,96.2,65.3,(260.2) Mount Hope,Br
59,Flatbush,246.2,51.6,5.8,7.5,0.0,29.7,4.3,1.2,0.9174,BROOKLYN,Q4,59.1,7.5,(246.20000000000002) Flatbush,Bk
52,East Tremont,219.8,26.5,1.4,65.0,0.0,3.8,1.9,1.0,0.9954,BRONX,Q4,91.5,65.0,(219.8) East Tremont,Br


In [None]:
selected_nta.T

Unnamed: 0,27,38,11,44,178,50,163,108,59,52,169,132,106,21,171,161,112,73,118,177,4,176,29,180,134,42,20,2,1,140
nta,Central Harlem North-Polo Grounds,Crown Heights North,Bedford Park-Fordham North,East Concourse-Concourse Village,Williamsbridge-Olinville,East New York,University Heights-Morris Heights,Mount Hope,Flatbush,East Tremont,Washington Heights South,Prospect Lefferts Gardens-Wingate,Morrisania-Melrose,Brownsville,West Concourse,Todt Hill-Emerson Hill-Heartland Village-Light...,New Dorp-Midland Beach,Great Kills,Oakland Gardens,Whitestone,Auburndale,Westerleigh,Charleston-Richmond Valley-Tottenville,Windsor Terrace,Queensboro Hill,Douglas Manor-Douglaston-Little Neck,Brooklyn Heights-Cobble Hill,Arden Heights,Annadale-Huguenot-Prince's Bay-Eltingville,Rossville-Woodrow
average_year_eviction_count_per_nta(nta),326.4000,319.8000,306.0000,287.6000,267.4000,266.0000,265.6000,260.2000,246.2000,219.8000,209.2000,204.2000,202.0000,193.6000,182.4000,8.4000,8.2000,8.2000,7.6000,7.2000,7.2000,6.8000,6.2000,6.2000,6.2000,5.0000,4.8000,3.4000,3.4000,3.4000
ep_afam,54.7000,66.7000,15.6000,38.2000,52.2000,55.9000,24.6000,30.9000,51.6000,26.5000,12.8000,63.2000,38.2000,70.1000,30.9000,3.3000,3.0000,0.8000,1.6000,0.3000,11.2000,3.3000,0.1000,8.9000,11.2000,0.3000,11.4000,0.8000,0.5000,0.5000
ep_asian,1.9000,1.3000,2.3000,0.7000,5.5000,1.5000,1.6000,0.4000,5.8000,1.4000,4.5000,3.2000,0.7000,0.8000,0.4000,18.0000,13.8000,8.7000,48.6000,26.0000,43.0000,18.0000,2.1000,17.8000,43.0000,30.9000,14.2000,8.7000,4.1000,4.1000
ep_hisp,32.7000,15.7000,78.0000,56.3000,29.5000,32.8000,69.2000,65.3000,7.5000,65.0000,65.7000,14.9000,56.3000,19.5000,65.3000,15.2000,16.2000,12.8000,16.4000,14.8000,21.2000,15.2000,10.6000,16.4000,21.2000,12.4000,11.0000,12.8000,9.6000,9.6000
ep_nhpi,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.2000,0.0000,0.0000,0.1000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000
ep_white,6.7000,11.9000,3.1000,2.4000,8.8000,5.3000,2.0000,1.7000,29.7000,3.8000,14.6000,13.7000,2.4000,3.5000,1.7000,60.3000,63.2000,76.1000,30.2000,56.1000,22.2000,60.3000,86.0000,51.5000,22.2000,51.3000,57.5000,76.1000,83.5000,83.5000
ep_twomore,2.8000,3.6000,0.5000,1.8000,2.3000,2.9000,1.9000,1.1000,4.3000,1.9000,2.0000,4.1000,1.8000,5.4000,1.1000,2.4000,3.5000,1.2000,2.5000,1.9000,1.9000,2.4000,1.2000,4.6000,1.9000,3.8000,5.3000,1.2000,1.5000,1.5000
ep_otherrace,1.1000,0.2000,0.5000,0.4000,1.0000,1.6000,0.7000,0.4000,1.2000,1.0000,0.4000,0.7000,0.4000,0.6000,0.4000,0.5000,0.4000,0.4000,0.3000,0.5000,0.4000,0.5000,0.0000,0.9000,0.4000,0.3000,0.5000,0.4000,0.7000,0.7000
rpl_themes,0.9748,0.9438,0.9874,0.9994,0.9507,0.9839,0.9983,0.9989,0.9174,0.9954,0.9771,0.9220,0.9994,0.9948,0.9989,0.7861,0.8739,0.5333,0.8240,0.8561,0.9077,0.7861,0.3624,0.9197,0.9077,0.7987,0.6233,0.5333,0.5642,0.5642


In [None]:
selected_nta.nta

Unnamed: 0,nta
27,Central Harlem North-Polo Grounds
38,Crown Heights North
11,Bedford Park-Fordham North
44,East Concourse-Concourse Village
178,Williamsbridge-Olinville
50,East New York
163,University Heights-Morris Heights
108,Mount Hope
59,Flatbush
52,East Tremont


In [None]:
selected_nta_excel = selected_nta[['nta', 'average_year_eviction_count_per_nta(nta)', 'black+hispanic', 'rpl_themes', 'borough']]
selected_nta_excel

Unnamed: 0,nta,average_year_eviction_count_per_nta(nta),black+hispanic,rpl_themes,borough
27,Central Harlem North-Polo Grounds,326.4,32.7,0.9748,MANHATTAN
38,Crown Heights North,319.8,15.7,0.9438,BROOKLYN
11,Bedford Park-Fordham North,306.0,78.0,0.9874,BRONX
44,East Concourse-Concourse Village,287.6,56.3,0.9994,BRONX
178,Williamsbridge-Olinville,267.4,29.5,0.9507,BRONX
50,East New York,266.0,32.8,0.9839,BROOKLYN
163,University Heights-Morris Heights,265.6,69.2,0.9983,BRONX
108,Mount Hope,260.2,65.3,0.9989,BRONX
59,Flatbush,246.2,7.5,0.9174,BROOKLYN
52,East Tremont,219.8,65.0,0.9954,BRONX


In [None]:
selected_nta_excel.shape

(30, 5)

groups, group that has evictions over 100-200, and then we have the group of nta and then we would have the
mean of svi of those nta (and therefore the quartile). for each grouped nta, we get the svi as bar and whisker.
outlier of each group, and see the trends of bars going up when the avergae evictions go over (as x-axis grows)
EXCEL.

do the columns and then just use one type(bar and whiskers).

In [None]:
selected_nta_excel.T

Unnamed: 0,27,38,11,44,178,50,163,108,59,52,169,132,106,21,171,112,73,118,177,4,176,29,180,134,42,20,2,1,68,140
nta,Central Harlem North-Polo Grounds,Crown Heights North,Bedford Park-Fordham North,East Concourse-Concourse Village,Williamsbridge-Olinville,East New York,University Heights-Morris Heights,Mount Hope,Flatbush,East Tremont,Washington Heights South,Prospect Lefferts Gardens-Wingate,Morrisania-Melrose,Brownsville,West Concourse,New Dorp-Midland Beach,Great Kills,Oakland Gardens,Whitestone,Auburndale,Westerleigh,Charleston-Richmond Valley-Tottenville,Windsor Terrace,Queensboro Hill,Douglas Manor-Douglaston-Little Neck,Brooklyn Heights-Cobble Hill,Arden Heights,Annadale-Huguenot-Prince's Bay-Eltingville,Glen Oaks-Floral Park-New Hyde Park,Rossville-Woodrow
average_year_eviction_count_per_nta(nta),326.4000,319.8000,306.0000,287.6000,267.4000,266.0000,265.6000,260.2000,246.2000,219.8000,209.2000,204.2000,202.0000,193.6000,182.4000,8.2000,8.2000,7.6000,7.2000,7.2000,6.8000,6.2000,6.2000,6.2000,5.0000,4.8000,3.4000,3.4000,3.4000,3.4000
black+hispanic,32.7000,15.7000,78.0000,56.3000,29.5000,32.8000,69.2000,65.3000,7.5000,65.0000,65.7000,14.9000,56.3000,19.5000,65.3000,16.2000,12.8000,16.4000,15.0000,21.2000,15.2000,10.7000,16.4000,21.2000,12.4000,11.0000,12.8000,9.6000,18.0000,9.6000
rpl_themes,0.9748,0.9438,0.9874,0.9994,0.9507,0.9839,0.9983,0.9989,0.9174,0.9954,0.9771,0.9220,0.9994,0.9948,0.9989,0.8739,0.5333,0.8240,0.8561,0.9077,0.7861,0.3624,0.9197,0.9077,0.7987,0.6233,0.5333,0.5642,0.7093,0.5642
borough,MANHATTAN,BROOKLYN,BRONX,BRONX,BRONX,BROOKLYN,BRONX,BRONX,BROOKLYN,BRONX,MANHATTAN,BROOKLYN,BRONX,BROOKLYN,BRONX,STATEN ISLAND,STATEN ISLAND,QUEENS,QUEENS,QUEENS,STATEN ISLAND,STATEN ISLAND,BROOKLYN,QUEENS,QUEENS,BROOKLYN,STATEN ISLAND,STATEN ISLAND,QUEENS,STATEN ISLAND


In [None]:
top_15.nta

Unnamed: 0,nta
27,Central Harlem North-Polo Grounds
38,Crown Heights North
11,Bedford Park-Fordham North
44,East Concourse-Concourse Village
178,Williamsbridge-Olinville
50,East New York
163,University Heights-Morris Heights
108,Mount Hope
59,Flatbush
52,East Tremont


In [None]:
bottom_17.nta

Unnamed: 0,nta
112,New Dorp-Midland Beach
73,Great Kills
185,park-cemetery-etc-Bronx
118,Oakland Gardens
177,Whitestone
4,Auburndale
176,Westerleigh
29,Charleston-Richmond Valley-Tottenville
180,Windsor Terrace
134,Queensboro Hill


In [None]:
selected_nta.reset_index()

Unnamed: 0,index,nta,average_year_eviction_count_per_nta(nta),ep_afam,ep_asian,ep_hisp,ep_nhpi,ep_white,ep_twomore,ep_otherrace,rpl_themes,borough,black+hispanic
0,27,Central Harlem North-Polo Grounds,326.4,54.7,1.9,32.7,0.0,6.7,2.8,1.1,0.9748,MANHATTAN,32.7
1,38,Crown Heights North,319.8,66.7,1.3,15.7,0.0,11.9,3.6,0.2,0.9438,BROOKLYN,15.7
2,11,Bedford Park-Fordham North,306.0,15.6,2.3,78.0,0.0,3.1,0.5,0.5,0.9874,BRONX,78.0
3,44,East Concourse-Concourse Village,287.6,38.2,0.7,56.3,0.0,2.4,1.8,0.4,0.9994,BRONX,56.3
4,178,Williamsbridge-Olinville,267.4,52.2,5.5,29.5,0.0,8.8,2.3,1.0,0.9507,BRONX,29.5
5,50,East New York,266.0,55.9,1.5,32.8,0.0,5.3,2.9,1.6,0.9839,BROOKLYN,32.8
6,163,University Heights-Morris Heights,265.6,24.6,1.6,69.2,0.0,2.0,1.9,0.7,0.9983,BRONX,69.2
7,108,Mount Hope,260.2,30.9,0.4,65.3,0.0,1.7,1.1,0.4,0.9989,BRONX,65.3
8,59,Flatbush,246.2,51.6,5.8,7.5,0.0,29.7,4.3,1.2,0.9174,BROOKLYN,7.5
9,52,East Tremont,219.8,26.5,1.4,65.0,0.0,3.8,1.9,1.0,0.9954,BRONX,65.0


In [None]:
selected_nta_excel

Unnamed: 0,nta,average_year_eviction_count_per_nta(nta),black+hispanic,rpl_themes,borough
27,Central Harlem North-Polo Grounds,326.4,32.7,0.9748,MANHATTAN
38,Crown Heights North,319.8,15.7,0.9438,BROOKLYN
11,Bedford Park-Fordham North,306.0,78.0,0.9874,BRONX
44,East Concourse-Concourse Village,287.6,56.3,0.9994,BRONX
178,Williamsbridge-Olinville,267.4,29.5,0.9507,BRONX
50,East New York,266.0,32.8,0.9839,BROOKLYN
163,University Heights-Morris Heights,265.6,69.2,0.9983,BRONX
108,Mount Hope,260.2,65.3,0.9989,BRONX
59,Flatbush,246.2,7.5,0.9174,BROOKLYN
52,East Tremont,219.8,65.0,0.9954,BRONX


# **Group by eviction counts and get their average svi and svi**

In [None]:
selected_nta['average_year_eviction_count_per_nta(nta)'].describe()

Unnamed: 0,average_year_eviction_count_per_nta(nta)
count,30.0
mean,128.12
std,128.6157
min,3.4
25%,6.2
50%,95.3
75%,256.7
max,326.4


In [None]:
neighbor_evictions['average_year_eviction_count_per_nta(nta)'].describe()

Unnamed: 0,average_year_eviction_count_per_nta(nta)
count,187.0
mean,71.0128
std,70.0048
min,0.2
25%,22.6
50%,44.8
75%,100.9
max,326.4


In [None]:
first_quartile = neighbor_evictions['average_year_eviction_count_per_nta(nta)'].quantile(0.25)
second_quartile = neighbor_evictions['average_year_eviction_count_per_nta(nta)'].quantile(0.50)
third_quartile = neighbor_evictions['average_year_eviction_count_per_nta(nta)'].quantile(0.75)
fourth_quartile = neighbor_evictions['average_year_eviction_count_per_nta(nta)'].quantile(1)

In [None]:
# need to add a new column to the raw dataframe to classify each row (nta)

neighbor_evictions['quartile'] = None

In [None]:
neighbor_evictions.loc[neighbor_evictions['average_year_eviction_count_per_nta(nta)'] <= first_quartile, 'quartile'] = 'Q1'
neighbor_evictions.loc[(neighbor_evictions['average_year_eviction_count_per_nta(nta)'] > first_quartile) & (neighbor_evictions['average_year_eviction_count_per_nta(nta)'] <= second_quartile), 'quartile'] = 'Q2'
neighbor_evictions.loc[(neighbor_evictions['average_year_eviction_count_per_nta(nta)'] > second_quartile) & (neighbor_evictions['average_year_eviction_count_per_nta(nta)'] <= third_quartile), 'quartile'] = 'Q3'
neighbor_evictions.loc[neighbor_evictions['average_year_eviction_count_per_nta(nta)'] > third_quartile, 'quartile'] = 'Q4'

In [None]:
neighbor_evictions.head()

Unnamed: 0,nta,average_year_eviction_count_per_nta(nta),ep_afam,ep_asian,ep_hisp,ep_nhpi,ep_white,ep_twomore,ep_otherrace,rpl_themes,borough,quartile
27,Central Harlem North-Polo Grounds,326.4,54.7,1.9,32.7,0.0,6.7,2.8,1.1,0.9748,MANHATTAN,Q4
38,Crown Heights North,319.8,66.7,1.3,15.7,0.0,11.9,3.6,0.2,0.9438,BROOKLYN,Q4
11,Bedford Park-Fordham North,306.0,15.6,2.3,78.0,0.0,3.1,0.5,0.5,0.9874,BRONX,Q4
44,East Concourse-Concourse Village,287.6,38.2,0.7,56.3,0.0,2.4,1.8,0.4,0.9994,BRONX,Q4
178,Williamsbridge-Olinville,267.4,52.2,5.5,29.5,0.0,8.8,2.3,1.0,0.9507,BRONX,Q4


In [None]:
neighbor_evictions.tail()

Unnamed: 0,nta,average_year_eviction_count_per_nta(nta),ep_afam,ep_asian,ep_hisp,ep_nhpi,ep_white,ep_twomore,ep_otherrace,rpl_themes,borough,quartile,bh_pct
2,Arden Heights,3.4,0.8,8.7,12.8,0.0,76.1,1.2,0.4,0.5333,STATEN ISLAND,Q1,13.6
1,Annadale-Huguenot-Prince's Bay-Eltingville,3.4,0.5,4.1,9.6,0.0,83.5,1.5,0.7,0.5642,STATEN ISLAND,Q1,10.1
68,Glen Oaks-Floral Park-New Hyde Park,3.4,5.7,46.8,18.0,0.0,23.0,4.1,2.4,0.7093,QUEENS,Q1,23.7
140,Rossville-Woodrow,3.4,0.5,4.1,9.6,0.0,83.5,1.5,0.7,0.5642,STATEN ISLAND,Q1,10.1
186,park-cemetery-etc-Brooklyn,0.2,21.1,7.5,14.1,0.0,52.1,4.1,0.9,0.9427,BROOKLYN,Q1,35.2


In [None]:
neighbor_evictions['bh_pct'] = neighbor_evictions['ep_afam'] + neighbor_evictions['ep_hisp']

In [None]:
box_whisker_df = neighbor_evictions[['nta', 'quartile', 'average_year_eviction_count_per_nta(nta)', 'bh_pct', 'rpl_themes']].copy()
box_whisker_df

Unnamed: 0,nta,quartile,average_year_eviction_count_per_nta(nta),bh_pct,rpl_themes
27,Central Harlem North-Polo Grounds,Q4,326.4,87.4,0.9748
38,Crown Heights North,Q4,319.8,82.4,0.9438
11,Bedford Park-Fordham North,Q4,306.0,93.6,0.9874
44,East Concourse-Concourse Village,Q4,287.6,94.5,0.9994
178,Williamsbridge-Olinville,Q4,267.4,81.7,0.9507
50,East New York,Q4,266.0,88.7,0.9839
163,University Heights-Morris Heights,Q4,265.6,93.8,0.9983
108,Mount Hope,Q4,260.2,96.2,0.9989
59,Flatbush,Q4,246.2,59.1,0.9174
52,East Tremont,Q4,219.8,91.5,0.9954


In [None]:
box_whisker_df = box_whisker_df[::-1]

In [None]:
box_whisker_df = box_whisker_df[['nta', 'quartile', 'bh_pct', 'rpl_themes']]
box_whisker_df

Unnamed: 0,nta,quartile,bh_pct,rpl_themes
186,park-cemetery-etc-Brooklyn,Q1,35.2,0.9427
140,Rossville-Woodrow,Q1,10.1,0.5642
68,Glen Oaks-Floral Park-New Hyde Park,Q1,23.7,0.7093
1,Annadale-Huguenot-Prince's Bay-Eltingville,Q1,10.1,0.5642
2,Arden Heights,Q1,13.6,0.5333
20,Brooklyn Heights-Cobble Hill,Q1,22.4,0.6233
42,Douglas Manor-Douglaston-Little Neck,Q1,12.7,0.7987
134,Queensboro Hill,Q1,32.4,0.9077
180,Windsor Terrace,Q1,25.3,0.9197
29,Charleston-Richmond Valley-Tottenville,Q1,10.7,0.3624
