In [None]:
import pandas as pd
import numpy as np
from scipy import stats
import datetime as dt
import matplotlib
import matplotlib.pyplot as plt
import os
import io
import geopandas as gpd
import seaborn as sns
# suppress warning
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', None)
# display all columns

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **Step 1: get the svi nyc data**

In [None]:
file_path1 = '/content/drive/My Drive/X999/NewYork_cencus.csv'

In [None]:
svi_census = pd.read_csv(file_path1)

In [None]:
svi_census.shape

(5394, 158)

In [None]:
fips_list = list(svi_census.FIPS.unique())
# fips_list

In [None]:
nyc_county_codes = {'36005', '36047', '36061', '36081', '36085'}

In [None]:
nyc_tracts = [fips for fips in fips_list if str(fips)[:5] in nyc_county_codes]

In [None]:
len(nyc_tracts)

2324

In [None]:
nyc_tracts[:5]

[np.int64(36005000100),
 np.int64(36005000200),
 np.int64(36005000400),
 np.int64(36005001600),
 np.int64(36005001901)]

In [None]:
svi_nyc = svi_census[svi_census['FIPS'].isin(nyc_tracts)].copy()

In [None]:
svi_nyc.shape

(2324, 158)

In [None]:
svi_nyc.columns = svi_nyc.columns.str.lower()
svi_nyc.shape

(2324, 158)

In [None]:
svi_nyc.columns

Index(['st', 'state', 'st_abbr', 'stcnty', 'county', 'fips', 'location',
       'area_sqmi', 'e_totpop', 'm_totpop',
       ...
       'ep_asian', 'mp_asian', 'ep_aian', 'mp_aian', 'ep_nhpi', 'mp_nhpi',
       'ep_twomore', 'mp_twomore', 'ep_otherrace', 'mp_otherrace'],
      dtype='object', length=158)

# **Step 2: clean svi_census as svi_nta**

In [None]:
link2 = '/content/drive/My Drive/X999/svi_cleaned.csv'

In [None]:
svi_nta = pd.read_csv(link2)
svi_nta.shape

(204, 153)

In [None]:
svi_nta.head()

Unnamed: 0,fips,location,area_sqmi,e_totpop,m_totpop,e_hu,m_hu,e_hh,m_hh,e_pov150,m_pov150,e_unemp,m_unemp,e_hburd,m_hburd,e_nohsdp,m_nohsdp,e_uninsur,m_uninsur,e_age65,m_age65,e_age17,m_age17,e_disabl,m_disabl,e_sngpnt,m_sngpnt,e_limeng,m_limeng,e_minrty,m_minrty,e_munit,m_munit,e_mobile,m_mobile,e_crowd,m_crowd,e_noveh,m_noveh,e_groupq,m_groupq,ep_pov150,mp_pov150,ep_unemp,mp_unemp,ep_hburd,mp_hburd,ep_nohsdp,mp_nohsdp,ep_uninsur,mp_uninsur,ep_age65,mp_age65,ep_age17,mp_age17,ep_disabl,mp_disabl,ep_sngpnt,mp_sngpnt,ep_limeng,mp_limeng,ep_minrty,mp_minrty,ep_munit,mp_munit,ep_mobile,mp_mobile,ep_crowd,mp_crowd,ep_noveh,mp_noveh,ep_groupq,mp_groupq,epl_pov150,epl_unemp,epl_hburd,epl_nohsdp,epl_uninsur,spl_theme1,rpl_theme1,epl_age65,epl_age17,epl_disabl,epl_sngpnt,epl_limeng,spl_theme2,rpl_theme2,epl_minrty,spl_theme3,rpl_theme3,epl_munit,epl_mobile,epl_crowd,epl_noveh,epl_groupq,spl_theme4,rpl_theme4,spl_themes,rpl_themes,f_pov150,f_unemp,f_hburd,f_nohsdp,f_uninsur,f_theme1,f_age65,f_age17,f_disabl,f_sngpnt,f_limeng,f_theme2,f_minrty,f_theme3,f_munit,f_mobile,f_crowd,f_noveh,f_groupq,f_theme4,f_total,e_daypop,e_noint,m_noint,e_afam,m_afam,e_hisp,m_hisp,e_asian,m_asian,e_aian,m_aian,e_nhpi,m_nhpi,e_twomore,m_twomore,e_otherrace,m_otherrace,ep_noint,mp_noint,ep_afam,mp_afam,ep_hisp,mp_hisp,ep_asian,mp_asian,ep_aian,mp_aian,ep_nhpi,mp_nhpi,ep_twomore,mp_twomore,ep_otherrace,mp_otherrace
0,10001,ZCTA5 10001,0.623822,27004,1827,16975,831,14375,782,5248,797,761,266,3314,531,1930,534,831,289,3428,432,2694,643,2310,499,501,215,1381,405,13460,2305,15840,898,15,23,389,135,12285,840,2213,218,20.3,2.7,4.3,1.5,23.1,3.5,9.1,2.4,3.1,1.0,12.7,1.6,10.0,2.1,8.6,1.9,3.5,1.5,5.3,1.5,49.8,7.8,93.3,2.7,0.1,0.1,2.7,0.9,85.5,2.8,8.2,0.6,0.6108,0.4574,0.5573,0.5902,0.4436,2.6593,0.5688,0.142,0.1161,0.1891,0.4707,0.8777,1.7956,0.1692,0.867,0.867,0.867,0.9853,0.271,0.7402,0.9949,0.9104,3.9018,0.9806,9.2237,0.7414,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,3,3,239407,1047,389,2220,576,5206,943,5031,774,0,25,0,25,780,326,223,169,7.3,2.6,8.2,2.2,19.3,3.0,18.6,2.9,0.0,0.1,0.0,0.1,2.9,1.2,0.8,0.6
1,10002,ZCTA5 10002,0.822292,76518,2894,39094,1241,36028,1326,27908,2853,2833,574,14688,1367,18301,1376,4074,766,17681,1287,10028,1549,9896,1062,2211,499,18393,1640,56964,3226,35725,1677,16,28,2461,449,29828,1403,2090,39,36.8,3.5,7.6,1.4,40.8,3.5,30.0,2.0,5.4,1.0,23.1,1.7,13.1,1.8,13.0,1.4,6.1,1.4,24.7,2.0,74.4,3.1,91.4,3.2,0.0,0.1,6.8,1.2,82.8,1.8,2.7,0.1,0.9148,0.7946,0.9219,0.9741,0.7207,4.3261,0.9639,0.7296,0.1831,0.5186,0.739,0.9944,3.1647,0.8781,0.9369,0.9369,0.9369,0.979,0.0,0.9105,0.9915,0.773,3.654,0.9254,12.0817,0.9656,1,0,1,1,0,3,0,0,0,0,1,1,1,1,1,0,1,1,0,3,8,64307,8590,1110,6141,1194,19864,2190,28477,1989,74,83,24,45,1810,486,574,394,23.8,2.9,8.0,1.5,26.0,2.5,37.2,2.2,0.1,0.1,0.0,0.1,2.4,0.6,0.8,0.5
2,10003,ZCTA5 10003,0.571603,53877,2579,30766,956,24987,936,6397,1171,1613,315,5445,853,1574,422,1282,404,8128,792,3866,718,3604,634,278,171,1217,828,19778,3548,27261,1299,0,31,798,282,20035,905,10199,194,14.3,2.5,4.7,0.9,21.8,3.3,4.2,1.2,2.4,0.7,15.1,1.4,7.2,1.2,6.7,1.2,1.1,0.7,2.3,1.6,36.7,6.3,88.6,3.2,0.0,0.1,3.2,1.1,80.2,2.8,18.9,1.0,0.4017,0.5142,0.4934,0.2486,0.3386,1.9965,0.3389,0.2513,0.0851,0.0965,0.2217,0.7589,1.4135,0.0969,0.7977,0.7977,0.7977,0.9745,0.0,0.7852,0.9869,0.9577,3.7043,0.9368,7.912,0.5373,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,3,3,138011,1458,478,2899,748,5541,943,9014,1065,51,52,27,32,2067,547,179,132,5.8,1.8,5.4,1.4,10.3,1.7,16.7,1.9,0.1,0.1,0.1,0.1,3.8,1.0,0.3,0.2
3,10004,ZCTA5 10004,0.455576,4579,926,2706,484,2123,394,169,101,11,16,130,84,32,28,13,19,190,131,840,341,104,82,8,16,39,73,2009,1102,2592,476,0,13,161,99,1548,318,35,13,3.7,2.1,0.4,0.5,6.1,3.8,0.9,0.8,0.3,0.4,4.1,2.9,18.3,5.1,2.3,1.9,0.4,0.7,0.9,1.8,43.9,22.4,95.8,4.0,0.0,1.5,7.6,4.4,72.9,9.4,0.8,0.2,0.0699,0.1413,0.057,0.0834,0.1213,0.4729,0.0252,0.04,0.3921,0.0412,0.1846,0.5977,1.2556,0.0769,0.8439,0.8439,0.8439,0.9898,0.0,0.9254,0.9749,0.551,3.4411,0.8558,6.0135,0.2299,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,3,3,77721,12,21,252,140,229,145,1370,703,0,13,0,13,158,113,0,13,0.6,1.0,5.5,3.0,5.0,3.0,29.9,12.2,0.0,0.9,0.0,0.9,3.5,2.4,0.0,0.9
4,10005,ZCTA5 10005,0.072868,8801,1132,6272,438,4881,550,647,363,257,158,532,266,168,135,106,96,158,129,924,395,99,80,140,147,22,78,2730,1506,6083,451,0,19,411,200,4503,549,50,30,7.4,4.0,3.4,2.0,10.9,5.3,2.5,2.0,1.2,1.0,1.8,1.5,10.5,3.8,1.1,0.9,2.9,3.0,0.3,0.9,31.0,16.6,97.0,2.4,0.0,0.6,8.4,4.0,92.3,3.6,0.6,0.3,0.158,0.3371,0.1031,0.1426,0.1766,0.9174,0.0631,0.0327,0.1245,0.0316,0.3966,0.4406,1.026,0.0427,0.7504,0.7504,0.7504,0.9921,0.0,0.9356,0.9972,0.4963,3.4212,0.8496,6.115,0.2437,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,3,3,56918,160,153,421,334,755,432,985,354,0,19,0,19,569,268,0,19,3.3,3.1,4.8,3.7,8.6,4.6,11.2,4.2,0.0,0.5,0.0,0.5,6.5,3.0,0.0,0.5


In [None]:
columns_to_keep = svi_nta.columns.to_list()

In [None]:
svi_nyc.drop(columns = [col for col in svi_nyc.columns if col not in columns_to_keep], inplace = True)
svi_nyc.shape

(2324, 153)

In [None]:
svi_nyc.head()

Unnamed: 0,fips,location,area_sqmi,e_totpop,m_totpop,e_hu,m_hu,e_hh,m_hh,e_pov150,m_pov150,e_unemp,m_unemp,e_hburd,m_hburd,e_nohsdp,m_nohsdp,e_uninsur,m_uninsur,e_age65,m_age65,e_age17,m_age17,e_disabl,m_disabl,e_sngpnt,m_sngpnt,e_limeng,m_limeng,e_minrty,m_minrty,e_munit,m_munit,e_mobile,m_mobile,e_crowd,m_crowd,e_noveh,m_noveh,e_groupq,m_groupq,ep_pov150,mp_pov150,ep_unemp,mp_unemp,ep_hburd,mp_hburd,ep_nohsdp,mp_nohsdp,ep_uninsur,mp_uninsur,ep_age65,mp_age65,ep_age17,mp_age17,ep_disabl,mp_disabl,ep_sngpnt,mp_sngpnt,ep_limeng,mp_limeng,ep_minrty,mp_minrty,ep_munit,mp_munit,ep_mobile,mp_mobile,ep_crowd,mp_crowd,ep_noveh,mp_noveh,ep_groupq,mp_groupq,epl_pov150,epl_unemp,epl_hburd,epl_nohsdp,epl_uninsur,spl_theme1,rpl_theme1,epl_age65,epl_age17,epl_disabl,epl_sngpnt,epl_limeng,spl_theme2,rpl_theme2,epl_minrty,spl_theme3,rpl_theme3,epl_munit,epl_mobile,epl_crowd,epl_noveh,epl_groupq,spl_theme4,rpl_theme4,spl_themes,rpl_themes,f_pov150,f_unemp,f_hburd,f_nohsdp,f_uninsur,f_theme1,f_age65,f_age17,f_disabl,f_sngpnt,f_limeng,f_theme2,f_minrty,f_theme3,f_munit,f_mobile,f_crowd,f_noveh,f_groupq,f_theme4,f_total,e_daypop,e_noint,m_noint,e_afam,m_afam,e_hisp,m_hisp,e_asian,m_asian,e_aian,m_aian,e_nhpi,m_nhpi,e_twomore,m_twomore,e_otherrace,m_otherrace,ep_noint,mp_noint,ep_afam,mp_afam,ep_hisp,mp_hisp,ep_asian,mp_asian,ep_aian,mp_aian,ep_nhpi,mp_nhpi,ep_twomore,mp_twomore,ep_otherrace,mp_otherrace
101,36005000100,Census Tract 1; Bronx County; New York,0.647574,4446,616,0,13,0,13,0,13,0,13,0,26,1495,291,0,13,31,24,58,57,0,13,0,18,363,120,3348,739,0,18,0,13,0,18,0,13,4446,616,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,38.7,5.3,-999.0,-999.0,0.7,0.5,1.3,1.3,-999.0,-999.0,-999.0,-999.0,8.2,2.4,75.3,12.9,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,100.0,0.0,-999.0,-999.0,-999.0,0.9747,-999.0,-999.0,-999.0,0.01,0.0106,-999.0,-999.0,0.7373,-999.0,-999.0,0.733,0.733,0.733,-999.0,-999.0,-999.0,-999.0,0.9925,-999.0,-999.0,-999.0,-999.0,-999,-999,-999,1,-999,-999,0,0,-999,-999,0,-999,0,0,-999,-999,-999,-999,1,-999,-999,16153,0,13,2000,368,1172,225,123,60,9,19,0,13,30,35,14,17,-999.0,-999.0,45.0,6.4,26.4,4.2,2.8,1.5,0.2,0.4,0.0,0.9,0.7,0.7,0.3,0.4
102,36005000200,Census Tract 2; Bronx County; New York,0.173014,4870,621,1487,209,1425,212,910,382,108,85,345,141,531,204,337,273,812,186,822,222,790,241,81,69,614,311,4787,627,0,18,0,13,89,68,301,124,7,1,18.7,7.5,4.5,3.6,24.2,9.2,14.8,5.2,6.9,5.3,16.7,4.5,16.9,4.1,16.2,5.1,5.7,4.8,13.2,6.5,98.3,2.9,0.0,1.2,0.0,2.7,6.3,4.7,21.1,8.9,0.1,0.0,0.5285,0.4034,0.4139,0.6803,0.7478,2.7739,0.5891,0.4893,0.28,0.7856,0.6021,0.8331,2.9901,0.7796,0.9391,0.9391,0.9391,0.0,0.0,0.7032,0.5796,0.2617,1.5445,0.2277,8.2476,0.6206,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1686,174,85,1281,416,3109,457,299,352,0,13,0,13,0,13,98,115,12.2,6.2,26.3,7.1,63.8,8.5,6.1,7.1,0.0,0.8,0.0,0.8,0.0,0.8,2.0,2.4
103,36005000400,Census Tract 4; Bronx County; New York,0.29939,6257,982,2344,278,2309,281,648,246,285,174,855,293,892,468,175,117,828,431,1383,371,883,305,259,119,508,449,5974,1002,28,57,0,19,72,73,600,312,19,2,10.4,3.6,8.3,4.9,37.0,11.9,19.5,8.2,2.8,1.8,13.2,6.2,22.1,4.7,14.1,5.7,11.3,5.0,8.5,7.4,95.5,5.6,1.2,2.4,0.0,1.7,3.1,3.1,26.0,11.8,0.3,0.1,0.26,0.7454,0.7265,0.7829,0.3388,2.8536,0.6128,0.2876,0.6304,0.6831,0.8508,0.7439,3.1958,0.8518,0.8803,0.8803,0.8803,0.2584,0.0,0.5296,0.6235,0.3787,1.7902,0.3235,8.7199,0.6871,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3697,45,48,1559,568,4212,925,103,78,0,19,0,19,36,34,64,89,1.9,2.1,24.9,8.5,67.3,8.1,1.6,1.2,0.0,0.6,0.0,0.6,0.6,0.6,1.0,1.5
104,36005001600,Census Tract 16; Bronx County; New York,0.18729,6177,618,2205,236,2205,236,1883,534,343,155,1186,330,1142,291,208,125,1269,332,1201,249,1268,350,146,84,459,181,6071,620,1601,272,0,19,403,148,1193,276,225,9,31.6,8.3,11.9,4.7,53.8,13.8,27.6,6.6,3.5,2.1,20.5,4.8,19.4,3.4,21.2,5.4,6.6,3.7,7.8,3.0,98.3,2.0,72.6,9.6,0.0,1.8,18.3,6.4,54.1,9.0,3.6,0.4,0.7842,0.8854,0.9396,0.899,0.4366,3.9448,0.8837,0.7047,0.4518,0.9255,0.6596,0.7292,3.4708,0.9234,0.9391,0.9391,0.9391,0.8666,0.0,0.932,0.8033,0.8276,3.4295,0.9456,11.7842,0.9561,0,0,1,0,0,1,0,0,1,0,0,1,1,1,0,0,1,0,0,1,4,8012,362,142,2132,640,3507,552,148,182,213,207,0,19,66,57,5,9,16.4,6.6,34.5,9.1,56.8,8.7,2.4,3.0,3.4,3.3,0.0,0.6,1.1,0.9,0.1,0.1
105,36005001901,Census Tract 19.01; Bronx County; New York,0.079154,2181,258,978,100,940,108,870,234,77,61,532,140,246,89,259,203,19,21,475,140,429,120,164,56,24,59,1875,294,703,121,0,13,41,43,699,124,17,4,39.9,9.6,6.2,4.9,56.6,13.4,16.8,6.1,11.9,8.8,0.9,1.0,21.8,6.0,19.7,5.3,17.5,5.6,1.2,3.0,86.0,8.8,71.9,10.0,0.0,4.0,4.3,4.5,74.4,7.9,0.8,0.2,0.8703,0.5865,0.9582,0.7299,0.9196,4.0645,0.9059,0.0107,0.6132,0.9013,0.943,0.3575,2.8257,0.7071,0.7973,0.7973,0.7973,0.8653,0.0,0.61,0.9297,0.588,2.993,0.8335,10.6805,0.8896,0,0,1,0,1,2,0,0,1,1,0,2,0,0,0,0,0,1,0,1,5,1970,115,73,942,277,842,256,0,13,34,50,0,13,57,44,0,13,12.2,7.7,43.2,11.1,38.6,11.3,0.0,1.8,1.6,2.3,0.0,1.8,2.6,2.0,0.0,1.8


In [None]:
svi_census_pop = svi_nyc[['fips', 'location', 'area_sqmi', 'e_totpop']]
svi_census_pop

Unnamed: 0,fips,location,area_sqmi,e_totpop
101,36005000100,Census Tract 1; Bronx County; New York,0.647574,4446
102,36005000200,Census Tract 2; Bronx County; New York,0.173014,4870
103,36005000400,Census Tract 4; Bronx County; New York,0.299390,6257
104,36005001600,Census Tract 16; Bronx County; New York,0.187290,6177
105,36005001901,Census Tract 19.01; Bronx County; New York,0.079154,2181
...,...,...,...,...
4282,36085030301,Census Tract 303.01; Richmond County; New York,0.277731,5915
4283,36085030302,Census Tract 303.02; Richmond County; New York,0.350875,6374
4284,36085031901,Census Tract 319.01; Richmond County; New York,0.149827,3674
4285,36085031902,Census Tract 319.02; Richmond County; New York,0.266618,5053


# **Step 3: Get the bbl_evictions_311_svi merged df**

In [None]:
link3 = '/content/drive/My Drive/X999/bbl_evictions_311_svi_normal_times.csv'
merged_df = pd.read_csv(link3)
merged_df.shape

(66397, 94)

In [None]:
merged_df.columns

Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
       'eviction_address', 'eviction_apartment_number', 'executed_date',
       'borough', 'zipcode', 'ejectment', 'eviction/legal_possession',
       'latitude', 'longitude', 'community_board', 'council_district',
       'census_tract', 'bin', 'nta', 'year', 'month_year', 'geometry',
       'yearbuilt', 'bldgclass', 'numfloors', 'unitsres', 'ownername',
       'bldgarea', 'building_type', 'building_category', 'is_condo',
       'floor_category', 'rent_era', 'architectural_style', 'economic_period',
       'residential_units_category', 'is_llc', 'building_size_category',
       'size_quartile', 'decade', 'fips', 'e_totpop', 'rpl_theme1',
       'rpl_theme2', 'rpl_theme3', 'rpl_theme4', 'rpl_themes', 'ep_pov150',
       'ep_unemp', 'ep_nohsdp', 'ep_uninsur', 'ep_age65', 'ep_age17',
       'ep_disabl', 'ep_limeng', 'ep_noveh', 'ep_crowd', 'ep_hburd', 'ep_afam',
       'ep_hisp', 'ep_asian', 'ep_aian', 'ep_nhpi', 'ep_twom

In [None]:
merged_df.census_tract

Unnamed: 0,census_tract
0,1152.0
1,118.0
2,122.0
3,409.0
4,35.0
...,...
66392,7.0
66393,151.0
66394,151.0
66395,97.0


In [None]:
merged_df['tract_str'] = merged_df['census_tract'].astype(str).str.replace('.0', '').str.zfill(6)
merged_df.tract_str

Unnamed: 0,tract_str
0,001152
1,000118
2,000122
3,000409
4,000035
...,...
66392,000007
66393,000151
66394,000151
66395,000097


In [None]:
svi_census['tract_str'] = svi_census['FIPS'].astype(str).str[-6:]
svi_census.tract_str

Unnamed: 0,tract_str
0,000100
1,000201
2,000202
3,000301
4,000302
...,...
5389,150301
5390,150302
5391,150400
5392,150501


In [None]:
overlapping_tracts = set(merged_df['tract_str']).intersection(set(svi_census['tract_str']))
num_overlapping = len(overlapping_tracts)
num_overlapping

355

In [None]:
svi_census_pop.shape[0]

2324

In [None]:
len(merged_df['tract_str'].unique())

1258

In [None]:
nyc_county_codes = ['36005', '36047', '36061', '36081', '36085']
svi_nyc = svi_census[svi_census['FIPS'].astype(str).str[:5].isin(nyc_county_codes)].copy()

In [None]:
merged_df['tract_str'] = merged_df['census_tract'].astype(str).str.replace('.0', '').str.zfill(6)

In [None]:
svi_nyc['tract_str'] = svi_nyc['FIPS'].astype(str).str[-6:]

In [None]:
overlap = set(merged_df['tract_str']).intersection(set(svi_nyc['tract_str']))
len(overlap) / len(merged_df)

0.005150835128093137

In [None]:
missing = set(merged_df['tract_str']) - set(svi_nyc['tract_str'])
len(missing)

916

In [None]:
merged_df.fips

Unnamed: 0,fips
0,11207
1,11220
2,11220
3,10468
4,10455
...,...
66392,10301
66393,10302
66394,10302
66395,10310


In [None]:
borough_to_county = {
    'BRONX': '005',
    'BROOKLYN': '047',
    'MANHATTAN': '061',
    'QUEENS': '081',
    'STATEN ISLAND': '085'
}

In [None]:
merged_df['tract_str'] = (
    merged_df['census_tract']
    .astype(str)
    .str.replace('.0', '')
    .str.zfill(6)
)

In [None]:
merged_df['tract_str'].head()

Unnamed: 0,tract_str
0,1152
1,118
2,122
3,409
4,35


In [None]:
merged_df['rebuilt_fips'] = (
    '36' +
    merged_df['borough'].map(borough_to_county) +
    merged_df['tract_str']
)

In [None]:
len(merged_df['rebuilt_fips'][0])

11

In [None]:
svi_census_pop.fips

Unnamed: 0,fips
101,36005000100
102,36005000200
103,36005000400
104,36005001600
105,36005001901
...,...
4282,36085030301
4283,36085030302
4284,36085031901
4285,36085031902


In [None]:
# type(svi_census_pop['fips'])

In [None]:
svi_census_pop['fips'].dtype

dtype('int64')

In [None]:
type(svi_census_pop['fips'].iloc[0])

numpy.int64

In [None]:
overlapping_tracts = set(merged_df['rebuilt_fips']).intersection(set(svi_census_pop['fips']))
num_overlapping = len(overlapping_tracts)
num_overlapping

0

In [None]:
len(merged_df['rebuilt_fips'].unique())

2031

In [None]:
len(svi_census_pop['fips'].unique())

2324

In [None]:
# merged_df.rebuilt_fips[0].dtype()
type(merged_df['rebuilt_fips'].iloc[0])
merged_df['rebuilt_fips'] = merged_df['rebuilt_fips'].astype(int)

In [None]:
overlapping_tracts = set(merged_df['rebuilt_fips']).intersection(set(svi_census_pop['fips']))
num_overlapping = len(overlapping_tracts)
num_overlapping

357

In [None]:
len(set(merged_df['rebuilt_fips'])), num_overlapping/len(set(merged_df['rebuilt_fips']))
# only 17.58 overlapped fips

(2031, 0.1757754800590842)

## **conclusion: Rebuilding fips from eviction data is hard, because the reduilt_fips from borough, zipcodes, and census tract do not match well with all the census tracts and the real census tract from SVI**

# **Step 4: Normalize the eviction rates by nta population**

In [None]:
merged_df.columns

Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
       'eviction_address', 'eviction_apartment_number', 'executed_date',
       'borough', 'zipcode', 'ejectment', 'eviction/legal_possession',
       'latitude', 'longitude', 'community_board', 'council_district',
       'census_tract', 'bin', 'nta', 'year', 'month_year', 'geometry',
       'yearbuilt', 'bldgclass', 'numfloors', 'unitsres', 'ownername',
       'bldgarea', 'building_type', 'building_category', 'is_condo',
       'floor_category', 'rent_era', 'architectural_style', 'economic_period',
       'residential_units_category', 'is_llc', 'building_size_category',
       'size_quartile', 'decade', 'fips', 'e_totpop', 'rpl_theme1',
       'rpl_theme2', 'rpl_theme3', 'rpl_theme4', 'rpl_themes', 'ep_pov150',
       'ep_unemp', 'ep_nohsdp', 'ep_uninsur', 'ep_age65', 'ep_age17',
       'ep_disabl', 'ep_limeng', 'ep_noveh', 'ep_crowd', 'ep_hburd', 'ep_afam',
       'ep_hisp', 'ep_asian', 'ep_aian', 'ep_nhpi', 'ep_twom

In [None]:
merged_df.drop(columns = ['tract_str', 'rebuilt_fips'], inplace = True)

## **evictions_nta_per_1kpop**

In [None]:
# nta_population = merged_df.groupby('nta')['e_totpop'].first().reset_index()
# nta_population

In [None]:
# evictions_per_nta = merged_df.groupby('nta').size().reset_index(name='eviction_count')
# evictions_per_nta

In [None]:
population_per_nta = merged_df.drop_duplicates('nta')[['nta', 'e_totpop']]
population_per_nta
# all rows here

Unnamed: 0,nta,e_totpop
0,East New York,96801.0
1,Sunset Park East,93008.0
2,Sunset Park West,93008.0
3,Van Cortlandt Village,81397.0
4,Mott Haven-Port Morris,44380.0
...,...,...
8239,Rossville-Woodrow,34740.0
8344,Midtown-Midtown South,30930.0
9697,Maspeth,39250.0
12075,Brooklyn Heights-Cobble Hill,69755.0


In [None]:
merged_df['evictions_nta_per_1kpop'] = merged_df['average_year_eviction_nta_count'] / merged_df['e_totpop'] * 1000
merged_df[['bin', 'evictions_nta_per_1kpop']].sort_values(by = 'evictions_nta_per_1kpop', ascending = False)

Unnamed: 0,bin,evictions_nta_per_1kpop
29883,4032652,inf
65395,4236378,181.927711
39043,1046407,36.375575
14692,1046407,36.375575
1756,1046407,36.375575
...,...,...
30447,5079157,0.053566
9663,5133499,0.053566
66189,5105247,0.053566
30665,5076332,0.053566


In [None]:
merged_df.e_totpop.median()

71812.0

In [None]:
bin_4032652	= merged_df[merged_df['bin'] == 4032652]
bin_4032652.nta
# pop: 69,529
# https://www.weichert.com/search/community/city.aspx?city=14635#:~:text=Total:%2069%2C529,Median%20Age:%200.00

Unnamed: 0,nta
29883,East Elmhurst


In [None]:
merged_df.loc[merged_df['bin'] == 4032652, 'e_totpop'] = 69529

In [None]:
merged_df['evictions_nta_per_1kpop'] = merged_df['average_year_eviction_nta_count'] / merged_df['e_totpop'] * 1000
merged_df[['bin','nta', 'evictions_nta_per_1kpop']].sort_values(by = 'evictions_nta_per_1kpop', ascending = False).head(30)

Unnamed: 0,bin,nta,evictions_nta_per_1kpop
65395,4236378,St. Albans,181.927711
14692,1046407,Lenox Hill-Roosevelt Island,36.375575
1756,1046407,Lenox Hill-Roosevelt Island,36.375575
39043,1046407,Lenox Hill-Roosevelt Island,36.375575
8027,2028351,Crotona Park East,26.281454
23092,2002619,East Concourse-Concourse Village,25.496454
17201,2002619,East Concourse-Concourse Village,25.496454
63581,1083989,Central Harlem North-Polo Grounds,17.055074
63579,1053940,Central Harlem North-Polo Grounds,17.055074
63577,1053938,Central Harlem North-Polo Grounds,17.055074


In [None]:
bin_4032652	= merged_df[merged_df['bin'] == 4032652]
bin_4032652.evictions_nta_per_1kpop
# good

Unnamed: 0,evictions_nta_per_1kpop
29883,0.227243


In [None]:
Albans = merged_df[merged_df['nta'] == 'St. Albans']
Albans.head(95)
# because this is done by zipcodes, so a mismatch of a very small zipcode and the very small population resulted in an outlier of
# normalized veictions per nta per 1k

Unnamed: 0,primary_key,bbl,court_index_number,docket_number,eviction_address,eviction_apartment_number,executed_date,borough,zipcode,ejectment,eviction/legal_possession,latitude,longitude,community_board,council_district,census_tract,bin,nta,year,month_year,geometry,yearbuilt,bldgclass,numfloors,unitsres,ownername,bldgarea,building_type,building_category,is_condo,floor_category,rent_era,architectural_style,economic_period,residential_units_category,is_llc,building_size_category,size_quartile,decade,fips,e_totpop,rpl_theme1,rpl_theme2,rpl_theme3,rpl_theme4,rpl_themes,ep_pov150,ep_unemp,ep_nohsdp,ep_uninsur,ep_age65,ep_age17,ep_disabl,ep_limeng,ep_noveh,ep_crowd,ep_hburd,ep_afam,ep_hisp,ep_asian,ep_aian,ep_nhpi,ep_twomore,ep_otherrace,ep_minrty,ep_white,invalid_zip,svi_quartile,svi_group,air_quality,animal_issues,appliances,building_exterior,doors_windows,electrical_issues,elevator_issues,floors_stairs,general_complaints,graffiti_posting,heat_hot_water,homeless_issues,noise_complaints,other_issues,pest_issues,plumbing_issues,police_matters,public_nuisance,safety_concerns,sanitation_issues,walls_ceilings,total_complaints,average_year_eviction_unit_count,average_year_eviction_count,average_year_eviction_nta_count,evictions_nta_per_1kpop
371,0304282/23_11772,4125290220,0304282/23,11772,126-30 LOCUST MANOR LANE,6C,2024-05-22,QUEENS,11434,Not an Ejectment,Possession,40.680366,-73.772190,12.0,27.0,33401.0,4596156,St. Albans,2024,2024-05,POINT (-73.77219 40.680366),2018.0,D1,14.0,82.0,"HP ECLIPSE HOUSING DEVELOPMENT FUND COMP ANY, INC.",87225.0,post-war,elevator,False,high-rise,"1994–Present, vacancy decontrol","2001-present, New Architecture","2009–present, post-financial crisis",21-100 units,False,very large,Q4 (largest 25%),2010-2020,11434,68182.0,0.9249,0.8632,0.9932,0.9613,0.9564,19.5,9.6,13.0,7.0,15.5,22.7,11.4,2.7,27.0,5.5,36.5,78.0,11.7,3.0,0.2,0.0,3.3,1.7,98.0,2.0,False,Q2,medium-low,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,5.0,0.0,0.0,4.0,0.0,0.0,1.0,1.0,0.0,15.0,0.002439,0.2,60.4,0.885864
476,0308184/23_16419,4125290230,0308184/23,16419,169-30 BAISLEY BLVD.,7A,2024-05-16,QUEENS,11434,Not an Ejectment,Possession,40.680784,-73.773998,12.0,27.0,33401.0,4542666,St. Albans,2024,2024-05,POINT (-73.773998 40.680784),2013.0,D1,8.0,54.0,"LOCUST MANOR FAMILY RESIDENCE HDFC, INC.",56926.0,post-war,elevator,False,high-rise,"1994–Present, vacancy decontrol","2001-present, New Architecture","2009–present, post-financial crisis",21-100 units,False,very large,Q4 (largest 25%),2010-2020,11434,68182.0,0.9249,0.8632,0.9932,0.9613,0.9564,19.5,9.6,13.0,7.0,15.5,22.7,11.4,2.7,27.0,5.5,36.5,78.0,11.7,3.0,0.2,0.0,3.3,1.7,98.0,2.0,False,Q2,medium-low,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,19.0,0.0,3.0,0.0,0.0,3.0,0.0,0.0,2.0,3.0,2.0,34.0,0.014815,0.8,60.4,0.885864
632,0315643/22_14225,4123900255,0315643/22,14225,172-25 BAISLEY BLVD,1 FLR-RM1,2024-05-22,QUEENS,11434,Not an Ejectment,Possession,40.682099,-73.771549,12.0,27.0,282.0,4268501,St. Albans,2024,2024-05,POINT (-73.771549 40.682099),1910.0,B2,2.0,2.0,"168TH PLACE BT, LLC",2468.0,pre-war,two-family,False,low-rise,"Pre-1947, pre-rent-control","1900–1920, Beaux-Arts","Pre-1929, pre-great depression",2-unit,True,small,Q3 (50-75%),1910-1919,11434,68182.0,0.9249,0.8632,0.9932,0.9613,0.9564,19.5,9.6,13.0,7.0,15.5,22.7,11.4,2.7,27.0,5.5,36.5,78.0,11.7,3.0,0.2,0.0,3.3,1.7,98.0,2.0,False,Q2,medium-low,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,1.0,0.0,3.0,0.0,0.0,3.0,0.0,0.0,1.0,6.0,1.0,18.0,0.300000,0.6,60.4,0.885864
913,050639/20_2067,4110720034,050639/20,2067,200-16 116TH AVENUE,#2,2023-03-15,QUEENS,11412,Not an Ejectment,Possession,40.696003,-73.752630,12.0,27.0,530.0,4238511,St. Albans,2023,2023-03,POINT (-73.75263 40.696003),1925.0,B3,2.0,2.0,"WILLIAMS, ANTHONY",1800.0,pre-war,two-family,False,low-rise,"Pre-1947, pre-rent-control","1921–1930, Art Deco Skyscrapers","Pre-1929, pre-great depression",2-unit,False,very small,Q2 (25-50%),1920-1929,11412,40593.0,0.7466,0.7254,0.9977,0.7356,0.8383,14.6,4.9,10.6,6.2,15.2,19.6,10.9,3.4,20.3,7.2,28.7,79.9,7.6,3.1,0.3,0.4,4.0,3.4,98.7,1.3,False,Q1 (Low),low,0.0,0.0,2.0,0.0,1.0,4.0,0.0,0.0,1.0,0.0,4.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,4.0,0.0,17.0,0.300000,0.6,60.4,1.487941
1069,054876/19_97546,4109870045,054876/19,97546,191-19 113TH ROAD,1,2019-10-16,QUEENS,11412,Not an Ejectment,Possession,40.698780,-73.762335,12.0,27.0,524.0,4235647,St. Albans,2019,2019-10,POINT (-73.762335 40.69878),1920.0,B3,2.0,2.0,"POWELL, DELWINSKI",1628.0,pre-war,two-family,False,low-rise,"Pre-1947, pre-rent-control","1900–1920, Beaux-Arts","Pre-1929, pre-great depression",2-unit,False,very small,Q2 (25-50%),1920-1929,11412,40593.0,0.7466,0.7254,0.9977,0.7356,0.8383,14.6,4.9,10.6,6.2,15.2,19.6,10.9,3.4,20.3,7.2,28.7,79.9,7.6,3.1,0.3,0.4,4.0,3.4,98.7,1.3,False,Q1 (Low),low,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,5.0,0.200000,0.4,60.4,1.487941
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28462,50138/18-2_25458,4109680136,50138/18-2,25458,190-62 112TH AVE,2,2019-02-27,QUEENS,11412,Not an Ejectment,Possession,40.700663,-73.763703,12.0,27.0,524.0,4235067,St. Albans,2019,2019-02,POINT (-73.763703 40.700663),1930.0,B3,2.0,2.0,SWEET HOME RESIDENCE LLC,1632.0,pre-war,two-family,False,low-rise,"Pre-1947, pre-rent-control","1921–1930, Art Deco Skyscrapers","1930-1945, great depression and WWII",2-unit,True,very small,Q2 (25-50%),1930-1939,11412,40593.0,0.7466,0.7254,0.9977,0.7356,0.8383,14.6,4.9,10.6,6.2,15.2,19.6,10.9,3.4,20.3,7.2,28.7,79.9,7.6,3.1,0.3,0.4,4.0,3.4,98.7,1.3,False,Q1 (Low),low,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.200000,0.4,60.4,1.487941
28751,50402/22_30618,4125380013,50402/22,30618,178-17 130TH AVE.,unknown,2023-09-05,QUEENS,11434,Not an Ejectment,Possession,40.680077,-73.763725,12.0,27.0,33401.0,4270385,St. Albans,2023,2023-09,POINT (-73.763725 40.680077),1925.0,A1,2.0,1.0,PETTIS LINDA,1040.0,pre-war,single-family,False,low-rise,"Pre-1947, pre-rent-control","1921–1930, Art Deco Skyscrapers","Pre-1929, pre-great depression",single-unit,False,very small,Q1 (smallest 25%),1920-1929,11434,68182.0,0.9249,0.8632,0.9932,0.9613,0.9564,19.5,9.6,13.0,7.0,15.5,22.7,11.4,2.7,27.0,5.5,36.5,78.0,11.7,3.0,0.2,0.0,3.3,1.7,98.0,2.0,False,Q2,medium-low,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.200000,0.2,60.4,0.885864
28963,50639/20_2066,4110720034,50639/20,2066,200-16 116TH AVENUE,#2,2023-03-15,QUEENS,11412,Not an Ejectment,Possession,40.696003,-73.752630,12.0,27.0,530.0,4238511,St. Albans,2023,2023-03,POINT (-73.75263 40.696003),1925.0,B3,2.0,2.0,"WILLIAMS, ANTHONY",1800.0,pre-war,two-family,False,low-rise,"Pre-1947, pre-rent-control","1921–1930, Art Deco Skyscrapers","Pre-1929, pre-great depression",2-unit,False,very small,Q2 (25-50%),1920-1929,11412,40593.0,0.7466,0.7254,0.9977,0.7356,0.8383,14.6,4.9,10.6,6.2,15.2,19.6,10.9,3.4,20.3,7.2,28.7,79.9,7.6,3.1,0.3,0.4,4.0,3.4,98.7,1.3,False,Q1 (Low),low,0.0,0.0,2.0,0.0,1.0,4.0,0.0,0.0,1.0,0.0,4.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,4.0,0.0,17.0,0.300000,0.6,60.4,1.487941
29045,50718/19_22722,4103530029,50718/19,22722,183-03 HENDERSON AVENUE,unknown,2019-05-14,QUEENS,11423,Not an Ejectment,Possession,40.706804,-73.774151,12.0,27.0,404.0,4220575,St. Albans,2019,2019-05,POINT (-73.774151 40.706804),1950.0,A5,2.0,1.0,"SAMUEL JOHN, FRANCINE",1836.0,post-war,single-family,False,low-rise,"1947–1969, rent-control","1931–1950, Manhattan Modern","1946–1975, pst war economic boom",single-unit,False,very small,Q2 (25-50%),1950-1959,11423,29921.0,0.9180,0.8222,0.9701,0.9647,0.9484,17.3,9.5,15.1,7.2,19.4,17.8,13.6,6.8,24.3,8.8,35.3,33.4,14.7,30.6,0.8,0.0,4.4,5.5,89.5,10.5,False,Q2,medium-low,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,9.0,0.200000,0.2,60.4,2.018649


In [None]:
median_of_Albans = Albans['evictions_nta_per_1kpop'].median()
median_of_Albans

1.4879412706624295

In [None]:
merged_df.loc[merged_df['bin'] == 4236378, 'evictions_nta_per_1kpop'] = median_of_Albans

In [None]:
merged_df[['bin','nta', 'evictions_nta_per_1kpop']].sort_values(by = 'evictions_nta_per_1kpop', ascending = False).head(30)

Unnamed: 0,bin,nta,evictions_nta_per_1kpop
14692,1046407,Lenox Hill-Roosevelt Island,36.375575
1756,1046407,Lenox Hill-Roosevelt Island,36.375575
39043,1046407,Lenox Hill-Roosevelt Island,36.375575
8027,2028351,Crotona Park East,26.281454
17201,2002619,East Concourse-Concourse Village,25.496454
23092,2002619,East Concourse-Concourse Village,25.496454
8317,1053939,Central Harlem North-Polo Grounds,17.055074
8449,1083988,Central Harlem North-Polo Grounds,17.055074
6753,1083988,Central Harlem North-Polo Grounds,17.055074
6722,1083990,Central Harlem North-Polo Grounds,17.055074


In [None]:
merged_df.evictions_nta_per_1kpop.describe(), merged_df.evictions_nta_per_1kpop.median()

(count    66397.000000
 mean         2.299831
 std          2.076064
 min          0.003975
 25%          1.150948
 50%          1.898226
 75%          2.960793
 max         36.375575
 Name: evictions_nta_per_1kpop, dtype: float64,
 1.8982260779489835)

$\text{evictions_nta_per_1kpop} = \left( \frac{\text{average_year_eviction_nta_count}}{\text{e_totpop}} \right) \times 1000$

## **Interpretations**

- represents the average annual number of evictions in an nta normalized by the nta's population.
- for example: if evicions_nta_per_1kpop = 120, the NTA had 120 evictions per year on average per 1000 people.

# **evictions_nta_per_1kunit_per_1kpop**

## **Add buildings_affected column**

In [None]:
evictions_per_nta = merged_df.groupby('nta')['bin'].nunique().reset_index(name='buildings_affected_per_nta')
evictions_per_nta.head()

Unnamed: 0,nta,buildings_affected_per_nta
0,Allerton-Pelham Gardens,97
1,Annadale-Huguenot-Prince's Bay-Eltingville,15
2,Arden Heights,16
3,Astoria,267
4,Auburndale,24


In [None]:
# unique_buildings = normal.drop_duplicates(subset=['nta', 'bin'])
# unique_buildings.head()

In [None]:
# total_units_per_nta = unique_buildings.groupby('nta')['unitsres'].sum().reset_index(name='total_units_per_nta')
# total_units_per_nta.head()

In [None]:
# building_units_per_nta = pd.merge(evictions_per_nta, total_units_per_nta, on='nta', how='left')
# building_units_per_nta.head()

In [None]:
merged_df = merged_df.merge(evictions_per_nta, on='nta', how='left')
merged_df.shape
# correct, now, buildings_affected column has been added.

(66397, 96)

In [None]:
merged_df.columns

Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
       'eviction_address', 'eviction_apartment_number', 'executed_date',
       'borough', 'zipcode', 'ejectment', 'eviction/legal_possession',
       'latitude', 'longitude', 'community_board', 'council_district',
       'census_tract', 'bin', 'nta', 'year', 'month_year', 'geometry',
       'yearbuilt', 'bldgclass', 'numfloors', 'unitsres', 'ownername',
       'bldgarea', 'building_type', 'building_category', 'is_condo',
       'floor_category', 'rent_era', 'architectural_style', 'economic_period',
       'residential_units_category', 'is_llc', 'building_size_category',
       'size_quartile', 'decade', 'fips', 'e_totpop', 'rpl_theme1',
       'rpl_theme2', 'rpl_theme3', 'rpl_theme4', 'rpl_themes', 'ep_pov150',
       'ep_unemp', 'ep_nohsdp', 'ep_uninsur', 'ep_age65', 'ep_age17',
       'ep_disabl', 'ep_limeng', 'ep_noveh', 'ep_crowd', 'ep_hburd', 'ep_afam',
       'ep_hisp', 'ep_asian', 'ep_aian', 'ep_nhpi', 'ep_twom

In [None]:
# merged_df.drop(columns = ['buildings_affected_per_nta_y'], inplace = True)
# merged_df.rename(columns = {'buildings_affected_per_nta_x': 'buildings_affected_per_nta'}, inplace = True)

# **Add units_per_nta**

In [None]:
merged_df['unit_per_nta'] = merged_df['unitsres'] * merged_df['buildings_affected_per_nta']
merged_df[['bin', 'unit_per_nta']].head()

Unnamed: 0,bin,unit_per_nta
0,3083989,2175.0
1,3143881,272.0
2,3143435,588.0
3,2015444,85888.0
4,2003900,20995.0


In [None]:
merged_df[['bin', 'nta','average_year_eviction_count', 'average_year_eviction_unit_count']].head(10)
# for the same nta, they likely have different values for these two columns, because they were groupedby bin
# but for the same bin, they have the same values for these two columns, because they were groupedby bin

Unnamed: 0,bin,nta,average_year_eviction_count,average_year_eviction_unit_count
0,3083989,East New York,0.8,0.266667
1,3143881,Sunset Park East,0.6,0.3
2,3143435,Sunset Park West,0.6,0.15
3,2015444,Van Cortlandt Village,0.8,0.002273
4,2003900,Mott Haven-Port Morris,1.6,0.024615
5,2013945,Bedford Park-Fordham North,1.6,0.038095
6,2017108,Bedford Park-Fordham North,1.0,0.5
7,2011545,Claremont-Bathgate,1.6,0.048485
8,3089943,East New York,0.4,0.08
9,3245453,Brighton Beach,1.2,0.007692


sum all evictions per NTA -> divide by years -> average_year_eviction_nta_count. \
sum all units per NTA (deduplicate buildings to avoid overcounting). \
divide average_year_eviction_nta_count by total units -> evictions per unit per year. \
divide by NTA population and multiply by 1,000 -> evictions per unit per 1k residents. \

In [None]:
# merged_df['e_totpop'].head()

In [None]:
merged_df.columns, merged_df.shape

(Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
        'eviction_address', 'eviction_apartment_number', 'executed_date',
        'borough', 'zipcode', 'ejectment', 'eviction/legal_possession',
        'latitude', 'longitude', 'community_board', 'council_district',
        'census_tract', 'bin', 'nta', 'year', 'month_year', 'geometry',
        'yearbuilt', 'bldgclass', 'numfloors', 'unitsres', 'ownername',
        'bldgarea', 'building_type', 'building_category', 'is_condo',
        'floor_category', 'rent_era', 'architectural_style', 'economic_period',
        'residential_units_category', 'is_llc', 'building_size_category',
        'size_quartile', 'decade', 'fips', 'e_totpop', 'rpl_theme1',
        'rpl_theme2', 'rpl_theme3', 'rpl_theme4', 'rpl_themes', 'ep_pov150',
        'ep_unemp', 'ep_nohsdp', 'ep_uninsur', 'ep_age65', 'ep_age17',
        'ep_disabl', 'ep_limeng', 'ep_noveh', 'ep_crowd', 'ep_hburd', 'ep_afam',
        'ep_hisp', 'ep_asian', 'ep_aian', 'ep_n

In [None]:
merged_df['evictions_nta_per_1kunit_per_1kpop'] = \
      merged_df['average_year_eviction_nta_count'] / (merged_df['unit_per_nta']*merged_df['e_totpop']) * 1000 * 1000

In [None]:
merged_df[['bin', 'nta', 'evictions_nta_per_1kunit_per_1kpop']].sort_values('evictions_nta_per_1kunit_per_1kpop', ascending = False)

Unnamed: 0,bin,nta,evictions_nta_per_1kunit_per_1kpop
65395,4236378,St. Albans,427.060354
54507,2082091,Pelham Bay-Country Club-City Island,83.731361
25274,2090621,Pelham Bay-Country Club-City Island,83.731361
23286,2006658,Hunts Point,44.748396
12979,2082172,Pelham Bay-Country Club-City Island,41.865680
...,...,...,...
9272,1082726,Stuyvesant Town-Cooper Village,0.000665
41666,1082885,Stuyvesant Town-Cooper Village,0.000665
47187,1082883,Stuyvesant Town-Cooper Village,0.000665
45357,1082869,Stuyvesant Town-Cooper Village,0.000665


## **Interpretations:**
- (evictions per nta/ population per nta) * 1,000 * 1,000 = Evictions per 1k units per 1k people. [note, intentially standardized twice over population and units]
- this formula ensures eviction rates account for housing stock (units) and population.
- for example: 427.060354 means 427.060354 evictions per 1,000 units per 1,000 people in St.Albans.
- Advantage: compare neighborhoods fairly (e.g., St.Albans has higher pressure, 7.39, than Stuyvesant Town-Cooper Village).
- The formula:

$\text{Rate} = \left( \frac{\text{Total Evictions}(\text{neighborhood})}{\text{Total Units}(\text{neighborhood}) \times \text{Total Population}(\text{neighborhood})} \right) \times 1,\!000 \times 1,\!000$

In [None]:
merged_df[['nta', 'average_year_eviction_count', 'average_year_eviction_unit_count',
           'average_year_eviction_nta_count','evictions_nta_per_1kpop',
           'evictions_nta_per_1kunit_per_1kpop']].sort_values(by = 'evictions_nta_per_1kunit_per_1kpop', ascending = False)

Unnamed: 0,nta,average_year_eviction_count,average_year_eviction_unit_count,average_year_eviction_nta_count,evictions_nta_per_1kpop,evictions_nta_per_1kunit_per_1kpop
65395,St. Albans,0.4,0.200000,60.4,1.487941,427.060354
54507,Pelham Bay-Country Club-City Island,0.2,0.200000,46.0,10.717614,83.731361
25274,Pelham Bay-Country Club-City Island,0.2,0.200000,46.0,10.717614,83.731361
23286,Hunts Point,0.2,0.200000,106.0,9.397163,44.748396
12979,Pelham Bay-Country Club-City Island,0.2,0.100000,46.0,10.717614,41.865680
...,...,...,...,...,...,...
9272,Stuyvesant Town-Cooper Village,0.2,0.000023,12.6,0.215687,0.000665
41666,Stuyvesant Town-Cooper Village,0.4,0.000046,12.6,0.215687,0.000665
47187,Stuyvesant Town-Cooper Village,0.2,0.000023,12.6,0.215687,0.000665
45357,Stuyvesant Town-Cooper Village,0.4,0.000046,12.6,0.215687,0.000665


In [None]:
merged_df.shape
# 98 is good, 96 + uniter per nta, building per nta

(66397, 98)

In [None]:
# merged_df.drop(columns = ['evictions_nta_per_unit_per_1k'], inplace = True)

In [None]:
merged_df.columns

Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
       'eviction_address', 'eviction_apartment_number', 'executed_date',
       'borough', 'zipcode', 'ejectment', 'eviction/legal_possession',
       'latitude', 'longitude', 'community_board', 'council_district',
       'census_tract', 'bin', 'nta', 'year', 'month_year', 'geometry',
       'yearbuilt', 'bldgclass', 'numfloors', 'unitsres', 'ownername',
       'bldgarea', 'building_type', 'building_category', 'is_condo',
       'floor_category', 'rent_era', 'architectural_style', 'economic_period',
       'residential_units_category', 'is_llc', 'building_size_category',
       'size_quartile', 'decade', 'fips', 'e_totpop', 'rpl_theme1',
       'rpl_theme2', 'rpl_theme3', 'rpl_theme4', 'rpl_themes', 'ep_pov150',
       'ep_unemp', 'ep_nohsdp', 'ep_uninsur', 'ep_age65', 'ep_age17',
       'ep_disabl', 'ep_limeng', 'ep_noveh', 'ep_crowd', 'ep_hburd', 'ep_afam',
       'ep_hisp', 'ep_asian', 'ep_aian', 'ep_nhpi', 'ep_twom

In [None]:
merged_df.isna().sum().sum()
# good

np.int64(0)

In [None]:
# merged_df.drop(columns = ['evictions_nta_per_1k_y'], inplace = True)
# merged_df.rename(columns = {'evictions_nta_per_1k_x': 'evictions_nta_per_1k'}, inplace = True)

# **Step 5 Normalize eviction rates per nta population for Covid**

In [None]:
link4 = '/content/drive/My Drive/X999/bbl_evictions_311_svi_covid.csv'
merged_df_covid = pd.read_csv(link4)
merged_df_covid.shape

(5386, 93)

In [None]:
merged_df_covid.isna().sum().sum()

np.int64(0)

In [None]:
merged_df_covid.columns

Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
       'eviction_address', 'eviction_apartment_number', 'executed_date',
       'borough', 'zipcode', 'ejectment', 'eviction/legal_possession',
       'latitude', 'longitude', 'community_board', 'council_district',
       'census_tract', 'bin', 'nta', 'year', 'month_year', 'geometry',
       'yearbuilt', 'bldgclass', 'numfloors', 'unitsres', 'ownername',
       'bldgarea', 'building_type', 'building_category', 'is_condo',
       'floor_category', 'rent_era', 'architectural_style', 'economic_period',
       'residential_units_category', 'is_llc', 'building_size_category',
       'size_quartile', 'decade', 'fips', 'e_totpop', 'rpl_theme1',
       'rpl_theme2', 'rpl_theme3', 'rpl_theme4', 'rpl_themes', 'ep_pov150',
       'ep_unemp', 'ep_nohsdp', 'ep_uninsur', 'ep_age65', 'ep_age17',
       'ep_disabl', 'ep_limeng', 'ep_noveh', 'ep_crowd', 'ep_hburd', 'ep_afam',
       'ep_hisp', 'ep_asian', 'ep_aian', 'ep_nhpi', 'ep_twom

## **evictions_nta_per_1kpop**

In [None]:
population_per_nta = merged_df_covid.drop_duplicates('nta')[['nta', 'e_totpop']]
population_per_nta
# all rows here

Unnamed: 0,nta,e_totpop
0,Kingsbridge Heights,81397.0
1,Forest Hills,75212.0
2,Erasmus,101053.0
3,Crown Heights South,58476.0
4,Kew Gardens,20315.0
...,...,...
3141,Jamaica Estates-Holliswood,64268.0
3370,Auburndale,37647.0
3430,Stuyvesant Town-Cooper Village,32410.0
4746,Starrett City,16808.0


In [None]:
merged_df_covid['evictions_nta_per_1kpop'] = merged_df_covid['average_year_eviction_nta_count'] / merged_df_covid['e_totpop'] * 1000
merged_df_covid[['bin', 'evictions_nta_per_1kpop']].sort_values(by = 'evictions_nta_per_1kpop', ascending = False)

Unnamed: 0,bin,evictions_nta_per_1kpop
3110,1046407,7.441453
915,1013698,2.386727
982,1013698,2.386727
1549,1013633,2.386727
3760,1000793,1.962884
...,...,...
3074,4296569,0.010974
5312,5082694,0.009595
3370,4124595,0.008854
2516,2121023,0.006754


In [None]:
merged_df_covid['evictions_nta_per_1kpop'] = merged_df_covid['average_year_eviction_nta_count'] / merged_df_covid['e_totpop'] * 1000
merged_df_covid[['bin','nta', 'evictions_nta_per_1kpop']].sort_values(by = 'evictions_nta_per_1kpop', ascending = False).head(30)

Unnamed: 0,bin,nta,evictions_nta_per_1kpop
3110,1046407,Lenox Hill-Roosevelt Island,7.441453
915,1013698,Clinton,2.386727
982,1013698,Clinton,2.386727
1549,1013633,Clinton,2.386727
3760,1000793,Battery Park City-Lower Manhattan,1.962884
803,1000793,Battery Park City-Lower Manhattan,1.962884
763,1000810,Battery Park City-Lower Manhattan,1.962884
1067,4541713,Hunters Point-Sunnyside-West Maspeth,1.923077
3407,4541713,Hunters Point-Sunnyside-West Maspeth,1.923077
3522,4000019,Hunters Point-Sunnyside-West Maspeth,1.923077


In [None]:
merged_df_covid[['bin','nta', 'evictions_nta_per_1kpop']].sort_values(by = 'evictions_nta_per_1kpop', ascending = False).head(30)

Unnamed: 0,bin,nta,evictions_nta_per_1kpop
3110,1046407,Lenox Hill-Roosevelt Island,7.441453
915,1013698,Clinton,2.386727
982,1013698,Clinton,2.386727
1549,1013633,Clinton,2.386727
3760,1000793,Battery Park City-Lower Manhattan,1.962884
803,1000793,Battery Park City-Lower Manhattan,1.962884
763,1000810,Battery Park City-Lower Manhattan,1.962884
1067,4541713,Hunters Point-Sunnyside-West Maspeth,1.923077
3407,4541713,Hunters Point-Sunnyside-West Maspeth,1.923077
3522,4000019,Hunters Point-Sunnyside-West Maspeth,1.923077


In [None]:
merged_df_covid.evictions_nta_per_1kpop.describe(), merged_df_covid.evictions_nta_per_1kpop.median()

(count    5386.000000
 mean        0.285140
 std         0.226549
 min         0.006754
 25%         0.148356
 50%         0.240340
 75%         0.376040
 max         7.441453
 Name: evictions_nta_per_1kpop, dtype: float64,
 0.24034017378443334)

In [None]:
merged_df_covid.isna().sum().sum()

np.int64(0)

$\text{evictions_nta_per_1kpop} = \left( \frac{\text{average_year_eviction_nta_count}}{\text{e_totpop}} \right) \times 1000$

## **Interpretations**

- represents the average annual number of evictions in an nta normalized by the nta's population.
- for example: if evicions_nta_per_1k = 120, the NTA had 120 evictions per year on average per 1000 people.

# **evictions_nta_per_1kunit_per_1kpop**

## **Add buildings_affected column**

In [None]:
evictions_per_nta = merged_df_covid.groupby('nta')['bin'].nunique().reset_index(name='buildings_affected_per_nta')
evictions_per_nta.head()

Unnamed: 0,nta,buildings_affected_per_nta
0,Allerton-Pelham Gardens,12
1,Annadale-Huguenot-Prince's Bay-Eltingville,1
2,Arden Heights,5
3,Astoria,18
4,Auburndale,1


In [None]:
merged_df_covid = merged_df_covid.merge(evictions_per_nta, on='nta', how='left')
merged_df_covid.shape
# correct, now, buildings_affected column has been added.

(5386, 95)

In [None]:
merged_df_covid.columns

Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
       'eviction_address', 'eviction_apartment_number', 'executed_date',
       'borough', 'zipcode', 'ejectment', 'eviction/legal_possession',
       'latitude', 'longitude', 'community_board', 'council_district',
       'census_tract', 'bin', 'nta', 'year', 'month_year', 'geometry',
       'yearbuilt', 'bldgclass', 'numfloors', 'unitsres', 'ownername',
       'bldgarea', 'building_type', 'building_category', 'is_condo',
       'floor_category', 'rent_era', 'architectural_style', 'economic_period',
       'residential_units_category', 'is_llc', 'building_size_category',
       'size_quartile', 'decade', 'fips', 'e_totpop', 'rpl_theme1',
       'rpl_theme2', 'rpl_theme3', 'rpl_theme4', 'rpl_themes', 'ep_pov150',
       'ep_unemp', 'ep_nohsdp', 'ep_uninsur', 'ep_age65', 'ep_age17',
       'ep_disabl', 'ep_limeng', 'ep_noveh', 'ep_crowd', 'ep_hburd', 'ep_afam',
       'ep_hisp', 'ep_asian', 'ep_aian', 'ep_nhpi', 'ep_twom

# **Add units_per_nta**

In [None]:
merged_df_covid['unit_per_nta'] = merged_df_covid['unitsres'] * merged_df_covid['buildings_affected_per_nta']
merged_df_covid[['bin', 'unit_per_nta']].head()

Unnamed: 0,bin,unit_per_nta
0,2113173,114.0
1,4074666,3077.0
2,3117969,92.0
3,3029673,2064.0
4,4079390,948.0


In [None]:
merged_df_covid[['bin', 'nta','average_year_eviction_count', 'average_year_eviction_unit_count']].head(10)
# for the same nta, they likely have different values for these two columns, because they were groupedby bin
# but for the same bin, they have the same values for these two columns, because they were groupedby bin

Unnamed: 0,bin,nta,average_year_eviction_count,average_year_eviction_unit_count
0,2113173,Kingsbridge Heights,0.2,0.066667
1,4074666,Forest Hills,0.2,0.001105
2,3117969,Erasmus,0.6,0.3
3,3029673,Crown Heights South,0.8,0.016667
4,4079390,Kew Gardens,0.4,0.005063
5,1063219,Washington Heights South,0.4,0.013333
6,2003313,Highbridge,0.8,0.002827
7,3337046,East New York,0.6,0.000416
8,2126620,East Concourse-Concourse Village,0.6,0.002765
9,3324603,Crown Heights North,0.2,0.000171


sum all evictions per NTA -> divide by years -> average_year_eviction_nta_count. \
sum all units per NTA (deduplicate buildings to avoid overcounting). \
divide average_year_eviction_nta_count by total units -> evictions per unit per year. \
divide by NTA population and multiply by 1,000 -> evictions per unit per 1k residents. \

In [None]:
# merged_df['e_totpop'].head()

In [None]:
merged_df_covid.columns, merged_df_covid.shape

(Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
        'eviction_address', 'eviction_apartment_number', 'executed_date',
        'borough', 'zipcode', 'ejectment', 'eviction/legal_possession',
        'latitude', 'longitude', 'community_board', 'council_district',
        'census_tract', 'bin', 'nta', 'year', 'month_year', 'geometry',
        'yearbuilt', 'bldgclass', 'numfloors', 'unitsres', 'ownername',
        'bldgarea', 'building_type', 'building_category', 'is_condo',
        'floor_category', 'rent_era', 'architectural_style', 'economic_period',
        'residential_units_category', 'is_llc', 'building_size_category',
        'size_quartile', 'decade', 'fips', 'e_totpop', 'rpl_theme1',
        'rpl_theme2', 'rpl_theme3', 'rpl_theme4', 'rpl_themes', 'ep_pov150',
        'ep_unemp', 'ep_nohsdp', 'ep_uninsur', 'ep_age65', 'ep_age17',
        'ep_disabl', 'ep_limeng', 'ep_noveh', 'ep_crowd', 'ep_hburd', 'ep_afam',
        'ep_hisp', 'ep_asian', 'ep_aian', 'ep_n

In [None]:
merged_df_covid['evictions_nta_per_1kunit_per_1kpop'] = \
      merged_df_covid['average_year_eviction_nta_count'] / (merged_df_covid['unit_per_nta']*merged_df_covid['e_totpop']) * 1000 * 1000

In [None]:
merged_df_covid[['bin', 'nta', 'evictions_nta_per_1kunit_per_1kpop']].sort_values('evictions_nta_per_1kunit_per_1kpop', ascending = False)

Unnamed: 0,bin,nta,evictions_nta_per_1kunit_per_1kpop
2446,2082724,Pelham Bay-Country Club-City Island,88.758709
2444,2082048,Pelham Bay-Country Club-City Island,44.379355
2633,5116366,Charleston-Richmond Valley-Tottenville,22.591212
807,5088263,Charleston-Richmond Valley-Tottenville,22.591212
2586,5023744,Port Richmond,20.945166
...,...,...,...
504,2128845,Co-op City,0.000786
2279,2124552,Co-op City,0.000786
2278,2095393,Co-op City,0.000786
2277,2095397,Co-op City,0.000786


## **Interpretations:**
- (evictions per nta/ population per nta) * 1,000 * 1,000 = Evictions per 1k units per 1k people. [note, intentially standardized twice over population and units]
- this formula ensures eviction rates account for housing stock (units) and population.
- for example: 427.060354 means 427.060354 evictions per 1,000 units per 1,000 people in St.Albans.
- Advantage: compare neighborhoods fairly (e.g., St.Albans has higher pressure, 7.39, than Stuyvesant Town-Cooper Village).
- The formula:

$\text{Rate} = \left( \frac{\text{Total Evictions}(\text{neighborhood})}{\text{Total Units}(\text{neighborhood}) \times \text{Total Population}(\text{neighborhood})} \right) \times 1,\!000 \times 1,\!000$

In [None]:
merged_df_covid[['nta', 'average_year_eviction_count', 'average_year_eviction_unit_count',
           'average_year_eviction_nta_count','evictions_nta_per_1kpop',
           'evictions_nta_per_1kunit_per_1kpop']].sort_values(by = 'evictions_nta_per_1kunit_per_1kpop', ascending = False)

Unnamed: 0,nta,average_year_eviction_count,average_year_eviction_unit_count,average_year_eviction_nta_count,evictions_nta_per_1kpop,evictions_nta_per_1kunit_per_1kpop
2446,Pelham Bay-Country Club-City Island,0.2,0.200000,5.333333,1.242622,88.758709
2444,Pelham Bay-Country Club-City Island,0.2,0.100000,5.333333,1.242622,44.379355
2633,Charleston-Richmond Valley-Tottenville,0.2,0.200000,1.666667,0.112956,22.591212
807,Charleston-Richmond Valley-Tottenville,0.2,0.200000,1.666667,0.112956,22.591212
2586,Port Richmond,0.2,0.200000,4.666667,0.251342,20.945166
...,...,...,...,...,...,...
504,Co-op City,0.2,0.000018,9.333333,0.214476,0.000786
2279,Co-op City,0.2,0.000018,9.333333,0.214476,0.000786
2278,Co-op City,0.4,0.000037,9.333333,0.214476,0.000786
2277,Co-op City,0.2,0.000018,9.333333,0.214476,0.000786


In [None]:
merged_df_covid.shape, merged_df.shape
# 97 and 98 is good, 95/96 + uniter per nta, building per nta

((5386, 97), (66397, 98))

In [None]:
# merged_df.drop(columns = ['evictions_nta_per_1k'], inplace = True)

In [None]:
merged_df_covid.isna().sum().sum(), merged_df.columns

(np.int64(0),
 Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
        'eviction_address', 'eviction_apartment_number', 'executed_date',
        'borough', 'zipcode', 'ejectment', 'eviction/legal_possession',
        'latitude', 'longitude', 'community_board', 'council_district',
        'census_tract', 'bin', 'nta', 'year', 'month_year', 'geometry',
        'yearbuilt', 'bldgclass', 'numfloors', 'unitsres', 'ownername',
        'bldgarea', 'building_type', 'building_category', 'is_condo',
        'floor_category', 'rent_era', 'architectural_style', 'economic_period',
        'residential_units_category', 'is_llc', 'building_size_category',
        'size_quartile', 'decade', 'fips', 'e_totpop', 'rpl_theme1',
        'rpl_theme2', 'rpl_theme3', 'rpl_theme4', 'rpl_themes', 'ep_pov150',
        'ep_unemp', 'ep_nohsdp', 'ep_uninsur', 'ep_age65', 'ep_age17',
        'ep_disabl', 'ep_limeng', 'ep_noveh', 'ep_crowd', 'ep_hburd', 'ep_afam',
        'ep_hisp', 'ep_asian', 'e

In [None]:
# save the correct coordinates' df to the drive:
merged_df.to_csv('/content/drive/My Drive/X999/bbl_evictions_311_svi_normal_times_2.7.csv', index=False)
merged_df_covid.to_csv('/content/drive/My Drive/X999/bbl_evictions_311_svi_covid_2.7.csv', index=False)