In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import datetime as dt
import matplotlib
import matplotlib.pyplot as plt
import os
import io
import geopandas as gpd
import seaborn as sns
# suppress warning
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', None)
# display all columns
# avoid scientific digit
# pd.options.display.float_format = '{:.2f}'.format

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [13]:
link1 = '/content/drive/My Drive/X999/bbl_evictions_311_svi_covid_correct_coordinates.csv'
covid = pd.read_csv(link1)

In [16]:
covid.columns, covid.shape

(Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
        'eviction_address', 'eviction_apartment_number', 'executed_date',
        'borough', 'zipcode', 'ejectment', 'eviction/legal_possession',
        'latitude', 'longitude', 'community_board', 'council_district',
        'census_tract', 'bin', 'nta', 'year', 'month_year', 'geometry',
        'average_year_eviction_count', 'yearbuilt', 'bldgclass', 'numfloors',
        'unitsres', 'ownername', 'bldgarea', 'building_type',
        'building_category', 'is_condo', 'floor_category', 'rent_era',
        'architectural_style', 'economic_period', 'residential_units_category',
        'is_llc', 'building_size_category', 'size_quartile', 'decade', 'fips',
        'e_totpop', 'rpl_theme1', 'rpl_theme2', 'rpl_theme3', 'rpl_theme4',
        'rpl_themes', 'ep_pov150', 'ep_unemp', 'ep_nohsdp', 'ep_uninsur',
        'ep_age65', 'ep_age17', 'ep_disabl', 'ep_limeng', 'ep_noveh',
        'ep_crowd', 'ep_hburd', 'ep_afam', 'ep_hisp

# **Correct evictions_nta_per_unit_per_1k**

Previously, we did evictions_nta_per_unit_per_1k = average_year_eviction_unit_count/nta_population.

Now, we will correct it to evictions_nta_per_unit_per_1k = average_year_eviction_unit_count * "total units in an nta" / nta_population.

To get total units in an nta, we will create two new columns: "total building counts in nta" and "total units in an nta".

In [17]:
evictions_per_nta = covid.groupby('nta')['bin'].nunique().reset_index(name='buildings_affected')
evictions_per_nta.head()

Unnamed: 0,nta,buildings_affected
0,Allerton-Pelham Gardens,12
1,Annadale-Huguenot-Prince's Bay-Eltingville,1
2,Arden Heights,5
3,Astoria,18
4,Auburndale,1


In [18]:
unique_buildings = covid.drop_duplicates(subset=['nta', 'bin'])
unique_buildings.head()

Unnamed: 0,primary_key,bbl,court_index_number,docket_number,eviction_address,eviction_apartment_number,executed_date,borough,zipcode,ejectment,eviction/legal_possession,latitude,longitude,community_board,council_district,census_tract,bin,nta,year,month_year,geometry,average_year_eviction_count,yearbuilt,bldgclass,numfloors,unitsres,ownername,bldgarea,building_type,building_category,is_condo,floor_category,rent_era,architectural_style,economic_period,residential_units_category,is_llc,building_size_category,size_quartile,decade,fips,e_totpop,rpl_theme1,rpl_theme2,rpl_theme3,rpl_theme4,rpl_themes,ep_pov150,ep_unemp,ep_nohsdp,ep_uninsur,ep_age65,ep_age17,ep_disabl,ep_limeng,ep_noveh,ep_crowd,ep_hburd,ep_afam,ep_hisp,ep_asian,ep_aian,ep_nhpi,ep_twomore,ep_otherrace,ep_minrty,ep_white,invalid_zip,svi_quartile,air_quality,animal_issues,appliances,building_exterior,doors_windows,electrical_issues,elevator_issues,floors_stairs,general_complaints,graffiti_posting,heat_hot_water,homeless_issues,noise_complaints,other_issues,pest_issues,plumbing_issues,police_matters,public_nuisance,safety_concerns,sanitation_issues,walls_ceilings,total_complaints,average_year_eviction_unit_count,average_year_eviction_nta_count,evictions_nta_per_1k,evictions_nta_per_unit_per_1k
0,004123/20_209969,2032140141,004123/20,209969,2541 A GRAND AVE,ROOM 3B,2022-08-22,BRONX,10468,Not an Ejectment,Possession,40.865396,-73.901317,7.0,14.0,265.0,2113173,Kingsbridge Heights,2022,2022-08,POINT (-73.901317 40.865396),0.2,2004.0,C0,3.0,3.0,MONJU SARKER,3420.0,post-war,walk-up,False,low-rise,"1994–Present, vacancy decontrol","2001-present, New Architecture","1991–2008, modern economic growth",3-5 units,False,medium-small,Q4 (largest 25%),2000-2009,10468,81397.0,0.9954,0.9407,0.987,0.947,0.9874,39.5,11.6,28.3,9.2,11.2,26.4,12.2,26.9,71.8,19.2,56.7,15.6,78.0,2.3,0.0,0.0,0.5,0.5,96.9,3.1,False,Q3,0.0,0.0,0.0,0.0,3.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0,2.0,0.0,0.0,0.0,3.0,1.0,14.0,0.066667,18.666667,0.687986,0.000819
1,0050153/20_106030,4031560133,0050153/20,106030,98-05 67TH AVENUE,12F,2022-04-14,QUEENS,11375,Not an Ejectment,Possession,40.724241,-73.855552,6.0,29.0,71306.0,4074666,Forest Hills,2022,2022-04,POINT (-73.855552 40.724241),0.2,1960.0,D3,13.0,181.0,MARSEILLES LEASING LIMITED PARTNERSHIP,177710.0,post-war,elevator,False,high-rise,"1947–1969, rent-control","1951–1980, the International Style, Alternative Modernism","1946–1975, pst war economic boom",100+ units,False,mega,Q4 (largest 25%),1960-1969,11375,75212.0,0.4759,0.5698,0.8789,0.8057,0.7322,12.0,4.8,6.1,3.7,20.4,18.0,10.5,7.9,41.9,5.8,25.4,2.7,16.4,28.5,0.1,0.0,4.6,0.7,53.0,47.0,False,Q1 (Low),0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,62.0,0.0,34.0,0.0,0.0,4.0,1.0,0.0,0.0,2.0,5.0,112.0,0.001105,6.0,0.239324,1.5e-05
2,0052002/19_101926,3051370021,0052002/19,101926,199 VERONICA PLACE,1ST FLOOR,2020-03-02,BROOKLYN,11226,Not an Ejectment,Possession,40.645404,-73.952578,17.0,40.0,792.0,3117969,Erasmus,2020,2020-03,POINT (-73.952578 40.645404),0.6,1920.0,B3,2.0,2.0,"AANS, LLC.",1496.0,pre-war,two-family,False,low-rise,"Pre-1947, pre-rent-control","1900–1920, Beaux-Arts","Pre-1929, pre-great depression",2-unit,True,very small,Q2 (25-50%),1920-1929,11226,101053.0,0.93,0.4536,0.9639,0.9692,0.922,23.7,5.9,13.9,9.1,13.1,18.7,6.7,5.6,66.1,10.0,39.2,63.2,14.9,3.2,0.3,0.0,4.1,0.7,86.3,13.7,False,Q2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.3,23.0,0.68281,0.002969
3,0057757/18_100889,3011850034,0057757/18,100889,302 EASTERN PARKWAY,4B,2020-02-03,BROOKLYN,11225,Not an Ejectment,Possession,40.670832,-73.958843,9.0,35.0,213.0,3029673,Crown Heights South,2020,2020-02,POINT (-73.958843 40.670832),0.8,1923.0,D1,6.0,48.0,302 EASTERN CORP,42984.0,pre-war,elevator,False,mid-rise,"Pre-1947, pre-rent-control","1921–1930, Art Deco Skyscrapers","Pre-1929, pre-great depression",21-100 units,False,very large,Q4 (largest 25%),1920-1929,11225,58476.0,0.8905,0.3157,0.933,0.8342,0.8538,23.1,6.6,11.5,5.9,15.3,16.7,9.6,2.2,66.2,6.9,37.3,53.7,10.8,3.3,0.0,0.0,3.9,0.9,72.6,27.4,False,Q1 (Low),0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,12.0,0.0,17.0,0.0,3.0,2.0,1.0,0.0,2.0,2.0,1.0,45.0,0.016667,23.0,1.179971,0.000285
4,0061902/19_117253,4033220043,0061902/19,117253,83-33 118TH STREET,5N,2020-02-14,QUEENS,11415,Not an Ejectment,Possession,40.706235,-73.834603,9.0,29.0,134.0,4079390,Kew Gardens,2020,2020-02,POINT (-73.834603 40.706235),0.4,1979.0,D1,6.0,79.0,CIAMPA METROPOLITAN CO,72147.0,post-war,elevator,False,mid-rise,"1970–1993, deregularization","1951–1980, the International Style, Alternative Modernism","1976–1990, fiscal crisis and recovery",21-100 units,False,very large,Q4 (largest 25%),1970-1979,11415,20315.0,0.7661,0.5573,0.898,0.9396,0.8761,14.6,5.6,11.8,4.7,17.0,18.0,10.9,7.5,44.3,8.5,32.3,6.7,22.9,22.3,0.2,0.0,3.4,2.1,57.7,42.3,False,Q1 (Low),0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,9.0,0.0,19.0,0.0,1.0,0.0,0.0,0.0,0.0,6.0,1.0,38.0,0.005063,6.0,0.886045,0.000249


In [19]:
total_units_per_nta = unique_buildings.groupby('nta')['unitsres'].sum().reset_index(name='total_units_per_nta')
total_units_per_nta.head()

Unnamed: 0,nta,total_units_per_nta
0,Allerton-Pelham Gardens,2683.0
1,Annadale-Huguenot-Prince's Bay-Eltingville,2.0
2,Arden Heights,7.0
3,Astoria,2230.0
4,Auburndale,2.0


In [20]:
building_units_per_nta = pd.merge(evictions_per_nta, total_units_per_nta, on='nta', how='left')
building_units_per_nta.head()

Unnamed: 0,nta,buildings_affected,total_units_per_nta
0,Allerton-Pelham Gardens,12,2683.0
1,Annadale-Huguenot-Prince's Bay-Eltingville,1,2.0
2,Arden Heights,5,7.0
3,Astoria,18,2230.0
4,Auburndale,1,2.0


In [21]:
covid = covid.merge(building_units_per_nta, on='nta', how='left')
covid.shape

(5366, 97)

In [22]:
covid.columns

Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
       'eviction_address', 'eviction_apartment_number', 'executed_date',
       'borough', 'zipcode', 'ejectment', 'eviction/legal_possession',
       'latitude', 'longitude', 'community_board', 'council_district',
       'census_tract', 'bin', 'nta', 'year', 'month_year', 'geometry',
       'average_year_eviction_count', 'yearbuilt', 'bldgclass', 'numfloors',
       'unitsres', 'ownername', 'bldgarea', 'building_type',
       'building_category', 'is_condo', 'floor_category', 'rent_era',
       'architectural_style', 'economic_period', 'residential_units_category',
       'is_llc', 'building_size_category', 'size_quartile', 'decade', 'fips',
       'e_totpop', 'rpl_theme1', 'rpl_theme2', 'rpl_theme3', 'rpl_theme4',
       'rpl_themes', 'ep_pov150', 'ep_unemp', 'ep_nohsdp', 'ep_uninsur',
       'ep_age65', 'ep_age17', 'ep_disabl', 'ep_limeng', 'ep_noveh',
       'ep_crowd', 'ep_hburd', 'ep_afam', 'ep_hisp', 'ep_asian',

In [23]:
covid['evictions_nta_per_1k'] = covid['evictions_nta_per_1k'] /5

In [24]:
covid[['nta','average_year_eviction_count', 'average_year_eviction_unit_count']].head(10)
# for the same nta, they likely have different values for these two columns, because they were groupedby bin

Unnamed: 0,nta,average_year_eviction_count,average_year_eviction_unit_count
0,Kingsbridge Heights,0.2,0.066667
1,Forest Hills,0.2,0.001105
2,Erasmus,0.6,0.3
3,Crown Heights South,0.8,0.016667
4,Kew Gardens,0.4,0.005063
5,Washington Heights South,0.4,0.013333
6,Highbridge,0.8,0.002827
7,East New York,0.6,0.000416
8,East Concourse-Concourse Village,0.6,0.002765
9,Crown Heights North,0.2,0.000171


In [25]:
covid[['nta', 'average_year_eviction_nta_count', 'evictions_nta_per_1k','evictions_nta_per_unit_per_1k']].head(10)
# for the same nta, 'average_year_eviction_nta_count' must have the same values
# for the same nta, 'evictions_nta_per_1k' likely have the same values, because population was defined on zipcode values and the zipcodes and nta
# could overlap (this is a limitation from the SVI data, zipcode tabulation was the closest unit to nta)
# for the same nta, 'evictions_nta_per_unit_per_1k' likely are the same, for the same reason as 'evictions_nta_per_1k'.
# additionally, we will correct the 'evictions_nta_per_unit_per_1k', because it was previously calculated by 'average_year_eviction_unit_count' aggregated
# by nta and then divided by nta's population [double-averages problem (first per building, then per NTA), losing the true denominator (total units in NTA)],
# we will do it like this: 'average_year_eviction_nta_count' / total units in an nta and then again / nta population

Unnamed: 0,nta,average_year_eviction_nta_count,evictions_nta_per_1k,evictions_nta_per_unit_per_1k
0,Kingsbridge Heights,18.666667,0.137597,0.000819
1,Forest Hills,6.0,0.047865,1.5e-05
2,Erasmus,23.0,0.136562,0.002969
3,Crown Heights South,23.0,0.235994,0.000285
4,Kew Gardens,6.0,0.177209,0.000249
5,Washington Heights South,24.333333,0.248676,0.000227
6,Highbridge,17.333333,0.132275,3.6e-05
7,East New York,26.0,0.144204,4e-06
8,East Concourse-Concourse Village,34.333333,0.401473,5.4e-05
9,Crown Heights North,46.0,0.33203,2e-06


sum all evictions per NTA -> divide by years -> average_year_eviction_nta_count. \
sum all units per NTA (deduplicate buildings to avoid overcounting). \
divide average_year_eviction_nta_count by total units -> evictions per unit per year. \
divide by NTA population and multiply by 1,000 -> evictions per unit per 1k residents. \

In [26]:
file_path3 = '/content/drive/My Drive/X999/bbl_cleaned.csv'
bbl_cleaned = pd.read_csv(file_path3)

In [27]:
bbl_cleaned.columns
# there is no nta in this dataset either, so it would be error-prone to approximate the nta data

Index(['borough', 'block', 'lot', 'community board', 'census tract 2010',
       'cb2010', 'schooldist', 'council district', 'postcode', 'firecomp',
       'policeprct', 'healtharea', 'sanitboro', 'sanitsub', 'address',
       'zonedist1', 'zonedist2', 'zonedist3', 'overlay1', 'overlay2',
       'spdist1', 'ltdheight', 'splitzone', 'bldgclass', 'landuse',
       'easements', 'ownertype', 'ownername', 'lotarea', 'bldgarea', 'comarea',
       'resarea', 'officearea', 'retailarea', 'garagearea', 'strgearea',
       'factryarea', 'otherarea', 'areasource', 'numbldgs', 'numfloors',
       'unitsres', 'unitstotal', 'lotfront', 'lotdepth', 'bldgfront',
       'bldgdepth', 'ext', 'proxcode', 'irrlotcode', 'lottype', 'bsmtcode',
       'assessland', 'assesstot', 'exempttot', 'yearbuilt', 'yearalter1',
       'yearalter2', 'histdist', 'landmark', 'builtfar', 'residfar', 'commfar',
       'facilfar', 'borocode', 'bbl', 'condono', 'tract2010', 'xcoord',
       'ycoord', 'latitude', 'longitude', 'z

In [28]:
# the new way:
covid['evictions_nta_per_unit_per_1k'] = (covid['average_year_eviction_nta_count'] / (covid['total_units_per_nta'] * covid['e_totpop'])) * 1000

In [29]:
covid[['nta', 'evictions_nta_per_unit_per_1k', 'average_year_eviction_nta_count', 'total_units_per_nta']].head(10)

# but this is still an inflated number,
# because for the buildings never appeared in the eviction dataset, their buildings and units were not counted at all
# we would have to have this limitation because bbl dataset did not have nta, and if we were to correctly include all buildings in
# each nta, we will need to derive the nta from community_board and census_tract, which will more likely to introduce errors and misjudgements
# than using only the buildings affected and were actually in the eviction dataset.

Unnamed: 0,nta,evictions_nta_per_unit_per_1k,average_year_eviction_nta_count,total_units_per_nta
0,Kingsbridge Heights,0.000134,18.666667,1711.0
1,Forest Hills,3.3e-05,6.0,2403.0
2,Erasmus,0.00022,23.0,1033.0
3,Crown Heights South,0.000174,23.0,2260.0
4,Kew Gardens,0.00024,6.0,1229.0
5,Washington Heights South,0.000157,24.333333,2639.0
6,Highbridge,8.2e-05,17.333333,2703.0
7,East New York,3.3e-05,26.0,7313.0
8,East Concourse-Concourse Village,0.000114,34.333333,5878.0
9,Crown Heights North,7.6e-05,46.0,7239.0


# **SVI measure analysis**

four types of aggregated and groupedby analysis:

average eviction count per building, per year;

average eviction count per nta population;

average eviction count per unit per building per year;

average eviction count per unit per nta population.

SVI measures:

ep_age17: age 17 or younger.
possible social causes impacted: homelessness, change of schools, education impact; \

ep_age65: age 65 and above.
possible social causes impacted: homelessness;\

ep_unemp: unemployed pct.
possible social causes impacted: homelessness;\





In [30]:
analysis_columns = covid[['primary_key', 'bin', 'bbl', 'latitude', 'longitude', 'eviction_address', 'zipcode', 'borough', 'nta','average_year_eviction_count', 	'average_year_eviction_unit_count',
                              'average_year_eviction_nta_count',	'evictions_nta_per_1k', 'evictions_nta_per_unit_per_1k',
                              'unitsres', 'e_totpop']]
analysis_columns.head(1)

Unnamed: 0,primary_key,bin,bbl,latitude,longitude,eviction_address,zipcode,borough,nta,average_year_eviction_count,average_year_eviction_unit_count,average_year_eviction_nta_count,evictions_nta_per_1k,evictions_nta_per_unit_per_1k,unitsres,e_totpop
0,004123/20_209969,2113173,2032140141,40.865396,-73.901317,2541 A GRAND AVE,10468,BRONX,Kingsbridge Heights,0.2,0.066667,18.666667,0.137597,0.000134,3.0,81397.0


In [31]:
covid['ep_age17'].head(1)
# pct

Unnamed: 0,ep_age17
0,26.4


# **Groupby and aggregate first**

In [32]:
svi_analysis_columns = ['ep_age17', 'ep_age65', 'ep_unemp', 'ep_afam', 'ep_hisp', 'evictions_nta_per_1k', 'evictions_nta_per_unit_per_1k',
                        'average_year_eviction_count', 'average_year_eviction_unit_count', 'average_year_eviction_nta_count']

## **Children**

In [33]:
covid[['nta', 'primary_key', 'eviction_address', 'average_year_eviction_nta_count']].sort_values('nta', ascending=True).head(10)

Unnamed: 0,nta,primary_key,eviction_address,average_year_eviction_nta_count
4692,Allerton-Pelham Gardens,B804190/19_407681,1210 BURKE AVENUE,4.333333
1087,Allerton-Pelham Gardens,303142/22_360993,3022 PAULDING AVENUE,4.333333
4685,Allerton-Pelham Gardens,B802323/19_406008,3055 BOUCK AVENUE,4.333333
395,Allerton-Pelham Gardens,23411/19_355898,3022 PAULDING AVENUE,4.333333
4638,Allerton-Pelham Gardens,B49003/19_120545,3234 YATES AVENUE,4.333333
281,Allerton-Pelham Gardens,17634/20_360971,2920 BOUCK AVENUE,4.333333
4467,Allerton-Pelham Gardens,B307662/21_121219,2910 TENBROECK AVE,4.333333
4615,Allerton-Pelham Gardens,B45506/19_119183,2915 HERING AVENUE,4.333333
4695,Allerton-Pelham Gardens,B808735/18_405222,1255 ADEE AVENUE,4.333333
1639,Allerton-Pelham Gardens,308172/22_361328,2325 DELANOY AVENUE,4.333333


In [34]:
covid.groupby('nta')[svi_analysis_columns].mean().head(10)
# using mean() here, because the average_year_eviction_nta_count might be slightly different due to zipcode
# svi measures. Here, we just use mean to offset some tiny deviations and get the most accurate one as much as possible

Unnamed: 0_level_0,ep_age17,ep_age65,ep_unemp,ep_afam,ep_hisp,evictions_nta_per_1k,evictions_nta_per_unit_per_1k,average_year_eviction_count,average_year_eviction_unit_count,average_year_eviction_nta_count
nta,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Allerton-Pelham Gardens,22.161538,16.876923,8.384615,48.776923,31.053846,0.03618,2.3e-05,0.230769,0.074924,4.333333
Annadale-Huguenot-Prince's Bay-Eltingville,22.9,16.5,4.1,0.5,9.6,0.005757,0.004798,0.2,0.1,0.333333
Arden Heights,22.1,18.0,4.0,0.8,12.8,0.015755,0.003751,0.2,0.16,1.666667
Astoria,13.927778,12.422222,7.716667,3.95,27.711111,0.04139,6.9e-05,0.2,0.038562,6.0
Auburndale,18.6,20.0,7.0,0.7,17.0,0.005313,0.004427,0.2,0.1,0.333333
Baisley Park,21.442105,14.194737,8.505263,67.263158,13.194737,0.177994,0.007603,0.326316,0.2,6.333333
Bath Beach,21.257143,18.104762,7.133333,1.566667,17.366667,0.046258,1.7e-05,0.27619,0.007138,7.0
Battery Park City-Lower Manhattan,11.322727,8.786364,3.490909,5.595455,9.686364,0.497569,0.000141,0.472727,0.000982,7.333333
Bay Ridge,20.482353,17.758824,5.947059,2.907843,22.609804,0.142038,0.000107,0.262745,0.031692,17.0
Bayside-Bayside Hills,19.033333,20.2,5.05,2.85,15.433333,0.040617,0.000166,0.2,0.052508,2.0


In [35]:
# rate of children affected per 1000 people in the population in each nta
covid['children_impacted_nta_per_1k'] = (covid['evictions_nta_per_1k'] * (covid['ep_age17'] / 100))
# covid['children_affected_per_1k_2'] = ((covid['average_year_eviction_nta_count'] /covid['e_totpop'])*1000 * (covid['ep_age17'] / 100))
covid[['nta', 'children_impacted_nta_per_1k']].head(10)

Unnamed: 0,nta,children_impacted_nta_per_1k
0,Kingsbridge Heights,0.036326
1,Forest Hills,0.008616
2,Erasmus,0.025537
3,Crown Heights South,0.039411
4,Kew Gardens,0.031898
5,Washington Heights South,0.042524
6,Highbridge,0.036111
7,East New York,0.038214
8,East Concourse-Concourse Village,0.098762
9,Crown Heights North,0.069394


In [36]:
# covid[['nta', 'children_affected_per_1k_2']].head(10)
# the same, they should be

In [37]:
# covid.drop(columns=['children_affected_per_1k_nta'], inplace=True)

In [38]:
# covid['children_affected_per_1k'] = (covid['evictions_nta_per_1k'] *   (covid['ep_age17'] / 100))
# covid[['nta','children_affected_per_1k']].head()

## **Unemployment**

In [39]:
# covid['unemployed_impacted_unit_count'] = covid['evictions_nta_per_unit_per_1k'] * covid['ep_unemp']/100
# covid['unemployed_impacted_unit_count'].head()
# covid['children_affected_per_1k'] = (covid['evictions_nta_per_1k'] * (covid['ep_age17'] / 100))
# covid[['nta', 'children_affected_per_1k']].head(10)
covid['unemployed_impacted_nta_per_1k'] = covid['evictions_nta_per_1k'] * covid['ep_unemp']/100
covid['unemployed_impacted_nta_per_1k'].head()

Unnamed: 0,unemployed_impacted_nta_per_1k
0,0.015961
1,0.002298
2,0.008057
3,0.015576
4,0.009924


In [40]:
# covid['unemployed_impacted_nta_count'] = covid['evictions_nta_per_1k'] * covid['ep_unemp']/100
# covid['unemployed_impacted_nta_count'].head()

## **Elderly**

In [41]:
# covid['elderly_impacted_unit_count'] = covid['evictions_nta_per_unit_per_1k'] * covid['ep_age65']/100
# covid['elderly_impacted_unit_count'].head()
# covid['unemployed_impacted_unit_count'] = covid['evictions_nta_per_unit_per_1k'] * covid['ep_unemp']/100
# covid['unemployed_impacted_unit_count'].head()
covid['elderly_impacted_nta_per_1k'] = covid['evictions_nta_per_1k'] * covid['ep_age65']/100
covid['elderly_impacted_nta_per_1k'].head()

Unnamed: 0,elderly_impacted_nta_per_1k
0,0.015411
1,0.009764
2,0.01789
3,0.036107
4,0.030126


In [42]:
# covid['elderly_impacted_nta_count'] = covid['evictions_nta_per_1k'] * covid['ep_age65']/100
# covid['elderly_impacted_nta_count'].head()

## **black and hispanics impacted**

In [43]:
covid['bh_impacted_nta_per_1k'] = covid['evictions_nta_per_1k'] * (covid['ep_afam'] + covid['ep_hisp'])/100
covid['bh_impacted_nta_per_1k'].head()

Unnamed: 0,bh_impacted_nta_per_1k
0,0.128791
1,0.009142
2,0.106655
3,0.152216
4,0.052454


# **Analysis**

## **Children Analysis: The most vulnerable nta for kids (high likelihood for change of schools or dropping out)**

In [44]:
svi_analysis_columns = ['children_impacted_nta_per_1k', 'elderly_impacted_nta_per_1k', 'unemployed_impacted_nta_per_1k', 'bh_impacted_nta_per_1k']

In [45]:
svi_analysis_df = covid.groupby('nta')[svi_analysis_columns].mean()
svi_analysis_df.head()

Unnamed: 0_level_0,children_impacted_nta_per_1k,elderly_impacted_nta_per_1k,unemployed_impacted_nta_per_1k,bh_impacted_nta_per_1k
nta,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Allerton-Pelham Gardens,0.008018,0.006106,0.003034,0.028883
Annadale-Huguenot-Prince's Bay-Eltingville,0.001318,0.00095,0.000236,0.000581
Arden Heights,0.003482,0.002836,0.00063,0.002143
Astoria,0.005765,0.005142,0.003194,0.013105
Auburndale,0.000988,0.001063,0.000372,0.00094


In [46]:
children_top_15 = svi_analysis_df.sort_values('children_impacted_nta_per_1k', ascending=False)['children_impacted_nta_per_1k'].head(15)
children_bottom_15 = svi_analysis_df.sort_values('children_impacted_nta_per_1k', ascending=True)['children_impacted_nta_per_1k'].head(15)
children_top_15

Unnamed: 0_level_0,children_impacted_nta_per_1k
nta,Unnamed: 1_level_1
Central Harlem North-Polo Grounds,0.1079
East Concourse-Concourse Village,0.106348
West New Brighton-New Brighton-St. George,0.104631
Crown Heights North,0.071078
Prospect Lefferts Gardens-Wingate,0.070381
Bushwick South,0.06968
Mott Haven-Port Morris,0.066314
East Tremont,0.065405
Bedford Park-Fordham North,0.06491
Morrisania-Melrose,0.061281


In [47]:
children_bottom_15

Unnamed: 0_level_0,children_impacted_nta_per_1k
nta,Unnamed: 1_level_1
park-cemetery-etc-Bronx,0.000956
Auburndale,0.000988
Queensboro Hill,0.001185
Annadale-Huguenot-Prince's Bay-Eltingville,0.001318
Lindenwood-Howard Beach,0.001429
Stuyvesant Town-Cooper Village,0.001678
Glen Oaks-Floral Park-New Hyde Park,0.001784
Glendale,0.001876
North Corona,0.0022
Maspeth,0.002242


In [48]:
children_top_15 = children_top_15.to_frame()
children_bottom_15 = children_bottom_15.to_frame()
# series can't be transposed

In [49]:
children_top_15.T

nta,Central Harlem North-Polo Grounds,East Concourse-Concourse Village,West New Brighton-New Brighton-St. George,Crown Heights North,Prospect Lefferts Gardens-Wingate,Bushwick South,Mott Haven-Port Morris,East Tremont,Bedford Park-Fordham North,Morrisania-Melrose,University Heights-Morris Heights,Battery Park City-Lower Manhattan,Mount Hope,Hunters Point-Sunnyside-West Maspeth,Brownsville
children_impacted_nta_per_1k,0.1079,0.106348,0.104631,0.071078,0.070381,0.06968,0.066314,0.065405,0.06491,0.061281,0.057735,0.056338,0.055938,0.055473,0.054415


In [50]:
children_bottom_15.T

nta,park-cemetery-etc-Bronx,Auburndale,Queensboro Hill,Annadale-Huguenot-Prince's Bay-Eltingville,Lindenwood-Howard Beach,Stuyvesant Town-Cooper Village,Glen Oaks-Floral Park-New Hyde Park,Glendale,North Corona,Maspeth,New Springville-Bloomfield-Travis,Williamsburg,Woodside,Midtown-Midtown South,Queensbridge-Ravenswood-Long Island City
children_impacted_nta_per_1k,0.000956,0.000988,0.001185,0.001318,0.001429,0.001678,0.001784,0.001876,0.0022,0.002242,0.002257,0.0024,0.002621,0.002814,0.00294


## **Elderly analysis: The most vulnerable nta for elderly (more demand for social security/welfare)**

In [51]:
elderly_top_15 = svi_analysis_df.sort_values('elderly_impacted_nta_per_1k', ascending=False)['elderly_impacted_nta_per_1k'].head(15)
elderly_bottom_15 = svi_analysis_df.sort_values('elderly_impacted_nta_per_1k', ascending=True)['elderly_impacted_nta_per_1k'].head(15)
elderly_top_15 = elderly_top_15.to_frame()
elderly_bottom_15 = elderly_bottom_15.to_frame()

In [52]:
elderly_top_15.T

nta,Turtle Bay-East Midtown,West New Brighton-New Brighton-St. George,Central Harlem North-Polo Grounds,Prospect Lefferts Gardens-Wingate,Clinton,Seagate-Coney Island,East Concourse-Concourse Village,Battery Park City-Lower Manhattan,Crown Heights North,Lenox Hill-Roosevelt Island,Washington Heights South,Bushwick South,Marble Hill-Inwood,Hunters Point-Sunnyside-West Maspeth,Flatbush
elderly_impacted_nta_per_1k,0.099206,0.067503,0.067153,0.062665,0.051087,0.046416,0.046185,0.043718,0.041785,0.040446,0.039539,0.03756,0.036988,0.036943,0.036394


In [53]:
elderly_bottom_15.T

nta,park-cemetery-etc-Bronx,Williamsburg,North Corona,Annadale-Huguenot-Prince's Bay-Eltingville,Auburndale,Glendale,Maspeth,Lindenwood-Howard Beach,Queensboro Hill,Ozone Park,New Springville-Bloomfield-Travis,Stuyvesant Town-Cooper Village,Windsor Terrace,Woodside,Elmhurst-Maspeth
elderly_impacted_nta_per_1k,0.000498,0.000687,0.000931,0.00095,0.001063,0.001137,0.001294,0.00164,0.001668,0.001731,0.001992,0.002123,0.002127,0.002138,0.002329


## **Unemployed analysis: The most vulnerable nta for elderly (more demand for social security/welfare, homelessness)**

In [54]:
unemployed_top_15 = svi_analysis_df.sort_values('unemployed_impacted_nta_per_1k', ascending=False)['unemployed_impacted_nta_per_1k'].head(15)
unemployed_bottom_15 = svi_analysis_df.sort_values('unemployed_impacted_nta_per_1k', ascending=True)['unemployed_impacted_nta_per_1k'].head(15)
unemployed_top_15 = unemployed_top_15.to_frame()
unemployed_bottom_15 = unemployed_bottom_15.to_frame()

In [55]:
unemployed_top_15.T

nta,Central Harlem North-Polo Grounds,East Concourse-Concourse Village,Bedford Park-Fordham North,Brownsville,East Tremont,Morrisania-Melrose,Mott Haven-Port Morris,Washington Heights South,Bushwick South,Crown Heights North,University Heights-Morris Heights,West New Brighton-New Brighton-St. George,Mount Hope,Prospect Lefferts Gardens-Wingate,Washington Heights North
unemployed_impacted_nta_per_1k,0.069522,0.052905,0.034996,0.033288,0.033102,0.031662,0.031487,0.031258,0.030788,0.030446,0.028714,0.028149,0.026162,0.025224,0.023917


In [56]:
unemployed_bottom_15.T

nta,Annadale-Huguenot-Prince's Bay-Eltingville,Stuyvesant Town-Cooper Village,Auburndale,park-cemetery-etc-Bronx,Glen Oaks-Floral Park-New Hyde Park,North Corona,New Springville-Bloomfield-Travis,Queensboro Hill,Lindenwood-Howard Beach,Glendale,Williamsburg,Arden Heights,Maspeth,Rossville-Woodrow,Middle Village
unemployed_impacted_nta_per_1k,0.000236,0.000284,0.000372,0.000446,0.000466,0.00047,0.000519,0.000536,0.00054,0.000597,0.000608,0.00063,0.000703,0.000708,0.000711


## **Black + Hispanic analysis: The most vulnerable nta for black and hispanic population**

In [57]:
bh_top_15 = svi_analysis_df.sort_values('bh_impacted_nta_per_1k', ascending=False)['bh_impacted_nta_per_1k'].head(15)
bh_bottom_15 = svi_analysis_df.sort_values('bh_impacted_nta_per_1k', ascending=True)['bh_impacted_nta_per_1k'].head(15)
bh_top_15 = bh_top_15.to_frame()
bh_bottom_15 = bh_bottom_15.to_frame()

In [58]:
bh_top_15.T

nta,Central Harlem North-Polo Grounds,East Concourse-Concourse Village,Prospect Lefferts Gardens-Wingate,Bushwick South,Mott Haven-Port Morris,Crown Heights North,Bedford Park-Fordham North,East Tremont,West New Brighton-New Brighton-St. George,Morrisania-Melrose,Brownsville,University Heights-Morris Heights,Melrose South-Mott Haven North,Washington Heights South,Marble Hill-Inwood
bh_impacted_nta_per_1k,0.435634,0.377058,0.299443,0.241025,0.234443,0.228097,0.222129,0.220477,0.219505,0.218216,0.211163,0.200682,0.193745,0.190397,0.187496


In [59]:
bh_bottom_15.T

nta,Annadale-Huguenot-Prince's Bay-Eltingville,Auburndale,Queensboro Hill,Rossville-Woodrow,Lindenwood-Howard Beach,New Springville-Bloomfield-Travis,Arden Heights,Stuyvesant Town-Cooper Village,Glen Oaks-Floral Park-New Hyde Park,Williamsburg,park-cemetery-etc-Bronx,Middle Village,East Flushing,Maspeth,Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill
bh_impacted_nta_per_1k,0.000581,0.00094,0.001396,0.001744,0.001863,0.001961,0.002143,0.002259,0.002402,0.002622,0.003303,0.003701,0.003804,0.003842,0.003844


In [60]:
# change some of the analysis columns' names so they are more obvious
# building based: 'average_year_eviction_count'
# 'average_year_eviction_unit_count',
# 'average_year_eviction_nta_count',
# 'evictions_nta_per_1k',
# 'evictions_nta_per_unit_per_1k',
covid.rename(columns={'average_year_eviction_count':'average_year_eviction_count(building)',
                       'average_year_eviction_unit_count':'average_year_eviction_count_per_unit(building)',
                      'average_year_eviction_nta_count':'average_year_eviction_count_per_nta(nta)',
                      'evictions_nta_per_1k':'evictions_per_nta_1k(nta)',
                      'evictions_nta_per_unit_per_1k':'evictions_per_unit_nta_1k(nta)'}, inplace=True)

In [61]:
covid.head()

Unnamed: 0,primary_key,bbl,court_index_number,docket_number,eviction_address,eviction_apartment_number,executed_date,borough,zipcode,ejectment,eviction/legal_possession,latitude,longitude,community_board,council_district,census_tract,bin,nta,year,month_year,geometry,average_year_eviction_count(building),yearbuilt,bldgclass,numfloors,unitsres,ownername,bldgarea,building_type,building_category,is_condo,floor_category,rent_era,architectural_style,economic_period,residential_units_category,is_llc,building_size_category,size_quartile,decade,fips,e_totpop,rpl_theme1,rpl_theme2,rpl_theme3,rpl_theme4,rpl_themes,ep_pov150,ep_unemp,ep_nohsdp,ep_uninsur,ep_age65,ep_age17,ep_disabl,ep_limeng,ep_noveh,ep_crowd,ep_hburd,ep_afam,ep_hisp,ep_asian,ep_aian,ep_nhpi,ep_twomore,ep_otherrace,ep_minrty,ep_white,invalid_zip,svi_quartile,air_quality,animal_issues,appliances,building_exterior,doors_windows,electrical_issues,elevator_issues,floors_stairs,general_complaints,graffiti_posting,heat_hot_water,homeless_issues,noise_complaints,other_issues,pest_issues,plumbing_issues,police_matters,public_nuisance,safety_concerns,sanitation_issues,walls_ceilings,total_complaints,average_year_eviction_count_per_unit(building),average_year_eviction_count_per_nta(nta),evictions_per_nta_1k(nta),evictions_per_unit_nta_1k(nta),buildings_affected,total_units_per_nta,children_impacted_nta_per_1k,unemployed_impacted_nta_per_1k,elderly_impacted_nta_per_1k,bh_impacted_nta_per_1k
0,004123/20_209969,2032140141,004123/20,209969,2541 A GRAND AVE,ROOM 3B,2022-08-22,BRONX,10468,Not an Ejectment,Possession,40.865396,-73.901317,7.0,14.0,265.0,2113173,Kingsbridge Heights,2022,2022-08,POINT (-73.901317 40.865396),0.2,2004.0,C0,3.0,3.0,MONJU SARKER,3420.0,post-war,walk-up,False,low-rise,"1994–Present, vacancy decontrol","2001-present, New Architecture","1991–2008, modern economic growth",3-5 units,False,medium-small,Q4 (largest 25%),2000-2009,10468,81397.0,0.9954,0.9407,0.987,0.947,0.9874,39.5,11.6,28.3,9.2,11.2,26.4,12.2,26.9,71.8,19.2,56.7,15.6,78.0,2.3,0.0,0.0,0.5,0.5,96.9,3.1,False,Q3,0.0,0.0,0.0,0.0,3.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0,2.0,0.0,0.0,0.0,3.0,1.0,14.0,0.066667,18.666667,0.137597,0.000134,38,1711.0,0.036326,0.015961,0.015411,0.128791
1,0050153/20_106030,4031560133,0050153/20,106030,98-05 67TH AVENUE,12F,2022-04-14,QUEENS,11375,Not an Ejectment,Possession,40.724241,-73.855552,6.0,29.0,71306.0,4074666,Forest Hills,2022,2022-04,POINT (-73.855552 40.724241),0.2,1960.0,D3,13.0,181.0,MARSEILLES LEASING LIMITED PARTNERSHIP,177710.0,post-war,elevator,False,high-rise,"1947–1969, rent-control","1951–1980, the International Style, Alternative Modernism","1946–1975, pst war economic boom",100+ units,False,mega,Q4 (largest 25%),1960-1969,11375,75212.0,0.4759,0.5698,0.8789,0.8057,0.7322,12.0,4.8,6.1,3.7,20.4,18.0,10.5,7.9,41.9,5.8,25.4,2.7,16.4,28.5,0.1,0.0,4.6,0.7,53.0,47.0,False,Q1 (Low),0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,62.0,0.0,34.0,0.0,0.0,4.0,1.0,0.0,0.0,2.0,5.0,112.0,0.001105,6.0,0.047865,3.3e-05,17,2403.0,0.008616,0.002298,0.009764,0.009142
2,0052002/19_101926,3051370021,0052002/19,101926,199 VERONICA PLACE,1ST FLOOR,2020-03-02,BROOKLYN,11226,Not an Ejectment,Possession,40.645404,-73.952578,17.0,40.0,792.0,3117969,Erasmus,2020,2020-03,POINT (-73.952578 40.645404),0.6,1920.0,B3,2.0,2.0,"AANS, LLC.",1496.0,pre-war,two-family,False,low-rise,"Pre-1947, pre-rent-control","1900–1920, Beaux-Arts","Pre-1929, pre-great depression",2-unit,True,very small,Q2 (25-50%),1920-1929,11226,101053.0,0.93,0.4536,0.9639,0.9692,0.922,23.7,5.9,13.9,9.1,13.1,18.7,6.7,5.6,66.1,10.0,39.2,63.2,14.9,3.2,0.3,0.0,4.1,0.7,86.3,13.7,False,Q2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.3,23.0,0.136562,0.00022,46,1033.0,0.025537,0.008057,0.01789,0.106655
3,0057757/18_100889,3011850034,0057757/18,100889,302 EASTERN PARKWAY,4B,2020-02-03,BROOKLYN,11225,Not an Ejectment,Possession,40.670832,-73.958843,9.0,35.0,213.0,3029673,Crown Heights South,2020,2020-02,POINT (-73.958843 40.670832),0.8,1923.0,D1,6.0,48.0,302 EASTERN CORP,42984.0,pre-war,elevator,False,mid-rise,"Pre-1947, pre-rent-control","1921–1930, Art Deco Skyscrapers","Pre-1929, pre-great depression",21-100 units,False,very large,Q4 (largest 25%),1920-1929,11225,58476.0,0.8905,0.3157,0.933,0.8342,0.8538,23.1,6.6,11.5,5.9,15.3,16.7,9.6,2.2,66.2,6.9,37.3,53.7,10.8,3.3,0.0,0.0,3.9,0.9,72.6,27.4,False,Q1 (Low),0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,12.0,0.0,17.0,0.0,3.0,2.0,1.0,0.0,2.0,2.0,1.0,45.0,0.016667,23.0,0.235994,0.000174,43,2260.0,0.039411,0.015576,0.036107,0.152216
4,0061902/19_117253,4033220043,0061902/19,117253,83-33 118TH STREET,5N,2020-02-14,QUEENS,11415,Not an Ejectment,Possession,40.706235,-73.834603,9.0,29.0,134.0,4079390,Kew Gardens,2020,2020-02,POINT (-73.834603 40.706235),0.4,1979.0,D1,6.0,79.0,CIAMPA METROPOLITAN CO,72147.0,post-war,elevator,False,mid-rise,"1970–1993, deregularization","1951–1980, the International Style, Alternative Modernism","1976–1990, fiscal crisis and recovery",21-100 units,False,very large,Q4 (largest 25%),1970-1979,11415,20315.0,0.7661,0.5573,0.898,0.9396,0.8761,14.6,5.6,11.8,4.7,17.0,18.0,10.9,7.5,44.3,8.5,32.3,6.7,22.9,22.3,0.2,0.0,3.4,2.1,57.7,42.3,False,Q1 (Low),0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,9.0,0.0,19.0,0.0,1.0,0.0,0.0,0.0,0.0,6.0,1.0,38.0,0.005063,6.0,0.177209,0.00024,12,1229.0,0.031898,0.009924,0.030126,0.052454


In [62]:
covid['average_year_eviction_per_building_nta(nta)'] = covid['average_year_eviction_count_per_nta(nta)'] / covid['buildings_affected']
covid['average_year_eviction_per_building_nta(nta)'].head()

Unnamed: 0,average_year_eviction_per_building_nta(nta)
0,0.491228
1,0.352941
2,0.5
3,0.534884
4,0.5


In [63]:
covid[['average_year_eviction_count(building)', 'average_year_eviction_count_per_unit(building)', 'average_year_eviction_count_per_nta(nta)',
        'average_year_eviction_per_building_nta(nta)', 'evictions_per_nta_1k(nta)', 'evictions_per_unit_nta_1k(nta)']]

Unnamed: 0,average_year_eviction_count(building),average_year_eviction_count_per_unit(building),average_year_eviction_count_per_nta(nta),average_year_eviction_per_building_nta(nta),evictions_per_nta_1k(nta),evictions_per_unit_nta_1k(nta)
0,0.2,0.066667,18.666667,0.491228,0.137597,0.000134
1,0.2,0.001105,6.000000,0.352941,0.047865,0.000033
2,0.6,0.300000,23.000000,0.500000,0.136562,0.000220
3,0.8,0.016667,23.000000,0.534884,0.235994,0.000174
4,0.4,0.005063,6.000000,0.500000,0.177209,0.000240
...,...,...,...,...,...,...
5361,0.6,0.001322,20.333333,0.495935,0.464957,0.000137
5362,0.6,0.001322,20.333333,0.495935,0.464957,0.000137
5363,0.2,0.012500,5.000000,0.555556,0.053350,0.000056
5364,0.2,0.000833,20.333333,0.495935,0.464957,0.000211


In [65]:
# save the updated covid times data
covid.to_csv('/content/drive/My Drive/X999/bbl_evictions_311_svi_covid_correct_coordinates.csv', index=False)