In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import datetime as dt
import matplotlib
import matplotlib.pyplot as plt
import os
import io
import geopandas as gpd
import seaborn as sns
# suppress warning
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', None)
# display all columns
# avoid scientific digit
# pd.options.display.float_format = '{:.2f}'.format

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
link1 = '/content/drive/My Drive/X999/bbl_evictions_311_svi_normal_times_2.7.csv'
link2 = '/content/drive/My Drive/X999/bbl_evictions_311_svi_covid_2.7.csv'
normal = pd.read_csv(link1)
covid = pd.read_csv(link2)

In [87]:
normal.total_complaints.head()

Unnamed: 0,total_complaints
0,19.0
1,4.0
2,10.0
3,248.0
4,356.0


In [93]:
# normal.rpl_theme4.head()

In [94]:
normal.columns, covid.columns, normal.shape, covid.shape

(Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
        'eviction_address', 'eviction_apartment_number', 'executed_date',
        'borough', 'zipcode', 'ejectment',
        ...
        'buildings_affected_per_nta', 'unit_per_nta',
        'evictions_per_1kunit_nta_1kpop(nta)',
        'children_impacted_nta_per_1kpop', 'unemployed_impacted_nta_per_1kpop',
        'elderly_impacted_nta_per_1kpop', 'bh_impacted_nta_per_1kpop',
        'hburd_impacted_nta_per_1kpop',
        'average_year_eviction_per_building_nta(nta)',
        'average_5year_eviction_count_per_unit(building)'],
       dtype='object', length=105),
 Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
        'eviction_address', 'eviction_apartment_number', 'executed_date',
        'borough', 'zipcode', 'ejectment',
        ...
        'buildings_affected_per_nta', 'unit_per_nta',
        'evictions_per_1kunit_nta_1kpop(nta)',
        'children_impacted_nta_per_1kpop', 'unemployed_impac

In [7]:
normal.columns[-15:]
# 97 is correct, before special groups

Index(['pest_issues', 'plumbing_issues', 'police_matters', 'public_nuisance',
       'safety_concerns', 'sanitation_issues', 'walls_ceilings',
       'total_complaints', 'average_year_eviction_unit_count',
       'average_year_eviction_count', 'average_year_eviction_nta_count',
       'evictions_nta_per_1kpop', 'buildings_affected_per_nta', 'unit_per_nta',
       'evictions_nta_per_1kunit_per_1kpop'],
      dtype='object')

# **Add buildings_affected column**

In [8]:
evictions_per_nta = covid.groupby('nta')['bin'].nunique().reset_index(name='buildings_affected_per_nta')
evictions_per_nta.sort_values('buildings_affected_per_nta', ascending=False)

Unnamed: 0,nta,buildings_affected_per_nta
38,Crown Heights North,115
59,Flatbush,96
177,Williamsbridge-Olinville,86
131,Prospect Lefferts Gardens-Wingate,85
11,Bedford Park-Fordham North,82
...,...,...
4,Auburndale,1
93,Lindenwood-Howard Beach,1
100,Maspeth,1
68,Glen Oaks-Floral Park-New Hyde Park,1


In [9]:
covid[['nta','bin','average_year_eviction_count', 'average_year_eviction_unit_count']].head(10)
# for the same nta, they likely have different values for these two columns, because they were groupedby bin
# for the same bin, they must have t he same values for these two columns, because they were grouped by bin

Unnamed: 0,nta,bin,average_year_eviction_count,average_year_eviction_unit_count
0,Kingsbridge Heights,2113173,0.2,0.066667
1,Forest Hills,4074666,0.2,0.001105
2,Erasmus,3117969,0.6,0.3
3,Crown Heights South,3029673,0.8,0.016667
4,Kew Gardens,4079390,0.4,0.005063
5,Washington Heights South,1063219,0.4,0.013333
6,Highbridge,2003313,0.8,0.002827
7,East New York,3337046,0.6,0.000416
8,East Concourse-Concourse Village,2126620,0.6,0.002765
9,Crown Heights North,3324603,0.2,0.000171


In [10]:
covid[['nta', 'average_year_eviction_nta_count', 'evictions_nta_per_1kpop','evictions_nta_per_1kunit_per_1kpop']].head(10)
# this is correct

Unnamed: 0,nta,average_year_eviction_nta_count,evictions_nta_per_1kpop,evictions_nta_per_1kunit_per_1kpop
0,Kingsbridge Heights,18.666667,0.229329,2.011655
1,Forest Hills,6.0,0.079775,0.025926
2,Erasmus,23.0,0.227603,2.473949
3,Crown Heights South,23.0,0.393324,0.190564
4,Kew Gardens,6.0,0.295348,0.311549
5,Washington Heights South,24.333333,0.41446,0.230255
6,Highbridge,17.333333,0.220459,0.019475
7,East New York,26.0,0.24034,0.002601
8,East Concourse-Concourse Village,34.333333,0.669122,0.039532
9,Crown Heights North,46.0,0.553383,0.00412


sum all evictions per NTA -> divide by years -> average_year_eviction_nta_count. \
sum all units per NTA (deduplicate buildings to avoid overcounting). \
divide average_year_eviction_nta_count by total units -> evictions per unit per year. \
divide by NTA population and multiply by 1,000 -> evictions per unit per 1k residents. \

In [11]:
file_path3 = '/content/drive/My Drive/X999/bbl_cleaned.csv'
bbl_cleaned = pd.read_csv(file_path3)

In [12]:
bbl_cleaned.columns
# there is no nta in this dataset either, so it would be error-prone to approximate the nta data

Index(['borough', 'block', 'lot', 'community board', 'census tract 2010',
       'cb2010', 'schooldist', 'council district', 'postcode', 'firecomp',
       'policeprct', 'healtharea', 'sanitboro', 'sanitsub', 'address',
       'zonedist1', 'zonedist2', 'zonedist3', 'overlay1', 'overlay2',
       'spdist1', 'ltdheight', 'splitzone', 'bldgclass', 'landuse',
       'easements', 'ownertype', 'ownername', 'lotarea', 'bldgarea', 'comarea',
       'resarea', 'officearea', 'retailarea', 'garagearea', 'strgearea',
       'factryarea', 'otherarea', 'areasource', 'numbldgs', 'numfloors',
       'unitsres', 'unitstotal', 'lotfront', 'lotdepth', 'bldgfront',
       'bldgdepth', 'ext', 'proxcode', 'irrlotcode', 'lottype', 'bsmtcode',
       'assessland', 'assesstot', 'exempttot', 'yearbuilt', 'yearalter1',
       'yearalter2', 'histdist', 'landmark', 'builtfar', 'residfar', 'commfar',
       'facilfar', 'borocode', 'bbl', 'condono', 'tract2010', 'xcoord',
       'ycoord', 'latitude', 'longitude', 'z

In [13]:
covid[['nta', 'bin', 'e_totpop', 'evictions_nta_per_1kunit_per_1kpop', 'average_year_eviction_nta_count', 'unit_per_nta']].head(10)

# but this is still an inflated number,
# because for the buildings never appeared in the eviction dataset, their buildings and units were not counted at all
# we would have to have this limitation because bbl dataset did not have nta, and if we were to correctly include all buildings in
# each nta, we will need to derive the nta from community_board and census_tract, which will more likely to introduce errors and misjudgements
# than using only the buildings affected and were actually in the eviction dataset.

Unnamed: 0,nta,bin,e_totpop,evictions_nta_per_1kunit_per_1kpop,average_year_eviction_nta_count,unit_per_nta
0,Kingsbridge Heights,2113173,81397.0,2.011655,18.666667,114.0
1,Forest Hills,4074666,75212.0,0.025926,6.0,3077.0
2,Erasmus,3117969,101053.0,2.473949,23.0,92.0
3,Crown Heights South,3029673,58476.0,0.190564,23.0,2064.0
4,Kew Gardens,4079390,20315.0,0.311549,6.0,948.0
5,Washington Heights South,1063219,58711.0,0.230255,24.333333,1800.0
6,Highbridge,2003313,78624.0,0.019475,17.333333,11320.0
7,East New York,3337046,108180.0,0.002601,26.0,92416.0
8,East Concourse-Concourse Village,2126620,51311.0,0.039532,34.333333,16926.0
9,Crown Heights North,3324603,83125.0,0.00412,46.0,134320.0


# **SVI measure analysis**

four types of aggregated and groupedby analysis:

average eviction count per building, per year;

average eviction count per nta population;

average eviction count per unit per building per year;

average eviction count per unit per nta population.

SVI measures:

ep_age17: age 17 or younger.
possible social causes impacted: homelessness, change of schools, education impact; \

ep_age65: age 65 and above.
possible social causes impacted: homelessness;\

ep_unemp: unemployed pct.
possible social causes impacted: homelessness;\





In [14]:
analysis_columns = covid[['primary_key', 'bin', 'bbl', 'latitude', 'longitude', 'eviction_address', 'zipcode', 'borough', 'nta','average_year_eviction_count', 	'average_year_eviction_unit_count',
                              'average_year_eviction_nta_count',	'evictions_nta_per_1kpop', 'evictions_nta_per_1kunit_per_1kpop',
                              'unitsres', 'e_totpop']]
analysis_columns.head()

Unnamed: 0,primary_key,bin,bbl,latitude,longitude,eviction_address,zipcode,borough,nta,average_year_eviction_count,average_year_eviction_unit_count,average_year_eviction_nta_count,evictions_nta_per_1kpop,evictions_nta_per_1kunit_per_1kpop,unitsres,e_totpop
0,004123/20_209969,2113173,2032140141,40.865396,-73.901317,2541 A GRAND AVE,10468,BRONX,Kingsbridge Heights,0.2,0.066667,18.666667,0.229329,2.011655,3.0,81397.0
1,0050153/20_106030,4074666,4031560133,40.724241,-73.855552,98-05 67TH AVENUE,11375,QUEENS,Forest Hills,0.2,0.001105,6.0,0.079775,0.025926,181.0,75212.0
2,0052002/19_101926,3117969,3051370021,40.645404,-73.952578,199 VERONICA PLACE,11226,BROOKLYN,Erasmus,0.6,0.3,23.0,0.227603,2.473949,2.0,101053.0
3,0057757/18_100889,3029673,3011850034,40.670832,-73.958843,302 EASTERN PARKWAY,11225,BROOKLYN,Crown Heights South,0.8,0.016667,23.0,0.393324,0.190564,48.0,58476.0
4,0061902/19_117253,4079390,4033220043,40.706235,-73.834603,83-33 118TH STREET,11415,QUEENS,Kew Gardens,0.4,0.005063,6.0,0.295348,0.311549,79.0,20315.0


In [15]:
covid['ep_age17'].head(1)
# pct

Unnamed: 0,ep_age17
0,26.4


# **Groupby and aggregate first**

In [16]:
svi_analysis_columns = ['ep_age17', 'ep_age65', 'ep_unemp', 'ep_afam', 'ep_hisp', 'evictions_nta_per_1kpop', 'evictions_nta_per_1kunit_per_1kpop',
                        'average_year_eviction_count', 'average_year_eviction_unit_count', 'average_year_eviction_nta_count']

## **Children**

In [17]:
covid[['nta', 'primary_key', 'eviction_address', 'average_year_eviction_nta_count']].sort_values('nta', ascending=True).head(10)

Unnamed: 0,nta,primary_key,eviction_address,average_year_eviction_nta_count
1646,Allerton-Pelham Gardens,308172/22_361328,2325 DELANOY AVENUE,4.333333
396,Allerton-Pelham Gardens,23411/19_355898,3022 PAULDING AVENUE,4.333333
4483,Allerton-Pelham Gardens,B307662/21_121219,2910 TENBROECK AVE,4.333333
282,Allerton-Pelham Gardens,17634/20_360971,2920 BOUCK AVENUE,4.333333
1091,Allerton-Pelham Gardens,303142/22_360993,3022 PAULDING AVENUE,4.333333
4631,Allerton-Pelham Gardens,B45506/19_119183,2915 HERING AVENUE,4.333333
4654,Allerton-Pelham Gardens,B49003/19_120545,3234 YATES AVENUE,4.333333
276,Allerton-Pelham Gardens,17/22_173005,2230 EASTCHESTER RD,4.333333
4701,Allerton-Pelham Gardens,B802323/19_406008,3055 BOUCK AVENUE,4.333333
274,Allerton-Pelham Gardens,16787/20_360927,3231 TENBROECK AVE,4.333333


In [18]:
covid.groupby('nta')[svi_analysis_columns].mean().head(10)
# using mean() here, because the average_year_eviction_nta_count might be slightly different due to zipcode
# svi measures. Here, we just use mean to offset some tiny deviations and get the most accurate one as much as possible

Unnamed: 0_level_0,ep_age17,ep_age65,ep_unemp,ep_afam,ep_hisp,evictions_nta_per_1kpop,evictions_nta_per_1kunit_per_1kpop,average_year_eviction_count,average_year_eviction_unit_count,average_year_eviction_nta_count
nta,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Allerton-Pelham Gardens,22.161538,16.876923,8.384615,48.776923,31.053846,0.062215,1.496138,0.230769,0.074924,4.333333
Annadale-Huguenot-Prince's Bay-Eltingville,22.9,16.5,4.1,0.5,9.6,0.009595,4.797544,0.2,0.1,0.333333
Arden Heights,22.1,18.0,4.0,0.8,12.8,0.026258,4.201261,0.2,0.16,1.666667
Astoria,13.927778,12.422222,7.716667,3.95,27.711111,0.154768,1.841368,0.2,0.038562,6.0
Auburndale,18.6,20.0,7.0,0.7,17.0,0.008854,4.42709,0.2,0.1,0.333333
Baisley Park,21.442105,14.194737,8.505263,67.263158,13.194737,0.190081,7.679759,0.326316,0.2,6.333333
Bath Beach,21.257143,18.104762,7.133333,1.566667,17.366667,0.080967,0.264114,0.27619,0.007138,7.0
Battery Park City-Lower Manhattan,11.322727,8.786364,3.490909,5.595455,9.686364,0.844814,0.175089,0.472727,0.000982,7.333333
Bay Ridge,20.482353,17.758824,5.947059,2.907843,22.609804,0.229324,0.62525,0.262745,0.031692,17.0
Bayside-Bayside Hills,19.033333,20.2,5.05,2.85,15.433333,0.062728,2.74175,0.2,0.052508,2.0


In [19]:
# rate of children affected per 1000 people in the population in each nta
covid['children_impacted_nta_per_1kpop'] = (covid['evictions_nta_per_1kpop'] * (covid['ep_age17'] / 100))
# covid['children_affected_per_1k_2'] = ((covid['average_year_eviction_nta_count'] /covid['e_totpop'])*1000 * (covid['ep_age17'] / 100))
covid[['nta', 'children_impacted_nta_per_1kpop']].head(10)

Unnamed: 0,nta,children_impacted_nta_per_1kpop
0,Kingsbridge Heights,0.060543
1,Forest Hills,0.014359
2,Erasmus,0.042562
3,Crown Heights South,0.065685
4,Kew Gardens,0.053163
5,Washington Heights South,0.070873
6,Highbridge,0.060185
7,East New York,0.06369
8,East Concourse-Concourse Village,0.164604
9,Crown Heights North,0.115657


## **Unemployment**

In [20]:
covid['unemployed_impacted_nta_per_1kpop'] = covid['evictions_nta_per_1kpop'] * covid['ep_unemp']/100
covid['unemployed_impacted_nta_per_1kpop'].head()

Unnamed: 0,unemployed_impacted_nta_per_1kpop
0,0.026602
1,0.003829
2,0.013429
3,0.025959
4,0.01654


## **Elderly**

In [21]:
covid['elderly_impacted_nta_per_1kpop'] = covid['evictions_nta_per_1kpop'] * covid['ep_age65']/100
covid['elderly_impacted_nta_per_1kpop'].head()

Unnamed: 0,elderly_impacted_nta_per_1kpop
0,0.025685
1,0.016274
2,0.029816
3,0.060179
4,0.050209


## **black and hispanics impacted**

In [22]:
covid['bh_impacted_nta_per_1kpop'] = covid['evictions_nta_per_1kpop'] * (covid['ep_afam'] + covid['ep_hisp'])/100
covid['bh_impacted_nta_per_1kpop'].head()

Unnamed: 0,bh_impacted_nta_per_1kpop
0,0.214652
1,0.015237
2,0.177758
3,0.253694
4,0.087423


# **housing burden areas impacted**

defintion: Housing cost-burdened occupied housing
units with annual income less than $75,000
(30%+ of income spent on housing costs)
estimate, 2018-2022 ACS
source: https://www.atsdr.cdc.gov/place-health/media/pdfs/2024/10/SVI2022Documentation.pdf

It is the pct of households that spend more tha 30% of income on housing costs.

In [23]:
covid[['nta', 'ep_hburd']].sort_values('ep_hburd', ascending=True).head()

Unnamed: 0,nta,ep_hburd
3185,SoHo-TriBeCa-Civic Center-Little Italy,8.1
84,Battery Park City-Lower Manhattan,8.4
3349,Battery Park City-Lower Manhattan,8.4
2897,Battery Park City-Lower Manhattan,8.4
5107,Battery Park City-Lower Manhattan,8.4


In [24]:
Albans = covid[covid['nta'] == 'St. Albans']

In [25]:
covid.loc[covid['nta'] == 'St. Albans', 'ep_hburd'] =  64.2
# https://anhd.org/report/how-affordable-housing-threatened-your-neighborhood-2020/
# https://www.nyc.gov/assets/doh/downloads/pdf/data/2018chp-qn12.pdf
covid.loc[covid['nta'] == 'East Elmhurst', 'ep_hburd'] = 54.2

In [26]:
covid['hburd_impacted_nta_per_1kpop'] = covid['evictions_nta_per_1kpop'] * covid['ep_hburd']/100
covid['hburd_impacted_nta_per_1kpop'].head()

Unnamed: 0,hburd_impacted_nta_per_1kpop
0,0.130029
1,0.020263
2,0.089221
3,0.14671
4,0.095397


# **Analysis**

## **Children Analysis: The most vulnerable nta for kids (high likelihood for change of schools or dropping out)**

In [27]:
svi_analysis_columns = ['children_impacted_nta_per_1kpop', 'elderly_impacted_nta_per_1kpop', 'unemployed_impacted_nta_per_1kpop', 'bh_impacted_nta_per_1kpop', 'hburd_impacted_nta_per_1kpop']

In [28]:
svi_analysis_df = covid.groupby('nta')[svi_analysis_columns].mean()
svi_analysis_df.head()
# this is after name change, but before the final chnage

Unnamed: 0_level_0,children_impacted_nta_per_1kpop,elderly_impacted_nta_per_1kpop,unemployed_impacted_nta_per_1kpop,bh_impacted_nta_per_1kpop,hburd_impacted_nta_per_1kpop
nta,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Allerton-Pelham Gardens,0.013779,0.010472,0.005213,0.049237,0.023161
Annadale-Huguenot-Prince's Bay-Eltingville,0.002197,0.001583,0.000393,0.000969,0.00308
Arden Heights,0.005803,0.004726,0.00105,0.003571,0.006039
Astoria,0.020916,0.018749,0.012137,0.047803,0.050998
Auburndale,0.001647,0.001771,0.00062,0.001567,0.003064


In [29]:
children_top_15 = svi_analysis_df.sort_values('children_impacted_nta_per_1kpop', ascending=False)['children_impacted_nta_per_1kpop'].head(15)
children_bottom_15 = svi_analysis_df.sort_values('children_impacted_nta_per_1kpop', ascending=True)['children_impacted_nta_per_1kpop'].head(15)
children_top_15

Unnamed: 0_level_0,children_impacted_nta_per_1kpop
nta,Unnamed: 1_level_1
Central Harlem North-Polo Grounds,0.174333
Crown Heights North,0.141017
West New Brighton-New Brighton-St. George,0.13855
Hunts Point,0.137261
East Concourse-Concourse Village,0.122662
Mott Haven-Port Morris,0.112336
Bedford Park-Fordham North,0.108434
Hunters Point-Sunnyside-West Maspeth,0.10635
Williamsbridge-Olinville,0.104108
University Heights-Morris Heights,0.096209


In [30]:
children_bottom_15

Unnamed: 0_level_0,children_impacted_nta_per_1kpop
nta,Unnamed: 1_level_1
park-cemetery-etc-Bronx,0.001594
Auburndale,0.001647
Queensboro Hill,0.001975
Annadale-Huguenot-Prince's Bay-Eltingville,0.002197
Lindenwood-Howard Beach,0.002381
Stuyvesant Town-Cooper Village,0.002797
Glen Oaks-Floral Park-New Hyde Park,0.002973
Glendale,0.003127
North Corona,0.003666
Maspeth,0.003737


In [31]:
children_top_15 = children_top_15.to_frame()
children_bottom_15 = children_bottom_15.to_frame()
# series can't be transposed

In [32]:
children_top_15.T

nta,Central Harlem North-Polo Grounds,Crown Heights North,West New Brighton-New Brighton-St. George,Hunts Point,East Concourse-Concourse Village,Mott Haven-Port Morris,Bedford Park-Fordham North,Hunters Point-Sunnyside-West Maspeth,Williamsbridge-Olinville,University Heights-Morris Heights,East Tremont,Flatbush,Mount Hope,Brownsville,Prospect Lefferts Gardens-Wingate
children_impacted_nta_per_1kpop,0.174333,0.141017,0.13855,0.137261,0.122662,0.112336,0.108434,0.10635,0.104108,0.096209,0.095677,0.094753,0.093656,0.090553,0.089628


In [33]:
children_bottom_15.T

nta,park-cemetery-etc-Bronx,Auburndale,Queensboro Hill,Annadale-Huguenot-Prince's Bay-Eltingville,Lindenwood-Howard Beach,Stuyvesant Town-Cooper Village,Glen Oaks-Floral Park-New Hyde Park,Glendale,North Corona,Maspeth,New Springville-Bloomfield-Travis,Whitestone,Woodside,Middle Village,Elmhurst-Maspeth
children_impacted_nta_per_1kpop,0.001594,0.001647,0.001975,0.002197,0.002381,0.002797,0.002973,0.003127,0.003666,0.003737,0.003762,0.00434,0.004369,0.004742,0.004998


## **Elderly analysis: The most vulnerable nta for elderly (more demand for social security/welfare)**

In [34]:
elderly_top_15 = svi_analysis_df.sort_values('elderly_impacted_nta_per_1kpop', ascending=False)['elderly_impacted_nta_per_1kpop'].head(15)
elderly_bottom_15 = svi_analysis_df.sort_values('elderly_impacted_nta_per_1kpop', ascending=True)['elderly_impacted_nta_per_1kpop'].head(15)
elderly_top_15 = elderly_top_15.to_frame()
elderly_bottom_15 = elderly_bottom_15.to_frame()

In [35]:
elderly_top_15.T

nta,Lenox Hill-Roosevelt Island,Central Harlem North-Polo Grounds,Clinton,West New Brighton-New Brighton-St. George,Crown Heights North,Turtle Bay-East Midtown,Prospect Lefferts Gardens-Wingate,Pelham Bay-Country Club-City Island,Seagate-Coney Island,Woodlawn-Wakefield,Hudson Yards-Chelsea-Flatiron-Union Square,Yorkville,East Harlem North,Washington Heights South,Williamsbridge-Olinville
elderly_impacted_nta_per_1kpop,0.207358,0.113483,0.106585,0.085499,0.083226,0.082492,0.081312,0.080753,0.07736,0.07551,0.070112,0.069211,0.068434,0.065013,0.064551


In [36]:
elderly_bottom_15.T

nta,park-cemetery-etc-Bronx,Williamsburg,North Corona,Annadale-Huguenot-Prince's Bay-Eltingville,Auburndale,Glendale,Maspeth,Lindenwood-Howard Beach,Queensboro Hill,Ozone Park,New Springville-Bloomfield-Travis,Windsor Terrace,Stuyvesant Town-Cooper Village,Woodside,Elmhurst-Maspeth
elderly_impacted_nta_per_1kpop,0.000831,0.001463,0.001552,0.001583,0.001771,0.001895,0.002157,0.002733,0.00278,0.002885,0.003321,0.0035,0.003538,0.003564,0.003653


## **Unemployed analysis: The most vulnerable nta for elderly (more demand for social security/welfare, homelessness)**

In [37]:
unemployed_top_15 = svi_analysis_df.sort_values('unemployed_impacted_nta_per_1kpop', ascending=False)['unemployed_impacted_nta_per_1kpop'].head(15)
unemployed_bottom_15 = svi_analysis_df.sort_values('unemployed_impacted_nta_per_1kpop', ascending=True)['unemployed_impacted_nta_per_1kpop'].head(15)
unemployed_top_15 = unemployed_top_15.to_frame()
unemployed_bottom_15 = unemployed_bottom_15.to_frame()

In [38]:
unemployed_top_15.T

nta,Central Harlem North-Polo Grounds,Crown Heights North,Hunts Point,East Concourse-Concourse Village,Bedford Park-Fordham North,Brownsville,Mott Haven-Port Morris,Washington Heights South,East Tremont,Washington Heights North,University Heights-Morris Heights,Williamsbridge-Olinville,Mount Hope,East Harlem North,West Farms-Bronx River
unemployed_impacted_nta_per_1kpop,0.112148,0.060727,0.06063,0.058827,0.058437,0.055438,0.053624,0.051456,0.049231,0.048914,0.047851,0.045468,0.043793,0.043551,0.039434


In [39]:
unemployed_bottom_15.T

nta,Annadale-Huguenot-Prince's Bay-Eltingville,Stuyvesant Town-Cooper Village,Auburndale,park-cemetery-etc-Bronx,Glen Oaks-Floral Park-New Hyde Park,North Corona,New Springville-Bloomfield-Travis,Queensboro Hill,Lindenwood-Howard Beach,Glendale,Arden Heights,Middle Village,Maspeth,Rossville-Woodrow,Woodside
unemployed_impacted_nta_per_1kpop,0.000393,0.000473,0.00062,0.000743,0.000777,0.000783,0.000866,0.000893,0.0009,0.000995,0.00105,0.001105,0.001172,0.00118,0.001242


## **Black + Hispanic analysis: The most vulnerable nta for black and hispanic population**

In [40]:
bh_top_15 = svi_analysis_df.sort_values('bh_impacted_nta_per_1kpop', ascending=False)['bh_impacted_nta_per_1kpop'].head(15)
bh_bottom_15 = svi_analysis_df.sort_values('bh_impacted_nta_per_1kpop', ascending=True)['bh_impacted_nta_per_1kpop'].head(15)
bh_top_15 = bh_top_15.to_frame()
bh_bottom_15 = bh_bottom_15.to_frame()

In [41]:
bh_top_15.T

nta,Central Harlem North-Polo Grounds,Hunts Point,Crown Heights North,East Concourse-Concourse Village,Mott Haven-Port Morris,Williamsbridge-Olinville,Prospect Lefferts Gardens-Wingate,Bedford Park-Fordham North,Brownsville,East Harlem North,University Heights-Morris Heights,East Tremont,Washington Heights South,Mount Hope,West Concourse
bh_impacted_nta_per_1kpop,0.722671,0.461355,0.450266,0.438711,0.396931,0.382665,0.378331,0.371106,0.351374,0.342554,0.334412,0.324178,0.31458,0.313238,0.307516


In [42]:
bh_bottom_15.T

nta,Annadale-Huguenot-Prince's Bay-Eltingville,Auburndale,Queensboro Hill,Rossville-Woodrow,Lindenwood-Howard Beach,New Springville-Bloomfield-Travis,Arden Heights,Stuyvesant Town-Cooper Village,Glen Oaks-Floral Park-New Hyde Park,East Flushing,park-cemetery-etc-Bronx,Williamsburg,Middle Village,Whitestone,Windsor Terrace
bh_impacted_nta_per_1kpop,0.000969,0.001567,0.002327,0.002907,0.003106,0.003268,0.003571,0.003764,0.004003,0.005477,0.005504,0.005643,0.005745,0.006267,0.006332


# **Housing Burden areas impacted by evictions**

In [43]:
housing_burden_top_15 = svi_analysis_df.sort_values('hburd_impacted_nta_per_1kpop', ascending=False)['hburd_impacted_nta_per_1kpop'].head(15)
housing_burden_bottom_15 = svi_analysis_df.sort_values('hburd_impacted_nta_per_1kpop', ascending=True)['hburd_impacted_nta_per_1kpop'].head(15)
housing_burden_top_15 = housing_burden_top_15.to_frame()
housing_burden_bottom_15 = housing_burden_bottom_15.to_frame()

In [44]:
housing_burden_top_15.T

nta,Central Harlem North-Polo Grounds,Crown Heights North,Hunts Point,East Concourse-Concourse Village,Bedford Park-Fordham North,Mott Haven-Port Morris,Brownsville,Williamsbridge-Olinville,University Heights-Morris Heights,Prospect Lefferts Gardens-Wingate,East Harlem North,East Tremont,West Concourse,West New Brighton-New Brighton-St. George,Mount Hope
hburd_impacted_nta_per_1kpop,0.37477,0.271629,0.266533,0.25726,0.247611,0.231798,0.208834,0.197318,0.197088,0.195865,0.1954,0.189523,0.187489,0.182046,0.17941


In [45]:
housing_burden_bottom_15.T

nta,Auburndale,Annadale-Huguenot-Prince's Bay-Eltingville,Glen Oaks-Floral Park-New Hyde Park,Lindenwood-Howard Beach,park-cemetery-etc-Bronx,Stuyvesant Town-Cooper Village,New Springville-Bloomfield-Travis,Glendale,Williamsburg,Maspeth,Queensboro Hill,Arden Heights,North Corona,Whitestone,Brooklyn Heights-Cobble Hill
hburd_impacted_nta_per_1kpop,0.003064,0.00308,0.003176,0.003314,0.003485,0.003949,0.004699,0.005133,0.005392,0.005639,0.005761,0.006039,0.006194,0.006466,0.007188


In [46]:
# change some of the analysis columns' names so they are more obvious
# building based: 'average_year_eviction_count'
# 'average_year_eviction_unit_count',
# 'average_year_eviction_nta_count',
# 'evictions_nta_per_1k',
# 'evictions_nta_per_unit_per_1k',
covid.rename(columns={'average_year_eviction_count':'average_year_eviction_count(building)',
                       'average_year_eviction_unit_count':'average_year_eviction_count_per_unit(building)',
                      'average_year_eviction_nta_count':'average_year_eviction_count_per_nta(nta)',
                      'evictions_nta_per_1kpop':'evictions_per_nta_1kpop(nta)',
                      'evictions_nta_per_1kunit_per_1kpop':'evictions_per_1kunit_nta_1kpop(nta)'}, inplace=True)

In [47]:
covid.columns

Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
       'eviction_address', 'eviction_apartment_number', 'executed_date',
       'borough', 'zipcode', 'ejectment',
       ...
       'average_year_eviction_count_per_nta(nta)',
       'evictions_per_nta_1kpop(nta)', 'buildings_affected_per_nta',
       'unit_per_nta', 'evictions_per_1kunit_nta_1kpop(nta)',
       'children_impacted_nta_per_1kpop', 'unemployed_impacted_nta_per_1kpop',
       'elderly_impacted_nta_per_1kpop', 'bh_impacted_nta_per_1kpop',
       'hburd_impacted_nta_per_1kpop'],
      dtype='object', length=102)

In [48]:
covid.columns[-12:], covid.shape

(Index(['average_year_eviction_count_per_unit(building)',
        'average_year_eviction_count(building)',
        'average_year_eviction_count_per_nta(nta)',
        'evictions_per_nta_1kpop(nta)', 'buildings_affected_per_nta',
        'unit_per_nta', 'evictions_per_1kunit_nta_1kpop(nta)',
        'children_impacted_nta_per_1kpop', 'unemployed_impacted_nta_per_1kpop',
        'elderly_impacted_nta_per_1kpop', 'bh_impacted_nta_per_1kpop',
        'hburd_impacted_nta_per_1kpop'],
       dtype='object'),
 (5386, 102))

In [49]:
covid.head()

Unnamed: 0,primary_key,bbl,court_index_number,docket_number,eviction_address,eviction_apartment_number,executed_date,borough,zipcode,ejectment,eviction/legal_possession,latitude,longitude,community_board,council_district,census_tract,bin,nta,year,month_year,geometry,yearbuilt,bldgclass,numfloors,unitsres,ownername,bldgarea,building_type,building_category,is_condo,floor_category,rent_era,architectural_style,economic_period,residential_units_category,is_llc,building_size_category,size_quartile,decade,fips,e_totpop,rpl_theme1,rpl_theme2,rpl_theme3,rpl_theme4,rpl_themes,ep_pov150,ep_unemp,ep_nohsdp,ep_uninsur,ep_age65,ep_age17,ep_disabl,ep_limeng,ep_noveh,ep_crowd,ep_hburd,ep_afam,ep_hisp,ep_asian,ep_aian,ep_nhpi,ep_twomore,ep_otherrace,ep_minrty,ep_white,invalid_zip,svi_quartile,air_quality,animal_issues,appliances,building_exterior,doors_windows,electrical_issues,elevator_issues,floors_stairs,general_complaints,graffiti_posting,heat_hot_water,homeless_issues,noise_complaints,other_issues,pest_issues,plumbing_issues,police_matters,public_nuisance,safety_concerns,sanitation_issues,walls_ceilings,total_complaints,average_year_eviction_count_per_unit(building),average_year_eviction_count(building),average_year_eviction_count_per_nta(nta),evictions_per_nta_1kpop(nta),buildings_affected_per_nta,unit_per_nta,evictions_per_1kunit_nta_1kpop(nta),children_impacted_nta_per_1kpop,unemployed_impacted_nta_per_1kpop,elderly_impacted_nta_per_1kpop,bh_impacted_nta_per_1kpop,hburd_impacted_nta_per_1kpop
0,004123/20_209969,2032140141,004123/20,209969,2541 A GRAND AVE,ROOM 3B,2022-08-22,BRONX,10468,Not an Ejectment,Possession,40.865396,-73.901317,7.0,14.0,265.0,2113173,Kingsbridge Heights,2022,2022-08,POINT (-73.901317 40.865396),2004.0,C0,3.0,3.0,MONJU SARKER,3420.0,post-war,walk-up,False,low-rise,"1994–Present, vacancy decontrol","2001-present, New Architecture","1991–2008, modern economic growth",3-5 units,False,medium-small,Q4 (largest 25%),2000-2009,10468,81397.0,0.9954,0.9407,0.987,0.947,0.9874,39.5,11.6,28.3,9.2,11.2,26.4,12.2,26.9,71.8,19.2,56.7,15.6,78.0,2.3,0.0,0.0,0.5,0.5,96.9,3.1,False,Q3,0.0,0.0,0.0,0.0,3.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0,2.0,0.0,0.0,0.0,3.0,1.0,14.0,0.066667,0.2,18.666667,0.229329,38,114.0,2.011655,0.060543,0.026602,0.025685,0.214652,0.130029
1,0050153/20_106030,4031560133,0050153/20,106030,98-05 67TH AVENUE,12F,2022-04-14,QUEENS,11375,Not an Ejectment,Possession,40.724241,-73.855552,6.0,29.0,71306.0,4074666,Forest Hills,2022,2022-04,POINT (-73.855552 40.724241),1960.0,D3,13.0,181.0,MARSEILLES LEASING LIMITED PARTNERSHIP,177710.0,post-war,elevator,False,high-rise,"1947–1969, rent-control","1951–1980, the International Style, Alternative Modernism","1946–1975, pst war economic boom",100+ units,False,mega,Q4 (largest 25%),1960-1969,11375,75212.0,0.4759,0.5698,0.8789,0.8057,0.7322,12.0,4.8,6.1,3.7,20.4,18.0,10.5,7.9,41.9,5.8,25.4,2.7,16.4,28.5,0.1,0.0,4.6,0.7,53.0,47.0,False,Q1 (Low),0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,62.0,0.0,34.0,0.0,0.0,4.0,1.0,0.0,0.0,2.0,5.0,112.0,0.001105,0.2,6.0,0.079775,17,3077.0,0.025926,0.014359,0.003829,0.016274,0.015237,0.020263
2,0052002/19_101926,3051370021,0052002/19,101926,199 VERONICA PLACE,1ST FLOOR,2020-03-02,BROOKLYN,11226,Not an Ejectment,Possession,40.645404,-73.952578,17.0,40.0,792.0,3117969,Erasmus,2020,2020-03,POINT (-73.952578 40.645404),1920.0,B3,2.0,2.0,"AANS, LLC.",1496.0,pre-war,two-family,False,low-rise,"Pre-1947, pre-rent-control","1900–1920, Beaux-Arts","Pre-1929, pre-great depression",2-unit,True,very small,Q2 (25-50%),1920-1929,11226,101053.0,0.93,0.4536,0.9639,0.9692,0.922,23.7,5.9,13.9,9.1,13.1,18.7,6.7,5.6,66.1,10.0,39.2,63.2,14.9,3.2,0.3,0.0,4.1,0.7,86.3,13.7,False,Q2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.3,0.6,23.0,0.227603,46,92.0,2.473949,0.042562,0.013429,0.029816,0.177758,0.089221
3,0057757/18_100889,3011850034,0057757/18,100889,302 EASTERN PARKWAY,4B,2020-02-03,BROOKLYN,11225,Not an Ejectment,Possession,40.670832,-73.958843,9.0,35.0,213.0,3029673,Crown Heights South,2020,2020-02,POINT (-73.958843 40.670832),1923.0,D1,6.0,48.0,302 EASTERN CORP,42984.0,pre-war,elevator,False,mid-rise,"Pre-1947, pre-rent-control","1921–1930, Art Deco Skyscrapers","Pre-1929, pre-great depression",21-100 units,False,very large,Q4 (largest 25%),1920-1929,11225,58476.0,0.8905,0.3157,0.933,0.8342,0.8538,23.1,6.6,11.5,5.9,15.3,16.7,9.6,2.2,66.2,6.9,37.3,53.7,10.8,3.3,0.0,0.0,3.9,0.9,72.6,27.4,False,Q1 (Low),0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,12.0,0.0,17.0,0.0,3.0,2.0,1.0,0.0,2.0,2.0,1.0,45.0,0.016667,0.8,23.0,0.393324,43,2064.0,0.190564,0.065685,0.025959,0.060179,0.253694,0.14671
4,0061902/19_117253,4033220043,0061902/19,117253,83-33 118TH STREET,5N,2020-02-14,QUEENS,11415,Not an Ejectment,Possession,40.706235,-73.834603,9.0,29.0,134.0,4079390,Kew Gardens,2020,2020-02,POINT (-73.834603 40.706235),1979.0,D1,6.0,79.0,CIAMPA METROPOLITAN CO,72147.0,post-war,elevator,False,mid-rise,"1970–1993, deregularization","1951–1980, the International Style, Alternative Modernism","1976–1990, fiscal crisis and recovery",21-100 units,False,very large,Q4 (largest 25%),1970-1979,11415,20315.0,0.7661,0.5573,0.898,0.9396,0.8761,14.6,5.6,11.8,4.7,17.0,18.0,10.9,7.5,44.3,8.5,32.3,6.7,22.9,22.3,0.2,0.0,3.4,2.1,57.7,42.3,False,Q1 (Low),0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,9.0,0.0,19.0,0.0,1.0,0.0,0.0,0.0,0.0,6.0,1.0,38.0,0.005063,0.4,6.0,0.295348,12,948.0,0.311549,0.053163,0.01654,0.050209,0.087423,0.095397


# **average_year_eviction_per_building_nta(nta)**

- measures eviction intensity per building.
- Answers the question: "In a particular nta, every 10 buildings, how many evictions do they have?"

$$
\text{average year eviction per building per neighborhood} = \left( \frac{\text{average year eviction count per nta}}{\text{buildings affected per nta}} \right) \times 10
$$

In [50]:
covid['average_year_eviction_per_building_nta(nta)'] = covid['average_year_eviction_count_per_nta(nta)'] / covid['buildings_affected_per_nta'] *10
covid['average_year_eviction_per_building_nta(nta)'].head()
# every 100 buildings in an nta, what are their average eviction counts

Unnamed: 0,average_year_eviction_per_building_nta(nta)
0,4.912281
1,3.529412
2,5.0
3,5.348837
4,5.0


# **Evictions per unit per building every 3 years**

In [67]:
covid.columns[-15:]

Index(['average_year_eviction_count_per_unit(building)',
       'average_year_eviction_count(building)',
       'average_year_eviction_count_per_nta(nta)',
       'evictions_per_nta_1kpop(nta)', 'buildings_affected_per_nta',
       'unit_per_nta', 'evictions_per_1kunit_nta_1kpop(nta)',
       'children_impacted_nta_per_1kpop', 'unemployed_impacted_nta_per_1kpop',
       'elderly_impacted_nta_per_1kpop', 'bh_impacted_nta_per_1kpop',
       'hburd_impacted_nta_per_1kpop',
       'average_year_eviction_per_building_nta(nta)',
       'average_5year_eviction_count_per_unit(building)',
       'average_3year_eviction_count_per_unit(building)'],
      dtype='object')

In [68]:
covid['average_year_eviction_count_per_unit(building)'].head()

Unnamed: 0,average_year_eviction_count_per_unit(building)
0,0.066667
1,0.001105
2,0.3
3,0.016667
4,0.005063


In [69]:
covid.columns[-15:]

Index(['average_year_eviction_count_per_unit(building)',
       'average_year_eviction_count(building)',
       'average_year_eviction_count_per_nta(nta)',
       'evictions_per_nta_1kpop(nta)', 'buildings_affected_per_nta',
       'unit_per_nta', 'evictions_per_1kunit_nta_1kpop(nta)',
       'children_impacted_nta_per_1kpop', 'unemployed_impacted_nta_per_1kpop',
       'elderly_impacted_nta_per_1kpop', 'bh_impacted_nta_per_1kpop',
       'hburd_impacted_nta_per_1kpop',
       'average_year_eviction_per_building_nta(nta)',
       'average_5year_eviction_count_per_unit(building)',
       'average_3year_eviction_count_per_unit(building)'],
      dtype='object')

In [70]:
covid['average_3year_eviction_count_per_unit(building)'] = covid['average_year_eviction_count_per_unit(building)'] *3
covid[['bin', 'average_year_eviction_count(building)', 'unitsres', 'average_year_eviction_count_per_unit(building)', 'average_3year_eviction_count_per_unit(building)']].head()

Unnamed: 0,bin,average_year_eviction_count(building),unitsres,average_year_eviction_count_per_unit(building),average_3year_eviction_count_per_unit(building)
0,2113173,0.2,3.0,0.066667,0.2
1,4074666,0.2,181.0,0.001105,0.003315
2,3117969,0.6,2.0,0.3,0.9
3,3029673,0.8,48.0,0.016667,0.05
4,4079390,0.4,79.0,0.005063,0.01519


In [72]:
covid[['average_year_eviction_count(building)', 'average_year_eviction_count_per_unit(building)', 'average_year_eviction_count_per_nta(nta)',
        'average_year_eviction_per_building_nta(nta)',\
        'evictions_per_nta_1kpop(nta)', 'evictions_per_1kunit_nta_1kpop(nta)', 'average_5year_eviction_count_per_unit(building)']]

Unnamed: 0,average_year_eviction_count(building),average_year_eviction_count_per_unit(building),average_year_eviction_count_per_nta(nta),average_year_eviction_per_building_nta(nta),evictions_per_nta_1kpop(nta),evictions_per_1kunit_nta_1kpop(nta),average_5year_eviction_count_per_unit(building)
0,0.2,0.066667,18.666667,4.912281,0.229329,2.011655,0.200000
1,0.2,0.001105,6.000000,3.529412,0.079775,0.025926,0.003315
2,0.6,0.300000,23.000000,5.000000,0.227603,2.473949,0.900000
3,0.8,0.016667,23.000000,5.348837,0.393324,0.190564,0.050000
4,0.4,0.005063,6.000000,5.000000,0.295348,0.311549,0.015190
...,...,...,...,...,...,...,...
5381,0.6,0.001322,20.333333,4.959350,0.504161,0.027085,0.003965
5382,0.6,0.001322,20.333333,4.959350,0.504161,0.027085,0.003965
5383,0.2,0.012500,5.000000,5.555556,0.088917,0.617482,0.037500
5384,0.2,0.000833,20.333333,4.959350,0.774928,0.078753,0.002500


In [74]:
# covid.drop(columns=['average_5year_eviction_count_per_unit(building)'], inplace=True)

In [75]:
covid.columns, covid.shape

(Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
        'eviction_address', 'eviction_apartment_number', 'executed_date',
        'borough', 'zipcode', 'ejectment',
        ...
        'buildings_affected_per_nta', 'unit_per_nta',
        'evictions_per_1kunit_nta_1kpop(nta)',
        'children_impacted_nta_per_1kpop', 'unemployed_impacted_nta_per_1kpop',
        'elderly_impacted_nta_per_1kpop', 'bh_impacted_nta_per_1kpop',
        'hburd_impacted_nta_per_1kpop',
        'average_year_eviction_per_building_nta(nta)',
        'average_3year_eviction_count_per_unit(building)'],
       dtype='object', length=104),
 (5386, 104))

In [77]:
covid.columns[-14:]

Index(['average_year_eviction_count_per_unit(building)',
       'average_year_eviction_count(building)',
       'average_year_eviction_count_per_nta(nta)',
       'evictions_per_nta_1kpop(nta)', 'buildings_affected_per_nta',
       'unit_per_nta', 'evictions_per_1kunit_nta_1kpop(nta)',
       'children_impacted_nta_per_1kpop', 'unemployed_impacted_nta_per_1kpop',
       'elderly_impacted_nta_per_1kpop', 'bh_impacted_nta_per_1kpop',
       'hburd_impacted_nta_per_1kpop',
       'average_year_eviction_per_building_nta(nta)',
       'average_3year_eviction_count_per_unit(building)'],
      dtype='object')

In [78]:
# save the updated covid times data
covid.to_csv('/content/drive/My Drive/X999/bbl_evictions_311_svi_covid_svi.csv', index=False)

In [79]:
covid.shape

(5386, 104)

In [80]:
link2 = '/content/drive/My Drive/X999/bbl_evictions_311_svi_covid_svi.csv'
covid = pd.read_csv(link2)

In [81]:
link = '/content/drive/My Drive/X999/bbl_evictions_311_svi_normal_times_svi.csv'
normal = pd.read_csv(link)

In [82]:
set(normal.columns) - set(covid.columns)

{'average_5year_eviction_count_per_unit(building)', 'svi_group'}

In [84]:
normal.shape, covid.shape
# 104 and 104 are correct

((66397, 105), (5386, 104))