In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import datetime as dt
import matplotlib
import matplotlib.pyplot as plt
import os
import io
import geopandas as gpd
import seaborn as sns
# suppress warning
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', None)
# display all columns
# avoid scientific digit
# pd.options.display.float_format = '{:.2f}'.format

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
link1 = '/content/drive/My Drive/X999/bbl_evictions_311_svi_normal_times_correct_coordinates.csv'
link2 = '/content/drive/My Drive/X999/bbl_evictions_311_svi_covid_correct_coordinates.csv'
normal = pd.read_csv(link1)
covid = pd.read_csv(link2)

In [5]:
normal.columns

Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
       'eviction_address', 'eviction_apartment_number', 'executed_date',
       'borough', 'zipcode', 'ejectment', 'eviction/legal_possession',
       'latitude', 'longitude', 'community_board', 'council_district',
       'census_tract', 'bin', 'nta', 'year', 'month_year', 'geometry',
       'average_year_eviction_count', 'yearbuilt', 'bldgclass', 'numfloors',
       'unitsres', 'ownername', 'bldgarea', 'building_type',
       'building_category', 'is_condo', 'floor_category', 'rent_era',
       'architectural_style', 'economic_period', 'residential_units_category',
       'is_llc', 'building_size_category', 'size_quartile', 'decade', 'fips',
       'e_totpop', 'rpl_theme1', 'rpl_theme2', 'rpl_theme3', 'rpl_theme4',
       'rpl_themes', 'ep_pov150', 'ep_unemp', 'ep_nohsdp', 'ep_uninsur',
       'ep_age65', 'ep_age17', 'ep_disabl', 'ep_limeng', 'ep_noveh',
       'ep_crowd', 'ep_hburd', 'ep_afam', 'ep_hisp', 'ep_asian',

# **Correct evictions_nta_per_unit_per_1k**

Previously, we did evictions_nta_per_unit_per_1k = average_year_eviction_unit_count/nta_population.

Now, we will correct it to evictions_nta_per_unit_per_1k = average_year_eviction_unit_count * "total units in an nta" / nta_population.

To get total units in an nta, we will create two new columns: "total building counts in nta" and "total units in an nta".

In [6]:
evictions_per_nta = normal.groupby('nta')['bin'].nunique().reset_index(name='buildings_affected')
evictions_per_nta.head()

Unnamed: 0,nta,buildings_affected
0,Allerton-Pelham Gardens,97
1,Annadale-Huguenot-Prince's Bay-Eltingville,15
2,Arden Heights,16
3,Astoria,266
4,Auburndale,24


In [7]:
unique_buildings = normal.drop_duplicates(subset=['nta', 'bin'])
unique_buildings.head()

Unnamed: 0,primary_key,bbl,court_index_number,docket_number,eviction_address,eviction_apartment_number,executed_date,borough,zipcode,ejectment,eviction/legal_possession,latitude,longitude,community_board,council_district,census_tract,bin,nta,year,month_year,geometry,average_year_eviction_count,yearbuilt,bldgclass,numfloors,unitsres,ownername,bldgarea,building_type,building_category,is_condo,floor_category,rent_era,architectural_style,economic_period,residential_units_category,is_llc,building_size_category,size_quartile,decade,fips,e_totpop,rpl_theme1,rpl_theme2,rpl_theme3,rpl_theme4,rpl_themes,ep_pov150,ep_unemp,ep_nohsdp,ep_uninsur,ep_age65,ep_age17,ep_disabl,ep_limeng,ep_noveh,ep_crowd,ep_hburd,ep_afam,ep_hisp,ep_asian,ep_aian,ep_nhpi,ep_twomore,ep_otherrace,ep_minrty,ep_white,invalid_zip,svi_quartile,svi_group,air_quality,animal_issues,appliances,building_exterior,doors_windows,electrical_issues,elevator_issues,floors_stairs,general_complaints,graffiti_posting,heat_hot_water,homeless_issues,noise_complaints,other_issues,pest_issues,plumbing_issues,police_matters,public_nuisance,safety_concerns,sanitation_issues,walls_ceilings,total_complaints,average_year_eviction_unit_count,average_year_eviction_nta_count,evictions_nta_per_1k,evictions_nta_per_unit_per_1k
0,*308072/22_5865,3037420029,*308072/22,5865,356 MILLER AVE,1 AND BASEMENT,2024-12-04,BROOKLYN,11207,Not an Ejectment,Possession,40.672121,-73.891105,5.0,37.0,1152.0,3083989,East New York,2024,2024-12,POINT (-73.891105 40.672121),0.8,1930.0,C0,3.0,3.0,356 MILLER LLC,2700.0,pre-war,walk-up,False,low-rise,"Pre-1947, pre-rent-control","1921–1930, Art Deco Skyscrapers","1930-1945, great depression and WWII",3-5 units,True,small,Q3 (50-75%),1930-1939,11207,96801.0,0.9788,0.914,0.9808,0.9812,0.9839,33.9,11.1,19.1,6.0,13.8,22.5,13.8,5.3,57.8,9.1,44.7,55.9,32.8,1.5,0.0,0.0,2.9,1.6,94.7,5.3,False,Q3,medium-high,0.0,0.0,1.0,0.0,1.0,2.0,0.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,3.0,5.0,19.0,0.266667,266.0,13.739527,0.002755
1,*313639/23_5202,3057940012,*313639/23,5202,710 61ST STREET,2ND FLOOR,2024-03-04,BROOKLYN,11220,Not an Ejectment,Possession,40.635941,-74.011883,7.0,38.0,118.0,3143881,Sunset Park East,2024,2024-03,POINT (-74.011883 40.635941),0.6,1920.0,B2,2.0,2.0,"A.R.M. PARKING, LLC",1204.0,pre-war,two-family,False,low-rise,"Pre-1947, pre-rent-control","1900–1920, Beaux-Arts","Pre-1929, pre-great depression",2-unit,True,very small,Q1 (smallest 25%),1920-1929,11220,93008.0,0.9885,0.7635,0.9594,0.9179,0.9662,37.5,7.5,37.9,11.6,13.1,25.4,8.4,40.2,61.7,23.7,43.6,1.7,40.9,40.7,0.4,0.0,1.2,0.2,85.0,15.0,False,Q3,medium-high,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,4.0,0.3,38.0,2.042835,0.003226
2,*324973/22_5308,3057820030,*324973/22,5308,462 60TH STREET,FOURTH FLOOR APT AKA,2024-08-13,BROOKLYN,11220,Not an Ejectment,Possession,40.640008,-74.017068,7.0,38.0,122.0,3143435,Sunset Park West,2024,2024-08,POINT (-74.017068 40.640008),0.6,1907.0,C3,4.0,4.0,"LIN, RONG LAN",4800.0,pre-war,walk-up,False,mid-rise,"Pre-1947, pre-rent-control","1900–1920, Beaux-Arts","Pre-1929, pre-great depression",3-5 units,False,medium-small,Q4 (largest 25%),1900-1909,11220,93008.0,0.9885,0.7635,0.9594,0.9179,0.9662,37.5,7.5,37.9,11.6,13.1,25.4,8.4,40.2,61.7,23.7,43.6,1.7,40.9,40.7,0.4,0.0,1.2,0.2,85.0,15.0,False,Q3,medium-high,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,10.0,0.15,39.6,2.128849,0.001613
3,*53336/16_170279,2032510420,*53336/16,170279,3400 PAUL AVENUE,15D,2018-10-17,BRONX,10468,Not an Ejectment,Possession,40.87719,-73.889569,7.0,11.0,409.0,2015444,Van Cortlandt Village,2018,2018-10,POINT (-73.889569 40.87719),0.8,1967.0,D4,21.0,352.0,SCOTT TOWER HOUSING CO INC,381213.0,post-war,condo-co-op,True,high-rise,"1947–1969, rent-control","1951–1980, the International Style, Alternative Modernism","1946–1975, pst war economic boom",100+ units,False,mega,Q4 (largest 25%),1960-1969,10468,81397.0,0.9954,0.9407,0.987,0.947,0.9874,39.5,11.6,28.3,9.2,11.2,26.4,12.2,26.9,71.8,19.2,56.7,15.6,78.0,2.3,0.0,0.0,0.5,0.5,96.9,3.1,False,Q3,medium-high,6.0,0.0,0.0,0.0,9.0,0.0,2.0,5.0,5.0,0.0,23.0,0.0,145.0,0.0,2.0,41.0,0.0,0.0,1.0,5.0,4.0,248.0,0.002273,172.8,10.614642,2.8e-05
4,*5990/17_2703,2025770038,*5990/17,2703,480 CONCORD AVENUE,4E,2019-08-30,BRONX,10455,Not an Ejectment,Possession,40.811197,-73.90881,1.0,8.0,35.0,2003900,Mott Haven-Port Morris,2019,2019-08,POINT (-73.90881 40.811197),1.6,1928.0,D7,6.0,65.0,480 CONCORD AVE OWNER LLC,69102.0,pre-war,elevator,False,mid-rise,"Pre-1947, pre-rent-control","1921–1930, Art Deco Skyscrapers","Pre-1929, pre-great depression",21-100 units,True,very large,Q4 (largest 25%),1920-1929,10455,44380.0,0.9971,0.9909,0.9972,0.9499,0.9971,48.5,12.5,32.1,9.5,10.1,28.1,19.5,17.9,75.1,14.5,51.9,21.1,74.1,1.1,0.0,0.0,1.4,1.0,98.6,1.4,False,Q4 (High),high,0.0,5.0,8.0,0.0,21.0,8.0,34.0,10.0,9.0,0.0,89.0,0.0,78.0,0.0,5.0,41.0,1.0,0.0,3.0,31.0,13.0,356.0,0.024615,158.8,17.890942,0.000555


In [8]:
total_units_per_nta = unique_buildings.groupby('nta')['unitsres'].sum().reset_index(name='total_units_per_nta')
total_units_per_nta.head()

Unnamed: 0,nta,total_units_per_nta
0,Allerton-Pelham Gardens,8248.0
1,Annadale-Huguenot-Prince's Bay-Eltingville,31.0
2,Arden Heights,27.0
3,Astoria,20664.0
4,Auburndale,225.0


In [9]:
building_units_per_nta = pd.merge(evictions_per_nta, total_units_per_nta, on='nta', how='left')
building_units_per_nta.head()

Unnamed: 0,nta,buildings_affected,total_units_per_nta
0,Allerton-Pelham Gardens,97,8248.0
1,Annadale-Huguenot-Prince's Bay-Eltingville,15,31.0
2,Arden Heights,16,27.0
3,Astoria,266,20664.0
4,Auburndale,24,225.0


In [10]:
normal = normal.merge(building_units_per_nta, on='nta', how='left')
normal.shape

(66397, 98)

In [11]:
normal.columns

Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
       'eviction_address', 'eviction_apartment_number', 'executed_date',
       'borough', 'zipcode', 'ejectment', 'eviction/legal_possession',
       'latitude', 'longitude', 'community_board', 'council_district',
       'census_tract', 'bin', 'nta', 'year', 'month_year', 'geometry',
       'average_year_eviction_count', 'yearbuilt', 'bldgclass', 'numfloors',
       'unitsres', 'ownername', 'bldgarea', 'building_type',
       'building_category', 'is_condo', 'floor_category', 'rent_era',
       'architectural_style', 'economic_period', 'residential_units_category',
       'is_llc', 'building_size_category', 'size_quartile', 'decade', 'fips',
       'e_totpop', 'rpl_theme1', 'rpl_theme2', 'rpl_theme3', 'rpl_theme4',
       'rpl_themes', 'ep_pov150', 'ep_unemp', 'ep_nohsdp', 'ep_uninsur',
       'ep_age65', 'ep_age17', 'ep_disabl', 'ep_limeng', 'ep_noveh',
       'ep_crowd', 'ep_hburd', 'ep_afam', 'ep_hisp', 'ep_asian',

In [12]:
normal['evictions_nta_per_1k'] = normal['evictions_nta_per_1k'] /5

In [13]:
normal[['nta','average_year_eviction_count', 'average_year_eviction_unit_count']].head(10)
# for the same nta, they likely have different values for these two columns, because they were groupedby bin

Unnamed: 0,nta,average_year_eviction_count,average_year_eviction_unit_count
0,East New York,0.8,0.266667
1,Sunset Park East,0.6,0.3
2,Sunset Park West,0.6,0.15
3,Van Cortlandt Village,0.8,0.002273
4,Mott Haven-Port Morris,1.6,0.024615
5,Bedford Park-Fordham North,1.6,0.038095
6,Bedford Park-Fordham North,1.0,0.5
7,Claremont-Bathgate,1.6,0.048485
8,East New York,0.4,0.08
9,Brighton Beach,1.2,0.007692


In [14]:
normal[['nta', 'average_year_eviction_nta_count', 'evictions_nta_per_1k','evictions_nta_per_unit_per_1k']].head(10)
# for the same nta, 'average_year_eviction_nta_count' must have the same values
# for the same nta, 'evictions_nta_per_1k' likely have the same values, because population was defined on zipcode values and the zipcodes and nta
# could overlap (this is a limitation from the SVI data, zipcode tabulation was the closest unit to nta)
# for the same nta, 'evictions_nta_per_unit_per_1k' likely are the same, for the same reason as 'evictions_nta_per_1k'.
# additionally, we will correct the 'evictions_nta_per_unit_per_1k', because it was previously calculated by 'average_year_eviction_unit_count' aggregated
# by nta and then divided by nta's population [double-averages problem (first per building, then per NTA), losing the true denominator (total units in NTA)],
# we will do it like this: 'average_year_eviction_nta_count' / total units in an nta and then again / nta population

Unnamed: 0,nta,average_year_eviction_nta_count,evictions_nta_per_1k,evictions_nta_per_unit_per_1k
0,East New York,266.0,2.747905,0.002755
1,Sunset Park East,38.0,0.408567,0.003226
2,Sunset Park West,39.6,0.42577,0.001613
3,Van Cortlandt Village,172.8,2.122928,2.8e-05
4,Mott Haven-Port Morris,158.8,3.578188,0.000555
5,Bedford Park-Fordham North,306.0,3.759352,0.000468
6,Bedford Park-Fordham North,306.0,3.759352,0.006048
7,Claremont-Bathgate,125.6,1.5736,0.000607
8,East New York,266.0,2.747905,0.00074
9,Brighton Beach,44.0,0.52968,9.3e-05


sum all evictions per NTA -> divide by years -> average_year_eviction_nta_count. \
sum all units per NTA (deduplicate buildings to avoid overcounting). \
divide average_year_eviction_nta_count by total units -> evictions per unit per year. \
divide by NTA population and multiply by 1,000 -> evictions per unit per 1k residents. \

In [15]:
file_path3 = '/content/drive/My Drive/X999/bbl_cleaned.csv'
bbl_cleaned = pd.read_csv(file_path3)

In [16]:
bbl_cleaned.columns
# there is no nta in this dataset either, so it would be error-prone to approximate the nta data

Index(['borough', 'block', 'lot', 'community board', 'census tract 2010',
       'cb2010', 'schooldist', 'council district', 'postcode', 'firecomp',
       'policeprct', 'healtharea', 'sanitboro', 'sanitsub', 'address',
       'zonedist1', 'zonedist2', 'zonedist3', 'overlay1', 'overlay2',
       'spdist1', 'ltdheight', 'splitzone', 'bldgclass', 'landuse',
       'easements', 'ownertype', 'ownername', 'lotarea', 'bldgarea', 'comarea',
       'resarea', 'officearea', 'retailarea', 'garagearea', 'strgearea',
       'factryarea', 'otherarea', 'areasource', 'numbldgs', 'numfloors',
       'unitsres', 'unitstotal', 'lotfront', 'lotdepth', 'bldgfront',
       'bldgdepth', 'ext', 'proxcode', 'irrlotcode', 'lottype', 'bsmtcode',
       'assessland', 'assesstot', 'exempttot', 'yearbuilt', 'yearalter1',
       'yearalter2', 'histdist', 'landmark', 'builtfar', 'residfar', 'commfar',
       'facilfar', 'borocode', 'bbl', 'condono', 'tract2010', 'xcoord',
       'ycoord', 'latitude', 'longitude', 'z

In [17]:
# the new way:
normal['evictions_nta_per_unit_per_1k'] = (normal['average_year_eviction_nta_count'] / (normal['total_units_per_nta'] * normal['e_totpop'])) * 1000

In [18]:
normal[['nta', 'evictions_nta_per_unit_per_1k', 'average_year_eviction_nta_count', 'total_units_per_nta']].head(10)

# but this is still an inflated number,
# because for the buildings never appeared in the eviction dataset, their buildings and units were not counted at all
# we would have to have this limitation because bbl dataset did not have nta, and if we were to correctly include all buildings in
# each nta, we will need to derive the nta from community_board and census_tract, which will more likely to introduce errors and misjudgements
# than using only the buildings affected and were actually in the eviction dataset.

Unnamed: 0,nta,evictions_nta_per_unit_per_1k,average_year_eviction_nta_count,total_units_per_nta
0,East New York,5.5e-05,266.0,50227.0
1,Sunset Park East,0.000191,38.0,2144.0
2,Sunset Park West,0.000266,39.6,1601.0
3,Van Cortlandt Village,0.000136,172.8,15614.0
4,Mott Haven-Port Morris,9.3e-05,158.8,38627.0
5,Bedford Park-Fordham North,0.000232,306.0,16180.0
6,Bedford Park-Fordham North,0.000229,306.0,16180.0
7,Claremont-Bathgate,9.3e-05,125.6,16920.0
8,East New York,4.9e-05,266.0,50227.0
9,Brighton Beach,8e-05,44.0,6662.0


# **SVI measure analysis**

four types of aggregated and groupedby analysis:

average eviction count per building, per year;

average eviction count per nta population;

average eviction count per unit per building per year;

average eviction count per unit per nta population.

SVI measures:

ep_age17: age 17 or younger.
possible social causes impacted: homelessness, change of schools, education impact; \

ep_age65: age 65 and above.
possible social causes impacted: homelessness;\

ep_unemp: unemployed pct.
possible social causes impacted: homelessness;\





In [19]:
analysis_columns = normal[['primary_key', 'bin', 'bbl', 'latitude', 'longitude', 'eviction_address', 'zipcode', 'borough', 'nta','average_year_eviction_count', 	'average_year_eviction_unit_count',
                              'average_year_eviction_nta_count',	'evictions_nta_per_1k', 'evictions_nta_per_unit_per_1k',
                              'unitsres', 'e_totpop']]
analysis_columns.head(1)

Unnamed: 0,primary_key,bin,bbl,latitude,longitude,eviction_address,zipcode,borough,nta,average_year_eviction_count,average_year_eviction_unit_count,average_year_eviction_nta_count,evictions_nta_per_1k,evictions_nta_per_unit_per_1k,unitsres,e_totpop
0,*308072/22_5865,3083989,3037420029,40.672121,-73.891105,356 MILLER AVE,11207,BROOKLYN,East New York,0.8,0.266667,266.0,2.747905,5.5e-05,3.0,96801.0


In [20]:
normal['ep_age17'].head(1)
# pct

Unnamed: 0,ep_age17
0,22.5


# **Groupby and aggregate first**

In [21]:
svi_analysis_columns = ['ep_age17', 'ep_age65', 'ep_unemp', 'ep_afam', 'ep_hisp', 'evictions_nta_per_1k', 'evictions_nta_per_unit_per_1k',
                        'average_year_eviction_count', 'average_year_eviction_unit_count', 'average_year_eviction_nta_count']

## **Children**

In [22]:
normal[['nta', 'primary_key', 'eviction_address', 'average_year_eviction_nta_count']].sort_values('nta', ascending=True).head(10)

Unnamed: 0,nta,primary_key,eviction_address,average_year_eviction_nta_count
58618,Allerton-Pelham Gardens,B802448/17_389388,1140 BURKE AVENUE,25.4
58619,Allerton-Pelham Gardens,B802450/17_397880,1240 BURKE AVENUE,25.4
16884,Allerton-Pelham Gardens,312913/21_362926,2325 BOUCK AVE,25.4
23563,Allerton-Pelham Gardens,330196/22_363899,1256 EAST GUN HILL ROAD,25.4
7991,Allerton-Pelham Gardens,29536/19_98103,2958 RADCLIFF AVENUE,25.4
8003,Allerton-Pelham Gardens,29596/19_356492,1114 ADEE AVENUE,25.4
8043,Allerton-Pelham Gardens,29730/19_356158,2219 MORGAN AVENUE,25.4
8084,Allerton-Pelham Gardens,29877/17_337087,2934 WILSON AVENUE,25.4
57080,Allerton-Pelham Gardens,B38861/18_108676,3018 PAULDING AVENUE,25.4
27763,Allerton-Pelham Gardens,47208/18_20541,1135 PELHAM PARKWAY NORTH,25.4


In [23]:
normal.groupby('nta')[svi_analysis_columns].mean().head(10)
# using mean() here, because the average_year_eviction_nta_count might be slightly different due to zipcode
# svi measures. Here, we just use mean to offset some tiny deviations and get the most accurate one as much as possible

Unnamed: 0_level_0,ep_age17,ep_age65,ep_unemp,ep_afam,ep_hisp,evictions_nta_per_1k,evictions_nta_per_unit_per_1k,average_year_eviction_count,average_year_eviction_unit_count,average_year_eviction_nta_count
nta,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Allerton-Pelham Gardens,22.211024,16.955906,8.409449,51.682677,29.820472,0.353455,4.3e-05,0.533858,0.198731,25.4
Annadale-Huguenot-Prince's Bay-Eltingville,22.429412,17.382353,4.041176,0.676471,11.482353,0.09787,0.002316,0.247059,0.195294,3.4
Arden Heights,22.1,18.0,4.0,0.8,12.8,0.053566,0.001984,0.223529,0.141176,3.4
Astoria,13.776205,12.541265,7.90753,4.771687,26.680422,1.651823,8.7e-05,0.40241,0.035115,66.4
Auburndale,19.805556,18.638889,6.844444,4.783333,18.633333,0.155092,0.000788,0.9,0.361545,7.2
Baisley Park,21.366667,14.332129,8.388755,68.014056,12.676305,0.730398,0.000515,0.368675,0.196634,49.8
Bath Beach,21.482635,18.013174,7.14491,1.568263,17.340719,0.367858,2.5e-05,0.564072,0.023973,33.4
Battery Park City-Lower Manhattan,12.024603,8.515079,3.301587,5.113492,9.581746,2.849712,0.000168,1.774603,0.004788,25.2
Bay Ridge,20.368932,17.892961,5.900971,2.942961,21.999515,0.885945,0.000106,0.484466,0.035094,82.4
Bayside-Bayside Hills,18.925,20.205,5.136667,2.855,15.426667,0.406174,0.00025,0.62,0.378169,12.0


In [24]:
# rate of children affected per 1000 people in the population in each nta
normal['children_impacted_nta_per_1k'] = (normal['evictions_nta_per_1k'] * (normal['ep_age17'] / 100))
# normal['children_affected_per_1k_2'] = ((normal['average_year_eviction_nta_count'] /normal['e_totpop'])*1000 * (normal['ep_age17'] / 100))
normal[['nta', 'children_impacted_nta_per_1k']].head(10)

Unnamed: 0,nta,children_impacted_nta_per_1k
0,East New York,0.618279
1,Sunset Park East,0.103776
2,Sunset Park West,0.108146
3,Van Cortlandt Village,0.560453
4,Mott Haven-Port Morris,1.005471
5,Bedford Park-Fordham North,0.992469
6,Bedford Park-Fordham North,0.966154
7,Claremont-Bathgate,0.464212
8,East New York,0.728195
9,Brighton Beach,0.090575


In [25]:
# normal[['nta', 'children_affected_per_1k_2']].head(10)
# the same, they should be

In [27]:
# normal.drop(columns=['children_affected_per_1k_nta'], inplace=True)

In [None]:
# normal['children_affected_per_1k'] = (normal['evictions_nta_per_1k'] *   (normal['ep_age17'] / 100))
# normal[['nta','children_affected_per_1k']].head()

## **Unemployment**

In [28]:
# normal['unemployed_impacted_unit_count'] = normal['evictions_nta_per_unit_per_1k'] * normal['ep_unemp']/100
# normal['unemployed_impacted_unit_count'].head()
# normal['children_affected_per_1k'] = (normal['evictions_nta_per_1k'] * (normal['ep_age17'] / 100))
# normal[['nta', 'children_affected_per_1k']].head(10)
normal['unemployed_impacted_nta_per_1k'] = normal['evictions_nta_per_1k'] * normal['ep_unemp']/100
normal['unemployed_impacted_nta_per_1k'].head()

Unnamed: 0,unemployed_impacted_nta_per_1k
0,0.305018
1,0.030643
2,0.031933
3,0.24626
4,0.447274


In [29]:
# normal['unemployed_impacted_nta_count'] = normal['evictions_nta_per_1k'] * normal['ep_unemp']/100
# normal['unemployed_impacted_nta_count'].head()

## **Elderly**

In [30]:
# normal['elderly_impacted_unit_count'] = normal['evictions_nta_per_unit_per_1k'] * normal['ep_age65']/100
# normal['elderly_impacted_unit_count'].head()
# normal['unemployed_impacted_unit_count'] = normal['evictions_nta_per_unit_per_1k'] * normal['ep_unemp']/100
# normal['unemployed_impacted_unit_count'].head()
normal['elderly_impacted_nta_per_1k'] = normal['evictions_nta_per_1k'] * normal['ep_age65']/100
normal['elderly_impacted_nta_per_1k'].head()

Unnamed: 0,elderly_impacted_nta_per_1k
0,0.379211
1,0.053522
2,0.055776
3,0.237768
4,0.361397


In [31]:
# normal['elderly_impacted_nta_count'] = normal['evictions_nta_per_1k'] * normal['ep_age65']/100
# normal['elderly_impacted_nta_count'].head()

## **black and hispanics impacted**

In [32]:
normal['bh_impacted_nta_per_1k'] = normal['evictions_nta_per_1k'] * (normal['ep_afam'] + normal['ep_hisp'])/100
normal['bh_impacted_nta_per_1k'].head()

Unnamed: 0,bh_impacted_nta_per_1k
0,2.437392
1,0.17405
2,0.181378
3,1.987061
4,3.406435


# **Analysis**

## **Children Analysis: The most vulnerable nta for kids (high likelihood for change of schools or dropping out)**

In [33]:
svi_analysis_columns = ['children_impacted_nta_per_1k', 'elderly_impacted_nta_per_1k', 'unemployed_impacted_nta_per_1k', 'bh_impacted_nta_per_1k']

In [34]:
svi_analysis_df = normal.groupby('nta')[svi_analysis_columns].mean()
svi_analysis_df.head()

Unnamed: 0_level_0,children_impacted_nta_per_1k,elderly_impacted_nta_per_1k,unemployed_impacted_nta_per_1k,bh_impacted_nta_per_1k
nta,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Allerton-Pelham Gardens,0.078506,0.059932,0.029724,0.288077
Annadale-Huguenot-Prince's Bay-Eltingville,0.021952,0.017012,0.003955,0.0119
Arden Heights,0.011838,0.009642,0.002143,0.007285
Astoria,0.227559,0.20716,0.130618,0.519533
Auburndale,0.030717,0.028907,0.010615,0.036317


In [35]:
children_top_15 = svi_analysis_df.sort_values('children_impacted_nta_per_1k', ascending=False)['children_impacted_nta_per_1k'].head(15)
children_bottom_15 = svi_analysis_df.sort_values('children_impacted_nta_per_1k', ascending=True)['children_impacted_nta_per_1k'].head(15)
children_top_15

Unnamed: 0_level_0,children_impacted_nta_per_1k
nta,Unnamed: 1_level_1
Central Harlem North-Polo Grounds,2.13808
Woodlawn-Wakefield,1.50911
East Tremont,1.004631
Bedford Park-Fordham North,0.9712
Mott Haven-Port Morris,0.957494
Mount Hope,0.9278
University Heights-Morris Heights,0.891696
East Concourse-Concourse Village,0.871887
Williamsbridge-Olinville,0.867621
Flatbush,0.827058


In [36]:
children_bottom_15

Unnamed: 0_level_0,children_impacted_nta_per_1k
nta,Unnamed: 1_level_1
park-cemetery-etc-Brooklyn,0.001224
Arden Heights,0.011838
Brooklyn Heights-Cobble Hill,0.012111
Westerleigh,0.015868
park-cemetery-etc-Bronx,0.018648
Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill,0.018958
East Flushing,0.020821
Windsor Terrace,0.021449
Annadale-Huguenot-Prince's Bay-Eltingville,0.021952
Rossville-Woodrow,0.02232


In [37]:
children_top_15 = children_top_15.to_frame()
children_bottom_15 = children_bottom_15.to_frame()
# series can't be transposed

In [38]:
children_top_15.T

nta,Central Harlem North-Polo Grounds,Woodlawn-Wakefield,East Tremont,Bedford Park-Fordham North,Mott Haven-Port Morris,Mount Hope,University Heights-Morris Heights,East Concourse-Concourse Village,Williamsbridge-Olinville,Flatbush,Crown Heights North,Belmont,East New York,Marble Hill-Inwood,West Concourse
children_impacted_nta_per_1k,2.13808,1.50911,1.004631,0.9712,0.957494,0.9278,0.891696,0.871887,0.867621,0.827058,0.815983,0.685758,0.682417,0.670378,0.612278


In [39]:
children_bottom_15.T

nta,park-cemetery-etc-Brooklyn,Arden Heights,Brooklyn Heights-Cobble Hill,Westerleigh,park-cemetery-etc-Bronx,Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill,East Flushing,Windsor Terrace,Annadale-Huguenot-Prince's Bay-Eltingville,Rossville-Woodrow,Stuyvesant Town-Cooper Village,Queensboro Hill,Great Kills,Glen Oaks-Floral Park-New Hyde Park,Auburndale
children_impacted_nta_per_1k,0.001224,0.011838,0.012111,0.015868,0.018648,0.018958,0.020821,0.021449,0.021952,0.02232,0.023921,0.02494,0.026632,0.028663,0.030717


## **Elderly analysis: The most vulnerable nta for elderly (more demand for social security/welfare)**

In [40]:
elderly_top_15 = svi_analysis_df.sort_values('elderly_impacted_nta_per_1k', ascending=False)['elderly_impacted_nta_per_1k'].head(15)
elderly_bottom_15 = svi_analysis_df.sort_values('elderly_impacted_nta_per_1k', ascending=True)['elderly_impacted_nta_per_1k'].head(15)
elderly_top_15 = elderly_top_15.to_frame()
elderly_bottom_15 = elderly_bottom_15.to_frame()

In [41]:
elderly_top_15.T

nta,Central Harlem North-Polo Grounds,Woodlawn-Wakefield,Starrett City,Flatbush,Lenox Hill-Roosevelt Island,Washington Heights South,Williamsbridge-Olinville,Marble Hill-Inwood,Crown Heights North,Seagate-Coney Island,Washington Heights North,East Flatbush-Farragut,East Tremont,Mott Haven-Port Morris,Hudson Yards-Chelsea-Flatiron-Union Square
elderly_impacted_nta_per_1k,1.425108,1.162729,0.668372,0.563302,0.544139,0.539411,0.521402,0.508453,0.481954,0.468712,0.443924,0.438852,0.391613,0.388382,0.387772


In [42]:
elderly_bottom_15.T

nta,park-cemetery-etc-Brooklyn,Brooklyn Heights-Cobble Hill,Arden Heights,park-cemetery-etc-Bronx,Windsor Terrace,Westerleigh,East Elmhurst,Rossville-Woodrow,Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill,Williamsburg,Annadale-Huguenot-Prince's Bay-Eltingville,Great Kills,Ocean Parkway South,Elmhurst-Maspeth,Glendale
elderly_impacted_nta_per_1k,0.000346,0.008602,0.009642,0.009719,0.01105,0.011421,0.014501,0.016321,0.01641,0.016714,0.017012,0.021767,0.024077,0.024731,0.025072


## **Unemployed analysis: The most vulnerable nta for elderly (more demand for social security/welfare, homelessness)**

In [43]:
unemployed_top_15 = svi_analysis_df.sort_values('unemployed_impacted_nta_per_1k', ascending=False)['unemployed_impacted_nta_per_1k'].head(15)
unemployed_bottom_15 = svi_analysis_df.sort_values('unemployed_impacted_nta_per_1k', ascending=True)['unemployed_impacted_nta_per_1k'].head(15)
unemployed_top_15 = unemployed_top_15.to_frame()
unemployed_bottom_15 = unemployed_bottom_15.to_frame()

In [44]:
unemployed_top_15.T

nta,Central Harlem North-Polo Grounds,Woodlawn-Wakefield,Bedford Park-Fordham North,East Tremont,Mott Haven-Port Morris,University Heights-Morris Heights,East Concourse-Concourse Village,Mount Hope,Washington Heights South,Williamsbridge-Olinville,Belmont,Washington Heights North,Crown Heights North,Brownsville,Morrisania-Melrose
unemployed_impacted_nta_per_1k,1.325098,0.575166,0.518174,0.517139,0.465228,0.443289,0.440106,0.432798,0.429907,0.384946,0.371907,0.367168,0.346266,0.323836,0.311316


In [45]:
unemployed_bottom_15.T

nta,East Elmhurst,park-cemetery-etc-Brooklyn,Arden Heights,Brooklyn Heights-Cobble Hill,Annadale-Huguenot-Prince's Bay-Eltingville,Rossville-Woodrow,Westerleigh,Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill,Windsor Terrace,Great Kills,New Springville-Bloomfield-Travis,Glen Oaks-Floral Park-New Hyde Park,East Flushing,Charleston-Richmond Valley-Tottenville,park-cemetery-etc-Bronx
unemployed_impacted_nta_per_1k,-0.023911,0.000346,0.002143,0.003716,0.003955,0.004001,0.004273,0.004697,0.005478,0.006702,0.007374,0.008189,0.008423,0.008661,0.008692


## **Black + Hispanic analysis: The most vulnerable nta for black and hispanic population**

In [46]:
bh_top_15 = svi_analysis_df.sort_values('bh_impacted_nta_per_1k', ascending=False)['bh_impacted_nta_per_1k'].head(15)
bh_bottom_15 = svi_analysis_df.sort_values('bh_impacted_nta_per_1k', ascending=True)['bh_impacted_nta_per_1k'].head(15)
bh_top_15 = bh_top_15.to_frame()
bh_bottom_15 = bh_bottom_15.to_frame()

In [47]:
bh_top_15.T

nta,Central Harlem North-Polo Grounds,Woodlawn-Wakefield,East Tremont,Mott Haven-Port Morris,Bedford Park-Fordham North,Williamsbridge-Olinville,Mount Hope,University Heights-Morris Heights,East Concourse-Concourse Village,Crown Heights North,Washington Heights South,Marble Hill-Inwood,Flatbush,East New York,Belmont
bh_impacted_nta_per_1k,8.87956,5.38217,3.406636,3.389198,3.330076,3.174682,3.102727,3.098451,3.057942,2.656539,2.616197,2.461621,2.454076,2.364387,2.328119


In [48]:
bh_bottom_15.T

nta,park-cemetery-etc-Brooklyn,Arden Heights,Rossville-Woodrow,Annadale-Huguenot-Prince's Bay-Eltingville,Brooklyn Heights-Cobble Hill,Great Kills,Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill,Windsor Terrace,East Flushing,Westerleigh,Whitestone,New Springville-Bloomfield-Travis,Dyker Heights,New Dorp-Midland Beach,Glen Oaks-Floral Park-New Hyde Park
bh_impacted_nta_per_1k,0.001399,0.007285,0.010288,0.0119,0.015414,0.017913,0.018956,0.020134,0.022112,0.025796,0.026673,0.027842,0.028644,0.029058,0.031004


In [53]:
# change some of the analysis columns' names so they are more obvious
# building based: 'average_year_eviction_count'
# 'average_year_eviction_unit_count',
# 'average_year_eviction_nta_count',
# 'evictions_nta_per_1k',
# 'evictions_nta_per_unit_per_1k',
normal.rename(columns={'average_year_eviction_count':'average_year_eviction_count(building)',
                       'average_year_eviction_unit_count':'average_year_eviction_count_per_unit(building)',
                      'average_year_eviction_nta_count':'average_year_eviction_count_per_nta(nta)',
                      'evictions_nta_per_1k':'evictions_per_nta_1k(nta)',
                      'evictions_nta_per_unit_per_1k':'evictions_per_unit_nta_1k(nta)'}, inplace=True)

In [55]:
normal.head()

Unnamed: 0,primary_key,bbl,court_index_number,docket_number,eviction_address,eviction_apartment_number,executed_date,borough,zipcode,ejectment,eviction/legal_possession,latitude,longitude,community_board,council_district,census_tract,bin,nta,year,month_year,geometry,average_year_eviction_count(building),yearbuilt,bldgclass,numfloors,unitsres,ownername,bldgarea,building_type,building_category,is_condo,floor_category,rent_era,architectural_style,economic_period,residential_units_category,is_llc,building_size_category,size_quartile,decade,fips,e_totpop,rpl_theme1,rpl_theme2,rpl_theme3,rpl_theme4,rpl_themes,ep_pov150,ep_unemp,ep_nohsdp,ep_uninsur,ep_age65,ep_age17,ep_disabl,ep_limeng,ep_noveh,ep_crowd,ep_hburd,ep_afam,ep_hisp,ep_asian,ep_aian,ep_nhpi,ep_twomore,ep_otherrace,ep_minrty,ep_white,invalid_zip,svi_quartile,svi_group,air_quality,animal_issues,appliances,building_exterior,doors_windows,electrical_issues,elevator_issues,floors_stairs,general_complaints,graffiti_posting,heat_hot_water,homeless_issues,noise_complaints,other_issues,pest_issues,plumbing_issues,police_matters,public_nuisance,safety_concerns,sanitation_issues,walls_ceilings,total_complaints,average_year_eviction_count_per_unit(building),average_year_eviction_count_per_nta(nta),evictions_per_nta_1k(nta),evictions_per_unit_nta_1k(nta),buildings_affected,total_units_per_nta,children_impacted_nta_per_1k,unemployed_impacted_nta_per_1k,elderly_impacted_nta_per_1k,bh_impacted_nta_per_1k
0,*308072/22_5865,3037420029,*308072/22,5865,356 MILLER AVE,1 AND BASEMENT,2024-12-04,BROOKLYN,11207,Not an Ejectment,Possession,40.672121,-73.891105,5.0,37.0,1152.0,3083989,East New York,2024,2024-12,POINT (-73.891105 40.672121),0.8,1930.0,C0,3.0,3.0,356 MILLER LLC,2700.0,pre-war,walk-up,False,low-rise,"Pre-1947, pre-rent-control","1921–1930, Art Deco Skyscrapers","1930-1945, great depression and WWII",3-5 units,True,small,Q3 (50-75%),1930-1939,11207,96801.0,0.9788,0.914,0.9808,0.9812,0.9839,33.9,11.1,19.1,6.0,13.8,22.5,13.8,5.3,57.8,9.1,44.7,55.9,32.8,1.5,0.0,0.0,2.9,1.6,94.7,5.3,False,Q3,medium-high,0.0,0.0,1.0,0.0,1.0,2.0,0.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,3.0,5.0,19.0,0.266667,266.0,2.747905,5.5e-05,719,50227.0,0.618279,0.305018,0.379211,2.437392
1,*313639/23_5202,3057940012,*313639/23,5202,710 61ST STREET,2ND FLOOR,2024-03-04,BROOKLYN,11220,Not an Ejectment,Possession,40.635941,-74.011883,7.0,38.0,118.0,3143881,Sunset Park East,2024,2024-03,POINT (-74.011883 40.635941),0.6,1920.0,B2,2.0,2.0,"A.R.M. PARKING, LLC",1204.0,pre-war,two-family,False,low-rise,"Pre-1947, pre-rent-control","1900–1920, Beaux-Arts","Pre-1929, pre-great depression",2-unit,True,very small,Q1 (smallest 25%),1920-1929,11220,93008.0,0.9885,0.7635,0.9594,0.9179,0.9662,37.5,7.5,37.9,11.6,13.1,25.4,8.4,40.2,61.7,23.7,43.6,1.7,40.9,40.7,0.4,0.0,1.2,0.2,85.0,15.0,False,Q3,medium-high,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,4.0,0.3,38.0,0.408567,0.000191,136,2144.0,0.103776,0.030643,0.053522,0.17405
2,*324973/22_5308,3057820030,*324973/22,5308,462 60TH STREET,FOURTH FLOOR APT AKA,2024-08-13,BROOKLYN,11220,Not an Ejectment,Possession,40.640008,-74.017068,7.0,38.0,122.0,3143435,Sunset Park West,2024,2024-08,POINT (-74.017068 40.640008),0.6,1907.0,C3,4.0,4.0,"LIN, RONG LAN",4800.0,pre-war,walk-up,False,mid-rise,"Pre-1947, pre-rent-control","1900–1920, Beaux-Arts","Pre-1929, pre-great depression",3-5 units,False,medium-small,Q4 (largest 25%),1900-1909,11220,93008.0,0.9885,0.7635,0.9594,0.9179,0.9662,37.5,7.5,37.9,11.6,13.1,25.4,8.4,40.2,61.7,23.7,43.6,1.7,40.9,40.7,0.4,0.0,1.2,0.2,85.0,15.0,False,Q3,medium-high,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,10.0,0.15,39.6,0.42577,0.000266,147,1601.0,0.108146,0.031933,0.055776,0.181378
3,*53336/16_170279,2032510420,*53336/16,170279,3400 PAUL AVENUE,15D,2018-10-17,BRONX,10468,Not an Ejectment,Possession,40.87719,-73.889569,7.0,11.0,409.0,2015444,Van Cortlandt Village,2018,2018-10,POINT (-73.889569 40.87719),0.8,1967.0,D4,21.0,352.0,SCOTT TOWER HOUSING CO INC,381213.0,post-war,condo-co-op,True,high-rise,"1947–1969, rent-control","1951–1980, the International Style, Alternative Modernism","1946–1975, pst war economic boom",100+ units,False,mega,Q4 (largest 25%),1960-1969,10468,81397.0,0.9954,0.9407,0.987,0.947,0.9874,39.5,11.6,28.3,9.2,11.2,26.4,12.2,26.9,71.8,19.2,56.7,15.6,78.0,2.3,0.0,0.0,0.5,0.5,96.9,3.1,False,Q3,medium-high,6.0,0.0,0.0,0.0,9.0,0.0,2.0,5.0,5.0,0.0,23.0,0.0,145.0,0.0,2.0,41.0,0.0,0.0,1.0,5.0,4.0,248.0,0.002273,172.8,2.122928,0.000136,244,15614.0,0.560453,0.24626,0.237768,1.987061
4,*5990/17_2703,2025770038,*5990/17,2703,480 CONCORD AVENUE,4E,2019-08-30,BRONX,10455,Not an Ejectment,Possession,40.811197,-73.90881,1.0,8.0,35.0,2003900,Mott Haven-Port Morris,2019,2019-08,POINT (-73.90881 40.811197),1.6,1928.0,D7,6.0,65.0,480 CONCORD AVE OWNER LLC,69102.0,pre-war,elevator,False,mid-rise,"Pre-1947, pre-rent-control","1921–1930, Art Deco Skyscrapers","Pre-1929, pre-great depression",21-100 units,True,very large,Q4 (largest 25%),1920-1929,10455,44380.0,0.9971,0.9909,0.9972,0.9499,0.9971,48.5,12.5,32.1,9.5,10.1,28.1,19.5,17.9,75.1,14.5,51.9,21.1,74.1,1.1,0.0,0.0,1.4,1.0,98.6,1.4,False,Q4 (High),high,0.0,5.0,8.0,0.0,21.0,8.0,34.0,10.0,9.0,0.0,89.0,0.0,78.0,0.0,5.0,41.0,1.0,0.0,3.0,31.0,13.0,356.0,0.024615,158.8,3.578188,9.3e-05,322,38627.0,1.005471,0.447274,0.361397,3.406435


In [61]:
normal['average_year_eviction_per_building_nta(nta)'] = normal['average_year_eviction_count_per_nta(nta)'] / normal['buildings_affected']
normal['average_year_eviction_per_building_nta(nta)'].head()

Unnamed: 0,average_year_eviction_per_building_nta(nta)
0,0.369958
1,0.279412
2,0.269388
3,0.708197
4,0.493168


In [63]:
normal[['average_year_eviction_count(building)', 'average_year_eviction_count_per_unit(building)', 'average_year_eviction_count_per_nta(nta)',
        'average_year_eviction_per_building_nta(nta)', 'evictions_per_nta_1k(nta)', 'evictions_per_unit_nta_1k(nta)']]

Unnamed: 0,average_year_eviction_count(building),average_year_eviction_count_per_unit(building),average_year_eviction_count_per_nta(nta),average_year_eviction_per_building_nta(nta),evictions_per_nta_1k(nta),evictions_per_unit_nta_1k(nta)
0,0.8,0.266667,266.0,0.369958,2.747905,0.000055
1,0.6,0.300000,38.0,0.279412,0.408567,0.000191
2,0.6,0.150000,39.6,0.269388,0.425770,0.000266
3,0.8,0.002273,172.8,0.708197,2.122928,0.000136
4,1.6,0.024615,158.8,0.493168,3.578188,0.000093
...,...,...,...,...,...,...
66392,3.8,0.008370,89.6,0.335581,2.221616,0.000150
66393,0.4,0.200000,6.8,0.226667,0.072068,0.000934
66394,0.4,0.200000,6.8,0.226667,0.072068,0.000934
66395,0.2,0.000833,89.6,0.335581,2.221616,0.000230


In [64]:
# save the updated normal times data
normal.to_csv('/content/drive/My Drive/X999/bbl_evictions_311_svi_normal_times_correct_coordinates.csv', index=False)