# **Introduction**

## **Chi-test (boroughs + svi)**
## **Bar-chart with svi as regression/scatterplot (boroughs first)**

source: https://www.atsdr.cdc.gov/place-health/media/pdfs/2024/10/SVI2022Documentation.pdf

source: https://www.atsdr.cdc.gov/place-health/php/svi/svi-interactive-map.html

In [413]:
# !pip install geopandas folium matplotlib seaborn scipy
# !pip install esda
# !pip install splot
# !pip install geopandas contextily
# # for google colab, had to reinstall some pacakges.

In [None]:
# !pip install geopandas folium matplotlib seaborn scipy esda splot

In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import datetime as dt
import scipy

from sklearn.cluster import DBSCAN
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
from shapely.geometry import Point
from sklearn.neighbors import NearestNeighbors

# visualization
import matplotlib.pyplot as plt
from matplotlib import colors as mcolors
import seaborn as sns
import folium
from folium.plugins import HeatMap
from folium import Marker
from folium.plugins import MarkerCluster
import plotly.express as px
import plotly.io as pio
import contextily as ctx
from scipy.stats import f_oneway
from sklearn.decomposition import PCA
from functools import reduce

# spatial statistics
from esda.moran import Moran
from esda.getisord import G_Local
from libpysal.weights import Queen, Rook

# system and utility
import warnings
import os
import io
from IPython.display import IFrame
from google.colab import files

from libpysal.weights import Queen, Rook
from esda.moran import Moran
import matplotlib.pyplot as plt
from splot.esda import moran_scatterplot

# suppress warnings
warnings.filterwarnings('ignore')

# inline
%matplotlib inline

In [None]:
pd.set_option('display.float_format', lambda x: '%.4f' % x)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# **Step 1 Get the Eviction data**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# data source:
file_path1 = '/content/drive/My Drive/X999/bbl_evictions_311_svi_normal_times.csv'
file_path2 = '/content/drive/My Drive/X999/bbl_evictions_311_svi_covid.csv'

In [None]:
# create a fundtion that sums up all the input
def sums_

In [272]:
evictions_pre_post_raw = pd.read_csv(file_path1)
evictions_covid_raw = pd.read_csv(file_path2)
evictions_covid_raw.shape, evictions_pre_post_raw.shape
# 91 and 92 with normal time + one more analysis column regarding an svi item

((5386, 91), (66397, 92))

In [273]:
evictions_pre_post = evictions_pre_post_raw.copy()
evictions_covid = evictions_covid_raw.copy()

In [274]:
evictions_pre_post.head(2)

Unnamed: 0,primary_key,bbl,court_index_number,docket_number,eviction_address,eviction_apartment_number,executed_date,borough,zipcode,ejectment,eviction/legal_possession,latitude,longitude,community_board,council_district,census_tract,bin,nta,year,month_year,geometry,average_year_eviction_count,yearbuilt,bldgclass,numfloors,unitsres,ownername,bldgarea,building_type,building_category,is_condo,floor_category,rent_era,architectural_style,economic_period,residential_units_category,is_llc,building_size_category,size_quartile,decade,fips,e_totpop,rpl_theme1,rpl_theme2,rpl_theme3,rpl_theme4,rpl_themes,ep_pov150,ep_unemp,ep_nohsdp,ep_uninsur,ep_age65,ep_age17,ep_disabl,ep_limeng,ep_noveh,ep_crowd,ep_hburd,ep_afam,ep_hisp,ep_asian,ep_aian,ep_nhpi,ep_twomore,ep_otherrace,ep_minrty,ep_white,invalid_zip,svi_quartile,svi_group,air_quality,animal_issues,appliances,building_exterior,doors_windows,electrical_issues,elevator_issues,floors_stairs,general_complaints,graffiti_posting,heat_hot_water,homeless_issues,noise_complaints,other_issues,pest_issues,plumbing_issues,police_matters,public_nuisance,safety_concerns,sanitation_issues,walls_ceilings,total_complaints
0,*308072/22_5865,3037420029,*308072/22,5865,356 MILLER AVE,1 AND BASEMENT,2024-12-04,BROOKLYN,11207,Not an Ejectment,Possession,40.6721,-73.8911,5.0,37.0,1152.0,3083989,East New York,2024,2024-12,POINT (-73.891105 40.672121),0.8,1930.0,C0,3.0,3.0,356 MILLER LLC,2700.0,pre-war,walk-up,False,low-rise,"Pre-1947, pre-rent-control","1921–1930, Art Deco Skyscrapers","1930-1945, great depression and WWII",3-5 units,True,small,Q3 (50-75%),1930-1939,11207,96801.0,0.9788,0.914,0.9808,0.9812,0.9839,33.9,11.1,19.1,6.0,13.8,22.5,13.8,5.3,57.8,9.1,44.7,55.9,32.8,1.5,0.0,0.0,2.9,1.6,94.7,5.3,False,Q3,medium-high,0.0,0.0,1.0,0.0,1.0,2.0,0.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,3.0,5.0,19.0
1,*313639/23_5202,3057940012,*313639/23,5202,710 61ST STREET,2ND FLOOR,2024-03-04,BROOKLYN,11220,Not an Ejectment,Possession,40.6359,-74.0119,7.0,38.0,118.0,3143881,Sunset Park East,2024,2024-03,POINT (-74.011883 40.635941),0.6,1920.0,B2,2.0,2.0,"A.R.M. PARKING, LLC",1204.0,pre-war,two-family,False,low-rise,"Pre-1947, pre-rent-control","1900–1920, Beaux-Arts","Pre-1929, pre-great depression",2-unit,True,very small,Q1 (smallest 25%),1920-1929,11220,93008.0,0.9885,0.7635,0.9594,0.9179,0.9662,37.5,7.5,37.9,11.6,13.1,25.4,8.4,40.2,61.7,23.7,43.6,1.7,40.9,40.7,0.4,0.0,1.2,0.2,85.0,15.0,False,Q3,medium-high,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,4.0


In [275]:
evictions_covid.head(2)

Unnamed: 0,primary_key,bbl,court_index_number,docket_number,eviction_address,eviction_apartment_number,executed_date,borough,zipcode,ejectment,eviction/legal_possession,latitude,longitude,community_board,council_district,census_tract,bin,nta,year,month_year,geometry,average_year_eviction_count,yearbuilt,bldgclass,numfloors,unitsres,ownername,bldgarea,building_type,building_category,is_condo,floor_category,rent_era,architectural_style,economic_period,residential_units_category,is_llc,building_size_category,size_quartile,decade,fips,e_totpop,rpl_theme1,rpl_theme2,rpl_theme3,rpl_theme4,rpl_themes,ep_pov150,ep_unemp,ep_nohsdp,ep_uninsur,ep_age65,ep_age17,ep_disabl,ep_limeng,ep_noveh,ep_crowd,ep_hburd,ep_afam,ep_hisp,ep_asian,ep_aian,ep_nhpi,ep_twomore,ep_otherrace,ep_minrty,ep_white,invalid_zip,svi_quartile,air_quality,animal_issues,appliances,building_exterior,doors_windows,electrical_issues,elevator_issues,floors_stairs,general_complaints,graffiti_posting,heat_hot_water,homeless_issues,noise_complaints,other_issues,pest_issues,plumbing_issues,police_matters,public_nuisance,safety_concerns,sanitation_issues,walls_ceilings,total_complaints
0,004123/20_209969,2032140141,004123/20,209969,2541 A GRAND AVE,ROOM 3B,2022-08-22,BRONX,10468,Not an Ejectment,Possession,40.8654,-73.9013,7.0,14.0,265.0,2113173,Kingsbridge Heights,2022,2022-08,POINT (-73.901317 40.865396),0.2,2004.0,C0,3.0,3.0,MONJU SARKER,3420.0,post-war,walk-up,False,low-rise,"1994–Present, vacancy decontrol","2001-present, New Architecture","1991–2008, modern economic growth",3-5 units,False,medium-small,Q4 (largest 25%),2000-2009,10468,81397.0,0.9954,0.9407,0.987,0.947,0.9874,39.5,11.6,28.3,9.2,11.2,26.4,12.2,26.9,71.8,19.2,56.7,15.6,78.0,2.3,0.0,0.0,0.5,0.5,96.9,3.1,False,Q3,0.0,0.0,0.0,0.0,3.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0,2.0,0.0,0.0,0.0,3.0,1.0,14.0
1,0050153/20_106030,4031560133,0050153/20,106030,98-05 67TH AVENUE,12F,2022-04-14,QUEENS,11375,Not an Ejectment,Possession,40.7242,-73.8556,6.0,29.0,71306.0,4074666,Forest Hills,2022,2022-04,POINT (-73.855552 40.724241),0.2,1960.0,D3,13.0,181.0,MARSEILLES LEASING LIMITED PARTNERSHIP,177710.0,post-war,elevator,False,high-rise,"1947–1969, rent-control","1951–1980, the International Style, Alternativ...","1946–1975, pst war economic boom",100+ units,False,mega,Q4 (largest 25%),1960-1969,11375,75212.0,0.4759,0.5698,0.8789,0.8057,0.7322,12.0,4.8,6.1,3.7,20.4,18.0,10.5,7.9,41.9,5.8,25.4,2.7,16.4,28.5,0.1,0.0,4.6,0.7,53.0,47.0,False,Q1 (Low),0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,62.0,0.0,34.0,0.0,0.0,4.0,1.0,0.0,0.0,2.0,5.0,112.0


In [276]:
evictions_pre_post.columns, \
evictions_covid.columns, \
evictions_pre_post.shape, \
evictions_covid.shape

(Index(['primary_key', 'bbl', 'court_index_number', 'docket_number',
        'eviction_address', 'eviction_apartment_number', 'executed_date',
        'borough', 'zipcode', 'ejectment', 'eviction/legal_possession',
        'latitude', 'longitude', 'community_board', 'council_district',
        'census_tract', 'bin', 'nta', 'year', 'month_year', 'geometry',
        'average_year_eviction_count', 'yearbuilt', 'bldgclass', 'numfloors',
        'unitsres', 'ownername', 'bldgarea', 'building_type',
        'building_category', 'is_condo', 'floor_category', 'rent_era',
        'architectural_style', 'economic_period', 'residential_units_category',
        'is_llc', 'building_size_category', 'size_quartile', 'decade', 'fips',
        'e_totpop', 'rpl_theme1', 'rpl_theme2', 'rpl_theme3', 'rpl_theme4',
        'rpl_themes', 'ep_pov150', 'ep_unemp', 'ep_nohsdp', 'ep_uninsur',
        'ep_age65', 'ep_age17', 'ep_disabl', 'ep_limeng', 'ep_noveh',
        'ep_crowd', 'ep_hburd', 'ep_afam', 'ep_hisp

In [277]:
link = '/content/drive/My Drive/X999/svi_cleaned.csv'

In [278]:
svi_df = pd.read_csv(link)
svi_df.head(2)

Unnamed: 0,fips,location,area_sqmi,e_totpop,m_totpop,e_hu,m_hu,e_hh,m_hh,e_pov150,m_pov150,e_unemp,m_unemp,e_hburd,m_hburd,e_nohsdp,m_nohsdp,e_uninsur,m_uninsur,e_age65,m_age65,e_age17,m_age17,e_disabl,m_disabl,e_sngpnt,m_sngpnt,e_limeng,m_limeng,e_minrty,m_minrty,e_munit,m_munit,e_mobile,m_mobile,e_crowd,m_crowd,e_noveh,m_noveh,e_groupq,m_groupq,ep_pov150,mp_pov150,ep_unemp,mp_unemp,ep_hburd,mp_hburd,ep_nohsdp,mp_nohsdp,ep_uninsur,mp_uninsur,ep_age65,mp_age65,ep_age17,mp_age17,ep_disabl,mp_disabl,ep_sngpnt,mp_sngpnt,ep_limeng,mp_limeng,ep_minrty,mp_minrty,ep_munit,mp_munit,ep_mobile,mp_mobile,ep_crowd,mp_crowd,ep_noveh,mp_noveh,ep_groupq,mp_groupq,epl_pov150,epl_unemp,epl_hburd,epl_nohsdp,epl_uninsur,spl_theme1,rpl_theme1,epl_age65,epl_age17,epl_disabl,epl_sngpnt,epl_limeng,spl_theme2,rpl_theme2,epl_minrty,spl_theme3,rpl_theme3,epl_munit,epl_mobile,epl_crowd,epl_noveh,epl_groupq,spl_theme4,rpl_theme4,spl_themes,rpl_themes,f_pov150,f_unemp,f_hburd,f_nohsdp,f_uninsur,f_theme1,f_age65,f_age17,f_disabl,f_sngpnt,f_limeng,f_theme2,f_minrty,f_theme3,f_munit,f_mobile,f_crowd,f_noveh,f_groupq,f_theme4,f_total,e_daypop,e_noint,m_noint,e_afam,m_afam,e_hisp,m_hisp,e_asian,m_asian,e_aian,m_aian,e_nhpi,m_nhpi,e_twomore,m_twomore,e_otherrace,m_otherrace,ep_noint,mp_noint,ep_afam,mp_afam,ep_hisp,mp_hisp,ep_asian,mp_asian,ep_aian,mp_aian,ep_nhpi,mp_nhpi,ep_twomore,mp_twomore,ep_otherrace,mp_otherrace
0,10001,ZCTA5 10001,0.6238,27004,1827,16975,831,14375,782,5248,797,761,266,3314,531,1930,534,831,289,3428,432,2694,643,2310,499,501,215,1381,405,13460,2305,15840,898,15,23,389,135,12285,840,2213,218,20.3,2.7,4.3,1.5,23.1,3.5,9.1,2.4,3.1,1.0,12.7,1.6,10.0,2.1,8.6,1.9,3.5,1.5,5.3,1.5,49.8,7.8,93.3,2.7,0.1,0.1,2.7,0.9,85.5,2.8,8.2,0.6,0.6108,0.4574,0.5573,0.5902,0.4436,2.6593,0.5688,0.142,0.1161,0.1891,0.4707,0.8777,1.7956,0.1692,0.867,0.867,0.867,0.9853,0.271,0.7402,0.9949,0.9104,3.9018,0.9806,9.2237,0.7414,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,3,3,239407,1047,389,2220,576,5206,943,5031,774,0,25,0,25,780,326,223,169,7.3,2.6,8.2,2.2,19.3,3.0,18.6,2.9,0.0,0.1,0.0,0.1,2.9,1.2,0.8,0.6
1,10002,ZCTA5 10002,0.8223,76518,2894,39094,1241,36028,1326,27908,2853,2833,574,14688,1367,18301,1376,4074,766,17681,1287,10028,1549,9896,1062,2211,499,18393,1640,56964,3226,35725,1677,16,28,2461,449,29828,1403,2090,39,36.8,3.5,7.6,1.4,40.8,3.5,30.0,2.0,5.4,1.0,23.1,1.7,13.1,1.8,13.0,1.4,6.1,1.4,24.7,2.0,74.4,3.1,91.4,3.2,0.0,0.1,6.8,1.2,82.8,1.8,2.7,0.1,0.9148,0.7946,0.9219,0.9741,0.7207,4.3261,0.9639,0.7296,0.1831,0.5186,0.739,0.9944,3.1647,0.8781,0.9369,0.9369,0.9369,0.979,0.0,0.9105,0.9915,0.773,3.654,0.9254,12.0817,0.9656,1,0,1,1,0,3,0,0,0,0,1,1,1,1,1,0,1,1,0,3,8,64307,8590,1110,6141,1194,19864,2190,28477,1989,74,83,24,45,1810,486,574,394,23.8,2.9,8.0,1.5,26.0,2.5,37.2,2.2,0.1,0.1,0.0,0.1,2.4,0.6,0.8,0.5


In [279]:
# build a fundtion that calculates the sum of input parameters



In [280]:
svi_df.shape

(204, 153)

In [281]:
# list(svi_df.columns)

In [282]:
svi_df.ep_nhpi.unique()

array([ 0.00e+00,  1.00e-01,  3.00e-01,  2.00e-01, -9.99e+02,  8.00e-01,
        1.20e+00,  5.00e-01,  4.00e-01])

# **Step 2: SVI items**

q quick double check

In [283]:
link = "/content/drive/My Drive/X999/NewYork_ZCTA.csv"

In [284]:
svi_raw = pd.read_csv(link)
svi_raw.head(2)

Unnamed: 0,ST,STATE,ST_ABBR,FIPS,LOCATION,AREA_SQMI,E_TOTPOP,M_TOTPOP,E_HU,M_HU,E_HH,M_HH,E_POV150,M_POV150,E_UNEMP,M_UNEMP,E_HBURD,M_HBURD,E_NOHSDP,M_NOHSDP,E_UNINSUR,M_UNINSUR,E_AGE65,M_AGE65,E_AGE17,M_AGE17,E_DISABL,M_DISABL,E_SNGPNT,M_SNGPNT,E_LIMENG,M_LIMENG,E_MINRTY,M_MINRTY,E_MUNIT,M_MUNIT,E_MOBILE,M_MOBILE,E_CROWD,M_CROWD,E_NOVEH,M_NOVEH,E_GROUPQ,M_GROUPQ,EP_POV150,MP_POV150,EP_UNEMP,MP_UNEMP,EP_HBURD,MP_HBURD,EP_NOHSDP,MP_NOHSDP,EP_UNINSUR,MP_UNINSUR,EP_AGE65,MP_AGE65,EP_AGE17,MP_AGE17,EP_DISABL,MP_DISABL,EP_SNGPNT,MP_SNGPNT,EP_LIMENG,MP_LIMENG,EP_MINRTY,MP_MINRTY,EP_MUNIT,MP_MUNIT,EP_MOBILE,MP_MOBILE,EP_CROWD,MP_CROWD,EP_NOVEH,MP_NOVEH,EP_GROUPQ,MP_GROUPQ,EPL_POV150,EPL_UNEMP,EPL_HBURD,EPL_NOHSDP,EPL_UNINSUR,SPL_THEME1,RPL_THEME1,EPL_AGE65,EPL_AGE17,EPL_DISABL,EPL_SNGPNT,EPL_LIMENG,SPL_THEME2,RPL_THEME2,EPL_MINRTY,SPL_THEME3,RPL_THEME3,EPL_MUNIT,EPL_MOBILE,EPL_CROWD,EPL_NOVEH,EPL_GROUPQ,SPL_THEME4,RPL_THEME4,SPL_THEMES,RPL_THEMES,F_POV150,F_UNEMP,F_HBURD,F_NOHSDP,F_UNINSUR,F_THEME1,F_AGE65,F_AGE17,F_DISABL,F_SNGPNT,F_LIMENG,F_THEME2,F_MINRTY,F_THEME3,F_MUNIT,F_MOBILE,F_CROWD,F_NOVEH,F_GROUPQ,F_THEME4,F_TOTAL,E_DAYPOP,E_NOINT,M_NOINT,E_AFAM,M_AFAM,E_HISP,M_HISP,E_ASIAN,M_ASIAN,E_AIAN,M_AIAN,E_NHPI,M_NHPI,E_TWOMORE,M_TWOMORE,E_OTHERRACE,M_OTHERRACE,EP_NOINT,MP_NOINT,EP_AFAM,MP_AFAM,EP_HISP,MP_HISP,EP_ASIAN,MP_ASIAN,EP_AIAN,MP_AIAN,EP_NHPI,MP_NHPI,EP_TWOMORE,MP_TWOMORE,EP_OTHERRACE,MP_OTHERRACE
0,36,New York,NY,6390,ZCTA5 06390,4.0467,53,39,253,49,19,19,17,16,0,13,9,26,0,13,27,34,0,13,6,11,31,33,0,18,9,53,20,51,0,18,4,5,0,18,0,13,17,16,32.1,18.8,0.0,52.7,47.4,100.0,0.0,51.4,50.9,45.7,0.0,45.2,11.3,19.0,58.5,41.4,0.0,94.7,17.0,99.2,37.7,92.1,0.0,7.1,1.6,2.0,0.0,94.7,0.0,75.5,32.1,18.8,0.879,0.0,0.9635,0.0,0.996,2.8385,0.6342,0.0,0.1408,0.9944,0.0,0.9775,2.1127,0.3009,0.8062,0.8062,0.8062,0.0,0.4654,0.0,0.0,0.9735,1.4389,0.2205,7.1963,0.4192,0,0,1,0,1,2,0,0,1,0,1,2,0,0,0,0,0,0,1,1,5,601,9,14,0,13,9,19,0,13,0,13,8,16,3,7,0,13,47.4,51.8,0.0,45.2,17.0,35.0,0.0,45.2,0.0,45.2,15.1,32.1,5.7,12.1,0.0,45.2
1,36,New York,NY,10001,ZCTA5 10001,0.6238,27004,1827,16975,831,14375,782,5248,797,761,266,3314,531,1930,534,831,289,3428,432,2694,643,2310,499,501,215,1381,405,13460,2305,15840,898,15,23,389,135,12285,840,2213,218,20.3,2.7,4.3,1.5,23.1,3.5,9.1,2.4,3.1,1.0,12.7,1.6,10.0,2.1,8.6,1.9,3.5,1.5,5.3,1.5,49.8,7.8,93.3,2.7,0.1,0.1,2.7,0.9,85.5,2.8,8.2,0.6,0.6108,0.4574,0.5573,0.5902,0.4436,2.6593,0.5688,0.142,0.1161,0.1891,0.4707,0.8777,1.7956,0.1692,0.867,0.867,0.867,0.9853,0.271,0.7402,0.9949,0.9104,3.9018,0.9806,9.2237,0.7414,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,3,3,239407,1047,389,2220,576,5206,943,5031,774,0,25,0,25,780,326,223,169,7.3,2.6,8.2,2.2,19.3,3.0,18.6,2.9,0.0,0.1,0.0,0.1,2.9,1.2,0.8,0.6


In [285]:
def is_nyc_zipcode(zipcode):
    zip_int = int(zipcode) if isinstance(zipcode, str) else zipcode

    # Manhattan: 10001-10282
    if 10001 <= zip_int <= 10282:
        return True
    # addition Manhattan: 10300-10499
    if 10300 <= zip_int <= 10499:
        return True
    # Bronx: 10451-10475
    if 10451 <= zip_int <= 10475:
        return True
    # Brooklyn: 11201-11256
    if 11201 <= zip_int <= 11256:
        return True
    # Queens: 11351-11436, 11101-11109
    if (11351 <= zip_int <= 11436) or (11101 <= zip_int <= 11109):
        return True
    # Staten Island: 10301-10314
    if 10301 <= zip_int <= 10314:
        return True
    # additional Queens ZIPs
    if zip_int in [11004, 11005, 11411, 11412, 11413, 11418, 11419, 11420, 11421, 11422, 11423, 11426, 11427, 11428, 11429]:
        return True
    return False

In [286]:
nyc_df = svi_raw[svi_raw['FIPS'].apply(is_nyc_zipcode)]

In [287]:
nyc_df.shape

(204, 156)

In [288]:
nyc_df.EP_NHPI.unique()
# interesting, so there was an error in the source data

array([ 0.00e+00,  1.00e-01,  3.00e-01,  2.00e-01, -9.99e+02,  8.00e-01,
        1.20e+00,  5.00e-01,  4.00e-01])

In [289]:
svi_raw.EP_NHPI.unique()

array([ 1.51e+01,  0.00e+00,  1.00e-01,  3.00e-01,  2.00e-01, -9.99e+02,
        8.00e-01,  1.20e+00,  5.00e-01,  4.00e-01,  1.10e+00,  7.00e-01,
        1.50e+00,  1.80e+00,  9.00e-01,  6.00e-01,  1.40e+00,  2.10e+00,
        2.30e+00,  1.00e+00])

In [290]:
-9.99e+02, 0.00e+00, 9.00e-01, 2.00e-01

(-999.0, 0.0, 0.9, 0.2)

# **Step 3:  Boroughs and their eviction rates**

In [291]:
evictions_pre_post_mean = evictions_pre_post[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
evictions_pre_post_mean
# may need to merge ep_twomore and ep_otherrance toegther

Unnamed: 0,0
ep_afam,29.2346
ep_asian,8.904
ep_hisp,38.7307
ep_nhpi,0.0115
ep_white,19.0236
ep_twomore,2.8025
ep_otherrace,1.0239


In [292]:
evictions_pre_post_mean = evictions_pre_post_mean.reset_index()

In [293]:
evictions_pre_post_mean.rename(columns = {'index':'race_svi', 0: "racial percentage"}, inplace=True)

In [294]:
evictions_pre_post_mean

Unnamed: 0,race_svi,racial percentage
0,ep_afam,29.2346
1,ep_asian,8.904
2,ep_hisp,38.7307
3,ep_nhpi,0.0115
4,ep_white,19.0236
5,ep_twomore,2.8025
6,ep_otherrace,1.0239


In [1]:
# type(evictions_pre_post_mean)
# so this is correct

## **Step 3.1 Selected neighborhoods**

In [296]:
# evictions_pre_post.columns

In [356]:
neighbor_evictions = evictions_pre_post.groupby('nta').agg({'average_year_eviction_count': 'median', 'borough': 'first'}).reset_index()
neighbor_evictions.sort_values('average_year_eviction_count', ascending=False, inplace=True)
neighbor_evictions

Unnamed: 0,nta,average_year_eviction_count,borough
36,Corona,4.8,QUEENS
185,park-cemetery-etc-Bronx,4.2,BRONX
75,Grymes Hill-Clifton-Fox Hills,2.6,STATEN ISLAND
143,Seagate-Coney Island,2.0,BROOKLYN
128,Pelham Parkway,1.6,BRONX
150,Springfield Gardens North,1.5,QUEENS
163,University Heights-Morris Heights,1.4,BRONX
19,Bronxdale,1.4,BRONX
7,Battery Park City-Lower Manhattan,1.4,MANHATTAN
44,East Concourse-Concourse Village,1.2,BRONX


In [298]:
top_10 = neighbor_evictions.head(10)
bottom_10 = neighbor_evictions.tail(10)

In [299]:
top_10

Unnamed: 0,nta,average_year_eviction_count,borough
185,park-cemetery-etc-Bronx,6.4667,BRONX
143,Seagate-Coney Island,4.3904,BROOKLYN
36,Corona,4.2597,QUEENS
163,University Heights-Morris Heights,3.5051,BRONX
125,Park Slope-Gowanus,2.3725,BROOKLYN
75,Grymes Hill-Clifton-Fox Hills,2.336,STATEN ISLAND
106,Morrisania-Melrose,2.255,BRONX
19,Bronxdale,2.2396,BRONX
21,Brownsville,2.0909,BROOKLYN
31,Claremont-Bathgate,1.9783,BRONX


In [300]:
bottom_10

Unnamed: 0,nta,average_year_eviction_count,borough
74,Greenpoint,0.2531,BROOKLYN
29,Charleston-Richmond Valley-Tottenville,0.2516,STATEN ISLAND
73,Great Kills,0.2488,STATEN ISLAND
1,Annadale-Huguenot-Prince's Bay-Eltingville,0.2471,STATEN ISLAND
176,Westerleigh,0.2471,STATEN ISLAND
100,Maspeth,0.2467,QUEENS
2,Arden Heights,0.2235,STATEN ISLAND
68,Glen Oaks-Floral Park-New Hyde Park,0.2235,QUEENS
140,Rossville-Woodrow,0.2235,STATEN ISLAND
186,park-cemetery-etc-Brooklyn,0.2,BROOKLYN


In [360]:
Manhattan = evictions_pre_post[evictions_pre_post['borough'] == 'MANHATTAN']
manhattan_neighbor_evictions = Manhattan.groupby('nta').agg({'average_year_eviction_count': 'median', 'borough': 'first'}).reset_index()
manhattan_neighbor_evictions.sort_values('average_year_eviction_count', ascending=False, inplace=True)
manhattan_bottom_10 = manhattan_neighbor_evictions.tail(10)
manhattan_bottom_10

Unnamed: 0,nta,average_year_eviction_count,borough
15,Manhattanville,0.4,MANHATTAN
14,Lower East Side,0.4,MANHATTAN
25,Upper West Side,0.4,MANHATTAN
23,Turtle Bay-East Midtown,0.4,MANHATTAN
22,Stuyvesant Town-Cooper Village,0.4,MANHATTAN
3,Chinatown,0.2,MANHATTAN
7,East Village,0.2,MANHATTAN
21,SoHo-TriBeCa-Civic Center-Little Italy,0.2,MANHATTAN
24,Upper East Side-Carnegie Hill,0.2,MANHATTAN
28,West Village,0.2,MANHATTAN


In [361]:
evictions_pre_post[evictions_pre_post.nta == "Stuyvesant Town-Cooper Village"].average_year_eviction_count.mean()

np.float64(0.4730158730158729)

In [362]:
# neighbor_evictions.nta

In [363]:
upper_east_side = neighbor_evictions[neighbor_evictions['nta'] == 'Upper East Side-Carnegie Hill']
upper_west_side = neighbor_evictions[neighbor_evictions['nta'] == 'Upper West Side']
yorkville = neighbor_evictions[neighbor_evictions['nta'] == 'Yorkville']
stuyvesant_village = neighbor_evictions[neighbor_evictions['nta'] == 'Stuyvesant Town-Cooper Village']
mornings_hights = neighbor_evictions[neighbor_evictions['nta'] == 'Morningside Heights']
tribeca = neighbor_evictions[neighbor_evictions['nta'] == 'SoHo-TriBeCa-Civic Center-Little Italy']
gramercy = neighbor_evictions[neighbor_evictions['nta'] == 'Gramercy']
riverdale = neighbor_evictions[neighbor_evictions['nta'] == 'North Riverdale-Fieldston-Riverdale']
lenox = neighbor_evictions[neighbor_evictions['nta'] == 'Lenox Hill-Roosevelt Island']
west_village = neighbor_evictions[neighbor_evictions['nta'] == 'West Village']
east_village = neighbor_evictions[neighbor_evictions['nta'] == 'East Village']

In [364]:
stuyvesant_village

Unnamed: 0,nta,average_year_eviction_count,borough
158,Stuyvesant Town-Cooper Village,0.4,MANHATTAN


In [365]:
# another typically "good" manhattan neighborhoods
# may or may not add
fidi = neighbor_evictions[neighbor_evictions['nta'] == 'Battery Park City-Lower Manhattan']
fidi.average_year_eviction_count.values[0]

np.float64(1.4)

In [366]:
nta_names = ['East Village', 'West Village', 'Lenox Hill-Roosevelt Island', 'North Riverdale-Fieldston-Riverdale', 'Gramercy',
             'SoHo-TriBeCa-Civic Center-Little Italy', 'MOrnings Heights', 'Stuyvesant Town-Cooper Village', 'Yorkville',
             'Upper West Side', 'Upper East Side-Carnegie Hill']
nta_df_names = ['east_village', 'west_village', 'lenox', 'riverdale', 'gramercy', 'tribeca', 'mornings_hights',
                'stuyvesant_village', 'yorkville', 'upper_west_side', 'upper_east_side']

In [367]:
# first df for evi
man_nta = {
    'nta': ['east_village', 'west_village', 'lenox', 'riverdale', 'gramercy', 'tribeca', 'mornings_hights',
                'stuyvesant_village', 'yorkville', 'upper_west_side', 'upper_east_side'],
    'eviction_rates': [
        upper_east_side.average_year_eviction_count.values[0],
        upper_west_side.average_year_eviction_count.values[0],
        yorkville.average_year_eviction_count.values[0],
        stuyvesant_village.average_year_eviction_count.values[0],
        mornings_hights.average_year_eviction_count.values[0],
        tribeca.average_year_eviction_count.values[0],
        gramercy.average_year_eviction_count.values[0],
        riverdale.average_year_eviction_count.values[0],
        lenox.average_year_eviction_count.values[0],
        west_village.average_year_eviction_count.values[0],
        east_village.average_year_eviction_count.values[0],
    ]
}
man_nta_df = pd.DataFrame(man_nta)
man_nta_df

Unnamed: 0,nta,eviction_rates
0,east_village,0.2
1,west_village,0.4
2,lenox,0.4
3,riverdale,0.4
4,gramercy,0.4
5,tribeca,0.2
6,mornings_hights,0.4
7,stuyvesant_village,0.8
8,yorkville,0.4
9,upper_west_side,0.2


In [368]:
stuyvesant_village# neighbor_evictions.nta.unique()

Unnamed: 0,nta,average_year_eviction_count,borough
158,Stuyvesant Town-Cooper Village,0.4,MANHATTAN


In [369]:
park_cemetery_bronx = neighbor_evictions[neighbor_evictions['nta'] == 'park-cemetery-etc-Bronx']
seagate_coney_island = neighbor_evictions[neighbor_evictions['nta'] == 'Seagate-Coney Island']
corona = neighbor_evictions[neighbor_evictions['nta'] == 'Corona']
university_heights_morris_heights	 = neighbor_evictions[neighbor_evictions['nta'] == 'University Heights-Morris Heights']
park_slope_gowanus = neighbor_evictions[neighbor_evictions['nta'] == 'Park Slope-Gowanus']
grymes_hill_clifton = neighbor_evictions[neighbor_evictions['nta'] == 'Grymes Hill-Clifton-Fox Hills']
morrisania_melros = neighbor_evictions[neighbor_evictions['nta'] == 'Morrisania-Melrose']
bronxdale = neighbor_evictions[neighbor_evictions['nta'] == 'Bronxdale']
brownsville = neighbor_evictions[neighbor_evictions['nta'] == 'Brownsville']
claremont_bathgate = neighbor_evictions[neighbor_evictions['nta'] == 'Claremont-Bathgate']

In [370]:
nta_names += ['park-cemetery-etc-Bronx', 'Seagate-Coney Island', 'Corona', 'University Heights-Morris Heights', 'Park Slope-Gowanus',
              'Grymes Hill-Clifton-Fox Hills', 'Morrisania-Melrose', 'Bronxdale', 'Brownsville', 'Claremont-Bathgate']
nta_df_names += ['park_cemetery_bronx', 'seagate_coney_island', 'corona', 'university_heights_morris_heights', 'park_slope_gowanus', 'grymes_hill_clifton'
                  'morrisania_melros', 'bronxdale', 'brownsville', 'claremont_bathgate']

In [371]:
bottom_10

Unnamed: 0,nta,average_year_eviction_count,borough
74,Greenpoint,0.2531,BROOKLYN
29,Charleston-Richmond Valley-Tottenville,0.2516,STATEN ISLAND
73,Great Kills,0.2488,STATEN ISLAND
1,Annadale-Huguenot-Prince's Bay-Eltingville,0.2471,STATEN ISLAND
176,Westerleigh,0.2471,STATEN ISLAND
100,Maspeth,0.2467,QUEENS
2,Arden Heights,0.2235,STATEN ISLAND
68,Glen Oaks-Floral Park-New Hyde Park,0.2235,QUEENS
140,Rossville-Woodrow,0.2235,STATEN ISLAND
186,park-cemetery-etc-Brooklyn,0.2,BROOKLYN


In [372]:
brownsville

Unnamed: 0,nta,average_year_eviction_count,borough
21,Brownsville,0.6,BROOKLYN


In [373]:
# second df for evi
highest_evi = {
    'nta': ['park_cemetery_bronx', 'seagate_coney_island', 'corona', 'university_heights_morris_heights', 'park_slope_gowanus', 'grymes_hill_clifton',
                  'morrisania_melros', 'bronxdale', 'brownsville', 'claremont_bathgate'],
    'eviction_rates': [park_cemetery_bronx.average_year_eviction_count.values[0],
                       seagate_coney_island.average_year_eviction_count.values[0],
                       corona.average_year_eviction_count.values[0],
                       university_heights_morris_heights.average_year_eviction_count.values[0],
                       park_slope_gowanus.average_year_eviction_count.values[0] ,
                       grymes_hill_clifton.average_year_eviction_count.values[0],
                       morrisania_melros.average_year_eviction_count.values[0],
                       bronxdale.average_year_eviction_count.values[0],
                       brownsville.average_year_eviction_count.values[0],
                       claremont_bathgate.average_year_eviction_count.values[0]]
}
highest_evi_df = pd.DataFrame(highest_evi)
highest_evi_df

Unnamed: 0,nta,eviction_rates
0,park_cemetery_bronx,4.2
1,seagate_coney_island,2.0
2,corona,4.8
3,university_heights_morris_heights,1.4
4,park_slope_gowanus,0.2
5,grymes_hill_clifton,2.6
6,morrisania_melros,1.0
7,bronxdale,1.4
8,brownsville,0.6
9,claremont_bathgate,1.0


In [374]:
greenpoint = neighbor_evictions[neighbor_evictions['nta'] == 'Greenpoint']
charleston_richmond = neighbor_evictions[neighbor_evictions['nta'] == 'Charleston-Richmond Valley-Tottenville']
great_kills = neighbor_evictions[neighbor_evictions['nta'] == 'Great Kills']
annadale_huguenot = neighbor_evictions[neighbor_evictions['nta'] == "Annadale-Huguenot-Prince's Bay-Eltingville"]
westerleigh = neighbor_evictions[neighbor_evictions['nta'] == 'Westerleigh']
maspeth = neighbor_evictions[neighbor_evictions['nta'] == 'Maspeth']
arden_heights = neighbor_evictions[neighbor_evictions['nta'] == 'Arden Heights']
glen_oask = neighbor_evictions[neighbor_evictions['nta'] == 'Glen Oaks-Floral Park-New Hyde Park']
rossville_woodrow = neighbor_evictions[neighbor_evictions['nta'] == 'Rossville-Woodrow']
park_cemetery_brooklyn = neighbor_evictions[neighbor_evictions['nta'] == 'park-cemetery-etc-Brooklyn']

In [375]:
nta_names += ['Greenpoint', 'Charleston-Richmon', 'Great Kills', "Annadale-Huguenot-Prince's Bay-Eltingville", 'Westerleigh', 'Maspeth',
              'Arden Heights', 'Glen Oaks-Floral Park-New Hyde Park', 'Rossville-Woodrow', 'park-cemeter-etc-Brooklyn']
nta_df_names += ['greenpoint', 'charlestn_richmond', 'great_kills', 'annadale-huguenot', 'westerleigh', 'maspeth', 'arden_heights', 'glen_oask'
                 'rossville_woodrow', 'park_cemetery_brooklyn']

In [376]:
# def count_evictions():

In [377]:
# third and last df for evi
lowest_evi = {
    'nta': ['greenpoint', 'charlestn_richmond', 'great_kills', 'annadale-huguenot', 'westerleigh', 'maspeth', 'arden_heights', 'glen_oask'
                 'rossville_woodrow', 'park_cemetery_brooklyn'],
    'eviction_rates': [
        greenpoint.average_year_eviction_count.values[0],
        charleston_richmond.average_year_eviction_count.values[0],
        great_kills.average_year_eviction_count.values[0],
        annadale_huguenot.average_year_eviction_count.values[0],
        westerleigh.average_year_eviction_count.values[0],
        maspeth.average_year_eviction_count.values[0],
        arden_heights.average_year_eviction_count.values[0],
        glen_oask.average_year_eviction_count.values[0],
        rossville_woodrow.average_year_eviction_count.values[0]
    ]
}
lowest_evi_df = pd.DataFrame(lowest_evi)
lowest_evi_df

Unnamed: 0,nta,eviction_rates
0,greenpoint,0.2
1,charlestn_richmond,0.2
2,great_kills,0.2
3,annadale-huguenot,0.2
4,westerleigh,0.2
5,maspeth,0.2
6,arden_heights,0.2
7,glen_oaskrossville_woodrow,0.2
8,park_cemetery_brooklyn,0.2


In [378]:
avg_per_nta = neighbor_evictions.average_year_eviction_count.mean()
avg_per_nta
# per building, per year, that's why it is similar to the borough one too

np.float64(0.5834224598930482)

In [379]:
# evictions_pre_post.nta.unique()

## **Step 3.2 a datframe that has nta as columns and average_year_eviction_count as rows and contents**

In [380]:
# the original names for assignments. Will use these names for merge
nta_names, \
nta_df_names
# df names for calling

(['East Village',
  'West Village',
  'Lenox Hill-Roosevelt Island',
  'North Riverdale-Fieldston-Riverdale',
  'Gramercy',
  'SoHo-TriBeCa-Civic Center-Little Italy',
  'MOrnings Heights',
  'Stuyvesant Town-Cooper Village',
  'Yorkville',
  'Upper West Side',
  'Upper East Side-Carnegie Hill',
  'park-cemetery-etc-Bronx',
  'Seagate-Coney Island',
  'Corona',
  'University Heights-Morris Heights',
  'Park Slope-Gowanus',
  'Grymes Hill-Clifton-Fox Hills',
  'Morrisania-Melrose',
  'Bronxdale',
  'Brownsville',
  'Claremont-Bathgate',
  'Greenpoint',
  'Charleston-Richmon',
  'Great Kills',
  "Annadale-Huguenot-Prince's Bay-Eltingville",
  'Westerleigh',
  'Maspeth',
  'Arden Heights',
  'Glen Oaks-Floral Park-New Hyde Park',
  'Rossville-Woodrow',
  'park-cemeter-etc-Brooklyn'],
 ['east_village',
  'west_village',
  'lenox',
  'riverdale',
  'gramercy',
  'tribeca',
  'mornings_hights',
  'stuyvesant_village',
  'yorkville',
  'upper_west_side',
  'upper_east_side',
  'park_cemetery_

In [381]:
all_nta = pd.concat([highest_evi_df, lowest_evi_df, man_nta_df])
all_nta

Unnamed: 0,nta,eviction_rates
0,park_cemetery_bronx,4.2
1,seagate_coney_island,2.0
2,corona,4.8
3,university_heights_morris_heights,1.4
4,park_slope_gowanus,0.2
5,grymes_hill_clifton,2.6
6,morrisania_melros,1.0
7,bronxdale,1.4
8,brownsville,0.6
9,claremont_bathgate,1.0


## **Step 3.3 We need a dataframe that has all these intersted neighborhoods as coloumns and race percentages as rows and cells**

In [382]:
nta_df_names, nta_names

(['east_village',
  'west_village',
  'lenox',
  'riverdale',
  'gramercy',
  'tribeca',
  'mornings_hights',
  'stuyvesant_village',
  'yorkville',
  'upper_west_side',
  'upper_east_side',
  'park_cemetery_bronx',
  'seagate_coney_island',
  'corona',
  'university_heights_morris_heights',
  'park_slope_gowanus',
  'grymes_hill_cliftonmorrisania_melros',
  'bronxdale',
  'brownsville',
  'claremont_bathgate',
  'greenpoint',
  'charlestn_richmond',
  'great_kills',
  'annadale-huguenot',
  'westerleigh',
  'maspeth',
  'arden_heights',
  'glen_oaskrossville_woodrow',
  'park_cemetery_brooklyn'],
 ['East Village',
  'West Village',
  'Lenox Hill-Roosevelt Island',
  'North Riverdale-Fieldston-Riverdale',
  'Gramercy',
  'SoHo-TriBeCa-Civic Center-Little Italy',
  'MOrnings Heights',
  'Stuyvesant Town-Cooper Village',
  'Yorkville',
  'Upper West Side',
  'Upper East Side-Carnegie Hill',
  'park-cemetery-etc-Bronx',
  'Seagate-Coney Island',
  'Corona',
  'University Heights-Morris He

In [383]:
# nta with everything
# use the nta_names as the name, and nta_df_names as the actually assignment name
east_village_all = evictions_pre_post[evictions_pre_post['nta'] == 'East Village']
west_village_all = evictions_pre_post[evictions_pre_post['nta'] == 'West Village']
lenox_all = evictions_pre_post[evictions_pre_post['nta'] == 'Lenox Hill-Roosevelt Island']
riverdale_all = evictions_pre_post[evictions_pre_post['nta'] == 'North Riverdale-Fieldston-Riverdale']
gramercy_all = evictions_pre_post[evictions_pre_post['nta'] == 'Gramercy']
tribeca_all = evictions_pre_post[evictions_pre_post['nta'] == 'SoHo-TriBeCa-Civic Center-Little Italy']
mornings_hights_all = evictions_pre_post[evictions_pre_post['nta'] == 'Morningside Heights']
stuyvesant_village_all = evictions_pre_post[evictions_pre_post['nta'] == 'Stuyvesant Town-Cooper Village']
yorkville_all = evictions_pre_post[evictions_pre_post['nta'] == 'Yorkville']
upper_west_side_all = evictions_pre_post[evictions_pre_post['nta'] == 'Upper West Side']
upper_east_side_all = evictions_pre_post[evictions_pre_post['nta'] == 'Upper East Side-Carnegie Hill']
park_cemetery_bronx_all = evictions_pre_post[evictions_pre_post['nta'] == 'park-cemetery-etc-Bronx']
seagate_coney_island_all = evictions_pre_post[evictions_pre_post['nta'] == 'Seagate-Coney Island']
corona_all = evictions_pre_post[evictions_pre_post['nta'] == 'Corona']
university_heights_morris_heights_all = evictions_pre_post[evictions_pre_post['nta'] == 'University Heights-Morris Heights']
park_slope_gowanus_all = evictions_pre_post[evictions_pre_post['nta'] == 'Park Slope-Gowanus']
grymes_hill_clifton_all = evictions_pre_post[evictions_pre_post['nta'] == 'Grymes Hill-Clifton-Fox Hills']
morrisania_melros_all = evictions_pre_post[evictions_pre_post['nta'] == 'Morrisania-Melrose']
bronxdale_all = evictions_pre_post[evictions_pre_post['nta'] == 'Bronxdale']
brownsville_all = evictions_pre_post[evictions_pre_post['nta'] == 'Brownsville']
claremont_bathgate_all = evictions_pre_post[evictions_pre_post['nta'] == 'Claremont-Bathgate']
greenpoint_all = evictions_pre_post[evictions_pre_post['nta'] == 'Greenpoint']
charleston_richmond_all = evictions_pre_post[evictions_pre_post['nta'] == 'Charleston-Richmond']
great_kills_all = evictions_pre_post[evictions_pre_post['nta'] == 'Great Kills']
annadale_huguenot_all = evictions_pre_post[evictions_pre_post['nta'] == "Annadale-Huguenot-Prince's Bay-Eltingville"]
westerleigh_all = evictions_pre_post[evictions_pre_post['nta'] == 'Westerleigh']
maspeth_all = evictions_pre_post[evictions_pre_post['nta'] == 'Maspeth']
arden_heights_all = evictions_pre_post[evictions_pre_post['nta'] == 'Arden Heights']
glen_oaks_all = evictions_pre_post[evictions_pre_post['nta'] == 'Glen Oaks-Floral Park-New Hyde Park']
rossville_woodrow_all = evictions_pre_post[evictions_pre_post['nta'] == 'Rossville-Woodrow']
park_cemetery_brooklyn_all = evictions_pre_post[evictions_pre_post['nta'] == 'park-cemetery-etc-Brooklyn']

### **Step 3.3.1 All neighborhoods racial composite**

In [384]:
# get the racial composite from svi data for each nta as series
east_village_racial = east_village_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
west_village_racial = west_village_all[['ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
lenox_racial = lenox_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
riverdale_racial = riverdale_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
gramercy_racial = gramercy_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
tribeca_racial = tribeca_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
mornings_hights_racial = mornings_hights_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
stuyvesant_village_racial = stuyvesant_village_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
yorkville_racial = yorkville_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
upper_west_side_racial = upper_west_side_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
upper_east_side_racial = upper_east_side_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
park_cemetery_bronx_racial = park_cemetery_bronx_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
seagate_coney_island_racial = seagate_coney_island_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
corona_racial = corona_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
university_heights_morris_heights_racial = university_heights_morris_heights_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
park_slope_gowanus_racial = park_slope_gowanus_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
grymes_hill_clifton_racial = grymes_hill_clifton_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
morrisania_melros_racial = morrisania_melros_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
bronxdale_racial = bronxdale_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
brownsville_racial = brownsville_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
claremont_bathgate_racial = claremont_bathgate_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
greenpoint_racial = greenpoint_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
charleston_richmond_racial = charleston_richmond_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
great_kills_racial = great_kills_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
annadale_huguenot_racial = annadale_huguenot_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
westerleigh_racial = westerleigh_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
maspeth_racial = maspeth_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
arden_heights_racial = arden_heights_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
glen_oaks_racial = glen_oaks_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
rossville_woodrow_racial = rossville_woodrow_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()
park_cemetery_brooklyn_racial = park_cemetery_brooklyn_all[['ep_afam', 'ep_asian', 'ep_hisp', 'ep_nhpi', 'ep_white', 'ep_twomore', 'ep_otherrace']].mean()

In [385]:
# convert those racial series to frames
east_village_racial = east_village_racial.to_frame()
west_village_racial = west_village_racial.to_frame()
lenox_racial = lenox_racial.to_frame()
riverdale_racial = riverdale_racial.to_frame()
gramercy_racial = gramercy_racial.to_frame()
tribeca_racial = tribeca_racial.to_frame()
mornings_hights_racial = mornings_hights_racial.to_frame()
stuyvesant_village_racial = stuyvesant_village_racial.to_frame()
yorkville_racial = yorkville_racial.to_frame()
upper_west_side_racial = upper_west_side_racial.to_frame()
upper_east_side_racial = upper_east_side_racial.to_frame()
park_cemetery_bronx_racial = park_cemetery_bronx_racial.to_frame()
seagate_coney_island_racial = seagate_coney_island_racial.to_frame()
corona_racial = corona_racial.to_frame()
university_heights_morris_heights_racial = university_heights_morris_heights_racial.to_frame()
park_slope_gowanus_racial = park_slope_gowanus_racial.to_frame()
grymes_hill_clifton_racial = grymes_hill_clifton_racial.to_frame()
morrisania_melros_racial = morrisania_melros_racial.to_frame()
bronxdale_racial = bronxdale_racial.to_frame()
brownsville_racial = brownsville_racial.to_frame()
claremont_bathgate_racial = claremont_bathgate_racial.to_frame()
greenpoint_racial = greenpoint_racial.to_frame()
charleston_richmond_racial = charleston_richmond_racial.to_frame()
great_kills_racial = great_kills_racial.to_frame()
annadale_huguenot_racial = annadale_huguenot_racial.to_frame()
westerleigh_racial = westerleigh_racial.to_frame()
maspeth_racial = maspeth_racial.to_frame()
arden_heights_racial = arden_heights_racial.to_frame()
glen_oaks_racial = glen_oaks_racial.to_frame()
rossville_woodrow_racial = rossville_woodrow_racial.to_frame()
park_cemetery_brooklyn_racial = park_cemetery_brooklyn_racial.to_frame()

In [386]:
# conver the dataframe's column name 0 to 'racial_pct'
east_village_racial.rename(columns = {0:"racial_percentage"}, inplace = True)
west_village_racial.rename(columns= {0: "racial_percentage"}, inplace = True)
lenox_racial.rename(columns={0: "racial_percentage"}, inplace=True)
riverdale_racial.rename(columns={0: "racial_percentage"}, inplace=True)
gramercy_racial.rename(columns={0: "racial_percentage"}, inplace=True)
tribeca_racial.rename(columns={0: "racial_percentage"}, inplace=True)
mornings_hights_racial.rename(columns={0: "racial_percentage"}, inplace=True)
stuyvesant_village_racial.rename(columns={0: "racial_percentage"}, inplace=True)
yorkville_racial.rename(columns={0: "racial_percentage"}, inplace=True)
upper_west_side_racial.rename(columns={0: "racial_percentage"}, inplace=True)
upper_east_side_racial.rename(columns={0: "racial_percentage"}, inplace=True)
park_cemetery_bronx_racial.rename(columns={0: "racial_percentage"}, inplace=True)
seagate_coney_island_racial.rename(columns={0: "racial_percentage"}, inplace=True)
corona_racial.rename(columns={0: "racial_percentage"}, inplace=True)
university_heights_morris_heights_racial.rename(columns={0: "racial_percentage"}, inplace=True)
park_slope_gowanus_racial.rename(columns={0: "racial_percentage"}, inplace=True)
grymes_hill_clifton_racial.rename(columns={0: "racial_percentage"}, inplace=True)
morrisania_melros_racial.rename(columns={0: "racial_percentage"}, inplace=True)
bronxdale_racial.rename(columns={0: "racial_percentage"}, inplace=True)
brownsville_racial.rename(columns={0: "racial_percentage"}, inplace=True)
claremont_bathgate_racial.rename(columns={0: "racial_percentage"}, inplace=True)
greenpoint_racial.rename(columns={0: "racial_percentage"}, inplace=True)
charleston_richmond_racial.rename(columns={0: "racial_percentage"}, inplace=True)
great_kills_racial.rename(columns={0: "racial_percentage"}, inplace=True)
annadale_huguenot_racial.rename(columns={0: "racial_percentage"}, inplace=True)
westerleigh_racial.rename(columns={0: "racial_percentage"}, inplace=True)
maspeth_racial.rename(columns={0: "racial_percentage"}, inplace=True)
arden_heights_racial.rename(columns={0: "racial_percentage"}, inplace=True)
glen_oaks_racial.rename(columns={0: "racial_percentage"}, inplace=True)
rossville_woodrow_racial.rename(columns={0: "racial_percentage"}, inplace=True)
park_cemetery_brooklyn_racial.rename(columns={0: "racial_percentage"}, inplace=True)

In [387]:
east_village_racial

Unnamed: 0,racial_percentage
ep_afam,6.2083
ep_asian,14.9575
ep_hisp,17.8939
ep_nhpi,0.0459
ep_white,56.2602
ep_twomore,4.0735
ep_otherrace,0.4602


In [388]:
ev_total = east_village_racial.racial_percentage.sum()
ev_total, 100 - ev_total
# good

(np.float64(99.89944751381218), np.float64(0.10055248618782286))

In [389]:
# reset all those neighborhoods' index
east_village_racial.reset_index(inplace = True)
west_village_racial.reset_index(inplace=True)
lenox_racial.reset_index(inplace=True)
riverdale_racial.reset_index(inplace=True)
gramercy_racial.reset_index(inplace=True)
tribeca_racial.reset_index(inplace=True)
mornings_hights_racial.reset_index(inplace=True)
stuyvesant_village_racial.reset_index(inplace=True)
yorkville_racial.reset_index(inplace=True)
upper_west_side_racial.reset_index(inplace=True)
upper_east_side_racial.reset_index(inplace=True)
park_cemetery_bronx_racial.reset_index(inplace=True)
seagate_coney_island_racial.reset_index(inplace=True)
corona_racial.reset_index(inplace=True)
university_heights_morris_heights_racial.reset_index(inplace=True)
park_slope_gowanus_racial.reset_index(inplace=True)
grymes_hill_clifton_racial.reset_index(inplace=True)
morrisania_melros_racial.reset_index(inplace=True)
bronxdale_racial.reset_index(inplace=True)
brownsville_racial.reset_index(inplace=True)
claremont_bathgate_racial.reset_index(inplace=True)
greenpoint_racial.reset_index(inplace=True)
charleston_richmond_racial.reset_index(inplace=True)
great_kills_racial.reset_index(inplace=True)
annadale_huguenot_racial.reset_index(inplace=True)
westerleigh_racial.reset_index(inplace=True)
maspeth_racial.reset_index(inplace=True)
arden_heights_racial.reset_index(inplace=True)
glen_oaks_racial.reset_index(inplace=True)
rossville_woodrow_racial.reset_index(inplace=True)
park_cemetery_brooklyn_racial.reset_index(inplace=True)

In [390]:
# change index to a column called race"
# foor loop for refactoring
for df in [east_village_racial, west_village_racial, lenox_racial, riverdale_racial,
           gramercy_racial, tribeca_racial, mornings_hights_racial, stuyvesant_village_racial,
           yorkville_racial, upper_west_side_racial, upper_east_side_racial,
           park_cemetery_bronx_racial, seagate_coney_island_racial, corona_racial,
           university_heights_morris_heights_racial, park_slope_gowanus_racial,
           grymes_hill_clifton_racial, morrisania_melros_racial, bronxdale_racial,
           brownsville_racial, claremont_bathgate_racial, greenpoint_racial,
           charleston_richmond_racial, great_kills_racial, annadale_huguenot_racial,
           westerleigh_racial, maspeth_racial, arden_heights_racial, glen_oaks_racial,
           rossville_woodrow_racial, park_cemetery_brooklyn_racial]:
    df.rename(columns={'index': 'race'}, inplace=True)

In [391]:
east_village_racial

Unnamed: 0,race,racial_percentage
0,ep_afam,6.2083
1,ep_asian,14.9575
2,ep_hisp,17.8939
3,ep_nhpi,0.0459
4,ep_white,56.2602
5,ep_twomore,4.0735
6,ep_otherrace,0.4602


In [392]:
dfs = [east_village_racial, west_village_racial, lenox_racial, riverdale_racial,
           gramercy_racial, tribeca_racial, mornings_hights_racial, stuyvesant_village_racial,
           yorkville_racial, upper_west_side_racial, upper_east_side_racial,
           park_cemetery_bronx_racial, seagate_coney_island_racial, corona_racial,
           university_heights_morris_heights_racial, park_slope_gowanus_racial,
           grymes_hill_clifton_racial, morrisania_melros_racial, bronxdale_racial,
           brownsville_racial, claremont_bathgate_racial, greenpoint_racial,
           charleston_richmond_racial, great_kills_racial, annadale_huguenot_racial,
           westerleigh_racial, maspeth_racial, arden_heights_racial, glen_oaks_racial,
           rossville_woodrow_racial, park_cemetery_brooklyn_racial]

In [393]:
suffixes = ['east_village', 'west_village', 'lenox', 'riverdale', 'gramercy', 'tribeca', 'mornings_hights', 'stuyvesant_village',
           'yorkville', 'upper_west_side', 'upper_east_side', 'park_cemetery_bronx', 'seagate_coney_island', 'corona',
           'university_heights_morris_heights', 'park_slope_gowanus', 'grymes_hill_clifton', 'morrisania_melros', 'bronxdale',
           'brownsville', 'claremont_bathgate', 'greenpoint', 'charleston_richmond', 'great_kills', 'annadale_huguenot',
           'westerleigh', 'maspeth', 'arden_heights', 'glen_oaks', 'rossville_woodrow', 'park_cemetery_brooklyn']

In [394]:
west_village_racial

Unnamed: 0,race,racial_percentage
0,ep_hisp,11.26
1,ep_nhpi,0.0097
2,ep_white,72.6042
3,ep_twomore,3.2152
4,ep_otherrace,0.3079


In [395]:
# refactored
def merge_with_suffixes(left_df, right_df, suffix):
    return pd.merge(left_df, right_df, on='race', how='outer', suffixes=('', f'_{suffix}'))

In [396]:
# merge_with_suffixes(dfs)
merged_df = dfs[0]
merged_df

Unnamed: 0,race,racial_percentage
0,ep_afam,6.2083
1,ep_asian,14.9575
2,ep_hisp,17.8939
3,ep_nhpi,0.0459
4,ep_white,56.2602
5,ep_twomore,4.0735
6,ep_otherrace,0.4602


In [397]:
merged_df = dfs[0]
for df, suffix in zip(dfs[1:], suffixes[1:]):
    # merged_df = merge_with_suffixes(merged_df, df, suffix=df.columns[0])
    # merged_df = pd.merge(merged_df, df, on='race_pct', how='outer')
    merged_df = merge_with_suffixes(merged_df, df, suffix=f'{suffix}')

In [398]:
merged_df

Unnamed: 0,race,racial_percentage,racial_percentage_west_village,racial_percentage_lenox,racial_percentage_riverdale,racial_percentage_gramercy,racial_percentage_tribeca,racial_percentage_mornings_hights,racial_percentage_stuyvesant_village,racial_percentage_yorkville,racial_percentage_upper_west_side,racial_percentage_upper_east_side,racial_percentage_park_cemetery_bronx,racial_percentage_seagate_coney_island,racial_percentage_corona,racial_percentage_university_heights_morris_heights,racial_percentage_park_slope_gowanus,racial_percentage_grymes_hill_clifton,racial_percentage_morrisania_melros,racial_percentage_bronxdale,racial_percentage_brownsville,racial_percentage_claremont_bathgate,racial_percentage_greenpoint,racial_percentage_charleston_richmond,racial_percentage_great_kills,racial_percentage_annadale_huguenot,racial_percentage_westerleigh,racial_percentage_maspeth,racial_percentage_arden_heights,racial_percentage_glen_oaks,racial_percentage_rossville_woodrow,racial_percentage_park_cemetery_brooklyn
0,ep_afam,6.2083,,1.73,9.398,6.563,2.9904,19.8254,7.2048,3.7964,6.7846,2.7172,28.6,20.1385,7.3968,24.4407,7.4853,25.1149,35.4504,29.4301,70.0757,30.5182,2.6,,1.2951,0.6765,11.5412,0.945,0.8,5.0824,0.5353,21.1
1,ep_asian,14.9575,,10.931,3.5199,16.4863,22.4956,9.4149,13.819,8.7272,8.9182,9.5957,6.2,9.827,11.305,1.6079,9.501,11.6023,0.6731,6.1401,0.8424,1.0987,4.9,,9.139,6.8059,12.5,15.3967,8.7,47.4706,4.6412,7.5
2,ep_hisp,17.8939,11.26,7.4325,34.0166,10.2918,9.6741,24.2418,21.781,11.3452,18.6,9.5336,52.9,19.4159,75.7668,69.3353,16.3461,22.3858,59.1572,52.1306,19.3509,63.2627,15.6,,12.5707,11.4824,24.2529,36.77,12.8,12.9118,9.9765,14.1
3,ep_nhpi,0.0459,0.0097,0.169,0.0,0.0425,0.0007,0.0,0.0,0.0128,0.0,0.0698,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0002,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,ep_otherrace,0.4602,0.3079,0.2588,0.8099,0.4178,0.3837,0.6995,0.6,0.6875,0.4398,0.4862,0.7,0.0159,0.4997,0.7046,0.5765,0.4703,0.3022,0.7095,0.6034,0.4059,0.6,,0.2366,0.5235,0.4235,0.8917,0.4,2.8941,0.6647,0.9
5,ep_twomore,4.0735,3.2152,2.6845,3.1205,3.8904,3.4215,3.7995,4.2048,2.8557,3.451,2.7974,2.3,2.9144,0.5039,1.8489,5.4206,1.5584,1.5814,2.2915,5.3733,1.4791,6.5,,1.8756,1.3235,2.5235,1.3367,1.2,2.3059,1.4647,4.1
6,ep_white,56.2602,72.6042,76.7173,49.1656,62.1342,60.9415,41.8333,52.2524,72.5574,61.7132,74.7638,8.7,47.3927,4.4275,2.0544,60.6373,38.7568,2.5927,8.6984,3.6573,3.0336,69.6,,74.8878,79.1471,48.4529,44.535,76.1,29.2588,82.6294,52.1


In [399]:
# West village and charleston_richmond has nones, we get rid of them
reduced_df = merged_df.drop(columns=['racial_percentage_west_village',
                                   'racial_percentage_charleston_richmond'])

## **This is for the race composites and neighborhoods bar char use**

In [400]:
reduced_df

Unnamed: 0,race,racial_percentage,racial_percentage_lenox,racial_percentage_riverdale,racial_percentage_gramercy,racial_percentage_tribeca,racial_percentage_mornings_hights,racial_percentage_stuyvesant_village,racial_percentage_yorkville,racial_percentage_upper_west_side,racial_percentage_upper_east_side,racial_percentage_park_cemetery_bronx,racial_percentage_seagate_coney_island,racial_percentage_corona,racial_percentage_university_heights_morris_heights,racial_percentage_park_slope_gowanus,racial_percentage_grymes_hill_clifton,racial_percentage_morrisania_melros,racial_percentage_bronxdale,racial_percentage_brownsville,racial_percentage_claremont_bathgate,racial_percentage_greenpoint,racial_percentage_great_kills,racial_percentage_annadale_huguenot,racial_percentage_westerleigh,racial_percentage_maspeth,racial_percentage_arden_heights,racial_percentage_glen_oaks,racial_percentage_rossville_woodrow,racial_percentage_park_cemetery_brooklyn
0,ep_afam,6.2083,1.73,9.398,6.563,2.9904,19.8254,7.2048,3.7964,6.7846,2.7172,28.6,20.1385,7.3968,24.4407,7.4853,25.1149,35.4504,29.4301,70.0757,30.5182,2.6,1.2951,0.6765,11.5412,0.945,0.8,5.0824,0.5353,21.1
1,ep_asian,14.9575,10.931,3.5199,16.4863,22.4956,9.4149,13.819,8.7272,8.9182,9.5957,6.2,9.827,11.305,1.6079,9.501,11.6023,0.6731,6.1401,0.8424,1.0987,4.9,9.139,6.8059,12.5,15.3967,8.7,47.4706,4.6412,7.5
2,ep_hisp,17.8939,7.4325,34.0166,10.2918,9.6741,24.2418,21.781,11.3452,18.6,9.5336,52.9,19.4159,75.7668,69.3353,16.3461,22.3858,59.1572,52.1306,19.3509,63.2627,15.6,12.5707,11.4824,24.2529,36.77,12.8,12.9118,9.9765,14.1
3,ep_nhpi,0.0459,0.169,0.0,0.0425,0.0007,0.0,0.0,0.0128,0.0,0.0698,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,ep_otherrace,0.4602,0.2588,0.8099,0.4178,0.3837,0.6995,0.6,0.6875,0.4398,0.4862,0.7,0.0159,0.4997,0.7046,0.5765,0.4703,0.3022,0.7095,0.6034,0.4059,0.6,0.2366,0.5235,0.4235,0.8917,0.4,2.8941,0.6647,0.9
5,ep_twomore,4.0735,2.6845,3.1205,3.8904,3.4215,3.7995,4.2048,2.8557,3.451,2.7974,2.3,2.9144,0.5039,1.8489,5.4206,1.5584,1.5814,2.2915,5.3733,1.4791,6.5,1.8756,1.3235,2.5235,1.3367,1.2,2.3059,1.4647,4.1
6,ep_white,56.2602,76.7173,49.1656,62.1342,60.9415,41.8333,52.2524,72.5574,61.7132,74.7638,8.7,47.3927,4.4275,2.0544,60.6373,38.7568,2.5927,8.6984,3.6573,3.0336,69.6,74.8878,79.1471,48.4529,44.535,76.1,29.2588,82.6294,52.1


## **Adding the svi scatterplot line onto the bar chart**

In [401]:
# get the general svi score for each neighborhood
east_village_svi = east_village_all[['rpl_themes']].mean()
west_village_svi = west_village_all[['rpl_themes']].mean()
lenox_svi = lenox_all[['rpl_themes']].mean()
riverdale_svi = riverdale_all[['rpl_themes']].mean()
gramercy_svi = gramercy_all[['rpl_themes']].mean()
tribeca_svi = tribeca_all[['rpl_themes']].mean()
mornings_hights_svi = mornings_hights_all[['rpl_themes']].mean()
stuyvesant_village_svi = stuyvesant_village_all[['rpl_themes']].mean()
yorkville_svi = yorkville_all[['rpl_themes']].mean()
upper_west_side_svi = upper_west_side_all[['rpl_themes']].mean()
upper_east_side_svi = upper_east_side_all[['rpl_themes']].mean()
park_cemetery_bronx_svi = park_cemetery_bronx_all[['rpl_themes']].mean()
seagate_coney_island_svi = seagate_coney_island_all[['rpl_themes']].mean()
corona_svi = corona_all[['rpl_themes']].mean()
university_heights_morris_heights_svi = university_heights_morris_heights_all[['rpl_themes']].mean()
park_slope_gowanus_svi = park_slope_gowanus_all[['rpl_themes']].mean()
grymes_hill_clifton_svi = grymes_hill_clifton_all[['rpl_themes']].mean()
morrisania_melros_svi = morrisania_melros_all[['rpl_themes']].mean()
bronxdale_svi = bronxdale_all[['rpl_themes']].mean()
brownsville_svi = brownsville_all[['rpl_themes']].mean()
claremont_bathgate_svi = claremont_bathgate_all[['rpl_themes']].mean()
greenpoint_svi = greenpoint_all[['rpl_themes']].mean()
charleston_richmond_svi = charleston_richmond_all[['rpl_themes']].mean()
great_kills_svi = great_kills_all[['rpl_themes']].mean()
annadale_huguenot_svi = annadale_huguenot_all[['rpl_themes']].mean()
westerleigh_svi = westerleigh_all[['rpl_themes']].mean()
maspeth_svi = maspeth_all[['rpl_themes']].mean()
arden_heights_svi = arden_heights_all[['rpl_themes']].mean()
glen_oaks_svi = glen_oaks_all[['rpl_themes']].mean()
rossville_woodrow_svi = rossville_woodrow_all[['rpl_themes']].mean()
park_cemetery_brooklyn_svi = park_cemetery_brooklyn_all[['rpl_themes']].mean()

In [402]:
# now, make a df with all these values where the rows represent the svi and the columns are each neighborhood
all_svi_data = {
    'east_village': east_village_svi,
    'west_village': west_village_svi,
    'lenox': lenox_svi,
    'riverdale': riverdale_svi,
    'gramercy': gramercy_svi,
    'tribeca': tribeca_svi,
    'mornings_hights': mornings_hights_svi,
    'stuyvesant_village': stuyvesant_village_svi,
    'yorkville': yorkville_svi,
    'upper_west_side': upper_west_side_svi,
    'upper_east_side': upper_east_side_svi,
    'park_cemetery_bronx': park_cemetery_bronx_svi,
    'seagate_coney_island': seagate_coney_island_svi,
    'corona': corona_svi,
    'university_heights_morris_heights': university_heights_morris_heights_svi,
    'park_slope_gowanus': park_slope_gowanus_svi,
    'grymes_hill_clifton': grymes_hill_clifton_svi,
    'morrisania_melros': morrisania_melros_svi,
    'bronxdale': bronxdale_svi,
    'brownsville': brownsville_svi,
    'claremont_bathgate': claremont_bathgate_svi,
    'greenpoint': greenpoint_svi,
    'charleston_richmond': charleston_richmond_svi,
    'great_kills': great_kills_svi,
    'annadale_huguenot': annadale_huguenot_svi,
    'westerleigh': westerleigh_svi,
    'maspeth': maspeth_svi,
    'arden_heights': arden_heights_svi,
    'glen_oaks': glen_oaks_svi,
    'rossville_woodrow': rossville_woodrow_svi,
    'park_cemetery_brooklyn': park_cemetery_brooklyn_svi
}

In [403]:
svi_df = pd.DataFrame(all_svi_data)
svi_df

Unnamed: 0,east_village,west_village,lenox,riverdale,gramercy,tribeca,mornings_hights,stuyvesant_village,yorkville,upper_west_side,upper_east_side,park_cemetery_bronx,seagate_coney_island,corona,university_heights_morris_heights,park_slope_gowanus,grymes_hill_clifton,morrisania_melros,bronxdale,brownsville,claremont_bathgate,greenpoint,charleston_richmond,great_kills,annadale_huguenot,westerleigh,maspeth,arden_heights,glen_oaks,rossville_woodrow,park_cemetery_brooklyn
rpl_themes,0.7145,0.4954,0.5378,0.8462,0.5312,0.6857,0.8909,0.8059,0.5769,0.7518,0.5451,0.9925,0.9911,0.9632,0.9979,0.6327,0.9444,0.9965,0.9909,0.9933,0.9971,0.637,,0.5291,0.546,0.8489,0.8709,0.5333,0.7721,0.5606,0.9427


In [404]:
svi_df.drop(columns=['charleston_richmond'], inplace=True)

In [405]:
svi_df = svi_df.transpose()
svi_df

Unnamed: 0,rpl_themes
east_village,0.7145
west_village,0.4954
lenox,0.5378
riverdale,0.8462
gramercy,0.5312
tribeca,0.6857
mornings_hights,0.8909
stuyvesant_village,0.8059
yorkville,0.5769
upper_west_side,0.7518


In [406]:
svi_df.reset_index(inplace=True)
svi_df.rename(columns={'index': 'Neighborhood'}, inplace=True)
svi_df

Unnamed: 0,Neighborhood,rpl_themes
0,east_village,0.7145
1,west_village,0.4954
2,lenox,0.5378
3,riverdale,0.8462
4,gramercy,0.5312
5,tribeca,0.6857
6,mornings_hights,0.8909
7,stuyvesant_village,0.8059
8,yorkville,0.5769
9,upper_west_side,0.7518


# **Step 4 We also need a dataframe that has neighborhoods as columns and average_year_eviction_count as rows and contents**

## **Step 4.1 First, we need to find a baseline, the average of eviction rates across all boroughs. This would help with Chi-test**

In [407]:
avg_per_nta = neighbor_evictions.average_year_eviction_count.mean()
avg_per_nta

np.float64(0.5834224598930482)

## **Step 4.2 get the boroughs' series and make a dataframe**

In [408]:
all_nta

Unnamed: 0,nta,eviction_rates
0,park_cemetery_bronx,4.2
1,seagate_coney_island,2.0
2,corona,4.8
3,university_heights_morris_heights,1.4
4,park_slope_gowanus,0.2
5,grymes_hill_clifton,2.6
6,morrisania_melros,1.0
7,bronxdale,1.4
8,brownsville,0.6
9,claremont_bathgate,1.0


In [409]:
all_nta.rename(columns={'nta': 'Neighborhood'}, inplace=True)

In [410]:
all_nta_evi_svi = all_nta.merge(svi_df, on='Neighborhood')
all_nta_evi_svi

Unnamed: 0,Neighborhood,eviction_rates,rpl_themes
0,park_cemetery_bronx,4.2,0.9925
1,seagate_coney_island,2.0,0.9911
2,corona,4.8,0.9632
3,university_heights_morris_heights,1.4,0.9979
4,park_slope_gowanus,0.2,0.6327
5,grymes_hill_clifton,2.6,0.9444
6,morrisania_melros,1.0,0.9965
7,bronxdale,1.4,0.9909
8,brownsville,0.6,0.9933
9,claremont_bathgate,1.0,0.9971


In [412]:
data = {
    'Neighborhood': nta_df_names,
    # booleans, if average eviction rates for each neighborhood > average eviction rates for all neighborhoods
    'Above_eviction_average': [],
    # booleans, if average svi index for each neighborhoods > average eviction rates for all neighborhoods
    'Above_svi_average': []
}
boro_svi_compare_df = pd.DataFrame(data)
boro_svi_compare_df

ValueError: All arrays must be of the same length

# **The following are carried over (TBC)**.



In [None]:
type(average_evictions_man)
# so this is still a series

In [None]:
boro_evictions_df = pd.DataFrame({
    # 'borough': ['Manhattan', 'Brooklyn', 'Queens', 'Staten Island', 'Bronx'],
    'manhattan': average_evictions_man,
    'brooklyn': average_evictions_bk,
    'queens': average_evictions_q,
    'staten island': average_evictions_si,
    'bronx': average_evictions_br
})
boro_evictions_df
# good

## **Step 4.3 An Item for excel use, chi-test**

In [None]:
average_evictions_man, average_evictions_bk, average_evictions_br, average_evictions_si, average_evictions_q, \
average_evictions_man > average_evictions, average_evictions_bk > average_evictions, average_evictions_br > average_evictions, \
average_evictions_si > average_evictions, average_evictions_q > average_evictions

### **In short summary, the only borough that has average eviction rate highger than the one across all five boros was Bronx**

# **Step 5: We also need a dataframe that has boroughs as columns and general svi (the most important svi) as rows and contents**

## **Step 5.1 A baseline (derived but used the official data from the CDC website**

In [None]:
average_svi_eviction = evictions_pre_post[['rpl_themes']].mean()
average_svi_eviction
# really bad
# a bit too high, need to double check
# oh, actually, this is based on eviction rates. Because Bronx has overwhelmingly high eviction rates, so the weight is higher.

In [None]:
svi_df.fips.nunique()

In [None]:
# svi_df.rpl_themes.unique(), 3.997e-01, -9.990e+02
bad_row = svi_df.loc[svi_df.rpl_themes == -9.990e+02]
bad_row.shape

In [None]:
manhattan_svi_df = svi_df[svi_df.fips.isin(range(10001, 10283))]
brooklyn_svi_df = svi_df[svi_df.fips.isin(range(11201, 11257))]
queens_svi_df = svi_df[svi_df.fips.isin(range(11351, 11437)) | svi_df.fips.isin(range(11101, 11110)) | svi_df.fips.isin([11004, 11005, 11411, 11412, 11413, 11418, 11419, 11420, 11421, 11422, 11423, 11426, 11427, 11428, 11429])]
staten_island_svi_df = svi_df[svi_df.fips.isin(range(10301, 10315))]
bronx_svi_df = svi_df[svi_df.fips.isin(range(10451, 10476))]

In [None]:
manhattan_svi_average = manhattan_svi_df[['rpl_themes']].mean()
manhattan_svi_average
# some bad one is in manhattan
manhattan_svi_average = 0.7283
# hard coded this one from https://www.atsdr.cdc.gov/place-health/php/svi/svi-interactive-map.html

In [None]:
brooklyn_svi_average = brooklyn_svi_df[['rpl_themes']].mean()
brooklyn_svi_average
# this is roughly correct

In [None]:
bronx_svi_average = bronx_svi_df[['rpl_themes']].mean()
bronx_svi_average

In [None]:
queens_svi_average = queens_svi_df[['rpl_themes']].mean()
queens_svi_average
queens_svi_average = 0.8024
# queens also has some bad ones

In [None]:
staten_island_svi_average = staten_island_svi_df[['rpl_themes']].mean()
staten_island_svi_average
# bad ones in there too
staten_island_svi_average = 0.5956

In [None]:
(0.9962 + 0.8024 + 0.7283 + 0.8874 + 0.5956)/5
# take this average for now
# interesting
# this is the baseline
# https://www.atsdr.cdc.gov/place-health/php/svi/svi-interactive-map.html

In [None]:
average_svi_all = 0.80198
# official data

## **Step 5.2 get the series and make a dataframe**

In [None]:
Manhattan.columns

In [None]:
average_svi_man = Manhattan[['rpl_themes']].mean()
average_svi_bk = Brooklyn[['rpl_themes']].mean()
average_svi_br = Bronx[['rpl_themes']].mean()
average_svi_si = Staten_Island[['rpl_themes']].mean()
average_svi_q = Queens[['rpl_themes']].mean()

In [None]:
type(average_evictions_man)
# so this is still a series

In [None]:
boro_svi_df = pd.DataFrame({
    # 'borough': ['Manhattan', 'Brooklyn', 'Queens', 'Staten Island', 'Bronx'],
    'manhattan': average_svi_man,
    'brooklyn': average_svi_bk,
    'queens': average_svi_q,
    'staten island': average_svi_si,
    'bronx': average_svi_br
})
boro_svi_df
# good

## **Step 5.3 An item for excel use, Chi-test**

In [None]:
manhattan_svi_average, brooklyn_svi_average, bronx_svi_average, staten_island_svi_average, queens_svi_average, \
manhattan_svi_average > average_svi_all, brooklyn_svi_average > average_svi_all, bronx_svi_average > average_svi_all, \
queens_svi_average > average_svi_all, staten_island_svi_average > average_svi_all

## **For Chi-test, only man and si are below average svi scores across all five boroughs**

# **In summary, repeat step 3 for the selected 11 or less neighborhoods**
# **Also repeat chi-test for some other metrix that requires baseline comparisons.**