# **Introductions**

This notebook aims at building an interpretable and layered model of evictions in different scenarios (can be adjusted and defined by any interested users with different objectivs in mind). Previously, we have had a solid understanding how geo_temporal, structural vulnerability, building conditions, and tenant complaint behavior contribute to the evictions (mainly using Random Forest Regressor and some help from Linear Regression and Decision Trees). Now, we select one model (happens to be Random Forest Regressor again) for building up a function to predict. We would be able to independently (in terms of location, feature selection, and variant selections) and interactively (in terms of flexibility of parameter choice and scenario building) to predict eviction severity. By using this model to evaluate a few neighborhoods (and particular buildings, in future), we will be able to answer "What if scenarios", "which type of buildings are most resilient towards which features", "Which factors had the largest exact effect on which location/building" etc. types of questions, and therefore provide data-based insights for policy-making.


In [None]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib
import matplotlib.pyplot as plt
import os
import io
import geopandas as gpd
import seaborn as sns
import statsmodels.api as sm
import shap
from xgboost import XGBRegressor
from scipy import stats
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import mutual_info_regression
from sklearn.inspection import PartialDependenceDisplay
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeRegressor

# suppress warning
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [None]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', '{:.5f}'.format)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
file_path = "/content/drive/My Drive/X999/bbl_evictions_311_svi_normal_times_analysis.csv"

In [None]:
df_raw = pd.read_csv(file_path)

In [None]:
df = df_raw.copy()

# **Step 1: some minor adjustment of the data**

In [None]:
df.svi_quartile.unique()
# these are strings, and will not work with the analysis.

array(['Q3', 'Q4 (High)', 'Q2', 'Q1 (Low)'], dtype=object)

In [None]:
df.svi_quartile = df.svi_quartile.str.extract(r'Q(\d)').astype(int)

In [None]:
df.svi_quartile.unique(), df.svi_quartile[0].dtype

(array([3, 4, 2, 1]), dtype('int64'))

In [None]:
# list(df.columns)

# **Step 2: Model Selection: Random forest regressors/XGBoost/MLP**

In [None]:
# xgb_model = XGBRegressor().fit()
# xgb_results = xgb_model.predict(X_test)

In [None]:
# from a previous notebook, all in one cell for clarity:
svi_vars = ['rpl_theme1', 'rpl_theme2', 'rpl_theme3', 'rpl_theme4', 'rpl_themes', 'ep_pov150', 'ep_unemp', 'ep_nohsdp', 'ep_uninsur',
  'ep_age65', 'ep_age17', 'ep_disabl', 'ep_limeng', 'ep_noveh', 'ep_crowd', 'ep_hburd', 'ep_afam', 'ep_hisp', 'ep_asian', 'ep_aian',
  'ep_nhpi', 'ep_twomore', 'ep_otherrace', 'ep_minrty', 'ep_white', 'svi_quartile']
building_vars = ['numfloors', 'building_category_ordinal', 'unitsres', 'bldgarea', 'building_type', 'is_condo', 'is_llc',
  'style_ordinal', 'building_age', 'floor_category', 'size_quartile', 'building_size_category', 'residential_units_category']
complaints_vars = ['air_quality', 'animal_issues', 'appliances', 'building_exterior', 'doors_windows', 'electrical_issues',
  'elevator_issues', 'floors_stairs', 'general_complaints', 'graffiti_posting', 'heat_hot_water', 'homeless_issues', 'noise_complaints',
  'other_issues', 'pest_issues', 'plumbing_issues', 'police_matters', 'public_nuisance',
  'safety_concerns', 'sanitation_issues', 'walls_ceilings','total_complaints']
df['month'] = df['month_year'].str[-2:]
# they are strings
df.month = df.month.astype(int)
borough_map = {
    'MANHATTAN': 1,
    'BRONX': 2,
    'BROOKLYN': 3,
    'QUEENS': 4,
    'STATEN ISLAND': 5
}
df['borough_int'] = df['borough'].map(borough_map)
geo_temporal_vars = ['longitude','latitude', 'borough_int', 'year', 'month']
# geometry, borough, year, month_year
all_vars = svi_vars + building_vars + complaints_vars + geo_temporal_vars
# since we are already here.
X_all = df[all_vars]
y_all = df['average_year_eviction_count']
X_train_all, X_test_all, y_train_all, y_test_all = train_test_split(X_all, y_all, test_size=0.2, random_state=42)

In [None]:
all_vars = svi_vars + building_vars + complaints_vars + geo_temporal_vars
target = 'average_year_eviction_count'

In [None]:
nans_total = df[all_vars + [target]].isna().sum().sum()
nans_total

np.int64(0)

In [None]:
df_model = df[all_vars + [target]]
X = df_model[all_vars]
y = df_model[target]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
xgb_model = XGBRegressor(
    n_estimators=100,
    max_depth=6,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)

xgb_model.fit(X_train, y_train)


In [None]:
xgb_results = xgb_model.predict(X_test)
mse = mean_squared_error(y_test, xgb_results)
r2 = r2_score(y_test, xgb_results)
print(f"xgb test mse: {mse:.4f}; r-squared: {r2:.4f}")

xgb test mse: 1.7103; r-squared: 0.6474


In [None]:
model_rfr_all = RandomForestRegressor(n_estimators=100, random_state=42)
model_rfr_all.fit(X_train_all, y_train_all)

In [None]:
rfr_results = model_rfr_all.predict(X_test)
mse = mean_squared_error(y_test, rfr_results)
r2 = r2_score(y_test, rfr_results)
print(f"rfr test mse: {mse:.4f}; r-squared: {r2:.4f}")
# better than xgb

rfr test mse: 1.2131; r-squared: 0.7499


**Although XGBoost is often considered a high-performance model, in our experiments, the Random Forest Regressor consistently outperformed XGBoost in both accuracy (MSE = 1.21 vs 1.71) and explanatory power (R² = 0.75 vs 0.65). Given its superior performance and greater interpretability, we adopt Random Forest as our primary model for simulation and analysis.**

In [None]:
from sklearn.neural_network import MLPRegressor
model = MLPRegressor(hidden_layer_sizes=(100, 50), max_iter=1000, random_state=42)
model.fit(X_train, y_train)

In [None]:
mlp_results = model.predict(X_test)
mse = mean_squared_error(y_test, mlp_results)
r2 = r2_score(y_test, mlp_results)
print(f"mlp test mse: {mse:.4f}; r-squared: {r2:.4f}")
# every time it is very different

mlp test mse: 109.9637; r-squared: -21.6703


**A simple MLP's results show they were not good at all. this is not sufficient to say nn would be bad at predicting the evictions. In fact, we had some tries with Gaussian Mixture Neural Network, or GMNN, and they were good. If we do more experiments, they might be even better, but given the scope and focus of this study (not on theories of how and why nn can be fine tuned to better solve a particular social science question), we will re-direct our focus to simulation with rfr, to compare, interpret, understand, and explain the data**

In [None]:
X_all.dtypes[X_all.dtypes == 'object']

Unnamed: 0,0


# **Step 3: Locate the POI (point of interest) in zipcodes and buildings for simulations**

In [None]:
top_zipcodes_df = df.groupby('zipcode').size().reset_index(name='count')
top_zipcodes_df = top_zipcodes_df.sort_values(by='count', ascending=False).head(3)
top_zipcodes_df

Unnamed: 0,zipcode,count
63,10458,2221
72,10467,2192
58,10453,1994


In [None]:
# df.nta.unique()

In [None]:
# top_nta_df = df.groupby('nta').size().reset_index(name='count')
# bottom_nta_df = top_nta_df.sort_values(by='count', ascending=False)
# bottom_nta_df

In [None]:
top_nta_df = df.groupby('nta').size().reset_index(name='count')
top_nta_df = top_nta_df.sort_values(by='count', ascending=False).head(3)
top_nta_df
# another two neighborhoods we are going to consider is 'SoHo-TriBeCa-Civic Center-Little Italy' and 'Upper East Side-Carnegie Hill'

Unnamed: 0,nta,count
27,Central Harlem North-Polo Grounds,1632
38,Crown Heights North,1599
11,Bedford Park-Fordham North,1530


In [None]:
# picked 3 worst neighborhoods
nta_list = [#'SoHo-TriBeCa-Civic Center-Little Italy', 'Upper East Side-Carnegie Hill',
            'Central Harlem North-Polo Grounds', 'Crown Heights North', 'Bedford Park-Fordham North']

In [None]:
top_bbls_df = df.groupby('bin').size().reset_index(name='count')
top_bbls_df = top_bbls_df.sort_values(by='count', ascending=False).head(10)
top_bbls_df

Unnamed: 0,bin,count
13603,3000000,104
5362,2000000,97
13459,2127134,74
12868,2113629,67
22092,3326600,66
12453,2093973,61
21616,3253907,49
12867,2113628,48
6219,2004223,47
0,1000000,40


In [None]:
hotspot_bins = [3000000, 2000000, 2127134, 2113629, 3326600,2093973, 3253907, 2113628, 2004223, 1000000]

In [None]:
# def reduce_complaints(sample_row, complaint_features):
#     modified = sample_row.copy()
#     for col in complaint_features:
#         modified[col] = 0
#     return modified

In [None]:
hotspot_samples = df[df['bin'].isin(hotspot_bins)]
hotspot_samples.head()

Unnamed: 0.1,Unnamed: 0,primary_key,bbl,court_index_number,docket_number,eviction_address,eviction_apartment_number,executed_date,borough,zipcode,ejectment,eviction/legal_possession,latitude,longitude,community_board,council_district,census_tract,bin,nta,year,month_year,geometry,average_year_eviction_count,yearbuilt,bldgclass,numfloors,unitsres,ownername,bldgarea,building_type,building_category,is_condo,floor_category,rent_era,architectural_style,economic_period,residential_units_category,is_llc,building_size_category,size_quartile,decade,fips,e_totpop,rpl_theme1,rpl_theme2,rpl_theme3,rpl_theme4,rpl_themes,ep_pov150,ep_unemp,ep_nohsdp,ep_uninsur,ep_age65,ep_age17,ep_disabl,ep_limeng,ep_noveh,ep_crowd,ep_hburd,ep_afam,ep_hisp,ep_asian,ep_aian,ep_nhpi,ep_twomore,ep_otherrace,ep_minrty,ep_white,invalid_zip,svi_quartile,svi_group,air_quality,animal_issues,appliances,building_exterior,doors_windows,electrical_issues,elevator_issues,floors_stairs,general_complaints,graffiti_posting,heat_hot_water,homeless_issues,noise_complaints,other_issues,pest_issues,plumbing_issues,police_matters,public_nuisance,safety_concerns,sanitation_issues,walls_ceilings,total_complaints,building_age,style_ordinal,style_freq,style_label,building_category_ordinal,building_category_freq,building_category_le,age_bin,age_group,month,borough_int
122,122,011901/18_83325,2026100012,011901/18,83325,530 EAST 169TH ST,16A,2019-01-17,BRONX,10456,Not an Ejectment,Possession,40.832613,-73.904676,3.0,16.0,145.0,2004223,Claremont-Bathgate,2019,2019-01,POINT (-73.904676 40.832613),9.4,1965.0,D6,17.0,321.0,"FORDHAM FULTON REALTY, CORP.",285056.0,1,elevator,0,3,"1947–1969, rent-control","1951–1980, the International Style, Alternativ...","1946–1975, pst war economic boom",150,0,8,4,1960-1969,10456,88575.0,0.996,0.9903,0.991,0.9972,0.9994,49.1,14.7,33.4,7.3,11.3,27.1,19.3,14.7,76.4,11.1,54.9,38.2,56.3,0.7,0.2,0.0,1.8,0.4,97.6,2.4,False,4,high,0.0,6.0,34.0,2.0,164.0,20.0,127.0,40.0,91.0,0.0,1838.0,0.0,373.0,0.0,21.0,184.0,1.0,2.0,20.0,199.0,62.0,3184.0,60.0,5,0.181213,3,6,0.38443,1,50+,"(30, 60]",1,2
158,158,018569/19_99619,2026100012,018569/19,99619,540 EAST 169TH ST.,18F,2019-11-26,BRONX,10456,Not an Ejectment,Possession,40.832517,-73.904383,3.0,16.0,145.0,2004223,Claremont-Bathgate,2019,2019-11,POINT (-73.904383 40.832517),9.4,1965.0,D6,17.0,321.0,"FORDHAM FULTON REALTY, CORP.",285056.0,1,elevator,0,3,"1947–1969, rent-control","1951–1980, the International Style, Alternativ...","1946–1975, pst war economic boom",150,0,8,4,1960-1969,10456,88575.0,0.996,0.9903,0.991,0.9972,0.9994,49.1,14.7,33.4,7.3,11.3,27.1,19.3,14.7,76.4,11.1,54.9,38.2,56.3,0.7,0.2,0.0,1.8,0.4,97.6,2.4,False,4,high,0.0,6.0,34.0,2.0,164.0,20.0,127.0,40.0,91.0,0.0,1838.0,0.0,373.0,0.0,21.0,184.0,1.0,2.0,20.0,199.0,62.0,3184.0,60.0,5,0.181213,3,6,0.38443,1,50+,"(30, 60]",11,2
201,201,025401/18_84954,2026100012,025401/18,84954,540 EAST 169TH ST,15K,2019-02-01,BRONX,10456,Not an Ejectment,Possession,40.832517,-73.904383,3.0,16.0,145.0,2004223,Claremont-Bathgate,2019,2019-02,POINT (-73.904383 40.832517),9.4,1965.0,D6,17.0,321.0,"FORDHAM FULTON REALTY, CORP.",285056.0,1,elevator,0,3,"1947–1969, rent-control","1951–1980, the International Style, Alternativ...","1946–1975, pst war economic boom",150,0,8,4,1960-1969,10456,88575.0,0.996,0.9903,0.991,0.9972,0.9994,49.1,14.7,33.4,7.3,11.3,27.1,19.3,14.7,76.4,11.1,54.9,38.2,56.3,0.7,0.2,0.0,1.8,0.4,97.6,2.4,False,4,high,0.0,6.0,34.0,2.0,164.0,20.0,127.0,40.0,91.0,0.0,1838.0,0.0,373.0,0.0,21.0,184.0,1.0,2.0,20.0,199.0,62.0,3184.0,60.0,5,0.181213,3,6,0.38443,1,50+,"(30, 60]",2,2
307,307,0302551/23_10953,3033700006,0302551/23,10953,435 WILSON AVENUE,4B,2024-03-27,BROOKLYN,11221,Not an Ejectment,Possession,40.693907,-73.913763,4.0,37.0,435.0,3000000,Bushwick North,2024,2024-03,POINT (-73.913763 40.693907),35.6,2018.0,C1,4.0,9.0,WILSON REALTY USA LLC,8247.0,1,walk-up,0,2,"1994–Present, vacancy decontrol","2001-present, New Architecture","2009–present, post-financial crisis",13,1,5,4,2010-2020,11221,91236.0,0.9599,0.4593,0.9521,0.9624,0.9352,30.0,8.5,15.5,8.4,10.2,17.3,9.8,6.6,62.7,6.2,35.3,39.7,31.0,5.1,0.0,0.1,3.7,0.6,80.2,19.8,False,2,medium-low,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,5.0,7.0,7,0.08651,5,3,0.412549,5,0-10,"(0, 30]",3,3
869,869,047846/16_66997,2026100012,047846/16,66997,540 EAST 169TH ST.,8K,2017-02-15,BRONX,10456,Not an Ejectment,Possession,40.832517,-73.904383,3.0,16.0,145.0,2004223,Claremont-Bathgate,2017,2017-02,POINT (-73.904383 40.832517),9.4,1965.0,D6,17.0,321.0,"FORDHAM FULTON REALTY, CORP.",285056.0,1,elevator,0,3,"1947–1969, rent-control","1951–1980, the International Style, Alternativ...","1946–1975, pst war economic boom",150,0,8,4,1960-1969,10456,88575.0,0.996,0.9903,0.991,0.9972,0.9994,49.1,14.7,33.4,7.3,11.3,27.1,19.3,14.7,76.4,11.1,54.9,38.2,56.3,0.7,0.2,0.0,1.8,0.4,97.6,2.4,False,4,high,0.0,6.0,34.0,2.0,164.0,20.0,127.0,40.0,91.0,0.0,1838.0,0.0,373.0,0.0,21.0,184.0,1.0,2.0,20.0,199.0,62.0,3184.0,60.0,5,0.181213,3,6,0.38443,1,50+,"(30, 60]",2,2


In [None]:
hotspot_samples.shape

(653, 104)

In [None]:
hotspot_samples.average_year_eviction_count.mean()
# original

np.float64(18.28820826952527)

In [None]:
hotspot_samples = df[df['nta'].isin(nta_list)]
hotspot_samples.head()

Unnamed: 0.1,Unnamed: 0,primary_key,bbl,court_index_number,docket_number,eviction_address,eviction_apartment_number,executed_date,borough,zipcode,ejectment,eviction/legal_possession,latitude,longitude,community_board,council_district,census_tract,bin,nta,year,month_year,geometry,average_year_eviction_count,yearbuilt,bldgclass,numfloors,unitsres,ownername,bldgarea,building_type,building_category,is_condo,floor_category,rent_era,architectural_style,economic_period,residential_units_category,is_llc,building_size_category,size_quartile,decade,fips,e_totpop,rpl_theme1,rpl_theme2,rpl_theme3,rpl_theme4,rpl_themes,ep_pov150,ep_unemp,ep_nohsdp,ep_uninsur,ep_age65,ep_age17,ep_disabl,ep_limeng,ep_noveh,ep_crowd,ep_hburd,ep_afam,ep_hisp,ep_asian,ep_aian,ep_nhpi,ep_twomore,ep_otherrace,ep_minrty,ep_white,invalid_zip,svi_quartile,svi_group,air_quality,animal_issues,appliances,building_exterior,doors_windows,electrical_issues,elevator_issues,floors_stairs,general_complaints,graffiti_posting,heat_hot_water,homeless_issues,noise_complaints,other_issues,pest_issues,plumbing_issues,police_matters,public_nuisance,safety_concerns,sanitation_issues,walls_ceilings,total_complaints,building_age,style_ordinal,style_freq,style_label,building_category_ordinal,building_category_freq,building_category_le,age_bin,age_group,month,borough_int
5,5,000098/17_69483,2031770041,000098/17,69483,65 EAST 193RD ST,1B,2017-05-04,BRONX,10468,Not an Ejectment,Possession,40.866075,-73.896515,7.0,14.0,401.0,2013945,Bedford Park-Fordham North,2017,2017-05,POINT (-73.896515 40.866075),1.6,1937.0,D1,6.0,42.0,LSB HULL ASSOCIATES,37800.0,0,elevator,0,2,"Pre-1947, pre-rent-control","1931–1950, Manhattan Modern","1930-1945, great depression and WWII",60,0,7,4,1930-1939,10468,81397.0,0.9954,0.9407,0.987,0.947,0.9874,39.5,11.6,28.3,9.2,11.2,26.4,12.2,26.9,71.8,19.2,56.7,15.6,78.0,2.3,0.0,0.0,0.5,0.5,96.9,3.1,False,3,medium-high,0.0,0.0,3.0,0.0,11.0,9.0,4.0,5.0,9.0,0.0,53.0,0.0,12.0,0.0,14.0,44.0,0.0,0.0,8.0,32.0,28.0,232.0,88.0,4,0.16942,2,6,0.38443,1,50+,"(60, 90]",5,2
6,6,0002959/16_69081,2033020071,0002959/16,69081,2847 BRIGGS AVENUE,SECOND FLOOR,2017-10-03,BRONX,10458,Not an Ejectment,Possession,40.86929,-73.888761,7.0,15.0,40702.0,2017108,Bedford Park-Fordham North,2017,2017-10,POINT (-73.888761 40.86929),1.0,1901.0,B2,2.0,2.0,"MAMONAU, DZMITRY",2358.0,0,two-family,0,1,"Pre-1947, pre-rent-control","1900–1920, Beaux-Arts","Pre-1929, pre-great depression",2,0,3,3,1900-1909,10458,82678.0,0.9983,0.9179,0.9775,0.963,0.9891,45.2,14.3,31.9,9.7,9.7,25.7,11.7,24.6,71.6,18.1,59.3,16.1,71.3,3.3,0.3,0.0,1.7,0.4,93.0,7.0,False,3,medium-high,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,16.0,124.0,2,0.229784,0,2,0.090004,4,,"(120, 200]",10,2
13,13,0012760/18_87617,1021060003,0012760/18,87617,2971 EIGHTH AVENUE,08A,2018-11-01,MANHATTAN,10039,Not an Ejectment,Possession,40.830297,-73.936456,10.0,9.0,24302.0,1084520,Central Harlem North-Polo Grounds,2018,2018-11,POINT (-73.936456 40.830297),3.0,2004.0,D7,30.0,1614.0,NYC HOUSING AUTHORITY,2531670.0,1,elevator,0,3,"1994–Present, vacancy decontrol","2001-present, New Architecture","1991–2008, modern economic growth",150,0,8,4,2000-2009,10039,29887.0,0.9868,0.9242,0.9797,0.8849,0.9748,38.3,16.6,18.2,7.2,11.1,22.0,15.1,7.9,77.9,7.9,41.5,54.7,32.7,1.9,0.1,0.0,2.8,1.1,93.3,6.7,False,3,medium-high,1.0,19.0,0.0,0.0,5.0,0.0,18.0,0.0,0.0,0.0,1.0,7.0,2088.0,1.0,4.0,4.0,2.0,1.0,0.0,4.0,1.0,2156.0,21.0,7,0.08651,5,6,0.38443,1,20-30,"(0, 30]",11,1
24,24,002960/16_66686,2033020071,002960/16,66686,2847 BRIGGS AVENUE,FIRST FLOOR,2017-10-03,BRONX,10458,Not an Ejectment,Possession,40.86929,-73.888761,7.0,15.0,40702.0,2017108,Bedford Park-Fordham North,2017,2017-10,POINT (-73.888761 40.86929),1.0,1901.0,B2,2.0,2.0,"MAMONAU, DZMITRY",2358.0,0,two-family,0,1,"Pre-1947, pre-rent-control","1900–1920, Beaux-Arts","Pre-1929, pre-great depression",2,0,3,3,1900-1909,10458,82678.0,0.9983,0.9179,0.9775,0.963,0.9891,45.2,14.3,31.9,9.7,9.7,25.7,11.7,24.6,71.6,18.1,59.3,16.1,71.3,3.3,0.3,0.0,1.7,0.4,93.0,7.0,False,3,medium-high,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,16.0,124.0,2,0.229784,0,2,0.090004,4,,"(120, 200]",10,2
35,35,0052539/19_97777,3013930066,0052539/19,97777,1325 EASTERN PARKWAY,11C,2019-10-25,BROOKLYN,11233,Not an Ejectment,Possession,40.668448,-73.924235,8.0,41.0,359.0,3037446,Crown Heights North,2019,2019-10,POINT (-73.924235 40.668448),1.4,1924.0,C1,4.0,43.0,"1325 PARKWAY ESTATES, LLC",40790.0,0,walk-up,0,2,"Pre-1947, pre-rent-control","1921–1930, Art Deco Skyscrapers","Pre-1929, pre-great depression",60,1,7,4,1920-1929,11233,83125.0,0.9541,0.7613,0.9673,0.8866,0.9438,33.0,7.3,14.9,6.9,12.2,20.9,12.4,2.4,63.0,5.0,41.3,66.7,15.7,1.3,0.5,0.0,3.6,0.2,88.1,11.9,False,2,medium-low,0.0,0.0,0.0,0.0,13.0,4.0,0.0,6.0,3.0,0.0,224.0,0.0,27.0,0.0,4.0,40.0,0.0,0.0,2.0,30.0,17.0,370.0,101.0,3,0.286052,1,3,0.412549,5,,"(90, 120]",10,3


In [None]:
hotspot_samples.shape
# this is why nta running the function takes much long

(4761, 104)

In [None]:
hotspot_samples.average_year_eviction_count.mean()
# original

np.float64(1.1972694812014282)

In [None]:
# result_df[['bin', 'original_eviction_count', 'average_year_eviction_count_c', 'reduction_in_evictions_c', 'percent_reduction_c', 'average_year_eviction_count_s', 'reduction_in_evictions_s', 'percent_reduction_s', 'original_llc_status', 'llc_change', 'average_year_eviction_count_llc', 'reduction_in_evictions_llc', 'percent_reduction_llc']].head()

Unnamed: 0,bin,original_eviction_count,average_year_eviction_count_c,reduction_in_evictions_c,percent_reduction_c,average_year_eviction_count_s,reduction_in_evictions_s,percent_reduction_s,original_llc_status,llc_change,average_year_eviction_count_llc,reduction_in_evictions_llc,percent_reduction_llc
122,2004223,9.4,3.954198,5.445802,57.93,1.040491,8.359509,88.93,0.0,no to yes,9.433686,-0.033686,-0.36
158,2004223,9.4,2.672542,6.727458,71.57,1.335571,8.064429,85.79,0.0,no to yes,9.255,0.145,1.54
201,2004223,9.4,2.672542,6.727458,71.57,1.335571,8.064429,85.79,0.0,no to yes,9.3725,0.0275,0.29
307,3000000,35.6,32.576744,3.023256,8.49,8.368412,27.231588,76.49,1.0,yes to no,23.721512,11.878488,33.37
869,2004223,9.4,2.672542,6.727458,71.57,1.350137,8.049863,85.64,0.0,no to yes,9.390586,0.009414,0.1


In [None]:
hotspot_bins = [ 3000000, 2000000, 2127134, 2113629, 3326600, 2093973, 3253907, 2113628, 2004223, 1000000]

# **Step 4: Create the function that takes in as many and various geo inputs, feature selections, and degrees/directions of changes. [This is the most important part]**

In [None]:
# the core function:
# anything with None can be replaced or interchanged by another comparable parameter
def process_hotspot_counterfactuals(df, hotspot_bins=None, zip_codes=None, neighborhoods=None, rfr_model = None, feature_cols = None, complaints_vars=None, svi_vars=None, change_llc=False,
                                   # building characteristic changes (in the form of a list, a few features that are in floats. the original ones)
                                   building_change=None, building_age_change=None,
                                   # complaint modifications (in the form of a dictionary ), can change pct
                                   targeted_complaints=None, complaint_reduction_pct=None,
                                   # SVI modifications (in the form of a dictionary ), default is 0.25
                                   svi_themes=None, svi_reduction_quantile=0.25):

    # use data based on hotspot_bins or zip_codes or nta
    if hotspot_bins is not None:
        hotspot_df = df[df['bin'].isin(hotspot_bins)].copy()
    elif zip_codes is not None:
        hotspot_df = df[df['zipcode'].isin(zip_codes)].copy()
    elif neighborhoods is not None:
        hotspot_df = df[df['nta'].isin(neighborhoods)].copy()
    else:
        # if no filtering parameter is provided, use all data
        hotspot_df = df.copy()
    hotspot_df['original_eviction_count'] = hotspot_df['average_year_eviction_count'].copy()

    # variable group 1: complaints related:
    # first: process all complaints reduction (zeroing out)
    if complaints_vars:
        for idx, row in hotspot_df.iterrows():
            modified_row = row.copy()
            for complaint in complaints_vars:
                modified_row[complaint] = 0
            new_prediction = model_rfr_all.predict([modified_row[feature_cols]])[0]
            hotspot_df.at[idx, 'average_year_eviction_count_c'] = new_prediction

        hotspot_df['change_in_evictions_c'] = hotspot_df['original_eviction_count'] - hotspot_df['average_year_eviction_count_c']
        hotspot_df['percent_change_c'] = (hotspot_df['change_in_evictions_c'] / hotspot_df['original_eviction_count'] * 100).round(2)

    # second: process partial complaints reduction (by percentage). percentages can be modefied
    if complaints_vars and complaint_reduction_pct:
        reduction_factor = complaint_reduction_pct / 100
        suffix = f'_c{complaint_reduction_pct}'

        for idx, row in hotspot_df.iterrows():
            modified_row = row.copy()
            for complaint in complaints_vars:
                modified_row[complaint] = row[complaint] * (1 - reduction_factor)
            new_prediction = model_rfr_all.predict([modified_row[feature_cols]])[0]
            hotspot_df.at[idx, f'average_year_eviction_count{suffix}'] = new_prediction

        hotspot_df[f'change_in_evictions{suffix}'] = hotspot_df['original_eviction_count'] - hotspot_df[f'average_year_eviction_count{suffix}']
        hotspot_df[f'percent_change{suffix}'] = (hotspot_df[f'change_in_evictions{suffix}'] / hotspot_df['original_eviction_count'] * 100).round(2)

    # third: select targeted complaints for reduction
    if targeted_complaints:
        # add any features
        suffix = '_targeted'

        for idx, row in hotspot_df.iterrows():
            modified_row = row.copy()
            for complaint_type, reduction_pct in targeted_complaints.items():
                if complaint_type in feature_cols:
                    reduction_factor = reduction_pct / 100
                    modified_row[complaint_type] = row[complaint_type] * (1 - reduction_factor)

            new_prediction = model_rfr_all.predict([modified_row[feature_cols]])[0]
            hotspot_df.at[idx, f'average_year_eviction_count{suffix}'] = new_prediction

        hotspot_df[f'change_in_evictions{suffix}'] = hotspot_df['original_eviction_count'] - hotspot_df[f'average_year_eviction_count{suffix}']
        hotspot_df[f'percent_change{suffix}'] = (hotspot_df[f'change_in_evictions{suffix}'] / hotspot_df['original_eviction_count'] * 100).round(2)

    # variable group 2: SVI related:
    # fourth: all svi change to a specific quantile
    if svi_vars:
        suffix = '_s'

        for idx, row in hotspot_df.iterrows():
            modified_row = row.copy()
            for svi in svi_vars:
                modified_row[svi] = df[svi].quantile(svi_reduction_quantile)
            new_prediction = model_rfr_all.predict([modified_row[feature_cols]])[0]
            hotspot_df.at[idx, f'average_year_eviction_count{suffix}'] = new_prediction

        hotspot_df[f'change_in_evictions{suffix}'] = hotspot_df['original_eviction_count'] - hotspot_df[f'average_year_eviction_count{suffix}']
        hotspot_df[f'percent_change{suffix}'] = (hotspot_df[f'change_in_evictions{suffix}'] / hotspot_df['original_eviction_count'] * 100).round(2)

    # firth: select SVI theme groups and change to a quantile (first quantile )
    if svi_themes:
        for theme_name, theme_info in svi_themes.items():
            theme_vars = theme_info['vars']
            theme_quantile = theme_info.get('quantile', 0.25)
            suffix = f'_svi_{theme_name}'

            for idx, row in hotspot_df.iterrows():
                modified_row = row.copy()
                for svi in theme_vars:
                    modified_row[svi] = df[svi].quantile(theme_quantile)
                new_prediction = model_rfr_all.predict([modified_row[feature_cols]])[0]
                hotspot_df.at[idx, f'average_year_eviction_count{suffix}'] = new_prediction

            hotspot_df[f'change_in_evictions{suffix}'] = hotspot_df['original_eviction_count'] - hotspot_df[f'average_year_eviction_count{suffix}']
            hotspot_df[f'percent_change{suffix}'] = (hotspot_df[f'change_in_evictions{suffix}'] / hotspot_df['original_eviction_count'] * 100).round(2)

    # variable group 3: llc status
    # 6th:  llc status change
    if change_llc:
        suffix = '_llc'

        for idx, row in hotspot_df.iterrows():
            modified_row = row.copy()
            hotspot_df.at[idx, 'original_llc_status'] = row['is_llc']

            # flip the LLC status
            if row['is_llc'] == True:
                modified_row['is_llc'] = False
                hotspot_df.at[idx, 'llc_change'] = 'yes to no'
            else:
                modified_row['is_llc'] = True
                hotspot_df.at[idx, 'llc_change'] = 'no to yes'

            new_prediction = model_rfr_all.predict([modified_row[feature_cols]])[0]
            hotspot_df.at[idx, f'average_year_eviction_count{suffix}'] = new_prediction

        hotspot_df[f'change_in_evictions{suffix}'] = hotspot_df['original_eviction_count'] - hotspot_df[f'average_year_eviction_count{suffix}']
        hotspot_df[f'percent_change{suffix}'] = (hotspot_df[f'change_in_evictions{suffix}'] / hotspot_df['original_eviction_count'] * 100).round(2)

    # variable group 4: common building features related
    # 7th: process building characteristic changes by self-defined percentage
    if building_change:
        for feature, change_pct in building_change.items():
          # each feature's change and effect will be calculated and be added back to the "change in evictions {suffix}" columns etc.
            suffix = f'_{feature}'

            if feature in feature_cols:
                for idx, row in hotspot_df.iterrows():
                    modified_row = row.copy()

                    original_value = row[feature]
                    hotspot_df.at[idx, f'original_{feature}'] = original_value

                    # calculate new value based on percentage change
                    # positive percentage increases value, negative decreases
                    modified_value = original_value * (1 + (change_pct / 100))

                    # ensure non-negative values for count or size features
                    if feature in ['numfloors', 'unitsres', 'bldgarea']:
                        modified_value = max(0, modified_value)

                    modified_row[feature] = modified_value
                    hotspot_df.at[idx, f'modified_{feature}'] = modified_value

                    new_prediction = model_rfr_all.predict([modified_row[feature_cols]])[0]
                    hotspot_df.at[idx, f'average_year_eviction_count{suffix}'] = new_prediction

                hotspot_df[f'change_in_evictions{suffix}'] = hotspot_df['original_eviction_count'] - hotspot_df[f'average_year_eviction_count{suffix}']
                hotspot_df[f'percent_change{suffix}'] = (hotspot_df[f'change_in_evictions{suffix}'] / hotspot_df['original_eviction_count'] * 100).round(2)

    # variable group 5: building age change (special case since it uses years not percentage)
    # 8th: building age change
    if building_age_change is not None:
        suffix = '_age'

        for idx, row in hotspot_df.iterrows():
            modified_row = row.copy()
            if 'building_age' in feature_cols:
                hotspot_df.at[idx, 'original_building_age'] = row['building_age']
                # can't have negative age, so use max to ensure it's not negative
                modified_row['building_age'] = max(0, row['building_age'] - building_age_change)
                hotspot_df.at[idx, 'modified_building_age'] = modified_row['building_age']
            new_prediction = model_rfr_all.predict([modified_row[feature_cols]])[0]
            hotspot_df.at[idx, f'average_year_eviction_count{suffix}'] = new_prediction

        hotspot_df[f'change_in_evictions{suffix}'] = hotspot_df['original_eviction_count'] - hotspot_df[f'average_year_eviction_count{suffix}']
        hotspot_df[f'percent_change{suffix}'] = (hotspot_df[f'change_in_evictions{suffix}'] / hotspot_df['original_eviction_count'] * 100).round(2)

    # 9th: group 6 (synthesis): for combined building feature changes: if several building features were changed all at once. We need to check if there is any "synergy" or is the all changes make sense when they are together.
    # if you specify changes for multiple building characteristics in building_change, we will get individual results for each characteristic (calculated separately)
    # if we have several ones at once, we will get the individual result, and a combined result (in the format of 'suffix _bld_all') that shows what happens when all those changes are applied simultaneously
    # if you only specify one building characteristic in building_change (such as just numfloors), you'll only get the individual result for that characteristic,
    # and the combined block won't run (since there's nothing to combine).
    if building_change and len(building_change) > 1:
        suffix = '_bld_all'

        for idx, row in hotspot_df.iterrows():
            modified_row = row.copy()

            # all building changes at once
            for feature, change_pct in building_change.items():
                if feature in feature_cols:
                    original_value = row[feature]
                    modified_value = original_value * (1 + (change_pct / 100))

                    # ensure non-negative values
                    if feature in ['numfloors', 'unitsres', 'bldgarea']:
                        modified_value = max(0, modified_value)

                    modified_row[feature] = modified_value

            # apply building age change if specified
            if building_age_change is not None:
                modified_row['building_age'] = max(0, row['building_age'] - building_age_change)

            # generate prediction for all building changes combined to create a change that is less drastic than individula ones combained, probably, but makes more sense.
            new_prediction = model_rfr_all.predict([modified_row[feature_cols]])[0]
            hotspot_df.at[idx, f'average_year_eviction_count{suffix}'] = new_prediction

        # calculate changes
        hotspot_df[f'change_in_evictions{suffix}'] = hotspot_df['original_eviction_count'] - hotspot_df[f'average_year_eviction_count{suffix}']
        hotspot_df[f'percent_change{suffix}'] = (hotspot_df[f'change_in_evictions{suffix}'] / hotspot_df['original_eviction_count'] * 100).round(2)

    return hotspot_df

# **Step 5: Run the function with Geo location at some eviction-hard-hitting buildings**

In [None]:
# call the function with a comprehensive comparsiosn output:
result_df_bins = process_hotspot_counterfactuals(
    df=df,
    hotspot_bins=hotspot_bins,
    rfr_model= model_rfr_all,
    feature_cols=all_vars,

    # bring all complaints to zero
    complaints_vars=complaints_vars,

    # reduce all complaints by 50% (total, but can also only work on one category at a time)
    complaint_reduction_pct=50,

    # targeted specific complaints with different reduction percentages
    # reduce heat/hot water complaints by 75%, reduce pest issues by 90%. plumbing iussues by 80%, noise complaints by 60%, and sanitation issues by 85%
    targeted_complaints={'heat_hot_water': 75, 'pest_issues': 90,'plumbing_issues': 80, 'noise_complaints': 60,'sanitation_issues': 85},

    # flip LLC status
    change_llc=True,

    # building physical characteristics
    # reduce floors, residential units, building area by 50%
    building_change={'numfloors': -50,'unitsres': -50,'bldgarea': -50 },

    # reduce building age by 20 years
    building_age_change=20,

    # all SVI variables to 25th percentile
    svi_vars=svi_vars,
    svi_reduction_quantile=0.25,

    # targeted specific SVI themes with different improvement levels (can change)
    # self defined
    svi_themes={'economic': {'vars': ['ep_pov150', 'ep_unemp', 'ep_nohsdp', 'ep_uninsur'],'quantile': 0.25}, 'demographic': {'vars': ['ep_age65', 'ep_age17', 'ep_disabl', 'ep_limeng'],'quantile': 0.25},
        'household': {'vars': ['ep_noveh', 'ep_crowd', 'ep_hburd'], 'quantile': 0.25}, 'racial': {'vars': ['ep_afam', 'ep_hisp', 'ep_asian', 'ep_minrty', 'ep_white'],'quantile': 0.25}})

In [None]:
result_df_bins.to_csv('/content/drive/My Drive/X999/bins_results_df.csv')

In [None]:
# base
summary_columns = ['bin', 'original_eviction_count']

In [None]:
# basic intervention results
# c50 can be changed to c25 depending on how we want to reduce it
for intervention in ['c', 'c50', 'targeted', 's', 'llc']:
    summary_columns.extend([ f'average_year_eviction_count_{intervention}', f'change_in_evictions_{intervention}', f'percent_change_{intervention}'])

In [None]:
# SVI theme results
for theme in ['economic', 'demographic', 'household', 'racial']:
    summary_columns.extend([f'average_year_eviction_count_svi_{theme}', f'change_in_evictions_svi_{theme}', f'percent_change_svi_{theme}'])

In [None]:
# building feature results
building_features = ['numfloors', 'unitsres', 'bldgarea', 'building_age']
for feature in building_features:
    if feature == 'building_age':
        feature_suffix = 'age'
    else:
        feature_suffix = feature
    # put the original features, changed features, predicted average eviction count, changes in eviction counts, and percenaged of change together.
    summary_columns.extend([f'original_{feature}', f'modified_{feature}' if feature != 'building_age' else 'modified_building_age', f'average_year_eviction_count_{feature_suffix}',
                            f'change_in_evictions_{feature_suffix}', f'percent_change_{feature_suffix}'])

In [None]:
# combined building changes
summary_columns.extend([ 'average_year_eviction_count_bld_all', 'change_in_evictions_bld_all', 'percent_change_bld_all'])

In [None]:
comprehensive_summary = result_df_bins[summary_columns]

## **Step 5.1 Get the comprehensive df**

In [None]:
comprehensive_summary

Unnamed: 0,bin,original_eviction_count,average_year_eviction_count_c,change_in_evictions_c,percent_change_c,average_year_eviction_count_c50,change_in_evictions_c50,percent_change_c50,average_year_eviction_count_targeted,change_in_evictions_targeted,percent_change_targeted,average_year_eviction_count_s,change_in_evictions_s,percent_change_s,average_year_eviction_count_llc,change_in_evictions_llc,percent_change_llc,average_year_eviction_count_svi_economic,change_in_evictions_svi_economic,percent_change_svi_economic,average_year_eviction_count_svi_demographic,change_in_evictions_svi_demographic,percent_change_svi_demographic,average_year_eviction_count_svi_household,change_in_evictions_svi_household,percent_change_svi_household,average_year_eviction_count_svi_racial,change_in_evictions_svi_racial,percent_change_svi_racial,original_numfloors,modified_numfloors,average_year_eviction_count_numfloors,change_in_evictions_numfloors,percent_change_numfloors,original_unitsres,modified_unitsres,average_year_eviction_count_unitsres,change_in_evictions_unitsres,percent_change_unitsres,original_bldgarea,modified_bldgarea,average_year_eviction_count_bldgarea,change_in_evictions_bldgarea,percent_change_bldgarea,original_building_age,modified_building_age,average_year_eviction_count_age,change_in_evictions_age,percent_change_age,average_year_eviction_count_bld_all,change_in_evictions_bld_all,percent_change_bld_all
122,2004223,9.4,1.444,7.956,84.64,4.672,4.728,50.3,8.94,0.46,4.89,3.46,5.94,63.19,9.13,0.27,2.87,8.014,1.386,14.74,9.324,0.076,0.81,9.438,-0.038,-0.4,7.25,2.15,22.87,17.0,8.5,9.4,1.598721e-14,0.0,321.0,160.5,8.402,0.998,10.62,285056.0,142528.0,9.36,0.04,0.43,60.0,40.0,9.36,0.04,0.43,8.322,1.078,11.47
158,2004223,9.4,3.374,6.026,64.11,4.674,4.726,50.28,8.95,0.45,4.79,3.106,6.294,66.96,9.13,0.27,2.87,8.006,1.394,14.83,9.284,0.116,1.23,9.28,0.12,1.28,6.964,2.436,25.91,17.0,8.5,9.4,1.598721e-14,0.0,321.0,160.5,8.402,0.998,10.62,285056.0,142528.0,9.36,0.04,0.43,60.0,40.0,9.36,0.04,0.43,8.322,1.078,11.47
201,2004223,9.4,1.444,7.956,84.64,4.674,4.726,50.28,8.95,0.45,4.79,3.106,6.294,66.96,9.13,0.27,2.87,8.006,1.394,14.83,9.284,0.116,1.23,9.28,0.12,1.28,6.964,2.436,25.91,17.0,8.5,9.4,1.598721e-14,0.0,321.0,160.5,8.402,0.998,10.62,285056.0,142528.0,9.36,0.04,0.43,60.0,40.0,9.36,0.04,0.43,8.322,1.078,11.47
307,3000000,35.6,25.814,9.786,27.49,26.158,9.442,26.52,25.886,9.714,27.29,17.532,18.068,50.75,27.908,7.692,21.61,27.752,7.848,22.04,23.188,12.412,34.87,25.81,9.79,27.5,25.764,9.836,27.63,4.0,2.0,27.908,7.692,21.61,9.0,4.5,28.886,6.714,18.86,8247.0,4123.5,26.928,8.672,24.36,7.0,0.0,27.686,7.914,22.23,26.622,8.978,25.22
869,2004223,9.4,2.264,7.136,75.91,4.674,4.726,50.28,8.95,0.45,4.79,3.104,6.296,66.98,9.13,0.27,2.87,8.006,1.394,14.83,9.284,0.116,1.23,9.28,0.12,1.28,6.964,2.436,25.91,17.0,8.5,9.4,1.598721e-14,0.0,321.0,160.5,8.402,0.998,10.62,285056.0,142528.0,9.36,0.04,0.43,60.0,40.0,9.36,0.04,0.43,8.322,1.078,11.47
1075,2004223,9.4,1.99,7.41,78.83,4.674,4.726,50.28,8.95,0.45,4.79,3.104,6.296,66.98,9.13,0.27,2.87,8.006,1.394,14.83,9.284,0.116,1.23,9.28,0.12,1.28,6.964,2.436,25.91,17.0,8.5,9.4,1.598721e-14,0.0,321.0,160.5,8.402,0.998,10.62,285056.0,142528.0,9.36,0.04,0.43,60.0,40.0,9.36,0.04,0.43,8.322,1.078,11.47
1416,1000000,13.8,9.478,4.322,31.32,9.476,4.324,31.33,12.044,1.756,12.72,7.598,6.202,44.94,12.178,1.622,11.75,11.906,1.894,13.72,11.5,2.3,16.67,11.908,1.892,13.71,9.228,4.572,33.13,7.0,3.5,11.774,2.026,14.68,42.0,21.0,0.92,12.88,93.33,33058.0,16529.0,11.644,2.156,15.62,125.0,105.0,4.872,8.928,64.7,0.514,13.286,96.28
1931,3253907,9.8,1.82,7.98,81.43,4.31,5.49,56.02,1.564,8.236,84.04,4.944,4.856,49.55,9.77,0.03,0.31,9.8,1.776357e-14,0.0,5.148,4.652,47.47,9.8,1.776357e-14,0.0,9.8,1.776357e-14,0.0,24.0,12.0,8.1,1.7,17.35,334.0,167.0,9.316,0.484,4.94,366000.0,183000.0,9.8,1.776357e-14,0.0,51.0,31.0,11.09,-1.29,-13.16,9.238,0.562,5.73
1932,3253907,9.8,1.82,7.98,81.43,4.33,5.47,55.82,1.564,8.236,84.04,4.944,4.856,49.55,9.77,0.03,0.31,9.8,1.776357e-14,0.0,5.148,4.652,47.47,9.8,1.776357e-14,0.0,9.8,1.776357e-14,0.0,24.0,12.0,8.1,1.7,17.35,334.0,167.0,9.316,0.484,4.94,366000.0,183000.0,9.8,1.776357e-14,0.0,51.0,31.0,11.09,-1.29,-13.16,9.238,0.562,5.73
2541,2000000,27.8,3.176,24.624,88.58,1.894,25.906,93.19,3.284,24.516,88.19,3.716,24.084,86.63,2.734,25.066,90.17,3.004,24.796,89.19,3.176,24.624,88.58,3.418,24.382,87.71,3.842,23.958,86.18,4.0,2.0,3.284,24.516,88.19,8.0,4.0,3.008,24.792,89.18,7909.0,3954.5,2.798,25.002,89.94,9.0,0.0,4.98,22.82,82.09,5.994,21.806,78.44


## **Step 5.2 Define the directional_changes function and get the result for bin df**

In [None]:
def directional_changes(df, feature_changes, metric_cols):
    results = {}

    for metric in metric_cols:
        intervention = metric.replace('percent_change_', '')
        # positive percent_change means evictions decreased
        decreased_evictions = df[df[metric] > 0]
        increased_evictions = df[df[metric] < 0]
        no_change = df[df[metric] == 0]

        results[intervention] = {
            'total_buildings': len(df), 'decreased_evictions': {'count': len(decreased_evictions), 'percentage': round((len(decreased_evictions) / len(df) * 100), 2),
            'avg_reduction': round(decreased_evictions[metric].mean(), 2) if len(decreased_evictions) > 0 else 0 },
            'increased_evictions': {'count': len(increased_evictions), 'percentage': round((len(increased_evictions) / len(df) * 100), 2),
            'avg_increase': round(abs(increased_evictions[metric].mean()), 2) if len(increased_evictions) > 0 else 0 },
            'no_change': { 'count': len(no_change), 'percentage': round((len(no_change) / len(df) * 100), 2)}}

    return results

In [None]:
# a dictionary noting the direction of changes applied
feature_directions = {
    # - 50%
    'numfloors': 'decrease',
    'unitsres': 'decrease',
    'bldgarea': 'decrease',
    # -20 years
    'building_age': 'decrease',
    # -50%
    'complaints': 'decrease',
    # 100%
    'complaints_100%': 'decrease',
    # to 25th percentile
    'svi': 'improve',
    # flipping
    'llc': 'toggle'
}

In [None]:
metric_columns = ['percent_change_numfloors','percent_change_unitsres', 'percent_change_bldgarea','percent_change_age','percent_change_bld_all','percent_change_c', 'percent_change_c50',
    'percent_change_s', 'percent_change_llc']

In [None]:
# get the directional analysis
trend_analysis = directional_changes(result_df_bins, feature_directions, metric_columns)

In [None]:
# df
summary_rows = []
for intervention, stats in trend_analysis.items():
    summary_rows.append({
        'intervention': intervention,
        'buildings with decreased evictions (%)': stats['decreased_evictions']['percentage'],
        'average eviction reduction (%)': stats['decreased_evictions']['avg_reduction'],
        'buildings with increased evictions (%)': stats['increased_evictions']['percentage'],
        'average eviction increase (%)': stats['increased_evictions']['avg_increase']
    })

In [None]:
trend_summary = pd.DataFrame(summary_rows)
trend_summary = trend_summary.sort_values(by='buildings with decreased evictions (%)', ascending=False)

In [None]:
trend_summary

Unnamed: 0,intervention,buildings with decreased evictions (%),average eviction reduction (%),buildings with increased evictions (%),average eviction increase (%)
7,s,100.0,47.99,0.0,0.0
4,bld_all,98.93,15.64,1.07,11.15
6,c50,98.62,64.89,0.46,18.15
5,c,97.86,68.93,1.07,11.01
0,numfloors,88.82,10.41,0.46,17.16
3,age,86.22,14.22,10.57,13.3
1,unitsres,76.26,11.17,0.46,17.1
2,bldgarea,42.73,14.19,0.61,14.76
8,llc,42.42,14.83,0.77,10.1


### **Short summary: so far, two very important functions have been defined: the core algo to get all kinds of senarios and the function to get the direction chanages. Any other scenarios that make sense can be derived from the core function. Now, let's do one with neighborhoods. (though the core function is compatible with zipcodes, but we would avoid doing that, because zipcodes highly overlap with neighborhoods, geographically speaking, and neighborhoods often provide a more clear and easier visual representation for human understanding.**

# **Step 6: Get results with the eviction hard hitting neighborhoods**

In [None]:
zip_codes = [10458, 10460, 10453]
# as selected previously

In [None]:
nta_list

['Central Harlem North-Polo Grounds',
 'Crown Heights North',
 'Bedford Park-Fordham North']

## **Step 6.1: run the model**

In [None]:
# this result_df get the nta
result_df_nta = process_hotspot_counterfactuals(
    df=df,
    # zip_codes = [10458, 10460, 10453],
    neighborhoods= nta_list,
    # hotspot_bins=hotspot_bins,
    rfr_model= model_rfr_all,
    feature_cols=all_vars,

    # bring all complaints to zero
    complaints_vars=complaints_vars,

    # reduce all complaints by 50% (total, but can also only work on one category at a time)
    complaint_reduction_pct=50,

    # targeted specific complaints with different reduction percentages
    # reduce heat/hot water complaints by 75%, reduce pest issues by 90%. plumbing iussues by 80%, noise complaints by 60%, and sanitation issues by 85%
    targeted_complaints={'heat_hot_water': 75, 'pest_issues': 90,'plumbing_issues': 80, 'noise_complaints': 60,'sanitation_issues': 85},

    # flip LLC status
    change_llc=True,

    # building physical characteristics
    # reduce floors, residential units, building area by 50%
    building_change={'numfloors': -50,'unitsres': -50,'bldgarea': -50 },

    # reduce building age by 20 years
    building_age_change=20,

    # all SVI variables to 25th percentile
    svi_vars=svi_vars,
    svi_reduction_quantile=0.25,

    # targeted specific SVI themes with different improvement levels (can change)
    # self defined
    svi_themes={'economic': {'vars': ['ep_pov150', 'ep_unemp', 'ep_nohsdp', 'ep_uninsur'],'quantile': 0.25}, 'demographic': {'vars': ['ep_age65', 'ep_age17', 'ep_disabl', 'ep_limeng'],'quantile': 0.25},
        'household': {'vars': ['ep_noveh', 'ep_crowd', 'ep_hburd'], 'quantile': 0.25}, 'racial': {'vars': ['ep_afam', 'ep_hisp', 'ep_asian', 'ep_minrty', 'ep_white'],'quantile': 0.25}})

In [None]:
# base
# same procedures as the bins
summary_columns = ['nta', 'original_eviction_count']

In [None]:
# basic intervention results
# c50 can be changed to c25 depending on how we want to reduce it
for intervention in ['c', 'c50', 'targeted', 's', 'llc']:
    summary_columns.extend([ f'average_year_eviction_count_{intervention}', f'change_in_evictions_{intervention}', f'percent_change_{intervention}'])

In [None]:
# SVI theme results
for theme in ['economic', 'demographic', 'household', 'racial']:
    summary_columns.extend([f'average_year_eviction_count_svi_{theme}', f'change_in_evictions_svi_{theme}', f'percent_change_svi_{theme}'])

In [None]:
# building feature results
building_features = ['numfloors', 'unitsres', 'bldgarea', 'building_age']
for feature in building_features:
    if feature == 'building_age':
        feature_suffix = 'age'
    else:
        feature_suffix = feature
    # put the original features, changed features, predicted average eviction count, changes in eviction counts, and percenaged of change together.
    summary_columns.extend([f'original_{feature}', f'modified_{feature}' if feature != 'building_age' else 'modified_building_age', f'average_year_eviction_count_{feature_suffix}',
                            f'change_in_evictions_{feature_suffix}', f'percent_change_{feature_suffix}'])

In [None]:
# combined building changes
summary_columns.extend([ 'average_year_eviction_count_bld_all', 'change_in_evictions_bld_all', 'percent_change_bld_all'])

In [None]:
# file_path2 = '/content/drive/My Drive/X999/nta_results_df.csv'
# # result_df_nta takes a long time to run, and therefore we have saved it previously and would avoid running again.

In [None]:
# result_df_nta = pd.read_csv(file_path2)

In [None]:
comprehensive_summary = result_df_nta[summary_columns]

In [None]:
comprehensive_summary.head()

Unnamed: 0,nta,original_eviction_count,average_year_eviction_count_c,change_in_evictions_c,percent_change_c,average_year_eviction_count_c50,change_in_evictions_c50,percent_change_c50,average_year_eviction_count_targeted,change_in_evictions_targeted,percent_change_targeted,average_year_eviction_count_s,change_in_evictions_s,percent_change_s,average_year_eviction_count_llc,change_in_evictions_llc,percent_change_llc,average_year_eviction_count_svi_economic,change_in_evictions_svi_economic,percent_change_svi_economic,average_year_eviction_count_svi_demographic,change_in_evictions_svi_demographic,percent_change_svi_demographic,average_year_eviction_count_svi_household,change_in_evictions_svi_household,percent_change_svi_household,average_year_eviction_count_svi_racial,change_in_evictions_svi_racial,percent_change_svi_racial,original_numfloors,modified_numfloors,average_year_eviction_count_numfloors,change_in_evictions_numfloors,percent_change_numfloors,original_unitsres,modified_unitsres,average_year_eviction_count_unitsres,change_in_evictions_unitsres,percent_change_unitsres,original_bldgarea,modified_bldgarea,average_year_eviction_count_bldgarea,change_in_evictions_bldgarea,percent_change_bldgarea,original_building_age,modified_building_age,average_year_eviction_count_age,change_in_evictions_age,percent_change_age,average_year_eviction_count_bld_all,change_in_evictions_bld_all,percent_change_bld_all
5,Bedford Park-Fordham North,1.6,4.178,-2.578,-161.12,0.644,0.956,59.75,0.846,0.754,47.12,1.04,0.56,35.0,1.592,0.008,0.5,1.528,0.072,4.5,1.454,0.146,9.13,1.506,0.094,5.88,1.524,0.076,4.75,6.0,3.0,1.604,-0.004,-0.25,42.0,21.0,1.244,0.356,22.25,37800.0,18900.0,1.574,0.026,1.63,88.0,68.0,1.624,-0.024,-1.5,1.278,0.322,20.13
6,Bedford Park-Fordham North,1.0,0.334,0.666,66.6,0.414,0.586,58.6,0.602,0.398,39.8,0.33,0.67,67.0,0.41,0.59,59.0,0.678,0.322,32.2,0.906,0.094,9.4,0.838,0.162,16.2,0.53,0.47,47.0,2.0,1.0,0.964,0.036,3.6,2.0,1.0,0.964,0.036,3.6,2358.0,1179.0,0.806,0.194,19.4,124.0,104.0,0.63,0.37,37.0,0.586,0.414,41.4
13,Central Harlem North-Polo Grounds,3.0,2.3,0.7,23.33,2.298,0.702,23.4,2.402,0.598,19.93,2.282,0.718,23.93,2.97,0.03,1.0,2.754,0.246,8.2,2.966,0.034,1.13,2.89,0.11,3.67,2.774,0.226,7.53,30.0,15.0,2.656,0.344,11.47,1614.0,807.0,2.908,0.092,3.07,2531670.0,1265835.0,2.844,0.156,5.2,21.0,1.0,11.68,-8.68,-289.33,11.558,-8.558,-285.27
24,Bedford Park-Fordham North,1.0,0.334,0.666,66.6,0.414,0.586,58.6,0.602,0.398,39.8,0.33,0.67,67.0,0.41,0.59,59.0,0.678,0.322,32.2,0.906,0.094,9.4,0.838,0.162,16.2,0.53,0.47,47.0,2.0,1.0,0.964,0.036,3.6,2.0,1.0,0.964,0.036,3.6,2358.0,1179.0,0.806,0.194,19.4,124.0,104.0,0.63,0.37,37.0,0.586,0.414,41.4
35,Crown Heights North,1.4,3.972,-2.572,-183.71,0.704,0.696,49.71,0.512,0.888,63.43,0.95,0.45,32.14,1.398,0.002,0.14,1.344,0.056,4.0,1.322,0.078,5.57,1.378,0.022,1.57,1.23,0.17,12.14,4.0,2.0,1.366,0.034,2.43,43.0,21.5,1.114,0.286,20.43,40790.0,20395.0,1.29,0.11,7.86,101.0,81.0,1.316,0.084,6.0,0.904,0.496,35.43


In [None]:
# result_df_nta.to_csv('/content/drive/My Drive/X999/nta_results_df.csv')

In [None]:
neighborhood_profiles = df[df['nta'].isin(nta_list)].groupby('nta').agg({
    'average_year_eviction_count': 'sum',
    'building_age': 'mean',
    'unitsres': 'mean',
    'is_llc': 'mean',
    'total_complaints': 'mean',
    # the overall SVI score
    'rpl_themes': 'mean'
})

In [None]:
neighborhood_profiles

Unnamed: 0_level_0,average_year_eviction_count,building_age,unitsres,is_llc,total_complaints,rpl_themes
nta,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bedford Park-Fordham North,2044.4,92.96797,47.15948,0.62549,269.22876,0.98879
Central Harlem North-Polo Grounds,2300.8,89.05882,214.38848,0.33211,662.56985,0.96937
Crown Heights North,1355.0,94.20888,63.22827,0.46467,202.81926,0.92978


In [None]:
result_df_nta.head()

Unnamed: 0.1,Unnamed: 0,primary_key,bbl,court_index_number,docket_number,eviction_address,eviction_apartment_number,executed_date,borough,zipcode,ejectment,eviction/legal_possession,latitude,longitude,community_board,council_district,census_tract,bin,nta,year,month_year,geometry,average_year_eviction_count,yearbuilt,bldgclass,numfloors,unitsres,ownername,bldgarea,building_type,building_category,is_condo,floor_category,rent_era,architectural_style,economic_period,residential_units_category,is_llc,building_size_category,size_quartile,decade,fips,e_totpop,rpl_theme1,rpl_theme2,rpl_theme3,rpl_theme4,rpl_themes,ep_pov150,ep_unemp,ep_nohsdp,ep_uninsur,ep_age65,ep_age17,ep_disabl,ep_limeng,ep_noveh,ep_crowd,ep_hburd,ep_afam,ep_hisp,ep_asian,ep_aian,ep_nhpi,ep_twomore,ep_otherrace,ep_minrty,ep_white,invalid_zip,svi_quartile,svi_group,air_quality,animal_issues,appliances,building_exterior,doors_windows,electrical_issues,elevator_issues,floors_stairs,general_complaints,graffiti_posting,heat_hot_water,homeless_issues,noise_complaints,other_issues,pest_issues,plumbing_issues,police_matters,public_nuisance,safety_concerns,sanitation_issues,walls_ceilings,total_complaints,building_age,style_ordinal,style_freq,style_label,building_category_ordinal,building_category_freq,building_category_le,age_bin,age_group,month,borough_int,original_eviction_count,average_year_eviction_count_c,change_in_evictions_c,percent_change_c,average_year_eviction_count_c50,change_in_evictions_c50,percent_change_c50,average_year_eviction_count_targeted,change_in_evictions_targeted,percent_change_targeted,average_year_eviction_count_s,change_in_evictions_s,percent_change_s,average_year_eviction_count_svi_economic,change_in_evictions_svi_economic,percent_change_svi_economic,average_year_eviction_count_svi_demographic,change_in_evictions_svi_demographic,percent_change_svi_demographic,average_year_eviction_count_svi_household,change_in_evictions_svi_household,percent_change_svi_household,average_year_eviction_count_svi_racial,change_in_evictions_svi_racial,percent_change_svi_racial,original_llc_status,llc_change,average_year_eviction_count_llc,change_in_evictions_llc,percent_change_llc,original_numfloors,modified_numfloors,average_year_eviction_count_numfloors,change_in_evictions_numfloors,percent_change_numfloors,original_unitsres,modified_unitsres,average_year_eviction_count_unitsres,change_in_evictions_unitsres,percent_change_unitsres,original_bldgarea,modified_bldgarea,average_year_eviction_count_bldgarea,change_in_evictions_bldgarea,percent_change_bldgarea,original_building_age,modified_building_age,average_year_eviction_count_age,change_in_evictions_age,percent_change_age,average_year_eviction_count_bld_all,change_in_evictions_bld_all,percent_change_bld_all
5,5,000098/17_69483,2031770041,000098/17,69483,65 EAST 193RD ST,1B,2017-05-04,BRONX,10468,Not an Ejectment,Possession,40.86608,-73.89651,7.0,14.0,401.0,2013945,Bedford Park-Fordham North,2017,2017-05,POINT (-73.896515 40.866075),1.6,1937.0,D1,6.0,42.0,LSB HULL ASSOCIATES,37800.0,0,elevator,0,2,"Pre-1947, pre-rent-control","1931–1950, Manhattan Modern","1930-1945, great depression and WWII",60,0,7,4,1930-1939,10468,81397.0,0.9954,0.9407,0.987,0.947,0.9874,39.5,11.6,28.3,9.2,11.2,26.4,12.2,26.9,71.8,19.2,56.7,15.6,78.0,2.3,0.0,0.0,0.5,0.5,96.9,3.1,False,3,medium-high,0.0,0.0,3.0,0.0,11.0,9.0,4.0,5.0,9.0,0.0,53.0,0.0,12.0,0.0,14.0,44.0,0.0,0.0,8.0,32.0,28.0,232.0,88.0,4,0.16942,2,6,0.38443,1,50+,"(60, 90]",5,2,1.6,4.178,-2.578,-161.12,0.644,0.956,59.75,0.846,0.754,47.12,1.04,0.56,35.0,1.528,0.072,4.5,1.454,0.146,9.13,1.506,0.094,5.88,1.524,0.076,4.75,0.0,no to yes,1.592,0.008,0.5,6.0,3.0,1.604,-0.004,-0.25,42.0,21.0,1.244,0.356,22.25,37800.0,18900.0,1.574,0.026,1.63,88.0,68.0,1.624,-0.024,-1.5,1.278,0.322,20.13
6,6,0002959/16_69081,2033020071,0002959/16,69081,2847 BRIGGS AVENUE,SECOND FLOOR,2017-10-03,BRONX,10458,Not an Ejectment,Possession,40.86929,-73.88876,7.0,15.0,40702.0,2017108,Bedford Park-Fordham North,2017,2017-10,POINT (-73.888761 40.86929),1.0,1901.0,B2,2.0,2.0,"MAMONAU, DZMITRY",2358.0,0,two-family,0,1,"Pre-1947, pre-rent-control","1900–1920, Beaux-Arts","Pre-1929, pre-great depression",2,0,3,3,1900-1909,10458,82678.0,0.9983,0.9179,0.9775,0.963,0.9891,45.2,14.3,31.9,9.7,9.7,25.7,11.7,24.6,71.6,18.1,59.3,16.1,71.3,3.3,0.3,0.0,1.7,0.4,93.0,7.0,False,3,medium-high,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,16.0,124.0,2,0.22978,0,2,0.09,4,,"(120, 200]",10,2,1.0,0.334,0.666,66.6,0.414,0.586,58.6,0.602,0.398,39.8,0.33,0.67,67.0,0.678,0.322,32.2,0.906,0.094,9.4,0.838,0.162,16.2,0.53,0.47,47.0,0.0,no to yes,0.41,0.59,59.0,2.0,1.0,0.964,0.036,3.6,2.0,1.0,0.964,0.036,3.6,2358.0,1179.0,0.806,0.194,19.4,124.0,104.0,0.63,0.37,37.0,0.586,0.414,41.4
13,13,0012760/18_87617,1021060003,0012760/18,87617,2971 EIGHTH AVENUE,08A,2018-11-01,MANHATTAN,10039,Not an Ejectment,Possession,40.8303,-73.93646,10.0,9.0,24302.0,1084520,Central Harlem North-Polo Grounds,2018,2018-11,POINT (-73.936456 40.830297),3.0,2004.0,D7,30.0,1614.0,NYC HOUSING AUTHORITY,2531670.0,1,elevator,0,3,"1994–Present, vacancy decontrol","2001-present, New Architecture","1991–2008, modern economic growth",150,0,8,4,2000-2009,10039,29887.0,0.9868,0.9242,0.9797,0.8849,0.9748,38.3,16.6,18.2,7.2,11.1,22.0,15.1,7.9,77.9,7.9,41.5,54.7,32.7,1.9,0.1,0.0,2.8,1.1,93.3,6.7,False,3,medium-high,1.0,19.0,0.0,0.0,5.0,0.0,18.0,0.0,0.0,0.0,1.0,7.0,2088.0,1.0,4.0,4.0,2.0,1.0,0.0,4.0,1.0,2156.0,21.0,7,0.08651,5,6,0.38443,1,20-30,"(0, 30]",11,1,3.0,2.3,0.7,23.33,2.298,0.702,23.4,2.402,0.598,19.93,2.282,0.718,23.93,2.754,0.246,8.2,2.966,0.034,1.13,2.89,0.11,3.67,2.774,0.226,7.53,0.0,no to yes,2.97,0.03,1.0,30.0,15.0,2.656,0.344,11.47,1614.0,807.0,2.908,0.092,3.07,2531670.0,1265835.0,2.844,0.156,5.2,21.0,1.0,11.68,-8.68,-289.33,11.558,-8.558,-285.27
24,24,002960/16_66686,2033020071,002960/16,66686,2847 BRIGGS AVENUE,FIRST FLOOR,2017-10-03,BRONX,10458,Not an Ejectment,Possession,40.86929,-73.88876,7.0,15.0,40702.0,2017108,Bedford Park-Fordham North,2017,2017-10,POINT (-73.888761 40.86929),1.0,1901.0,B2,2.0,2.0,"MAMONAU, DZMITRY",2358.0,0,two-family,0,1,"Pre-1947, pre-rent-control","1900–1920, Beaux-Arts","Pre-1929, pre-great depression",2,0,3,3,1900-1909,10458,82678.0,0.9983,0.9179,0.9775,0.963,0.9891,45.2,14.3,31.9,9.7,9.7,25.7,11.7,24.6,71.6,18.1,59.3,16.1,71.3,3.3,0.3,0.0,1.7,0.4,93.0,7.0,False,3,medium-high,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,16.0,124.0,2,0.22978,0,2,0.09,4,,"(120, 200]",10,2,1.0,0.334,0.666,66.6,0.414,0.586,58.6,0.602,0.398,39.8,0.33,0.67,67.0,0.678,0.322,32.2,0.906,0.094,9.4,0.838,0.162,16.2,0.53,0.47,47.0,0.0,no to yes,0.41,0.59,59.0,2.0,1.0,0.964,0.036,3.6,2.0,1.0,0.964,0.036,3.6,2358.0,1179.0,0.806,0.194,19.4,124.0,104.0,0.63,0.37,37.0,0.586,0.414,41.4
35,35,0052539/19_97777,3013930066,0052539/19,97777,1325 EASTERN PARKWAY,11C,2019-10-25,BROOKLYN,11233,Not an Ejectment,Possession,40.66845,-73.92423,8.0,41.0,359.0,3037446,Crown Heights North,2019,2019-10,POINT (-73.924235 40.668448),1.4,1924.0,C1,4.0,43.0,"1325 PARKWAY ESTATES, LLC",40790.0,0,walk-up,0,2,"Pre-1947, pre-rent-control","1921–1930, Art Deco Skyscrapers","Pre-1929, pre-great depression",60,1,7,4,1920-1929,11233,83125.0,0.9541,0.7613,0.9673,0.8866,0.9438,33.0,7.3,14.9,6.9,12.2,20.9,12.4,2.4,63.0,5.0,41.3,66.7,15.7,1.3,0.5,0.0,3.6,0.2,88.1,11.9,False,2,medium-low,0.0,0.0,0.0,0.0,13.0,4.0,0.0,6.0,3.0,0.0,224.0,0.0,27.0,0.0,4.0,40.0,0.0,0.0,2.0,30.0,17.0,370.0,101.0,3,0.28605,1,3,0.41255,5,,"(90, 120]",10,3,1.4,3.972,-2.572,-183.71,0.704,0.696,49.71,0.512,0.888,63.43,0.95,0.45,32.14,1.344,0.056,4.0,1.322,0.078,5.57,1.378,0.022,1.57,1.23,0.17,12.14,1.0,yes to no,1.398,0.002,0.14,4.0,2.0,1.366,0.034,2.43,43.0,21.5,1.114,0.286,20.43,40790.0,20395.0,1.29,0.11,7.86,101.0,81.0,1.316,0.084,6.0,0.904,0.496,35.43


## **Step 6.2: Change of Directions analysis**

In [None]:
metric_columns

['percent_change_numfloors',
 'percent_change_unitsres',
 'percent_change_bldgarea',
 'percent_change_age',
 'percent_change_bld_all',
 'percent_change_c',
 'percent_change_c50',
 'percent_change_s',
 'percent_change_llc']

In [None]:
metric_columns2 = ['average_year_eviction_count_c', 'change_in_evictions_c', 'percent_change_c', 'average_year_eviction_count_c50', 'change_in_evictions_c50',
 'percent_change_c50', 'average_year_eviction_count_targeted', 'change_in_evictions_targeted', 'percent_change_targeted','average_year_eviction_count_s',
 'change_in_evictions_s', 'percent_change_s', 'average_year_eviction_count_svi_economic', 'change_in_evictions_svi_economic','percent_change_svi_economic',
 'average_year_eviction_count_svi_demographic', 'change_in_evictions_svi_demographic', 'percent_change_svi_demographic', 'average_year_eviction_count_svi_household',
 'change_in_evictions_svi_household','percent_change_svi_household', 'average_year_eviction_count_svi_racial', 'change_in_evictions_svi_racial',
 'percent_change_svi_racial', 'original_llc_status', 'llc_change', 'average_year_eviction_count_llc', 'change_in_evictions_llc','percent_change_llc',
'original_numfloors', 'modified_numfloors','average_year_eviction_count_numfloors']

In [None]:
missing_cols = [col for col in metric_columns2 if col not in result_df_nta.columns]
missing_cols
# good

[]

In [None]:
# list(result_df_nta.columns)[:-20]

In [None]:
trend_analysis_nta = directional_changes(result_df_nta, feature_directions, metric_columns2)

TypeError: '>' not supported between instances of 'str' and 'int'

In [None]:
result_df_nta[metric_columns2].dtypes

Unnamed: 0,0
average_year_eviction_count_c,float64
change_in_evictions_c,float64
percent_change_c,float64
average_year_eviction_count_c50,float64
change_in_evictions_c50,float64
percent_change_c50,float64
average_year_eviction_count_targeted,float64
change_in_evictions_targeted,float64
percent_change_targeted,float64
average_year_eviction_count_s,float64


In [None]:
# llc_change is boolean currently, we can just drop it for trend analysis
metric_columns2.remove('llc_change')

In [None]:
trend_analysis_nta = directional_changes(result_df_nta, feature_directions, metric_columns2)

In [None]:
summary_rows_nta = []

for intervention, stats in trend_analysis_nta.items():
    summary_rows_nta.append({
        'intervention': intervention,
        'buildings with decreased evictions (%)': stats['decreased_evictions']['percentage'],
        'average eviction reduction (%)': stats['decreased_evictions']['avg_reduction'],
        'buildings with increased evictions (%)': stats['increased_evictions']['percentage'],
        'average eviction increase (%)': stats['increased_evictions']['avg_increase']
    })

In [None]:
trend_summary_nta = pd.DataFrame(summary_rows_nta)
trend_summary_nta = trend_summary_nta.sort_values(by='buildings with decreased evictions (%)', ascending=False)

In [None]:
trend_summary_nta

Unnamed: 0,intervention,buildings with decreased evictions (%),average eviction reduction (%),buildings with increased evictions (%),average eviction increase (%)
0,average_year_eviction_count_c,100.0,1.88,0.0,0.0
6,average_year_eviction_count_targeted,100.0,0.91,0.0,0.0
3,average_year_eviction_count_c50,100.0,0.81,0.0,0.0
12,average_year_eviction_count_svi_economic,100.0,1.16,0.0,0.0
9,average_year_eviction_count_s,100.0,0.94,0.0,0.0
18,average_year_eviction_count_svi_household,100.0,1.17,0.0,0.0
15,average_year_eviction_count_svi_demographic,100.0,1.15,0.0,0.0
25,average_year_eviction_count_llc,100.0,1.19,0.0,0.0
30,average_year_eviction_count_numfloors,100.0,1.19,0.0,0.0
29,modified_numfloors,100.0,3.22,0.0,0.0


## **Step 6.3: Run Sensitivity analysis on neighborhood result**

In [None]:
target_neighborhoods = nta_list

In [None]:
intervention_levels = {}

In [None]:
# part 1: run sensitivity analysis for different intervention levels

# takes a long time to run due to calling the function for each type for each neighborhood.
# the result will be saved
intervention_levels = {}

for neighborhood in target_neighborhoods:
    neighborhood_df = result_df_nta[result_df_nta['nta'] == neighborhood].copy()
    print(f"inside the outside loop of sensitivity analysis for {neighborhood} with {len(neighborhood_df)} buildings")

    # complaint reduction sensitivity
    complaint_sensitivity = []
    for pct in [25, 50, 75]:
        print(f"inside the loop of complaint reduction at {pct}%")
        result = process_hotspot_counterfactuals(df=neighborhood_df, neighborhoods=target_neighborhoods, rfr_model= model_rfr_all,feature_cols=all_vars, complaints_vars=complaints_vars,
                                                 complaint_reduction_pct=pct)
        col_name = f'percent_change_c{pct}'
        avg_reduction = result[col_name].mean()
        complaint_sensitivity.append((pct, avg_reduction))

    # SVI improvement sensitivity
    svi_sensitivity = []
    for quantile in [0.25, 0.5, 0.75]:
        print(f"inside the loop of SVI improvement at quantile {quantile}")
        result = process_hotspot_counterfactuals( df=neighborhood_df, neighborhoods=target_neighborhoods, rfr_model= model_rfr_all, feature_cols=all_vars, svi_vars=svi_vars, svi_reduction_quantile=quantile)
        avg_reduction = result['percent_change_s'].mean()
        svi_sensitivity.append((quantile, avg_reduction))

    # llc status flipping sensitivity
    print("inside the loop of LLC status flipping")
    result = process_hotspot_counterfactuals(df=neighborhood_df, neighborhoods=target_neighborhoods, rfr_model= model_rfr_all, feature_cols=all_vars,change_llc=True)
    # two scenarios, from 'is llc' to 'not llc'
    # and from 'not llc' to 'is llc'
    llc_buildings = result[result['is_llc'] == True]
    non_llc_buildings = result[result['is_llc'] == False]
    llc_to_non_effect = llc_buildings['percent_change_llc'].mean()
    non_to_llc_effect = non_llc_buildings['percent_change_llc'].mean()
    llc_sensitivity = [('llc_to_non', llc_to_non_effect), ('non_to_llc', non_to_llc_effect)]

    # building age sensitivity
    age_sensitivity = []
    for years in [10, 20, 30]:
        print(f"inside the loop of building age reduction by {years} years")
        result = process_hotspot_counterfactuals(
            df=neighborhood_df,
            neighborhoods=target_neighborhoods,
            rfr_model= model_rfr_all,
            feature_cols=all_vars,
            building_age_change=years
        )
        avg_reduction = result['percent_change_age'].mean()
        age_sensitivity.append((years, avg_reduction))

    # combined building features sensitivity
    building_features_sensitivity = []
    for pct in [30, 50, 70]:
        print(f"inside the loop of combined building feature reduction at {pct}%")
        result = process_hotspot_counterfactuals(
            df=neighborhood_df,
            neighborhoods=target_neighborhoods,
            rfr_model= model_rfr_all,
            feature_cols=all_vars,
            building_change={
                'numfloors': -pct,
                'unitsres': -pct,
                'bldgarea': -pct
            }
        )
        avg_reduction = result['percent_change_bld_all'].mean()
        building_features_sensitivity.append((pct, avg_reduction))

    # separate bldgarea sensitivity
    # easier for gathering all the results into an aggregated dataframe later
    bldgarea_sensitivity = []
    for pct in [30, 50, 70]:
        print(f"inside the loop of bldgarea reduction at {pct}%")
        result = process_hotspot_counterfactuals(
            df=neighborhood_df,
            neighborhoods=target_neighborhoods,
            rfr_model= model_rfr_all,
            feature_cols=all_vars,
            building_change={'bldgarea': -pct}
        )
        avg_reduction = result['percent_change_bld_all'].mean()
        bldgarea_sensitivity.append((pct, avg_reduction))

    # separate unitsres sensitivity
    unitsres_sensitivity = []
    for pct in [30, 50, 70]:
        print(f"inside the loop of unitsres reduction at {pct}%")
        result = process_hotspot_counterfactuals(
            df=neighborhood_df,
            neighborhoods=target_neighborhoods,
            rfr_model= model_rfr_all,
            feature_cols=all_vars,
            building_change={'unitsres': -pct}
        )
        avg_reduction = result['percent_change_bld_all'].mean()
        unitsres_sensitivity.append((pct, avg_reduction))

    # separate numfloors sensitivity
    numfloors_sensitivity = []
    for pct in [30, 50, 70]:
        print(f"inside the loop of numfloors reduction at {pct}%")
        result = process_hotspot_counterfactuals(
            df=neighborhood_df,
            neighborhoods=target_neighborhoods,
            rfr_model= model_rfr_all,
            feature_cols=all_vars,
            building_change={'numfloors': -pct}
        )
        avg_reduction = result['percent_change_bld_all'].mean()
        numfloors_sensitivity.append((pct, avg_reduction))

    # finally, we tore everything to a list so far
    intervention_levels[neighborhood] = {
        'complaint_sensitivity': complaint_sensitivity,
        'svi_sensitivity': svi_sensitivity,
        'llc_sensitivity': llc_sensitivity,
        'age_sensitivity': age_sensitivity,
        # combined
        'building_features_sensitivity': building_features_sensitivity,
        # individuals
        'bldgarea_sensitivity': bldgarea_sensitivity,
        'unitsres_sensitivity': unitsres_sensitivity,
        'numfloors_sensitivity': numfloors_sensitivity
    }

    print(f"done with the loop of {neighborhood}")

inside the outside loop of sensitivity analysis for Central Harlem North-Polo Grounds with 1632 buildings
inside the loop of complaint reduction at 25%
inside the loop of complaint reduction at 50%
inside the loop of complaint reduction at 75%
inside the loop of SVI improvement at quantile 0.25
inside the loop of SVI improvement at quantile 0.5
inside the loop of SVI improvement at quantile 0.75
inside the loop of LLC status flipping
inside the loop of building age reduction by 10 years
inside the loop of building age reduction by 20 years
inside the loop of building age reduction by 30 years
inside the loop of combined building feature reduction at 30%
inside the loop of combined building feature reduction at 50%
inside the loop of combined building feature reduction at 70%
inside the loop of bldgarea reduction at 30%
inside the loop of bldgarea reduction at 50%
inside the loop of bldgarea reduction at 70%
inside the loop of unitsres reduction at 30%
inside the loop of unitsres reduct

In [None]:
intervention_levels

{'Central Harlem North-Polo Grounds': {'complaint_sensitivity': [(25,
    np.float64(-6.568511029411765)),
   (50, np.float64(-2.2003186274509803)),
   (75, np.float64(1.9561397058823524))],
  'svi_sensitivity': [(0.25, np.float64(-35.873823529411766)),
   (0.5, np.float64(-13.224479166666665)),
   (0.75, np.float64(-23.9684068627451))],
  'llc_sensitivity': [('llc_to_non', np.float64(-17.980793357933578)),
   ('non_to_llc', np.float64(-16.049137614678898))],
  'age_sensitivity': [(10, np.float64(-63.62705269607844)),
   (20, np.float64(-96.53775122549018)),
   (30, np.float64(-107.17489583333332))],
  'building_features_sensitivity': [(30, np.float64(-7.267530637254902)),
   (50, np.float64(-6.284944852941177)),
   (70, np.float64(-7.4486580882352955))],
  'bldgarea_sensitivity': [(30, np.float64(-96.07960171568625)),
   (50, np.float64(-96.07960171568625)),
   (70, np.float64(-96.07960171568625))],
  'unitsres_sensitivity': [(30, np.float64(-96.07960171568625)),
   (50, np.float64(-9

In [None]:
flat_records = []

In [None]:
intervention_name_map = {'complaint_sensitivity': 'complaint reduction', 'svi_sensitivity': 'SVI improvement', 'llc_sensitivity': 'LLC Status Change',
    'age_sensitivity': 'building age reduction', 'building_features_sensitivity': 'all building features','bldgarea_sensitivity': 'bldgarea reduction',
    'unitsres_sensitivity': 'unitsres reduction', 'numfloors_sensitivity': 'numFloors reduction'}

In [None]:
for neighborhood, sensitivity_dict in intervention_levels.items():
    for key, values in sensitivity_dict.items():
        feature_name = intervention_name_map.get(key, key)
        for change, effect in values:
            flat_records.append({'neighborhood': neighborhood, 'intervention': feature_name, 'variant': change, 'effect': float(effect),
                                 'feature': key.replace('_sensitivity', '')})

In [None]:
intervention_df = pd.DataFrame(flat_records)

In [None]:
intervention_df = intervention_df.sort_values(by=['neighborhood', 'intervention', 'variant']).reset_index(drop=True)
intervention_df.shape

(69, 5)

In [None]:
intervention_df

Unnamed: 0,neighborhood,intervention,variant,effect,feature
0,Bedford Park-Fordham North,LLC Status Change,llc_to_non,-7.40934,llc
1,Bedford Park-Fordham North,LLC Status Change,non_to_llc,-30.21035,llc
2,Bedford Park-Fordham North,SVI improvement,0.25000,-19.06828,svi
3,Bedford Park-Fordham North,SVI improvement,0.50000,-19.06828,svi
4,Bedford Park-Fordham North,SVI improvement,0.75000,-19.06828,svi
5,Bedford Park-Fordham North,all building features,30,-7.7896,building_features
6,Bedford Park-Fordham North,all building features,50,0.13767,building_features
7,Bedford Park-Fordham North,all building features,70,6.81297,building_features
8,Bedford Park-Fordham North,bldgarea reduction,30,-83.0918,bldgarea
9,Bedford Park-Fordham North,bldgarea reduction,50,-83.0918,bldgarea


In [None]:
intervention_df_reduced = intervention_df[~((intervention_df['intervention'] == 'bldgarea reduction') | (intervention_df['intervention'] == 'unitsres reduction') \
                                    | (intervention_df['intervention'] == 'numFloors reduction') )]
# as we can see, many of the individual building features are way too "influential" and therefore we will only keep the combined ones that are more moderate

In [None]:
intervention_df_reduced.shape, intervention_df.shape

((42, 5), (69, 5))

In [None]:
intervention_df_reduced

Unnamed: 0,neighborhood,intervention,variant,effect,feature
0,Bedford Park-Fordham North,LLC Status Change,llc_to_non,-7.40934,llc
1,Bedford Park-Fordham North,LLC Status Change,non_to_llc,-30.21035,llc
2,Bedford Park-Fordham North,SVI improvement,0.25000,-19.06828,svi
3,Bedford Park-Fordham North,SVI improvement,0.50000,-19.06828,svi
4,Bedford Park-Fordham North,SVI improvement,0.75000,-19.06828,svi
5,Bedford Park-Fordham North,all building features,30,-7.7896,building_features
6,Bedford Park-Fordham North,all building features,50,0.13767,building_features
7,Bedford Park-Fordham North,all building features,70,6.81297,building_features
11,Bedford Park-Fordham North,building age reduction,10,-70.15365,age
12,Bedford Park-Fordham North,building age reduction,20,-96.98458,age


In [None]:
intervention_df_reduced.columns

Index(['neighborhood', 'intervention', 'variant', 'effect', 'feature'], dtype='object')

In [None]:
intervention_df.to_csv('/content/drive/My Drive/X999/intervention_df_indiviudals.csv')

In [None]:
intervention_df_reduced.to_csv('/content/drive/My Drive/X999/intervention_df_indiviudals_reduced.csv')

In [None]:
intervention_levels_df.to_csv('/content/drive/My Drive/X999/intervention_levels_df.csv')
# a df we got from a previous run, the codes have been changed and not kept here (to rerun will take a long time, and
# it is basically the same dataframe as the one above but without the detailed building feature ones. For clarity, we kept this df here.)
# if we ignore the individual effects and only consider the combined building feature change effects
# this combined effect makes more sense, because when building size shrinks, the number of floors also decreases
# and the total units must also shrink. So the prediction based upon the three factors together would be more sensible
# this is the dataframe we had from previous runs where we did not add back the individual building feature changes and the results are more
# clear and moderate
intervention_levels_df
# this is outdated, but let's keep it here for now for reference

Unnamed: 0,neighborhood,category,level,effect
0,Central Harlem North-Polo Grounds,complaint_sensitivity,25,-26.245551
1,Central Harlem North-Polo Grounds,complaint_sensitivity,50,-19.314583
2,Central Harlem North-Polo Grounds,complaint_sensitivity,75,-11.128511
3,Central Harlem North-Polo Grounds,svi_sensitivity,0.25,-36.939694
4,Central Harlem North-Polo Grounds,svi_sensitivity,0.5,-34.810208
5,Central Harlem North-Polo Grounds,svi_sensitivity,0.75,-43.022059
6,Central Harlem North-Polo Grounds,llc_sensitivity,llc_to_non,-34.222251
7,Central Harlem North-Polo Grounds,llc_sensitivity,non_to_llc,-50.641
8,Central Harlem North-Polo Grounds,age_sensitivity,10,-62.087592
9,Central Harlem North-Polo Grounds,age_sensitivity,20,-92.50038


In [None]:
# mini_tables
ranking_records = []

In [None]:
building_type_analysis = {}
intervention_ranking_records = []

In [None]:
# define mapping for intervention display names
intervention_names = {
    'c': 'complaint reduction',
    's': 'SVI improvement',
    'llc': 'llc status change',
    'age': 'building age reduction',
    'bld_all': 'all building changes'
}

In [None]:
for neighborhood in target_neighborhoods:
    neighborhood_df = result_df_nta[result_df_nta['nta'] == neighborhood]
    building_col = None

    # identify the correct building type column
    for col in ['building_category', 'building_type', 'building_size_category']:
        if col in neighborhood_df.columns:
            building_col = col
            break

    if building_col is None:
        print(f"No building category column found for {neighborhood}. Skipping...")
        continue

    building_analysis = neighborhood_df.groupby(building_col).agg({
        'original_eviction_count': 'sum',
        'bin': 'count'
    }).rename(columns={'bin': 'building_count'})

    # add intervention effect columns
    for suffix in ['c', 's', 'llc', 'age', 'bld_all']:
        col_name = f'average_year_eviction_count_{suffix}'
        if col_name in neighborhood_df.columns:
            building_analysis[col_name] = neighborhood_df.groupby(building_col)[col_name].sum()

    # percent reductions
    for suffix in ['c', 's', 'llc', 'age', 'bld_all']:
        col_name = f'average_year_eviction_count_{suffix}'
        if col_name in building_analysis.columns:
            with np.errstate(divide='ignore', invalid='ignore'):
                building_analysis[f'percent_change_{suffix}'] = (
                    (building_analysis['original_eviction_count'] - building_analysis[col_name]) /
                    building_analysis['original_eviction_count'].replace(0, np.nan) * 100
                ).round(2)

    # determine best and second-best interventions
    pct_columns = [col for col in building_analysis.columns if col.startswith('percent_change_')]
    if pct_columns:
        building_analysis['best_intervention'] = building_analysis[pct_columns].idxmax(axis=1)
        building_analysis['best_intervention'] = building_analysis['best_intervention'].str.replace('percent_change_', '')

        building_analysis['max_reduction_pct'] = building_analysis[pct_columns].max(axis=1)

        # for second-best intervention
        pct_df = building_analysis[pct_columns].copy()
        for idx in pct_df.index:
            best_col_suffix = building_analysis.loc[idx, 'best_intervention']
            best_col_full = f'percent_change_{best_col_suffix}'
            pct_df.loc[idx, best_col_full] = np.nan

        building_analysis['second_best_intervention'] = pct_df.idxmax(axis=1).str.replace('percent_change_', '')
        building_analysis['second_max_reduction_pct'] = pct_df.max(axis=1)

    building_type_analysis[neighborhood] = building_analysis

    # now collect full rankings for all building types and interventions
    for intervention in ['c', 's', 'llc', 'age', 'bld_all']:
        col = f'percent_change_{intervention}'
        if col not in building_analysis.columns:
            continue

        intervention_name = intervention_names.get(intervention, intervention)

        sorted_building_analysis = building_analysis.sort_values(col, ascending=False).reset_index()

        for rank, row in sorted_building_analysis.iterrows():
            reduction = row[col]
            if not pd.isna(reduction):
                intervention_ranking_records.append({
                    'neighborhood': neighborhood,
                    'intervention_type': intervention_name,
                    'building_type': row[building_col],
                    'reduction_pct': reduction,
                    'rank': rank + 1,
                    'building_count': row['building_count'],
                    'eviction_count': row['original_eviction_count']
                })

In [None]:
for neighborhood in target_neighborhoods:
    building_analysis = building_type_analysis.get(neighborhood)

    for intervention in ['c', 's', 'llc', 'age', 'bld_all']:
        col = f'percent_change_{intervention}'
        if col not in building_analysis.columns:
            continue
        intervention_name = intervention_names.get(intervention, intervention)
        sorted_df = building_analysis.sort_values(col, ascending=False).reset_index()
        for rank, row in sorted_df.iterrows():
            btype = row[building_analysis.index.name or 'index']
            reduction = row[col]
            record = {'neighborhood': neighborhood, 'intervention_type': intervention_name, 'building_type': btype,
                'reduction_pct': None if pd.isna(reduction) else round(reduction, 2), 'rank': rank + 1, 'building_count': int(row['building_count']),
                'eviction_count': float(row['original_eviction_count'])}
            ranking_records.append(record)

In [None]:
ranking_df = pd.DataFrame(ranking_records)
ranking_df.shape

(85, 7)

In [None]:
ranking_df

Unnamed: 0,neighborhood,intervention_type,building_type,reduction_pct,rank,building_count,eviction_count
0,Central Harlem North-Polo Grounds,complaint reduction,elevator,44.49,1,735,1602.6
1,Central Harlem North-Polo Grounds,complaint reduction,walk-up,24.25,2,801,591.0
2,Central Harlem North-Polo Grounds,complaint reduction,condo-co-op,15.46,3,85,105.0
3,Central Harlem North-Polo Grounds,complaint reduction,two-family,-84.47,4,3,0.6
4,Central Harlem North-Polo Grounds,complaint reduction,primarily_res_with_mixed_use,-105.91,5,8,1.6
5,Central Harlem North-Polo Grounds,SVI improvement,condo-co-op,28.13,1,85,105.0
6,Central Harlem North-Polo Grounds,SVI improvement,elevator,27.38,2,735,1602.6
7,Central Harlem North-Polo Grounds,SVI improvement,walk-up,22.81,3,801,591.0
8,Central Harlem North-Polo Grounds,SVI improvement,two-family,-42.23,4,3,0.6
9,Central Harlem North-Polo Grounds,SVI improvement,primarily_res_with_mixed_use,-59.78,5,8,1.6


In [None]:
intervention_ranking_df = pd.DataFrame(intervention_ranking_records)
intervention_ranking_df.head()

Unnamed: 0,neighborhood,intervention_type,building_type,reduction_pct,rank,building_count,eviction_count
0,Central Harlem North-Polo Grounds,complaint reduction,elevator,44.49,1,735,1602.6
1,Central Harlem North-Polo Grounds,complaint reduction,walk-up,24.25,2,801,591.0
2,Central Harlem North-Polo Grounds,complaint reduction,condo-co-op,15.46,3,85,105.0
3,Central Harlem North-Polo Grounds,complaint reduction,two-family,-84.47,4,3,0.6
4,Central Harlem North-Polo Grounds,complaint reduction,primarily_res_with_mixed_use,-105.91,5,8,1.6


In [None]:
intervention_ranking_df.to_csv("intervention_ranking_by_building_type.csv", index=False)

In [None]:
building_analysis.sort_values('max_reduction_pct', ascending=False)

Unnamed: 0_level_0,original_eviction_count,building_count,average_year_eviction_count_c,average_year_eviction_count_s,average_year_eviction_count_llc,average_year_eviction_count_age,average_year_eviction_count_bld_all,percent_change_c,percent_change_s,percent_change_llc,percent_change_age,percent_change_bld_all,best_intervention,max_reduction_pct,second_best_intervention,second_max_reduction_pct
building_category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
primarily_res_with_mixed_use,8.4,14,17.259916,4.583329,5.758747,58.230602,59.556992,-105.48,45.44,31.44,-593.22,-609.01,s,45.44,llc,31.44
condo-co-op,36.8,42,77.589668,25.427605,42.46814,41.236576,23.534353,-110.84,30.9,-15.4,-12.06,36.05,bld_all,36.05,s,30.9
two-family,14.6,33,16.723236,9.74443,21.720274,79.472794,79.472794,-14.54,33.26,-48.77,-444.33,-444.33,s,33.26,c,-14.54
walk-up,947.2,816,1503.182983,643.492493,917.414001,1017.300964,840.70929,-58.7,32.06,3.14,-7.4,11.24,s,32.06,bld_all,11.24
elevator,1035.2,616,1584.268799,721.17157,992.136963,1208.093628,1224.339111,-53.04,30.34,4.16,-16.7,-18.27,s,30.34,llc,4.16
single-family,2.2,9,5.050557,3.098861,5.821255,4.152866,4.152866,-129.57,-40.86,-164.6,-88.77,-88.77,s,-40.86,age,-88.77


# **Base on these results, here are summaries based on how each type of reduction would affect different categories' buildings in three of the hardest eviction hit neighborhoods: Central Harlem, Crown Heights North, and Bedford Park-Fordham North:**

### *These are the "What if scenarios": what if certain types of complaints go down? What if svi scores go down? What if buildings were newer? What if the buildings downsized? What if SVI scores only went down xx% but several complaints go to 0? What if all complaints were reduced to 0? What if the ownership of the building has changed? What if these same changes happen to different categories of buildings (such as walk-up and elevators), and how would they react different to the changes? Who would be the most resilient types of buildings? etc.*

### Previously, we have already known the overal trends when these factors decrease. By using the core function and adjusting the **geo parameters (neighborhoods, zipcodes, or building ids)**, **feature selections (complaints, svi scores, building age, is_llc, building units, building areas, and building heights)**, and the **degree and direction of change (yes-> no, no-> yes, 25%, 50%, 75% reductions for each featrues etc.)**, we got detailed breakdowns of how exactly the effects would be to these particular eviction hotspots.

### In future notebooks, we will carry out experiments and analysis on eviction hotspots (in terms of building ids) and eviction coldspots (in terms of neighborhoods and building ids), using the same core functions, to come up with scenarios where different feature variants, directions of changes, and degrees of changes. (may or may not upload the experiments here).

## **Intervention Effectiveness by Building Type for Central Harlem North-Polo Grounds**

--- Complaint Reduction ---
1. elevator: 44.49% reduction (735 buildings, 1602.6 evictions)
2. walk-up: 24.25% reduction (801 buildings, 591.0 evictions)
3. condo-co-op: 15.46% reduction (85 buildings, 105.0 evictions)
4. two-family: -84.47% reduction (3 buildings, 0.6 evictions)
5. primarily_res_with_mixed_use: -105.91% reduction (8 buildings, 1.6 evictions)

--- SVI Improvement ---
1. condo-co-op: 28.13% reduction (85 buildings, 105.0 evictions)
2. elevator: 27.38% reduction (735 buildings, 1602.6 evictions)
3. walk-up: 22.81% reduction (801 buildings, 591.0 evictions)
4. two-family: -42.23% reduction (3 buildings, 0.6 evictions)
5. primarily_res_with_mixed_use: -59.78% reduction (8 buildings, 1.6 evictions)

--- LLC Status Change ---
1. walk-up: 3.67% reduction (801 buildings, 591.0 evictions)
2. elevator: -0.78% reduction (735 buildings, 1602.6 evictions)
3. condo-co-op: -23.32% reduction (85 buildings, 105.0 evictions)
4. primarily_res_with_mixed_use: -76.09% reduction (8 buildings, 1.6 evictions)
5. two-family: -107.76% reduction (3 buildings, 0.6 evictions)

--- Building Age Reduction ---
1. walk-up: 5.22% reduction (801 buildings, 591.0 evictions)
2. condo-co-op: -24.69% reduction (85 buildings, 105.0 evictions)
3. elevator: -65.97% reduction (735 buildings, 1602.6 evictions)
4. two-family: -84.09% reduction (3 buildings, 0.6 evictions)
5. primarily_res_with_mixed_use: -690.73% reduction (8 buildings, 1.6 evictions)

--- All Building Changes ---
1. walk-up: 22.21% reduction (801 buildings, 591.0 evictions)
2. condo-co-op: -29.14% reduction (85 buildings, 105.0 evictions)
3. elevator: -58.71% reduction (735 buildings, 1602.6 evictions)
4. two-family: -82.60% reduction (3 buildings, 0.6 evictions)
5. primarily_res_with_mixed_use: -648.54% reduction (8 buildings, 1.6 evictions)

## **Intervention Effectiveness by Building Type for Crown Heights North**

--- Complaint Reduction ---
1. elevator: 23.72% reduction (381 buildings, 610.2 evictions)
2. walk-up: 22.46% reduction (941 buildings, 631.8 evictions)
3. condo-co-op: 12.94% reduction (51 buildings, 31.8 evictions)
4. two-family: 8.14% reduction (126 buildings, 48.0 evictions)
5. primarily_res_with_mixed_use: -3.23% reduction (89 buildings, 30.6 evictions)
6. single-family: -32.17% reduction (11 buildings, 2.6 evictions)

--- SVI Improvement ---
1. elevator: 26.19% reduction (381 buildings, 610.2 evictions)
2. two-family: 23.53% reduction (126 buildings, 48.0 evictions)
3. walk-up: 15.82% reduction (941 buildings, 631.8 evictions)
4. primarily_res_with_mixed_use: 8.48% reduction (89 buildings, 30.6 evictions)
5. condo-co-op: -3.20% reduction (51 buildings, 31.8 evictions)
6. single-family: -17.54% reduction (11 buildings, 2.6 evictions)

--- LLC Status Change ---
1. walk-up: 9.49% reduction (941 buildings, 631.8 evictions)
2. two-family: -4.87% reduction (126 buildings, 48.0 evictions)
3. primarily_res_with_mixed_use: -7.45% reduction (89 buildings, 30.6 evictions)
4. elevator: -7.86% reduction (381 buildings, 610.2 evictions)
5. condo-co-op: -9.10% reduction (51 buildings, 31.8 evictions)
6. single-family: -67.38% reduction (11 buildings, 2.6 evictions)

--- Building Age Reduction ---
1. single-family: -33.96% reduction (11 buildings, 2.6 evictions)
2. elevator: -78.21% reduction (381 buildings, 610.2 evictions)
3. primarily_res_with_mixed_use: -125.37% reduction (89 buildings, 30.6 evictions)
4. condo-co-op: -133.15% reduction (51 buildings, 31.8 evictions)
5. walk-up: -218.60% reduction (941 buildings, 631.8 evictions)
6. two-family: -581.52% reduction (126 buildings, 48.0 evictions)

--- All Building Changes ---
1. single-family: -33.96% reduction (11 buildings, 2.6 evictions)
2. elevator: -69.21% reduction (381 buildings, 610.2 evictions)
3. primarily_res_with_mixed_use: -122.72% reduction (89 buildings, 30.6 evictions)
4. condo-co-op: -174.00% reduction (51 buildings, 31.8 evictions)
5. walk-up: -219.30% reduction (941 buildings, 631.8 evictions)
6. two-family: -574.18% reduction (126 buildings, 48.0 evictions)

## **Intervention Effectiveness by Building Type for Bedford Park-Fordham North**

--- Complaint Reduction ---
1. two-family: -14.54% reduction (33 buildings, 14.6 evictions)
2. elevator: -53.04% reduction (616 buildings, 1035.2 evictions)
3. walk-up: -58.70% reduction (816 buildings, 947.2 evictions)
4. primarily_res_with_mixed_use: -105.48% reduction (14 buildings, 8.4 evictions)
5. condo-co-op: -110.84% reduction (42 buildings, 36.8 evictions)
6. single-family: -129.57% reduction (9 buildings, 2.2 evictions)

--- SVI Improvement ---
1. primarily_res_with_mixed_use: 45.44% reduction (14 buildings, 8.4 evictions)
2. two-family: 33.26% reduction (33 buildings, 14.6 evictions)
3. walk-up: 32.06% reduction (816 buildings, 947.2 evictions)
4. condo-co-op: 30.90% reduction (42 buildings, 36.8 evictions)
5. elevator: 30.34% reduction (616 buildings, 1035.2 evictions)
6. single-family: -40.86% reduction (9 buildings, 2.2 evictions)

--- LLC Status Change ---
1. primarily_res_with_mixed_use: 31.44% reduction (14 buildings, 8.4 evictions)
2. elevator: 4.16% reduction (616 buildings, 1035.2 evictions)
3. walk-up: 3.14% reduction (816 buildings, 947.2 evictions)
4. condo-co-op: -15.40% reduction (42 buildings, 36.8 evictions)
5. two-family: -48.77% reduction (33 buildings, 14.6 evictions)
6. single-family: -164.60% reduction (9 buildings, 2.2 evictions)

--- Building Age Reduction ---
1. walk-up: -7.40% reduction (816 buildings, 947.2 evictions)
2. condo-co-op: -12.06% reduction (42 buildings, 36.8 evictions)
3. elevator: -16.70% reduction (616 buildings, 1035.2 evictions)
4. single-family: -88.77% reduction (9 buildings, 2.2 evictions)
5. two-family: -444.33% reduction (33 buildings, 14.6 evictions)
6. primarily_res_with_mixed_use: -593.22% reduction (14 buildings, 8.4 evictions)

--- All Building Changes ---
1. condo-co-op: 36.05% reduction (42 buildings, 36.8 evictions)
2. walk-up: 11.24% reduction (816 buildings, 947.2 evictions)
3. elevator: -18.27% reduction (616 buildings, 1035.2 evictions)
4. single-family: -88.77% reduction (9 buildings, 2.2 evictions)
5. two-family: -444.33% reduction (33 buildings, 14.6 evictions)
6. primarily_res_with_mixed_use: -609.01% reduction (14 buildings, 8.4 evictions)