# 03 — Predictive Modeling: Eviction Risk

In [None]:

import pandas as pd, numpy as np, matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, RocCurveDisplay
from sklearn.inspection import permutation_importance

df = pd.read_csv('data/households.csv')
df['annual_income_usd'] = df.groupby(['county','year'])['annual_income_usd'].transform(lambda s: s.fillna(s.median()))
df['num_children'] = df.groupby(['county','year'])['num_children'].transform(lambda s: s.fillna(s.median().round()))
df['rent_burden_ratio'] = df['rent_burden_ratio'].clip(0,1)
df['rent_burden_ratio'] = df.groupby(['county','year'])['rent_burden_ratio'].transform(lambda s: s.fillna(s.mean()))

features = ['annual_income_usd','rent_burden_ratio','eviction_notice','prior_homeless_experience','num_children','any_disability']
X, y = df[features], df['eviction_filed_next_6mo']
Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.25, random_state=7, stratify=y)

sc = StandardScaler(); Xtr_s = sc.fit_transform(Xtr); Xte_s = sc.transform(Xte)
lr = LogisticRegression(max_iter=200).fit(Xtr_s, ytr)
rf = RandomForestClassifier(n_estimators=300, random_state=7, class_weight='balanced_subsample').fit(Xtr, ytr)

plr = lr.predict_proba(Xte_s)[:,1]; prf = rf.predict_proba(Xte)[:,1]
print('AUC — LR:', round(roc_auc_score(yte, plr),3), ' RF:', round(roc_auc_score(yte, prf),3))

fig = plt.figure()
RocCurveDisplay.from_predictions(yte, plr, name='LogReg')
RocCurveDisplay.from_predictions(yte, prf, name='RandomForest')
plt.title('ROC — Eviction Risk'); plt.tight_layout(); plt.show()

perm = permutation_importance(rf, Xte, yte, n_repeats=10, random_state=7)
importances = pd.DataFrame({'feature': features, 'importance': perm.importances_mean}).sort_values('importance', ascending=False)
print(importances)

fig = plt.figure()
plt.barh(importances['feature'], importances['importance']); plt.gca().invert_yaxis()
plt.title('Permutation Importance (RF)'); plt.tight_layout(); plt.show()
