# Import packages

In [69]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from linearmodels.panel import PanelOLS
import statsmodels.formula.api as smf

# Import dataset

In [70]:
df = pd.read_csv("C:/Users/trung/CCDS/data/processed/df.csv")

df.head(5)

Unnamed: 0,name,year,poverty-rate,median-household-income,rent-burden,pct-white,households-threatened,eviction-filing-rate,total-crime
0,Baltimore City,2000,22.92,30078,26.9,30.96,58207.75,124.17,66791
1,Baltimore City,2001,22.92,30078,26.9,30.96,52564.67,111.08,64453
2,Baltimore City,2002,22.92,30078,26.9,30.96,57183.58,121.05,56550
3,Baltimore City,2003,22.92,30078,26.9,30.96,57039.41,116.82,49263
4,Baltimore City,2004,22.92,30078,26.9,30.96,60221.48,109.42,48314


# Data processing

In [71]:
# drop households-threatened variable
df = df.drop(columns="households-threatened", axis=1)

In [72]:
# rename columns to keep consistent format
df.rename(columns={"poverty-rate": "poverty_rate",
           "rent-burden": "rent_burden",
           "pct-white": "pct_white",
           "eviction-filing-rate": "eviction_filing_rate"
           }, inplace=True)

In [73]:
# create a metropolitan column indicating whether a jurisdiction is metropolitan
metro_jurisdictions = [
    "Frederick County",
    "Montgomery County",
    "Prince George's County"
]

df['metropolitan'] = df['name'].isin(metro_jurisdictions).astype(int)

In [74]:
# Log transform income, crime, poverty rate and eviction filing rate variable
df['log_income'] = np.log(df['median-household-income'])

# Log crime (+1 to allow zeros) since some jurisdictions have low crime rate
df['log_crime'] = np.log(df['total-crime'] + 1)

# Log transform poverty rate
df['log_poverty'] = np.log(df['poverty_rate'])

# Log transform eviction filing rate
df['log_eviction'] = np.log(df['eviction_filing_rate'])

In [75]:
# Create a poverty × metro interaction variable
df['poverty_metro'] = df['poverty_rate'] * df['metropolitan']

In [76]:
df

Unnamed: 0,name,year,poverty_rate,median-household-income,rent_burden,pct_white,eviction_filing_rate,total-crime,metropolitan,log_income,log_crime,log_poverty,log_eviction,poverty_metro
0,Baltimore City,2000,22.92,30078,26.9,30.96,124.17,66791,0,10.311549,11.109339,3.132010,4.821652,0.0
1,Baltimore City,2001,22.92,30078,26.9,30.96,111.08,64453,0,10.311549,11.073707,3.132010,4.710251,0.0
2,Baltimore City,2002,22.92,30078,26.9,30.96,121.05,56550,0,10.311549,10.942898,3.132010,4.796204,0.0
3,Baltimore City,2003,22.92,30078,26.9,30.96,116.82,49263,0,10.311549,10.804949,3.132010,4.760634,0.0
4,Baltimore City,2004,22.92,30078,26.9,30.96,109.42,48314,0,10.311549,10.785497,3.132010,4.695194,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
451,Worcester County,2014,7.65,56773,31.5,79.97,17.29,2427,0,10.946816,7.794823,2.034706,2.850128,0.0
452,Worcester County,2015,7.65,56773,31.5,79.97,10.03,2008,0,10.946816,7.605392,2.034706,2.305581,0.0
453,Worcester County,2016,6.41,61145,30.7,80.08,15.95,1987,0,11.021003,7.594884,1.857859,2.769459,0.0
454,Worcester County,2017,6.41,61145,30.7,80.08,14.51,1950,0,11.021003,7.576097,1.857859,2.674838,0.0


# Baseline model: Pooled OLS

In [80]:
ols_vars = ['log_poverty', 'log_income', 'rent_burden', 'log_crime', 'pct_white']
X0 = sm.add_constant(df[ols_vars])
y = df['log_eviction']

# Cluster-robust SEs by county
model0 = sm.OLS(y, X0)
res0 = model0.fit(cov_type='HC3')

print("Model 0: Pooled OLS")
print(res0.summary())

Model 0: Pooled OLS
                            OLS Regression Results                            
Dep. Variable:           log_eviction   R-squared:                       0.653
Model:                            OLS   Adj. R-squared:                  0.649
Method:                 Least Squares   F-statistic:                     179.3
Date:                Wed, 19 Nov 2025   Prob (F-statistic):          1.14e-104
Time:                        20:27:31   Log-Likelihood:                -305.17
No. Observations:                 456   AIC:                             622.3
Df Residuals:                     450   BIC:                             647.1
Df Model:                           5                                         
Covariance Type:                  HC3                                         
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const          -5.2068      1.

# Model 1: Jurisdiction and Year Fixed Effect

In [78]:
# Create fixed effect dummies
fe_entity = pd.get_dummies(df['name'], drop_first=True, dtype=float)
fe_year   = pd.get_dummies(df['year'], drop_first=True, dtype=float)

# Build design matrix
X1 = pd.concat([
    df[['log_poverty', 'log_income', 'rent_burden', 'log_crime', 'pct_white']],
    fe_year,
    fe_entity
], axis=1)

X1 = sm.add_constant(X1)
y = df['log_eviction']

# Run OLS with robust SE (HC3)
model1 = sm.OLS(y, X1).fit(cov_type="HC3")

print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:           log_eviction   R-squared:                       0.976
Model:                            OLS   Adj. R-squared:                  0.974
Method:                 Least Squares   F-statistic:                     415.6
Date:                Wed, 19 Nov 2025   Prob (F-statistic):          5.81e-314
Time:                        20:26:13   Log-Likelihood:                 307.95
No. Observations:                 456   AIC:                            -521.9
Df Residuals:                     409   BIC:                            -328.1
Df Model:                          46                                         
Covariance Type:                  HC3                                         
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                      6

# Model 2: Jurisdiction and Year Fixed Effect with Interaction Term

In [81]:
# Build design matrix
X2 = pd.concat([
    df[['log_poverty', 'log_income', 'rent_burden', 'log_crime', 'pct_white', 'poverty_metro']],
    fe_year,
    fe_entity
], axis=1)

X2 = sm.add_constant(X2)
y = df['log_eviction']

# Run OLS with robust SE (HC3)
model2 = sm.OLS(y, X2).fit(cov_type="HC3")

print(model2.summary())

                            OLS Regression Results                            
Dep. Variable:           log_eviction   R-squared:                       0.976
Model:                            OLS   Adj. R-squared:                  0.974
Method:                 Least Squares   F-statistic:                     411.8
Date:                Wed, 19 Nov 2025   Prob (F-statistic):          6.29e-314
Time:                        20:38:42   Log-Likelihood:                 308.42
No. Observations:                 456   AIC:                            -520.8
Df Residuals:                     408   BIC:                            -323.0
Df Model:                          47                                         
Covariance Type:                  HC3                                         
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                      6