In [50]:
import pandas as pd
import os
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
import statsmodels.formula.api as smf

In [3]:
df = pd.read_stata("hpd_correction/Eviction_Building_Panel_Gardner_200307to201312_v1.dta")

In [10]:
df['total_evic'] = df['atfault'] + df['nofault']

In [11]:
df_year = df.groupby(
    ['MAP_BLK_LOT', 'year', 'newYRBLT', 'initial_newUNITS', 'initial_rentcontrol']).agg(
    {'atfault': "sum", "nofault": "sum", "total_evic": "sum", "month": "count"}
).reset_index().rename(columns={'month':'months_obs'})

df_year['nofault_per_unit_per_month'] = (df_year['nofault'] / df_year['initial_newUNITS']) / df_year['months_obs']
df_year['atfault_per_unit_per_month'] = (df_year['atfault'] / df_year['initial_newUNITS']) / df_year['months_obs']
df_year['total_evic_per_unit_per_month'] = (df_year['total_evic'] / df_year['initial_newUNITS']) / df_year['months_obs']

df_year['nofault_per_unit_per_yr'] = df_year['nofault_per_unit_per_month'] * 12
df_year['total_evic_per_unit_per_yr'] = df_year['total_evic_per_unit_per_month'] * 12
df_year['atfault_per_unit_per_yr'] = df_year['atfault_per_unit_per_month'] * 12

In [54]:
bandwidth = 23

rd_df = df_year[
    (df_year['newYRBLT'] > 1980 - bandwidth) &  # bandwidth
    (df_year['newYRBLT'] < 1980 + bandwidth) &  # bandwidth
    (df_year['newYRBLT'] != 1985) &  # huge outlier (?)
#     (df_year['newYRBLT'] != 1979) &  # partial RC (?)
#     (df_year['newYRBLT'] != 1980) &  # Should be RC but could be fuzzy boundary (?)
    (df_year['initial_newUNITS'] > 0)
].copy()

In [55]:
rd_df['pre_1980'] = rd_df['newYRBLT'] < 1980
rd_df['rent_control'] = False
rd_df.loc[rd_df['pre_1980'] == True, 'rent_control'] = True
rd_df['year_built_centered'] = rd_df['newYRBLT'] - 1980

### Model 1

In [56]:
rd = smf.ols(
    "total_evic_per_unit_per_yr ~ rent_control + year_built_centered*rent_control",
    data=rd_df)

In [57]:
fitted = rd.fit()

In [58]:
print(fitted.summary())

                                OLS Regression Results                                
Dep. Variable:     total_evic_per_unit_per_yr   R-squared:                       0.001
Model:                                    OLS   Adj. R-squared:                  0.001
Method:                         Least Squares   F-statistic:                     14.89
Date:                        Sun, 04 Dec 2022   Prob (F-statistic):           1.10e-09
Time:                                14:43:33   Log-Likelihood:                 85535.
No. Observations:                       53031   AIC:                        -1.711e+05
Df Residuals:                           53027   BIC:                        -1.710e+05
Df Model:                                   3                                         
Covariance Type:                    nonrobust                                         
                                               coef    std err          t      P>|t|      [0.025      0.975]
---------------------

### Model 2

In [59]:
rd2 = smf.ols(
    "total_evic_per_unit_per_yr ~ rent_control + year_built_centered*rent_control + np.log(initial_newUNITS)",
    data=rd_df)

In [60]:
fitted2 = rd2.fit()
print(fitted2.summary())

                                OLS Regression Results                                
Dep. Variable:     total_evic_per_unit_per_yr   R-squared:                       0.001
Model:                                    OLS   Adj. R-squared:                  0.001
Method:                         Least Squares   F-statistic:                     12.25
Date:                        Sun, 04 Dec 2022   Prob (F-statistic):           5.91e-10
Time:                                14:43:44   Log-Likelihood:                 85537.
No. Observations:                       53031   AIC:                        -1.711e+05
Df Residuals:                           53026   BIC:                        -1.710e+05
Df Model:                                   4                                         
Covariance Type:                    nonrobust                                         
                                               coef    std err          t      P>|t|      [0.025      0.975]
---------------------