In [2]:
# ============================================================
# CAPITALIZATION: MLS prices/DOM on (Δ_actual - Δ_effective)
#   - runs BOTH: pre-cap window and post-cap window
#   - regression mechanics identical to your existing code
# ============================================================

import os
import numpy as np
import pandas as pd
import statsmodels.api as sm

# ---------------- PATHS ----------------
PATH_DTA_MLS = '../../final_datasets/master_datasets/master_dataset_price_dom_assoc_quarter_mls_redfin.dta'
OUTPUT_DIR   = 'exports/delta_all_vars'
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Delta files (already computed)
DELTA_DIR = r"C:\Users\ngodin\Dropbox\RESEARCH\active_projects\florida_condo\final_code\18_pre_sb4d_effects\exports\delta_all_vars"
PATH_DELTA_ACTUAL    = os.path.join(DELTA_DIR, "delta_assoc.csv")
PATH_DELTA_EFFECTIVE = os.path.join(DELTA_DIR, "delta_assoc_effect_age.csv")

# ---------------- CORE COLS ----------------
ASSOC_COL   = 'assoc_name_final'
COUNTY_COL  = 'mm_fips_county_name_attom'
ZIP_COL     = 'zip5_attom'
QUARTER_COL = 'quarter'
STORIES_COL = 'num_stories_final_assoc'

# MLS outcomes
MLS_LIST_PSF = 'list_price_sq_ft_assoc_qtr'
MLS_SOLD_PSF = 'sold_price_sq_ft_assoc_qtr'
MLS_DOM      = 'dom_assoc_qtr'

EXCLUDE_MIAMI_BROWARD = True

# ---------------- helpers ----------------
def exclude_mia_broward(df, county_col):
    # Mirrors your existing behavior: drop Miami-Dade and Broward based on county name text.
    x = df[county_col].astype(str).str.lower()
    keep = (~x.str.contains('miami', na=False)) & (~x.str.contains('broward', na=False))
    return df.loc[keep].copy()

def ensure_numeric_df(X):
    for c in X.columns:
        if not np.issubdtype(X[c].dtype, np.number):
            X[c] = pd.to_x_numeric(X[c])
    return X

def _to_qstr(q):
    """Convert MLS quarter variable to 'YYYYQx' string."""
    return pd.PeriodIndex(q, freq='Q').astype(str).str.upper()

def ln_pos(df, col_in, col_out):
    """Keep rows with col_in>0 and add log(col_in) column."""
    x = pd.to_numeric(df[col_in], errors='coerce')
    df = df[x > 0].copy()
    df[col_out] = np.log(x[x > 0].astype(float))
    return df

def design_cap(df, ycol, delta_col):
    y = pd.to_numeric(df[ycol], errors='coerce')

    # ZIP FE
    fe_zip = pd.get_dummies(
        df[ZIP_COL].astype(str).str.upper().str.strip(),
        prefix='zip', drop_first=True
    )
    # Quarter FE (YYYYQx strings)
    fe_q = pd.get_dummies(df[QUARTER_COL], prefix='q', drop_first=True)

    X = pd.concat([df[[delta_col]], fe_zip, fe_q], axis=1)
    # numeric coercion
    for c in X.columns:
        X[c] = pd.to_numeric(X[c], errors='coerce')

    m = y.notna() & X.notna().all(axis=1)
    Xc = sm.add_constant(X.loc[m], has_constant='add')

    groups = df.loc[m, ZIP_COL].astype(str)  # cluster by ZIP
    return y.loc[m], Xc, groups

def run_cap(df, y_name, y_label, delta_col, tag):
    y, X, g = design_cap(df, y_name, delta_col)
    if y.empty:
        print(f"[MLS {tag}] {y_label}: no usable observations.")
        return None
    fit = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': g})
    print(f"\n[MLS {tag}] {y_label} on {delta_col} + ZIP FE + quarter FE, "
          f"unweighted | n={int(fit.nobs)} R²={fit.rsquared:.3f}")
    print(fit.summary())
    return fit

def stars(p):
    if p is None or not np.isfinite(p):
        return ''
    return '***' if p < 0.01 else '**' if p < 0.05 else '*' if p < 0.10 else ''

def coef_se_line(res, name, scale=1.0, dec=4):
    """Return ('b***', '(se)') for regressor `name`."""
    try:
        b = float(res.params[name]) * scale
        se = float(res.bse[name]) * scale
        p = float(res.pvalues[name])
        return f"{b:.{dec}f}{stars(p)}", f"({se:.{dec}f})"
    except Exception:
        return "", ""

def export_body_threecols(res_list, res_sold, res_dom,
                          out_path,
                          var_rows,
                          col_labels,
                          notes=None,
                          fe_markers=True,
                          dec=4):
    """Write only the LaTeX body (no \\begin{tabular}) for 3 outcome columns."""
    if (res_list is None) and (res_sold is None) and (res_dom is None):
        with open(out_path, 'w', encoding='utf-8') as f:
            f.write('% empty\n')
        print(f"[LaTeX] wrote empty {out_path}")
        return

    def coef_or_blank(res, name, scale):
        if res is None:
            return "", ""
        return coef_se_line(res, name, scale=scale, dec=dec)

    def safe_r2(res):
        try:
            return f"{res.rsquared:.3f}"
        except Exception:
            return ""

    def safe_nobs(res):
        try:
            return str(int(round(res.nobs)))
        except Exception:
            return ""

    lines = []
    lines.append(" & " + " & ".join(col_labels) + r" \\")
    lines.append(r"\midrule")

    for name, label, scale in var_rows:
        b1, se1 = coef_or_blank(res_list, name, scale)
        b2, se2 = coef_or_blank(res_sold, name, scale)
        b3, se3 = coef_or_blank(res_dom,  name, scale)

        lines.append(label + " & " + " & ".join([b1, b2, b3]) + r" \\")
        lines.append(" & " + " & ".join([se1, se2, se3]) + r" \\")
        lines.append(r"\addlinespace")

    r2_1, r2_2, r2_3 = safe_r2(res_list), safe_r2(res_sold), safe_r2(res_dom)
    n1, n2, n3       = safe_nobs(res_list), safe_nobs(res_sold), safe_nobs(res_dom)

    lines.append(r"$R^2$ & " + " & ".join([r2_1, r2_2, r2_3]) + r" \\")
    lines.append("Observations & " + " & ".join([n1, n2, n3]) + r" \\")
    if fe_markers:
        fe_vals = ["Yes" if res is not None else "" for res in (res_list, res_sold, res_dom)]
        lines.append(r"ZIP and Quarter FE & " + " & ".join(fe_vals) + r" \\")

    if notes:
        lines.append(r"\midrule")
        lines.append(notes)

    with open(out_path, 'w', encoding='utf-8') as f:
        f.write("\n".join(lines) + "\n")

    print(f"[LaTeX] wrote {out_path}")

# ---------------- load and build differenced delta ----------------
d_act = pd.read_csv(PATH_DELTA_ACTUAL)
d_eff = pd.read_csv(PATH_DELTA_EFFECTIVE)

# both have: assoc_name_final, delta_pct
d_act = d_act[[ASSOC_COL, 'delta_pct']].rename(columns={'delta_pct': 'delta_actual_pct'})
d_eff = d_eff[[ASSOC_COL, 'delta_pct']].rename(columns={'delta_pct': 'delta_effective_pct'})

delta = d_act.merge(d_eff, on=ASSOC_COL, how='inner', validate='1:1')

delta['delta_diff_pct'] = (
    pd.to_numeric(delta['delta_actual_pct'], errors='coerce')
    - pd.to_numeric(delta['delta_effective_pct'], errors='coerce')
)

# optional: export for inspection
delta_out_path = os.path.join(OUTPUT_DIR, "delta_assoc_diff_actual_minus_effective.csv")
delta[[ASSOC_COL, 'delta_actual_pct', 'delta_effective_pct', 'delta_diff_pct']].to_csv(delta_out_path, index=False)
print(f"Exported differenced delta to {delta_out_path}")


# ---------------- load MLS and prep (identical restrictions) ----------------
mls = pd.read_stata(PATH_DTA_MLS).copy()

# treated group: 3+ stories
mls = mls[pd.to_numeric(mls[STORIES_COL], errors='coerce') >= 3].copy()
mls[QUARTER_COL] = _to_qstr(mls[QUARTER_COL])

# exclude Miami-Dade & Broward (to match your stage-1 exclusion)
if EXCLUDE_MIAMI_BROWARD and COUNTY_COL in mls.columns:
    mls = exclude_mia_broward(mls, COUNTY_COL)

# merge differenced delta onto MLS (inner join)
mls = mls.merge(delta[[ASSOC_COL, 'delta_diff_pct']], on=ASSOC_COL, how='inner')

# ---------------- run BOTH capitalization windows ----------------
SPECS = [
    # PRE capitalization window (your pre script)
    dict(tag="PRE",
         start_q="2019Q1",
         end_q="2022Q1",
         latex_file="cap_mls_deltaDiff_3outcomes_body_pre.tex"),
    # POST capitalization window (your post script uses 2022Q3–2024Q4)
    dict(tag="POST",
         start_q="2022Q3",
         end_q="2024Q4",
         latex_file="cap_mls_deltaDiff_3outcomes_body_post.tex"),
]

for spec in SPECS:
    tag = spec["tag"]
    mls_w = mls[(mls[QUARTER_COL] >= spec["start_q"]) & (mls[QUARTER_COL] <= spec["end_q"])].copy()

    # outcomes (same construction as your scripts)
    df_list = ln_pos(mls_w.copy(), MLS_LIST_PSF, 'ln_list_psf')
    df_sold = ln_pos(mls_w.copy(), MLS_SOLD_PSF, 'ln_sold_psf')

    df_dom = mls_w.copy()
    df_dom = df_dom[pd.to_numeric(df_dom[MLS_DOM], errors='coerce') >= 0].copy()
    df_dom['ln_dom1p'] = np.log1p(pd.to_numeric(df_dom[MLS_DOM], errors='coerce').astype(float))

    # regressions
    res_list = run_cap(df_list, 'ln_list_psf', 'ln(list/ft²)', 'delta_diff_pct', tag)
    res_sold = run_cap(df_sold, 'ln_sold_psf', 'ln(sold/ft²)', 'delta_diff_pct', tag)
    res_dom  = run_cap(df_dom,  'ln_dom1p',   'ln(1+DOM)',     'delta_diff_pct', tag)

    # LaTeX body (Δ row only)
    rows_common = [('delta_diff_pct', r'$\Delta^{actual}_i - \Delta^{effective}_i$ (pp)', 1)]
    col_labels = [r'Listed Price', r'Sold Price', r'Days on Market']

    latex_path = os.path.join(OUTPUT_DIR, spec["latex_file"])
    export_body_threecols(
        res_list,
        res_sold,
        res_dom,
        latex_path,
        rows_common,
        col_labels,
        notes=(r"\multicolumn{4}{l}{ZIP-clustered SE in parentheses. "
               r"\sym{*} $p<0.10$, \sym{**} $p<0.05$, \sym{***} $p<0.01$} \\")
    )

print("\n[Done] Wrote both PRE and POST capitalization LaTeX bodies using delta_diff_pct.")


Exported differenced delta to exports/delta_all_vars\delta_assoc_diff_actual_minus_effective.csv

[MLS PRE] ln(list/ft²) on delta_diff_pct + ZIP FE + quarter FE, unweighted | n=3915 R²=0.668
                            OLS Regression Results                            
Dep. Variable:            ln_list_psf   R-squared:                       0.668
Model:                            OLS   Adj. R-squared:                  0.652
Method:                 Least Squares   F-statistic:                     4062.
Date:                Mon, 26 Jan 2026   Prob (F-statistic):          1.95e-203
Time:                        14:07:15   Log-Likelihood:                -1360.2
No. Observations:                3915   AIC:                             3086.
Df Residuals:                    3732   BIC:                             4234.
Df Model:                         182                                         
Covariance Type:              cluster                                         
                   




[MLS PRE] ln(sold/ft²) on delta_diff_pct + ZIP FE + quarter FE, unweighted | n=3997 R²=0.651
                            OLS Regression Results                            
Dep. Variable:            ln_sold_psf   R-squared:                       0.651
Model:                            OLS   Adj. R-squared:                  0.634
Method:                 Least Squares   F-statistic:                     4328.
Date:                Mon, 26 Jan 2026   Prob (F-statistic):          9.36e-206
Time:                        14:07:15   Log-Likelihood:                -1518.1
No. Observations:                3997   AIC:                             3402.
Df Residuals:                    3814   BIC:                             4554.
Df Model:                         182                                         
Covariance Type:              cluster                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------




[MLS PRE] ln(1+DOM) on delta_diff_pct + ZIP FE + quarter FE, unweighted | n=4001 R²=0.305
                            OLS Regression Results                            
Dep. Variable:               ln_dom1p   R-squared:                       0.305
Model:                            OLS   Adj. R-squared:                  0.272
Method:                 Least Squares   F-statistic:                     71.10
Date:                Mon, 26 Jan 2026   Prob (F-statistic):           2.07e-61
Time:                        14:07:16   Log-Likelihood:                -7080.8
No. Observations:                4001   AIC:                         1.453e+04
Df Residuals:                    3818   BIC:                         1.568e+04
Df Model:                         182                                         
Covariance Type:              cluster                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------




[MLS POST] ln(list/ft²) on delta_diff_pct + ZIP FE + quarter FE, unweighted | n=2645 R²=0.699
                            OLS Regression Results                            
Dep. Variable:            ln_list_psf   R-squared:                       0.699
Model:                            OLS   Adj. R-squared:                  0.677
Method:                 Least Squares   F-statistic:                     1597.
Date:                Mon, 26 Jan 2026   Prob (F-statistic):          3.28e-163
Time:                        14:07:16   Log-Likelihood:                -598.33
No. Observations:                2645   AIC:                             1561.
Df Residuals:                    2463   BIC:                             2631.
Df Model:                         181                                         
Covariance Type:              cluster                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------




[MLS POST] ln(sold/ft²) on delta_diff_pct + ZIP FE + quarter FE, unweighted | n=2378 R²=0.711
                            OLS Regression Results                            
Dep. Variable:            ln_sold_psf   R-squared:                       0.711
Model:                            OLS   Adj. R-squared:                  0.687
Method:                 Least Squares   F-statistic:                     1736.
Date:                Mon, 26 Jan 2026   Prob (F-statistic):          2.92e-166
Time:                        14:07:16   Log-Likelihood:                -395.42
No. Observations:                2378   AIC:                             1155.
Df Residuals:                    2196   BIC:                             2206.
Df Model:                         181                                         
Covariance Type:              cluster                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------




[MLS POST] ln(1+DOM) on delta_diff_pct + ZIP FE + quarter FE, unweighted | n=2483 R²=0.182
                            OLS Regression Results                            
Dep. Variable:               ln_dom1p   R-squared:                       0.182
Model:                            OLS   Adj. R-squared:                  0.117
Method:                 Least Squares   F-statistic:                     212.3
Date:                Mon, 26 Jan 2026   Prob (F-statistic):           7.21e-91
Time:                        14:07:17   Log-Likelihood:                -4002.6
No. Observations:                2483   AIC:                             8369.
Df Residuals:                    2301   BIC:                             9428.
Df Model:                         181                                         
Covariance Type:              cluster                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------



In [5]:
# ============================================================
# CAPITALIZATION (UNIT OBS): unit-level MLS/Redfin outcomes on
#   delta_diff_pct = delta_actual_pct - delta_effective_pct
#
# Uses: master_dataset_price_dom_unit_obs_mls_redfin.dta
# Same folder as your assoc-quarter file.
#
# Key mechanics:
#   - merge association-level delta onto unit observations by assoc_name_final
#   - (optionally) restrict to 3+ story associations if that field exists
#   - exclude Miami/Broward if county field exists
#   - include QUARTER fixed effects (as requested)
#   - (by default) include ZIP fixed effects if zip exists (keeps prior logic)
#   - unweighted OLS with clustered SE (ZIP if present, else association)
#   - runs BOTH PRE and POST windows like before
#   - exports 3-outcome LaTeX bodies (list psf, sold psf, ln(1+DOM))
# ============================================================

import os
import numpy as np
import pandas as pd
import statsmodels.api as sm

# ---------------- PATHS ----------------
PATH_DTA_MLS_UNIT = '../../final_datasets/master_datasets/master_dataset_unit_obs_redfin.dta'
OUTPUT_DIR        = 'exports/delta_all_vars'
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Delta files (already computed at association level, for unit-obs merges)
DELTA_DIR = r"C:\Users\ngodin\Dropbox\RESEARCH\active_projects\florida_condo\final_code\18_pre_sb4d_effects\exports\delta_all_vars"
PATH_DELTA_ACTUAL    = os.path.join(DELTA_DIR, "delta_assoc_unit.csv")
PATH_DELTA_EFFECTIVE = os.path.join(DELTA_DIR, "delta_assoc_unit_effect_age.csv")

# ---------------- CORE COLS ----------------
ASSOC_COL   = 'assoc_name_final'
COUNTY_COL  = 'mm_fips_county_name_attom'   # if present
ZIP_COL     = 'zip5_attom'                  # if present
QUARTER_COL = 'quarter'                     # must be present
STORIES_COL = 'num_stories_final_assoc'     # if present

EXCLUDE_MIAMI_BROWARD = True
RESTRICT_3PLUS_STORIES_IF_AVAILABLE = True  # set False if you don't want this restriction

# ---------------- OUTCOMES (try these; adjust if your unit file uses different names) ----------------
# If these are wrong, just edit these three to match your unit-obs column names.
MLS_LIST_PSF = 'listed_price_sq_ft'      # unit-level list $/ft2
MLS_SOLD_PSF = 'sold_price_sq_ft'      # unit-level sold $/ft2
MLS_DOM      = 'dom'                   # unit-level days on market

# ---------------- helpers ----------------
def exclude_mia_broward(df, county_col):
    x = df[county_col].astype(str).str.lower()
    keep = (~x.str.contains('miami', na=False)) & (~x.str.contains('broward', na=False))
    return df.loc[keep].copy()

def _to_qstr(q):
    """Convert quarter to 'YYYYQx' string where possible."""
    # Works if q is already a Period/str; if numeric codes, fall back to str
    try:
        return pd.PeriodIndex(q, freq='Q').astype(str).str.upper()
    except Exception:
        return q.astype(str).str.upper()

def ln_pos(df, col_in, col_out):
    x = pd.to_numeric(df[col_in], errors='coerce')
    df = df[x > 0].copy()
    df[col_out] = np.log(x[x > 0].astype(float))
    return df

def build_X(df, delta_col, use_zip_fe=True):
    parts = [df[[delta_col]]]

    # ZIP FE (if available and desired)
    if use_zip_fe and (ZIP_COL in df.columns):
        fe_zip = pd.get_dummies(df[ZIP_COL].astype(str).str.upper().str.strip(),
                                prefix='zip', drop_first=True)
        parts.append(fe_zip)

    # Quarter FE (required)
    fe_q = pd.get_dummies(df[QUARTER_COL].astype(str).str.upper().str.strip(),
                          prefix='q', drop_first=True)
    parts.append(fe_q)

    X = pd.concat(parts, axis=1)

    # numeric coercion
    for c in X.columns:
        X[c] = pd.to_numeric(X[c], errors='coerce')
    return X

def run_ols_cluster(df, ycol, delta_col, tag, y_label, use_zip_fe=True):
    y = pd.to_numeric(df[ycol], errors='coerce')
    X = build_X(df, delta_col, use_zip_fe=use_zip_fe)

    m = y.notna() & X.notna().all(axis=1)
    y = y.loc[m]
    X = sm.add_constant(X.loc[m], has_constant='add')

    # cluster choice
    if ZIP_COL in df.columns:
        groups = df.loc[m, ZIP_COL].astype(str)
        cluster_label = "ZIP"
    else:
        groups = df.loc[m, ASSOC_COL].astype(str)
        cluster_label = "Association"

    if y.empty:
        print(f"[{tag}] {y_label}: no usable observations.")
        return None

    fit = sm.OLS(y, X).fit(
        cov_type='cluster',
        cov_kwds={'groups': groups}
    )

    print("\n" + "="*80)
    print(f"[{tag}] {y_label} on delta_diff_pct")
    print(f"Quarter FE included"
          f"{' + ZIP FE' if (use_zip_fe and ZIP_COL in df.columns) else ''}")
    print(f"Clustered by {cluster_label} | n={int(fit.nobs)} | R²={fit.rsquared:.3f}")
    print("="*80)
    print(fit.summary())
    print("\n")

    return fit

def stars(p):
    if p is None or not np.isfinite(p):
        return ''
    return '***' if p < 0.01 else '**' if p < 0.05 else '*' if p < 0.10 else ''

def coef_se_line(res, name, scale=1.0, dec=4):
    try:
        b = float(res.params[name]) * scale
        se = float(res.bse[name]) * scale
        p = float(res.pvalues[name])
        return f"{b:.{dec}f}{stars(p)}", f"({se:.{dec}f})"
    except Exception:
        return "", ""

def export_body_threecols(res1, res2, res3, out_path, var_rows, col_labels, notes=None, dec=4):
    def coef_or_blank(res, name, scale):
        if res is None:
            return "", ""
        return coef_se_line(res, name, scale=scale, dec=dec)

    def safe_r2(res):
        try: return f"{res.rsquared:.3f}"
        except Exception: return ""

    def safe_nobs(res):
        try: return str(int(round(res.nobs)))
        except Exception: return ""

    lines = []
    lines.append(" & " + " & ".join(col_labels) + r" \\")
    lines.append(r"\midrule")

    for name, label, scale in var_rows:
        b1, se1 = coef_or_blank(res1, name, scale)
        b2, se2 = coef_or_blank(res2, name, scale)
        b3, se3 = coef_or_blank(res3, name, scale)

        lines.append(label + " & " + " & ".join([b1, b2, b3]) + r" \\")
        lines.append(" & " + " & ".join([se1, se2, se3]) + r" \\")
        lines.append(r"\addlinespace")

    lines.append(r"$R^2$ & " + " & ".join([safe_r2(res1), safe_r2(res2), safe_r2(res3)]) + r" \\")
    lines.append("Observations & " + " & ".join([safe_nobs(res1), safe_nobs(res2), safe_nobs(res3)]) + r" \\")

    if notes:
        lines.append(r"\midrule")
        lines.append(notes)

    with open(out_path, 'w', encoding='utf-8') as f:
        f.write("\n".join(lines) + "\n")
    print(f"[LaTeX] wrote {out_path}")

# ---------------- load and build differenced delta ----------------
d_act = pd.read_csv(PATH_DELTA_ACTUAL)
d_eff = pd.read_csv(PATH_DELTA_EFFECTIVE)

# both have: assoc_name_final, delta_pct
d_act = d_act[[ASSOC_COL, 'delta_pct']].rename(columns={'delta_pct': 'delta_actual_pct'})
d_eff = d_eff[[ASSOC_COL, 'delta_pct']].rename(columns={'delta_pct': 'delta_effective_pct'})

delta = d_act.merge(d_eff, on=ASSOC_COL, how='inner', validate='1:1')
delta['delta_diff_pct'] = (
    pd.to_numeric(delta['delta_actual_pct'], errors='coerce')
    - pd.to_numeric(delta['delta_effective_pct'], errors='coerce')
)

# optional: export for inspection
delta_out_path = os.path.join(OUTPUT_DIR, "delta_assoc_unit_diff_actual_minus_effective.csv")
delta[[ASSOC_COL, 'delta_actual_pct', 'delta_effective_pct', 'delta_diff_pct']].to_csv(delta_out_path, index=False)
print(f"Exported differenced delta to {delta_out_path}")

# ---------------- load UNIT-OBS dataset ----------------
df = pd.read_stata(PATH_DTA_MLS_UNIT).copy()

# quarter to string
df[QUARTER_COL] = _to_qstr(df[QUARTER_COL])

# optional: restrict to 3+ stories (only if column exists)
if RESTRICT_3PLUS_STORIES_IF_AVAILABLE and (STORIES_COL in df.columns):
    df = df[pd.to_numeric(df[STORIES_COL], errors='coerce') >= 3].copy()

# optional: exclude Miami/Broward (only if county exists)
if EXCLUDE_MIAMI_BROWARD and (COUNTY_COL in df.columns):
    df = exclude_mia_broward(df, COUNTY_COL)

# merge differenced delta (inner join to keep only matched assocs)
df = df.merge(delta[[ASSOC_COL, 'delta_diff_pct']], on=ASSOC_COL, how='inner')

# ---------------- run BOTH capitalization windows ----------------
SPECS = [
    dict(tag="PRE",  start_q="2019Q1", end_q="2022Q1",
         latex_file="cap_unitobs_deltaDiff_3outcomes_body_pre.tex"),
    dict(tag="POST", start_q="2022Q3", end_q="2024Q4",
         latex_file="cap_unitobs_deltaDiff_3outcomes_body_post.tex"),
]

for spec in SPECS:
    tag = spec["tag"]
    d = df[(df[QUARTER_COL] >= spec["start_q"]) & (df[QUARTER_COL] <= spec["end_q"])].copy()

    # outcomes
    d_list = ln_pos(d.copy(), MLS_LIST_PSF, 'ln_list_psf')
    d_sold = ln_pos(d.copy(), MLS_SOLD_PSF, 'ln_sold_psf')

    d_dom = d.copy()
    d_dom = d_dom[pd.to_numeric(d_dom[MLS_DOM], errors='coerce') >= 0].copy()
    d_dom['ln_dom1p'] = np.log1p(pd.to_numeric(d_dom[MLS_DOM], errors='coerce').astype(float))

    # regressions (Quarter FE included; ZIP FE included if zip exists)
    res_list = run_ols_cluster(d_list, 'ln_list_psf', 'delta_diff_pct', tag, 'ln(list/ft²)', use_zip_fe=True)
    res_sold = run_ols_cluster(d_sold, 'ln_sold_psf', 'delta_diff_pct', tag, 'ln(sold/ft²)', use_zip_fe=True)
    res_dom  = run_ols_cluster(d_dom,  'ln_dom1p',   'delta_diff_pct', tag, 'ln(1+DOM)',   use_zip_fe=True)

    # LaTeX body (Δ row only)
    rows = [('delta_diff_pct', r'$\Delta^{actual}_i - \Delta^{effective}_i$ (pp)', 1)]
    col_labels = [r'Listed Price', r'Sold Price', r'Days on Market']

    latex_path = os.path.join(OUTPUT_DIR, spec["latex_file"])
    export_body_threecols(
        res_list, res_sold, res_dom,
        latex_path,
        rows,
        col_labels,
        notes=(r"\multicolumn{4}{l}{Quarter FE included; ZIP FE included when available. "
               r"Clustered SE (ZIP if available, else association) in parentheses. "
               r"\sym{*} $p<0.10$, \sym{**} $p<0.05$, \sym{***} $p<0.01$} \\")
    )

print("\n[Done] UNIT-OBS pre/post capitalization regressions complete using delta_diff_pct.")


Exported differenced delta to exports/delta_all_vars\delta_assoc_unit_diff_actual_minus_effective.csv

[PRE] ln(list/ft²) on delta_diff_pct
Quarter FE included + ZIP FE
Clustered by ZIP | n=4700 | R²=0.748
                            OLS Regression Results                            
Dep. Variable:            ln_list_psf   R-squared:                       0.748
Model:                            OLS   Adj. R-squared:                  0.738
Method:                 Least Squares   F-statistic:                     325.6
Date:                Mon, 26 Jan 2026   Prob (F-statistic):          3.70e-114
Time:                        14:17:16   Log-Likelihood:                -865.01
No. Observations:                4700   AIC:                             2104.
Df Residuals:                    4513   BIC:                             3311.
Df Model:                         186                                         
Covariance Type:              cluster                                         
    




[PRE] ln(sold/ft²) on delta_diff_pct
Quarter FE included + ZIP FE
Clustered by ZIP | n=4693 | R²=0.772
                            OLS Regression Results                            
Dep. Variable:            ln_sold_psf   R-squared:                       0.772
Model:                            OLS   Adj. R-squared:                  0.763
Method:                 Least Squares   F-statistic:                     265.1
Date:                Mon, 26 Jan 2026   Prob (F-statistic):          8.73e-107
Time:                        14:17:16   Log-Likelihood:                -612.45
No. Observations:                4693   AIC:                             1599.
Df Residuals:                    4506   BIC:                             2806.
Df Model:                         186                                         
Covariance Type:              cluster                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
-----------------------




[PRE] ln(1+DOM) on delta_diff_pct
Quarter FE included + ZIP FE
Clustered by ZIP | n=4689 | R²=0.161
                            OLS Regression Results                            
Dep. Variable:               ln_dom1p   R-squared:                       0.161
Model:                            OLS   Adj. R-squared:                  0.126
Method:                 Least Squares   F-statistic:                     203.5
Date:                Mon, 26 Jan 2026   Prob (F-statistic):           1.98e-97
Time:                        14:17:16   Log-Likelihood:                -4992.1
No. Observations:                4689   AIC:                         1.036e+04
Df Residuals:                    4502   BIC:                         1.156e+04
Df Model:                         186                                         
Covariance Type:              cluster                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
--------------------------




[POST] ln(list/ft²) on delta_diff_pct
Quarter FE included + ZIP FE
Clustered by ZIP | n=3428 | R²=0.718
                            OLS Regression Results                            
Dep. Variable:            ln_list_psf   R-squared:                       0.718
Model:                            OLS   Adj. R-squared:                  0.703
Method:                 Least Squares   F-statistic:                     88.52
Date:                Mon, 26 Jan 2026   Prob (F-statistic):           3.33e-61
Time:                        14:17:17   Log-Likelihood:                -580.65
No. Observations:                3428   AIC:                             1515.
Df Residuals:                    3251   BIC:                             2602.
Df Model:                         176                                         
Covariance Type:              cluster                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------




[POST] ln(sold/ft²) on delta_diff_pct
Quarter FE included + ZIP FE
Clustered by ZIP | n=3215 | R²=0.715
                            OLS Regression Results                            
Dep. Variable:            ln_sold_psf   R-squared:                       0.715
Model:                            OLS   Adj. R-squared:                  0.699
Method:                 Least Squares   F-statistic:                     513.0
Date:                Mon, 26 Jan 2026   Prob (F-statistic):          3.48e-118
Time:                        14:17:17   Log-Likelihood:                -448.06
No. Observations:                3215   AIC:                             1246.
Df Residuals:                    3040   BIC:                             2309.
Df Model:                         174                                         
Covariance Type:              cluster                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------




[POST] ln(1+DOM) on delta_diff_pct
Quarter FE included + ZIP FE
Clustered by ZIP | n=3022 | R²=0.096
                            OLS Regression Results                            
Dep. Variable:               ln_dom1p   R-squared:                       0.096
Model:                            OLS   Adj. R-squared:                  0.041
Method:                 Least Squares   F-statistic:                     45.76
Date:                Mon, 26 Jan 2026   Prob (F-statistic):           2.24e-42
Time:                        14:17:17   Log-Likelihood:                -3543.3
No. Observations:                3022   AIC:                             7437.
Df Residuals:                    2847   BIC:                             8489.
Df Model:                         174                                         
Covariance Type:              cluster                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
-------------------------

