In [27]:
from pathlib import Path
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import math

# -----------------------
# Paths (EDIT ONLY IF NEEDED)
# -----------------------
NOTEBOOK_DIR = Path(r"C:\Users\ngodin\Dropbox\RESEARCH\active_projects\florida_condo\final_code\19_delta_regressions")
DATA_PATH = Path(r"C:\Users\ngodin\Dropbox\RESEARCH\active_projects\florida_condo\final_datasets\master_datasets\master_dataset_assoc.dta")

OUT_DIR = NOTEBOOK_DIR / "output"
OUT_DIR.mkdir(parents=True, exist_ok=True)

# -----------------------
# Load
# -----------------------
cols = [
    "assoc_name_final",
    "mm_fips_county_name_attom",
    "delta_pct",
    "num_stories_final_assoc",
    "num_units_final_assoc",
    "num_bldgs_final_assoc",
    "frac_npexcorp_state_attom_assoc",
    "frac_corp_own_attom_assoc",
    "corp_mgmt_city_attom_assoc",
    "log_med_income_nonprimary_assoc",
    "yr_blt_attom_assoc",
    "yr_blt_effect_attom_assoc",
]

df = pd.read_stata(DATA_PATH, columns=cols)
df = df[~df['mm_fips_county_name_attom'].isin(['MIAMI-DADE', 'BROWARD'])]
df = df[df.num_stories_final_assoc >= 3]
print(df.shape)
df.head()


(4123, 12)


Unnamed: 0,assoc_name_final,mm_fips_county_name_attom,delta_pct,num_stories_final_assoc,num_units_final_assoc,num_bldgs_final_assoc,frac_npexcorp_state_attom_assoc,frac_corp_own_attom_assoc,corp_mgmt_city_attom_assoc,log_med_income_nonprimary_assoc,yr_blt_attom_assoc,yr_blt_effect_attom_assoc
0,"101 EOLA CONDOMINIUMS ASSOCIATION, INC. 32801",ORANGE,64.352737,12.0,148.0,1.0,0.054054,0.141892,1.0,11.54864,2008.0,2008.0
2,"1350 MAIN RESIDENTIAL CONDOMINIUM ASSOCIATION,...",SARASOTA,-28.327566,16.0,135.0,1.0,0.214815,0.081481,0.0,11.789378,2007.0,2007.0
4,200 EAST PALMETTO PARK CONDOMINIUM ASSOCIATION...,PALM BEACH,-2.068552,9.0,115.0,1.0,0.121739,0.086957,1.0,,2009.0,
5,2560 SOUTH OCEAN BOULEVARD 33480,PALM BEACH,-14.71036,7.0,94.0,1.0,0.078652,0.831461,0.0,11.730022,1970.0,
6,"2700 NORTH OCEAN CONDOMINIUM ASSOCIATION, INC....",PALM BEACH,2.033192,27.0,242.0,1.0,0.348548,0.165975,1.0,11.755589,2008.0,


In [28]:
# -----------------------
# Basic cleaning
# -----------------------

# --------------------------------------------------
# Extract ZIP code
# Assumes last 5 characters of assoc_name_final are ZIP
# --------------------------------------------------
df["zip5"] = df["assoc_name_final"].str[-5:]

# Drop obviously bad ZIPs
df.loc[~df["zip5"].str.match(r"^\d{5}$"), "zip5"] = np.nan

# Treat obviously bad year-built info as missing
# (you can tighten/loosen these bounds if you want)
for y in ["yr_blt_attom_assoc", "yr_blt_effect_attom_assoc"]:
    df.loc[(df[y] == 0) | (df[y] > 2020) | (df[y] < 1700), y] = np.nan

# Option A renovation signal ingredients:
#   R_i = max(0, effective_year - built_year)
df["renov_reset_years"] = (df["yr_blt_effect_attom_assoc"] - df["yr_blt_attom_assoc"])
#df.loc[df["renov_reset_years"] < 0, "renov_reset_years"] = 0

# Option B renovation signal ingredients:
#   R_i = max(0, effective_age)
df["effective_age"] = (2019 - df["yr_blt_effect_attom_assoc"])
df.loc[df["effective_age"] < 0, "effective_age"] = 0
df["effective_age"] = df["effective_age"].clip(lower=0, upper=80)

# Top-code to reduce influence of weird vendor updates (common with ATTOM harmonization)
# 60 is conservative; change if you want
df["renov_reset_years"] = df["renov_reset_years"].clip(lower=-60, upper=60)

# Built-year cohort FE (10-year bins): cohort = 1960, 1970, 1980, ...
df["built_cohort_10"] = (np.floor(df["yr_blt_attom_assoc"] / 10) * 10).astype("float")

# Coordination / maintenance proxies
# - log units handles scale effects
# - buildings, stories capture physical & governance complexity
df["log_units"] = np.log(df["num_units_final_assoc"].where(df["num_units_final_assoc"] > 0))

# Ensure indicator is 0/1-ish (if it isn't already)
# If it's missing sometimes, leave missing (regression will drop those rows)
df["corp_mgmt_city_ind"] = df["corp_mgmt_city_attom_assoc"]

# Keep only observations with delta and the essentials
needed = ["delta_pct", "renov_reset_years", "built_cohort_10", "zip5"]
df_reg = df.dropna(subset=needed).copy()

print("Regression sample:", df_reg.shape)
df_reg[["delta_pct", "renov_reset_years", "yr_blt_attom_assoc", "yr_blt_effect_attom_assoc"]].describe()

Regression sample: (578, 18)


Unnamed: 0,delta_pct,renov_reset_years,yr_blt_attom_assoc,yr_blt_effect_attom_assoc
count,578.0,578.0,578.0,578.0
mean,6.745,9.762976,1987.728394,1997.491333
std,31.222883,12.161847,14.898641,13.62537
min,-246.196899,-5.0,1915.0,1919.0
25%,-7.269723,0.0,1976.0,1988.0
50%,6.671572,7.0,1985.0,1999.0
75%,21.461691,13.75,2001.0,2007.0
max,182.796829,57.0,2017.0,2020.0


In [30]:
# (1) Renovation signal only + built-year cohort FE (Option A)
f1 = (
    "delta_pct ~ renov_reset_years + C(built_cohort_10)"
    " + log_med_income_nonprimary_assoc"
    " + log_units"
    " + num_bldgs_final_assoc"
    " + num_stories_final_assoc"
#     " + effective_age"
)

# (2) Ownership characteristics
f2 = (
    "delta_pct ~ effective_age"
    " + log_med_income_nonprimary_assoc"
    " + log_units"
    " + num_bldgs_final_assoc"
    " + num_stories_final_assoc"
)

# (3) Association size
f3 = (
    "delta_pct ~ effective_age"
    " + log_units"
    " + num_bldgs_final_assoc"
    " + num_stories_final_assoc"
)

# -----------------------
# Build per-spec cleaned samples (ensures groups align 1:1 with exog rows)
# -----------------------
# NOTE: "C(built_cohort_10)" in the formula just uses built_cohort_10;
# we include built_cohort_10 in the subset list.

vars_f1 = [
    "delta_pct",
    "renov_reset_years",
    "built_cohort_10",
    "log_units",
    "num_bldgs_final_assoc",
    "num_stories_final_assoc",
    "frac_npexcorp_state_attom_assoc",
    "frac_corp_own_attom_assoc",
    "corp_mgmt_city_ind",
    "log_med_income_nonprimary_assoc",
    "zip5",
    "effective_age"
]

vars_f2 = [
    "delta_pct",
    "frac_npexcorp_state_attom_assoc",
    "frac_corp_own_attom_assoc",
    "corp_mgmt_city_ind",
    "log_med_income_nonprimary_assoc",
    "zip5",
    "effective_age"
]

vars_f3 = [
    "delta_pct",
    "log_units",
    "num_bldgs_final_assoc",
    "num_stories_final_assoc",
    "log_med_income_nonprimary_assoc",
    "zip5",
    "effective_age"
]

df_f1 = df_reg.dropna(subset=vars_f1).copy()
df_f2 = df_reg.dropna(subset=vars_f2).copy()
df_f3 = df_reg.dropna(subset=vars_f3).copy()

print("N (spec 1):", df_f1.shape[0], "| # ZIPs:", df_f1["zip5"].nunique())
print("N (spec 2):", df_f2.shape[0], "| # ZIPs:", df_f2["zip5"].nunique())
print("N (spec 3):", df_f3.shape[0], "| # ZIPs:", df_f3["zip5"].nunique())

# -----------------------
# Estimate with ZIP-clustered SE (now aligned with each spec's sample)
# -----------------------
m1 = smf.ols(f1, data=df_f1).fit(
    cov_type="cluster",
    cov_kwds={"groups": df_f1["zip5"]}
)

m2 = smf.ols(f2, data=df_f2).fit(
    cov_type="cluster",
    cov_kwds={"groups": df_f2["zip5"]}
)

m3 = smf.ols(f3, data=df_f3).fit(
    cov_type="cluster",
    cov_kwds={"groups": df_f3["zip5"]}
)

models = [m1, m2, m3]

for i, m in enumerate(models, 1):
    print(f"\n--- Model {i} (ZIP-clustered SE) ---")
    print(m.summary().tables[1])
    print("N =", int(m.nobs), "| R2 =", round(m.rsquared, 4))

N (spec 1): 367 | # ZIPs: 129
N (spec 2): 367 | # ZIPs: 129
N (spec 3): 367 | # ZIPs: 129

--- Model 1 (ZIP-clustered SE) ---
                                      coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------------------
Intercept                         254.3168    147.920      1.719      0.086     -35.602     544.235
C(built_cohort_10)[T.1940.0]       41.7760      5.342      7.821      0.000      31.307      52.245
C(built_cohort_10)[T.1950.0]       42.7044      1.855     23.026      0.000      39.069      46.339
C(built_cohort_10)[T.1960.0]       11.6641     10.902      1.070      0.285      -9.703      33.031
C(built_cohort_10)[T.1970.0]       20.8629      3.840      5.434      0.000      13.337      28.388
C(built_cohort_10)[T.1980.0]       19.4721      3.581      5.437      0.000      12.453      26.491
C(built_cohort_10)[T.1990.0]       10.3891      8.031      1.294      0.19



In [22]:
# --------------------------------------------------
# Formatting helpers
# --------------------------------------------------
def star(p):
    if p < 0.01:
        return r"\sym{***}"
    if p < 0.05:
        return r"\sym{**}"
    if p < 0.10:
        return r"\sym{*}"
    return ""

def fmt(x, nd=4):
    if x is None or (isinstance(x, float) and (math.isnan(x) or math.isinf(x))):
        return ""
    return f"{x:.{nd}f}"

def latex_escape(s):
    return (
        s.replace("&", r"\&")
         .replace("%", r"\%")
         .replace("_", r"\_")
    )

# --------------------------------------------------
# Variable labels (only include what appears in each model)
# --------------------------------------------------
var_labels = {
    "renov_reset_years": r"$\text{Reset Years}$",
    "log_med_income_nonprimary_assoc": r"$\log(\text{Nonprimary Income})$",
    "log_units": r"$\log(\text{Units})$",
    "num_bldgs_final_assoc": r"$\#\text{Buildings}$",
    "num_stories_final_assoc": r"$\#\text{Stories}$",
    "frac_npexcorp_state_attom_assoc": r"$\text{Share Nonprimary}$",
    "frac_corp_own_attom_assoc": r"$\text{Share Corp Owners}$",
    "corp_mgmt_city_ind": r"$\text{Corp Mgmt Indicator}$",
    "effective_age": r"$\text{Effective Age}$"
}

# --------------------------------------------------
# Export function (one model per file)
# --------------------------------------------------
def export_model_body(model, fname, include_cohort_fe=True):
    lines = []

    # Header
    lines.append(r"\midrule")

    # Coefficients (skip intercept and cohort FE rows)
    for v in model.params.index:
        if v == "Intercept":
            continue
        if v.startswith("C(built_cohort"):
            continue

        label = latex_escape(var_labels.get(v, v))
        coef = fmt(model.params[v]) + star(model.pvalues[v])
        se = "(" + fmt(model.bse[v]) + ")"

        lines.append(f"{label} & {coef} \\\\")
        lines.append(f" & {se} \\\\")
        lines.append(r"\addlinespace")

    # Fit stats
    lines.append(r"$R^2$ & " + fmt(model.rsquared, 3) + r" \\")
    lines.append(r"Observations & " + str(int(model.nobs)) + r" \\")

    if include_cohort_fe:
        lines.append(r"Built-year cohort FE & Yes \\")
    else:
        lines.append(r"Built-year cohort FE & No \\")

    lines.append(r"\midrule")
    lines.append(
        r"\multicolumn{2}{l}{ZIP-clustered standard errors in parentheses. "
        r"\sym{*} $p<0.10$, \sym{**} $p<0.05$, \sym{***} $p<0.01$.} \\"
    )

    out_path = OUT_DIR / fname
    out_path.write_text("\n".join(lines), encoding="utf-8")
    print("Wrote:", out_path)

# --------------------------------------------------
# Export all three models
# --------------------------------------------------
export_model_body(m1, "delta_model1_body.tex", include_cohort_fe=True)
export_model_body(m2, "delta_model2_body.tex", include_cohort_fe=False)
export_model_body(m3, "delta_model3_body.tex", include_cohort_fe=False)

Wrote: C:\Users\ngodin\Dropbox\RESEARCH\active_projects\florida_condo\final_code\19_delta_regressions\output\delta_model1_body.tex
Wrote: C:\Users\ngodin\Dropbox\RESEARCH\active_projects\florida_condo\final_code\19_delta_regressions\output\delta_model2_body.tex
Wrote: C:\Users\ngodin\Dropbox\RESEARCH\active_projects\florida_condo\final_code\19_delta_regressions\output\delta_model3_body.tex


In [23]:
from pathlib import Path
import math

# --------------------------------------------------
# Output directory
# --------------------------------------------------
OUT_DIR = Path(
    r"C:\Users\ngodin\Dropbox\RESEARCH\active_projects\florida_condo\final_code\19_delta_regressions\output"
)

# --------------------------------------------------
# Formatting helpers
# --------------------------------------------------
def star(p):
    if p < 0.01:
        return r"\sym{***}"
    if p < 0.05:
        return r"\sym{**}"
    if p < 0.10:
        return r"\sym{*}"
    return ""

def fmt(x, nd=4):
    if x is None or (isinstance(x, float) and (math.isnan(x) or math.isinf(x))):
        return ""
    return f"{x:.{nd}f}"

def latex_escape(s):
    return (
        s.replace("&", r"\&")
         .replace("%", r"\%")
         .replace("_", r"\_")
    )

# --------------------------------------------------
# Models and column labels
# --------------------------------------------------
models = [m1, m2, m3]
col_labels = ["(1)", "(2)", "(3)"]

# --------------------------------------------------
# Variable labels (only variables that appear)
# --------------------------------------------------
var_labels = {
    "renov_reset_years": r"$\text{Reset Years}$",
    "log_med_income_nonprimary_assoc": r"$\log(\text{Nonprimary Income})$",
    "log_units": r"$\log(\text{Units})$",
    "num_bldgs_final_assoc": r"$\#\,\text{Buildings}$",
    "num_stories_final_assoc": r"$\#\,\text{Stories}$",
    "effective_age": r"$\text{Effective Age}$"
}

# --------------------------------------------------
# Collect all non-FE, non-intercept regressors
# --------------------------------------------------
vars_all = []
for m in models:
    for v in m.params.index:
        if v == "Intercept":
            continue
        if v.startswith("C(built_cohort"):
            continue
        if v not in vars_all:
            vars_all.append(v)

# --------------------------------------------------
# Build LaTeX body
# --------------------------------------------------
lines = []

# Column header
lines.append(" & " + " & ".join(col_labels) + r" \\")
lines.append(r"\midrule")

# Coefficients + SEs
for v in vars_all:
    label = latex_escape(var_labels.get(v, v))

    coef_row = [label]
    se_row = [""]

    for m in models:
        if v in m.params.index:
            coef_row.append(fmt(m.params[v]) + star(m.pvalues[v]))
            se_row.append("(" + fmt(m.bse[v]) + ")")
        else:
            coef_row.append("")
            se_row.append("")

    lines.append(" & ".join(coef_row) + r" \\")
    lines.append(" & ".join(se_row) + r" \\")
    lines.append(r"\addlinespace")

# Fit stats
lines.append(r"$R^2$ & " + " & ".join(fmt(m.rsquared, 3) for m in models) + r" \\")
lines.append(r"Observations & " + " & ".join(str(int(m.nobs)) for m in models) + r" \\")

# Built-year cohort FE row
lines.append(
    r"Built-year cohort FE & Yes & No & No \\"
)

lines.append(r"\midrule")
lines.append(
    r"\multicolumn{4}{l}{ZIP-clustered standard errors in parentheses. "
    r"\sym{*} $p<0.10$, \sym{**} $p<0.05$, \sym{***} $p<0.01$.} \\"
)

# --------------------------------------------------
# Write file
# --------------------------------------------------
out_path = OUT_DIR / "delta_combined_models_body.tex"
out_path.write_text("\n".join(lines), encoding="utf-8")

print("Wrote:", out_path)


Wrote: C:\Users\ngodin\Dropbox\RESEARCH\active_projects\florida_condo\final_code\19_delta_regressions\output\delta_combined_models_body.tex
