# Housing Supply vs Wage Growth (CBSA-Level Analysis)

This notebook analyzes wage growth and housing permit trends across selected U.S. metropolitan areas
using BLS QCEW and Census Building Permits data. The focus is on reproducibility, data quality,
and exploratory metrics such as zoning pressure and cumulative housing gaps.

In [None]:
#from bls_housing.pipeline.wages import build_annual_wages
from bls_housing.pipeline.ensure import ensure_annual_wages
from bls_housing.pipeline.duck import list_metros, get_analysis_db_connection

years = [y for y in range(2014, 2025)] # include dates from 2014-2024

area_codes = [42660, 38900, 33460, 12420, 47900]

#exclude_codes = [13980, 15680, 13780]
con = get_analysis_db_connection()
metros = list_metros(con, area_codes)


w_res = ensure_annual_wages(metros, years)
(wages_df, annual_wages_df) = w_res.df_tuple
print("Missing wages keys:", sorted(w_res.missing_keys))
print(annual_wages_df.head(10))

expected_rows = len(metros) * len(years)
actual_rows = len(annual_wages_df.drop_duplicates(subset=["Code","Year"]))
assert actual_rows == expected_rows, (expected_rows, actual_rows)

In [None]:
from bls_housing.pipeline.ensure import ensure_annual_permits

w_res = ensure_annual_permits(metros, years)
(permits_df, annual_permits) = w_res.df_tuple
print("Missing permits keys:", sorted(w_res.missing_keys))

print(annual_permits.head(10))
expected_rows = len(metros) * len(years)
actual_rows = len(annual_permits.drop_duplicates(subset=["Code","Year"]))
assert actual_rows == expected_rows, (expected_rows, actual_rows)

In [None]:
from bls_housing.pipeline.marts import build_annual_metrics

final_df = build_annual_metrics(annual_wages_df, annual_permits)

print(final_df[['Area', 'Year', 'Zoning_Pressure']].sort_values(by=['Area', 'Year']).head(10))

In [None]:
import matplotlib.pyplot as plt
from pathlib import Path
import datetime as dt
final_df['Area_Code'] = final_df['Area'].astype(str) + ', ' + final_df['Code'].astype(str)
#print(final_df['Area_Code'])
pivot_df = final_df.pivot_table(index='Year', columns='Area_Code', values='Zoning_Pressure')

plt.figure(figsize=(12, 6))
for column in pivot_df.columns:
    plt.plot(pivot_df.index, pivot_df[column], marker='o', label=column)

plt.axhline(y=1.0, color='black', linestyle='--', alpha=0.5, label='Balanced Growth (1.0)')

plt.title('Zoning Pressure Index (2015-2024 inflation adjusted)')
plt.ylabel('Pressure (>1 means Demand outpaces Supply)')
plt.xlabel('Year')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True, linestyle='--', alpha=0.3)
plt.tight_layout()


out_dir = Path("../outputs/charts") / dt.datetime.now().strftime("%Y%m%d_%H%M%S")
out_dir.mkdir(parents=True, exist_ok=True)

plt.savefig(out_dir / "Zoning.png", dpi=200, bbox_inches="tight")
plt.show()
final_df.drop(columns=['Area_Code'], inplace=True)

In [None]:
from bls_housing.pipeline.marts import build_cumulative_metrics

cumulative_df = build_cumulative_metrics(annual_wages_df, annual_permits)

In [None]:
import matplotlib.pyplot as plt

cumulative_df['Area_Code'] = cumulative_df['Area'].astype(str) + ', ' + cumulative_df['Code'].astype(str)
#print(cumulative_df['Area_Code'])
# Pivot for plotting
pivot_gap = cumulative_df.pivot(index='Year', columns='Area_Code', values='Structural_Gap')

plt.figure(figsize=(12, 6))

# Plot lines
for column in pivot_gap.columns:
    plt.plot(pivot_gap.index, pivot_gap[column], marker='o', linewidth=2, label=column)

# Add Reference Line (1.0 = Supply keeping up with Demand)
plt.axhline(y=1.0, color='black', linestyle='--', alpha=0.5, label='Balanced (1.0)')

plt.title('Cumulative Housing Deficit (Since 2015 inflation adjusted)')
plt.ylabel('Deficit Index (1.2 = Demand is 20% ahead of Supply)')
plt.xlabel('Year')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True, linestyle='--', alpha=0.3)
plt.tight_layout()

plt.savefig(out_dir / "Cumulative_Housing_Deficit.png", dpi=200, bbox_inches="tight")
plt.show()
cumulative_df.drop(columns=['Area_Code'], inplace=True)

In [None]:
from bls_housing.pipeline.duck import update_db
update_db(con, final_df, cumulative_df, wages_df, permits_df )

In [None]:
con.close()