# Dark Pebble: The Rent Gap Thesis

## Phase 1: The Narrative

### The Pitch (Satire)
"We identify 'underutilized' communities with 'high upside'. Our proprietary algorithms locate markets where housing is artificially cheap relative to the region, signaling a prime opportunity for value realization through strategic capital injection."

### The Reality (Academic)
We are identifying vulnerable, low-income neighborhoods where the **Rent Gap**—the difference between current capitalized rent and potential future rent—is widest. This maximizes displacement pressure, as capital flows into these areas to capture the gap, often at the expense of existing residents.

### Actionable Narrative Arc
1.  **Identification**: "Where is housing artificially cheap?" -> **Rent Gap** (High Appreciation Potential, Low Entry Price).
2.  **Catalyst**: "Where is the money flowing?" -> **Economic Tailwind** (Job/Wage Growth).
3.  **Displacement**: "Who are we replacing?" -> **Displacement Cliff** (Rents rising faster than local wages).

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import folium
from folium.plugins import HeatMap
import json

# Setup
DATA_DIR = Path('housing_market_data/processed')
SNAPSHOT_FILE = DATA_DIR / 'dark_pebble_snapshot.csv'
MASTER_FILE = DATA_DIR / 'dark_pebble_master.csv'

plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (12, 8)

## Phase 2: Data Loading
We load our "Dark Pebble" proprietary dataset, which combines Zillow Home Values, Market Rents, and BLS Wage/Employment data.

In [None]:
def load_data():
    # Load Snapshot for Scatter/Map
    df_snap = pd.read_csv(SNAPSHOT_FILE)
    
    # Load Master for Time Series
    df_master = pd.read_csv(MASTER_FILE)
    df_master['date'] = pd.to_datetime(df_master['date'])
    
    return df_snap, df_master

df_snap, df_master = load_data()
print(f"Loaded {len(df_snap)} counties in snapshot.")
print(f"Loaded {len(df_master)} monthly records.")
df_snap.head()

## Phase 3: Data Visualization

### 1. The "Opportunity Matrix" (Scatter Plot)
**X-Axis**: Median Home Price (Low to High) - *Barrier to Entry*
**Y-Axis**: 3-Year Appreciation Rate (Low to High) - *Momentum*

**The "Kill Zone" (Top-Left)**: Cheap homes rising fast. This is where the Rent Gap is closing most aggressively.

In [None]:
def plot_opportunity_matrix(df):
    # Filter for valid data
    plot_df = df.dropna(subset=['zhvi', 'appreciation_3y']).copy()
    
    # Create Scatter Plot
    plt.figure(figsize=(14, 8))
    sns.scatterplot(data=plot_df, x='zhvi', y='appreciation_3y', 
                    hue='rent_gap_proxy', palette='RdYlGn', size='job_growth_3y', 
                    sizes=(20, 200), alpha=0.7)
    
    # Highlight the "Kill Zone"
    plt.axvspan(0, plot_df['zhvi'].quantile(0.3), ymin=0.5, color='red', alpha=0.1, label='Kill Zone (Low Price, High Growth)')
    
    plt.title('The Opportunity Matrix: Identifying the Rent Gap', fontsize=16, fontweight='bold')
    plt.xlabel('Median Home Price ($)', fontsize=12)
    plt.ylabel('3-Year Appreciation Rate', fontsize=12)
    plt.xscale('log') # Log scale for price to see the low end better
    
    # Annotate top targets
    # Find low price (bottom 30%) and high appreciation (top 30%)
    targets = plot_df[
        (plot_df['zhvi'] < plot_df['zhvi'].quantile(0.3)) & 
        (plot_df['appreciation_3y'] > plot_df['appreciation_3y'].quantile(0.7))
    ].sort_values('appreciation_3y', ascending=False).head(5)
    
    for _, row in targets.iterrows():
        plt.text(row['zhvi'], row['appreciation_3y'], row['RegionName'], 
                 fontsize=9, fontweight='bold', ha='right')
    
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.show()
    
    return targets

top_targets = plot_opportunity_matrix(df_snap)

### 2. The "Target Map" (Bivariate Analysis)
We map the intersection of **Economic Tailwind** (Wage Growth) and **Displacement Pressure** (Rent Growth).

**Logic**: Areas where wages are rising (attracting new residents) AND rents are spiking (displacing old residents) are our prime targets.

In [None]:
def create_target_map(df):
    # Filter for California for the map (since we have the geojson)
    ca_df = df[df['StateName'] == 'CA'].dropna(subset=['fips', 'wage_growth_3y', 'rent_growth_3y']).copy()
    
    # Create a "Gentrification Score" for visualization
    # High Wage Growth + High Rent Growth
    ca_df['gentrification_score'] = (ca_df['wage_growth_3y'] + ca_df['rent_growth_3y']) / 2
    
    # Prepare for Map: Match GeoJSON names
    # GeoJSON has "Alameda", DataFrame has "Alameda County"
    ca_df['match_name'] = ca_df['countycountyequivalent'].str.replace(' County', '')
    
    # Load GeoJSON
    try:
        with open('california-counties.geojson', 'r') as f:
            geo_data = json.load(f)
    except FileNotFoundError:
        print("GeoJSON not found. Skipping map.")
        return
    
    m = folium.Map(location=[37.0, -120.0], zoom_start=6)
    
    folium.Choropleth(
        geo_data=geo_data,
        name='choropleth',
        data=ca_df,
        columns=['match_name', 'gentrification_score'],
        key_on='feature.properties.name', 
        fill_color='YlOrRd',
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name='Gentrification Intensity (Wage + Rent Growth)'
    ).add_to(m)
    
    folium.LayerControl().add_to(m)
    return m

target_map = create_target_map(df_snap)
target_map

### 3. The "Displacement Cliff" (Time Series)
We visualize the moment local wages decouple from local rents. This crossover point represents the "Cliff" where displacement becomes inevitable.

In [None]:
def plot_displacement_cliff(df_master, fips_code, county_name):
    county_data = df_master[df_master['fips'] == fips_code].sort_values('date')
    
    if county_data.empty:
        print(f"No data for {county_name}")
        return
        
    # Normalize to 100 at start to compare growth rates
    # Find first valid index for both
    valid_data = county_data.dropna(subset=['rent', 'wage'])
    if valid_data.empty:
        print(f"No valid rent/wage data for {county_name}")
        return
        
    start_date = valid_data['date'].min()
    plot_data = valid_data[valid_data['date'] >= start_date].copy()
    
    base_rent = plot_data['rent'].iloc[0]
    base_wage = plot_data['wage'].iloc[0]
    
    plot_data['norm_rent'] = (plot_data['rent'] / base_rent) * 100
    plot_data['norm_wage'] = (plot_data['wage'] / base_wage) * 100
    
    plt.figure(figsize=(12, 6))
    plt.plot(plot_data['date'], plot_data['norm_rent'], label='Median Rent', color='#e74c3c', linewidth=3)
    plt.plot(plot_data['date'], plot_data['norm_wage'], label='Local Wages', color='#2ecc71', linewidth=3, linestyle='--')
    
    # Fill the gap
    plt.fill_between(plot_data['date'], plot_data['norm_rent'], plot_data['norm_wage'], 
                     where=(plot_data['norm_rent'] > plot_data['norm_wage']), 
                     color='gray', alpha=0.3, label='Displacement Gap')
    
    plt.title(f"The Displacement Cliff: {county_name}", fontsize=16, fontweight='bold')
    plt.xlabel("Year", fontsize=12)
    plt.ylabel("Growth Index (Start=100)", fontsize=12)
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()

# Plot for the top target identified in the Opportunity Matrix
if not top_targets.empty:
    target_fips = top_targets.iloc[0]['fips']
    target_name = top_targets.iloc[0]['RegionName']
    plot_displacement_cliff(df_master, target_fips, target_name)