# 05 — Spatial Exploration

Interactive spatial analysis:
- DSNY section maps with tonnage data
- Cluster assignment choropleth maps
- Spatial autocorrelation (Moran's I)
- Interactive Folium maps

**Prerequisites:** Run `make spatial` and `make cluster` first.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
import folium
from pathlib import Path

plt.style.use('seaborn-v0_8-whitegrid')
%matplotlib inline

import sys
sys.path.insert(0, str(Path.cwd().parent))
from src.config import DATA_PROCESSED, DATA_SPATIAL, RESULTS_DIR

## 1. Load Spatial Data

In [None]:
# Load sections
sections = None
for path in [DATA_SPATIAL / 'dsny_sections_with_pop.parquet', DATA_SPATIAL / 'dsny_sections.parquet']:
    if path.exists():
        sections = gpd.read_parquet(path)
        print(f'Loaded {len(sections)} sections from {path.name}')
        break

if sections is not None:
    print(f'CRS: {sections.crs}')
    print(f'Columns: {list(sections.columns)}')
    if 'population' in sections.columns:
        print(f'Total population: {sections["population"].sum():,}')
    sections.head()

## 2. Base Map

In [None]:
if sections is not None:
    fig, ax = plt.subplots(figsize=(12, 14))
    sections.plot(ax=ax, edgecolor='black', linewidth=0.3, facecolor='lightblue')
    ax.set_title('DSNY Sections', fontsize=14)
    ax.set_axis_off()
    plt.tight_layout()
    plt.show()

## 3. Tonnage Choropleth

In [None]:
section_data = pd.read_parquet(DATA_PROCESSED / 'weekly_section.parquet')
avg_tons = section_data.groupby('Section_Code')['tons_refuse'].mean().reset_index()
avg_tons.columns = ['Section_Code', 'avg_refuse']

if sections is not None:
    merged = sections.merge(avg_tons, on='Section_Code', how='left')
    
    fig, axes = plt.subplots(1, 3, figsize=(20, 14))
    
    for ax, (col, title, cmap) in zip(axes, [
        ('avg_refuse', 'Avg Weekly Refuse (Tons)', 'YlOrRd'),
    ]):
        merged.plot(column=col, cmap=cmap, legend=True, ax=ax,
                   edgecolor='black', linewidth=0.2,
                   legend_kwds={'shrink': 0.5},
                   missing_kwds={'color': 'lightgray'})
        ax.set_title(title)
        ax.set_axis_off()
    
    # Population density if available
    if 'population' in merged.columns:
        merged.plot(column='population', cmap='Blues', legend=True, ax=axes[1],
                   edgecolor='black', linewidth=0.2,
                   legend_kwds={'shrink': 0.5})
        axes[1].set_title('Population')
        axes[1].set_axis_off()
        
        # Per capita
        merged['per_capita'] = np.where(
            merged['population'] > 0,
            merged['avg_refuse'] / merged['population'] * 2000,  # lbs per capita
            np.nan
        )
        merged.plot(column='per_capita', cmap='Purples', legend=True, ax=axes[2],
                   edgecolor='black', linewidth=0.2,
                   legend_kwds={'shrink': 0.5},
                   missing_kwds={'color': 'lightgray'})
        axes[2].set_title('Avg Weekly Refuse (lbs/capita)')
        axes[2].set_axis_off()
    
    plt.tight_layout()
    plt.show()

## 4. Cluster Assignment Maps

In [None]:
assign_path = RESULTS_DIR / 'cluster_assignments_section.parquet'
if assign_path.exists() and sections is not None:
    assign = pd.read_parquet(assign_path)
    merged = sections.merge(assign, on='Section_Code', how='left')
    
    cluster_cols = [c for c in assign.columns if c.startswith('cluster_')]
    n = len(cluster_cols)
    
    fig, axes = plt.subplots(1, n, figsize=(7*n, 14))
    if n == 1:
        axes = [axes]
    
    for ax, col in zip(axes, cluster_cols):
        merged.plot(column=col, cmap='Set3', categorical=True, legend=True, ax=ax,
                   edgecolor='black', linewidth=0.2,
                   missing_kwds={'color': 'lightgray'},
                   legend_kwds={'loc': 'lower left', 'fontsize': 7})
        method = col.replace('cluster_', '').replace('_', ' ').title()
        ax.set_title(method)
        ax.set_axis_off()
    
    plt.suptitle('Cluster Assignments — Section Level', fontsize=14)
    plt.tight_layout()
    plt.show()
else:
    print('Run clustering and spatial pipelines first')

## 5. Spatial Autocorrelation (Moran's I)

In [None]:
if sections is not None:
    try:
        from libpysal.weights import Queen
        from esda.moran import Moran
        
        merged = sections.merge(avg_tons, on='Section_Code', how='left')
        merged = merged.dropna(subset=['avg_refuse'])
        
        w = Queen.from_dataframe(merged)
        w.transform = 'r'
        
        moran = Moran(merged['avg_refuse'].values, w)
        print(f"Moran's I: {moran.I:.4f}")
        print(f"p-value: {moran.p_sim:.4f}")
        print(f"Z-score: {moran.z_sim:.4f}")
        
        if moran.p_sim < 0.05:
            print('\n→ Significant spatial autocorrelation detected')
            print('  Nearby sections have similar waste generation patterns')
        else:
            print('\n→ No significant spatial autocorrelation')
    except ImportError:
        print('Install esda for spatial autocorrelation analysis: pip install esda')
    except Exception as e:
        print(f'Spatial autocorrelation analysis failed: {e}')

## 6. Interactive Folium Maps

These are also saved as HTML files by `make visualize` in `outputs/interactive/`.

In [None]:
if sections is not None:
    merged = sections.merge(avg_tons, on='Section_Code', how='left')
    
    center = [40.7128, -74.0060]
    m = folium.Map(location=center, zoom_start=11, tiles='CartoDB positron')
    
    max_tons = merged['avg_refuse'].quantile(0.95)
    
    for _, row in merged.iterrows():
        tons = row.get('avg_refuse', 0)
        if pd.isna(tons):
            tons = 0
        intensity = min(tons / max_tons, 1.0) if max_tons > 0 else 0
        r, g, b = 255, int(255*(1-intensity)), int(255*(1-intensity))
        color = f'#{r:02x}{g:02x}{b:02x}'
        
        popup = f"<b>{row['Section_Code']}</b><br>Avg tons/week: {tons:.1f}"
        if 'population' in row.index and pd.notna(row.get('population')):
            popup += f"<br>Population: {int(row['population']):,}"
        
        if row.geometry is not None:
            folium.GeoJson(
                row.geometry.__geo_interface__,
                style_function=lambda x, c=color: {
                    'fillColor': c, 'color': 'black',
                    'weight': 0.5, 'fillOpacity': 0.6
                },
                popup=folium.Popup(popup, max_width=200),
            ).add_to(m)
    
    m