# Who Lives Where?

First exploratory notebook: mapping people, parcels, zoning, water, and roads.

**Question:** Who lives where in relation to water and land-use constraints?

## Tasks

1. Load the 4 anchor datasets
2. Clip to Santa Fe city limits
3. Simple maps: parcels over census tracts, river, and main roads
4. 2–3 quick stats (e.g., median income by distance to river, % renters by zoning category)


In [2]:
import sys
from pathlib import Path

# Add project root to path so we can import src modules
project_root = Path().resolve().parent.parent
sys.path.insert(0, str(project_root))

import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from src.data.loaders import (
    load_parcels,
    load_census_tracts,
    load_hydrology,
    load_osm_infrastructure,
    get_santa_fe_bounds
)
from src.viz.maps import setup_basemap, save_map


## Load Data

Load the 4 anchor datasets.


In [3]:
# Load the 4 anchor datasets
# Note: Run 000_data_prep.ipynb first to download and process data

try:
    parcels = load_parcels()
    print(f"✓ Loaded parcels: {len(parcels)} features")
except FileNotFoundError as e:
    print(f"✗ Parcels not found: {e}")
    print("  Run notebooks/00_exploratory/000_data_prep.ipynb first")
    parcels = None

try:
    tracts = load_census_tracts()
    print(f"✓ Loaded census tracts: {len(tracts)} features")
except FileNotFoundError as e:
    print(f"✗ Census tracts not found: {e}")
    tracts = None

try:
    hydrology = load_hydrology()
    print(f"✓ Loaded hydrology: {len(hydrology)} features")
except FileNotFoundError as e:
    print(f"✗ Hydrology not found: {e}")
    hydrology = None

try:
    osm = load_osm_infrastructure()
    print(f"✓ Loaded OSM data: {len(osm)} features")
except FileNotFoundError as e:
    print(f"✗ OSM data not found: {e}")
    osm = None

# Load city limits for reference
from src.data.loaders import load_city_limits
city_limits = load_city_limits()
if city_limits is not None:
    print(f"✓ City limits available: {len(city_limits)} feature(s)")
else:
    print("⚠ City limits not found")


✗ Parcels not found: Parcels data not found at /Users/richard/Documents/projects/santa-fe/data/processed/parcels_zoning.gpkg. Download from City of Santa Fe GIS and process first. Expected location: /Users/richard/Documents/projects/santa-fe/data/processed/parcels_zoning.gpkg
  Run notebooks/00_exploratory/000_data_prep.ipynb first
✗ Census tracts not found: Census tracts data not found at /Users/richard/Documents/projects/santa-fe/data/processed/census_tracts_acs.gpkg. Download from Census Bureau and process first. Expected location: /Users/richard/Documents/projects/santa-fe/data/processed/census_tracts_acs.gpkg
✗ Hydrology not found: Hydrology data not found at /Users/richard/Documents/projects/santa-fe/data/processed/hydrology.gpkg. Download from source and process first. Expected location: /Users/richard/Documents/projects/santa-fe/data/processed/hydrology.gpkg
✗ OSM data not found: OSM data not found at /Users/richard/Documents/projects/santa-fe/data/processed/osm_roads_pois.gpkg

## Data Overview

Check what we have loaded and their CRS.


In [None]:
# Note: Data is already clipped to city limits during processing
# But let's verify CRS and check data structure

datasets_to_check = {
    "Parcels": parcels,
    "Census Tracts": tracts,
    "Hydrology": hydrology,
    "OSM": osm,
    "City Limits": city_limits
}

print("Dataset Summary:")
print("=" * 60)

for name, gdf in datasets_to_check.items():
    if gdf is not None:
        print(f"\n{name}:")
        print(f"  Features: {len(gdf)}")
        print(f"  CRS: {gdf.crs}")
        print(f"  Columns: {list(gdf.columns)[:5]}...")  # First 5 columns
        print(f"  Bounds: {gdf.total_bounds}")
    else:
        print(f"\n{name}: Not loaded")

print("\n" + "=" * 60)


## Exploratory Maps

Create simple maps showing parcels over census tracts, river, and main roads.


In [None]:
# Create overview map: parcels over census tracts, river, and main roads

if tracts is not None:
    # Use tracts as base layer
    fig, ax = setup_basemap(tracts, crs="EPSG:3857", add_basemap=True, figsize=(14, 14))
    
    # Plot census tracts (choropleth if we have demographic data)
    if 'GEOID' in tracts.columns or len(tracts) > 0:
        tracts.plot(ax=ax, color='lightgray', edgecolor='white', linewidth=0.5, alpha=0.6, label='Census Tracts')
    
    # Plot parcels (outline only to avoid clutter)
    if parcels is not None:
        parcels.plot(ax=ax, color='none', edgecolor='gray', linewidth=0.3, alpha=0.4, label='Parcels')
    
    # Plot hydrology (rivers/streams)
    if hydrology is not None:
        # Filter to lines (flowlines) if both lines and polygons exist
        if 'geometry' in hydrology.columns:
            hydrology.plot(ax=ax, color='#4A90E2', linewidth=1.5, alpha=0.8, label='Rivers/Streams')
    
    # Plot main roads from OSM
    if osm is not None:
        # Filter to roads (not POIs)
        roads = osm[osm.get('feature_type', '') == 'road'].copy()
        if len(roads) > 0:
            # Filter to primary/secondary roads if category column exists
            if 'category' in roads.columns:
                main_roads = roads[roads['category'].isin(['primary', 'secondary', 'tertiary'])]
            else:
                main_roads = roads
            
            if len(main_roads) > 0:
                main_roads.plot(ax=ax, color='#E74C3C', linewidth=1, alpha=0.7, label='Main Roads')
    
    # Add city limits boundary
    if city_limits is not None:
        city_limits.plot(ax=ax, color='none', edgecolor='black', linewidth=2, linestyle='--', label='City Limits')
    
    ax.set_title('Santa Fe: Parcels, Census Tracts, River, and Main Roads', fontsize=14, pad=20)
    ax.legend(loc='upper right', fontsize=9)
    
    # Save map
    save_map(fig, '001_who_lives_where_overview')
    plt.show()
else:
    print("⚠ Cannot create map: census tracts data not loaded")


## Quick Statistics

Calculate 2–3 quick stats:
- Median income by distance to river
- % renters by zoning category


In [None]:
# Quick Statistics

stats_results = {}

# 1. Distance to river for each tract
if tracts is not None and hydrology is not None:
    # Create union of hydrology features (rivers/streams)
    hydro_union = hydrology.unary_union
    
    # Calculate distance from each tract centroid to nearest river
    tracts_with_dist = tracts.copy()
    tracts_with_dist['centroid'] = tracts_with_dist.geometry.centroid
    tracts_with_dist['dist_to_river_m'] = tracts_with_dist['centroid'].distance(hydro_union)
    # Convert to meters (assuming CRS is in meters - NM State Plane)
    # If CRS is degrees, multiply by ~111000 to get approximate meters
    if tracts_with_dist.crs and '4326' in str(tracts_with_dist.crs):
        tracts_with_dist['dist_to_river_m'] = tracts_with_dist['dist_to_river_m'] * 111000
    
    # Bin distances
    distance_bins = pd.cut(tracts_with_dist['dist_to_river_m'], bins=5, labels=['Very Close', 'Close', 'Medium', 'Far', 'Very Far'])
    
    print("Distance to River Analysis:")
    print("=" * 50)
    print(tracts_with_dist.groupby(distance_bins)['dist_to_river_m'].agg(['count', 'mean']))
    print()
    
    stats_results['distance_to_river'] = tracts_with_dist
    
    # If we have income data, analyze by distance
    income_cols = [col for col in tracts.columns if 'income' in col.lower() or 'median' in col.lower()]
    if income_cols:
        print(f"Income by Distance to River (using {income_cols[0]}):")
        print(tracts_with_dist.groupby(distance_bins)[income_cols[0]].median())
        print()
else:
    print("⚠ Cannot calculate distance to river: missing tracts or hydrology data")

# 2. Parcels by zoning category
if parcels is not None:
    # Find zoning column
    zoning_cols = [col for col in parcels.columns if 'zoning' in col.lower() or 'zone' in col.lower()]
    
    if zoning_cols:
        zoning_col = zoning_cols[0]
        print("Parcels by Zoning Category:")
        print("=" * 50)
        zoning_counts = parcels[zoning_col].value_counts()
        print(zoning_counts.head(10))
        print(f"\nTotal zoning categories: {len(zoning_counts)}")
        print()
        
        stats_results['zoning_counts'] = zoning_counts
        
        # If we can join parcels to tracts, analyze renters by zoning
        if tracts is not None:
            # Spatial join parcels to tracts
            parcels_with_tracts = gpd.sjoin(parcels, tracts, how='left', predicate='within')
            
            # Look for renter/tenure columns in tracts
            renter_cols = [col for col in tracts.columns if 'renter' in col.lower() or 'tenure' in col.lower()]
            if renter_cols and zoning_col:
                print(f"Renters by Zoning (using {renter_cols[0]}):")
                print(parcels_with_tracts.groupby(zoning_col)[renter_cols[0]].mean().sort_values(ascending=False).head(10))
                print()
    else:
        print("⚠ No zoning column found in parcels data")
        print(f"Available columns: {list(parcels.columns)}")
else:
    print("⚠ Cannot analyze zoning: parcels data not loaded")

print("\n" + "=" * 50)
print("Note: ACS demographic data requires Census API key.")
print("See 000_data_prep.ipynb for instructions on joining ACS data.")


## Next Steps

- [x] Download and process the 4 anchor datasets (via 000_data_prep.ipynb)
- [x] Load and explore datasets
- [x] Create overview map
- [x] Calculate basic statistics
- [ ] Join ACS demographic data (requires Census API key)
- [ ] Refine analysis based on available columns
- [ ] Export figures for first field note
- [ ] Write field note in `stories/drafts/field_note_01.md`
