# Step 3: Building Footprint Generation

**Generate building footprints for the 20 networks matching reference distributions**

This notebook:
1. Loads 20 generated networks from Step 2
2. Generates building footprints matching reference distributions
3. Matches: footprint area, density, compactness, proximity to paths
4. Visualizes all 20 networks with buildings in one SVG

In [None]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle, Patch, Polygon as MPLPolygon
from pathlib import Path
from collections import Counter
import math
import pickle
import geopandas as gpd
from shapely.geometry import Point, LineString, Polygon, box
from shapely.ops import unary_union
from shapely.affinity import rotate, translate, scale

%matplotlib inline
plt.rcParams['figure.dpi'] = 100
plt.rcParams['font.size'] = 10

print("✓ Libraries loaded")

## Configuration

In [None]:
WINDOW_SIZE_M = 500  # 500×500m window
MIN_BUILDING_AREA = 30  # Minimum building area in m²
MAX_BUILDING_AREA = 1000  # Maximum building area in m²

# Create output directories
Path("outputs/generated/visualizations").mkdir(parents=True, exist_ok=True)
Path("outputs/generated/buildings").mkdir(parents=True, exist_ok=True)

print(f"Window size: {WINDOW_SIZE_M}m × {WINDOW_SIZE_M}m")
print(f"Building area range: {MIN_BUILDING_AREA}-{MAX_BUILDING_AREA} m²")
print("✓ Output directories created")

## Load Reference Data and Generated Networks

In [None]:
# Load reference city data
with open('outputs/data/reference_cities_data.pkl', 'rb') as f:
    reference_data = pickle.load(f)

# Load generated networks from Step 2
with open('outputs/generated/networks/generated_networks_20.pkl', 'rb') as f:
    generated_networks = pickle.load(f)

print("✓ Loaded reference data from Step 1")
print(f"✓ Loaded {len(generated_networks)} generated networks from Step 2")

# Use London as reference for buildings
reference_city = 'london'
ref_building_metrics = reference_data[reference_city]['building_metrics']

print(f"\nUsing {reference_city.upper()} building distributions:")
print(f"  Footprint areas: {len(ref_building_metrics['footprint_areas'])} samples")
print(f"  Avg area: {ref_building_metrics['avg_footprint_area']:.1f} m²")
print(f"  Coverage ratio: {ref_building_metrics['building_coverage_ratio']:.3f}")
print(f"  Avg compactness: {ref_building_metrics['avg_compactness']:.1f}")
print(f"  Avg distance to path: {ref_building_metrics['avg_building_road_proximity']:.1f} m")

## Building Generation Functions

In [None]:
def sample_building_area(reference_areas):
    """Sample building area from reference distribution."""
    area = np.random.choice(reference_areas)
    return max(MIN_BUILDING_AREA, min(MAX_BUILDING_AREA, area))


def sample_building_compactness(reference_compactness):
    """Sample building compactness from reference distribution."""
    return np.random.choice(reference_compactness)


def create_rectangle_building(width, height):
    """Create a rectangular building."""
    return box(-width/2, -height/2, width/2, height/2)


def create_lshape_building(width, height):
    """Create an L-shaped building."""
    # L-shape with smaller leg
    leg_ratio = 0.6
    leg_width = width * leg_ratio
    leg_height = height * leg_ratio
    
    coords = [
        (0, 0),
        (width, 0),
        (width, leg_height),
        (leg_width, leg_height),
        (leg_width, height),
        (0, height),
        (0, 0)
    ]
    poly = Polygon(coords)
    
    # Center it
    centroid = poly.centroid
    poly = translate(poly, xoff=-centroid.x, yoff=-centroid.y)
    return poly


def create_cshape_building(width, height):
    """Create a C-shaped building (courtyard on one side)."""
    courtyard_ratio = 0.4
    wall_thickness = 0.25
    
    courtyard_width = width * courtyard_ratio
    courtyard_height = height * (1 - 2 * wall_thickness)
    
    coords = [
        (0, 0),
        (width, 0),
        (width, height),
        (0, height),
        (0, height * (1 - wall_thickness)),
        (width - courtyard_width, height * (1 - wall_thickness)),
        (width - courtyard_width, height * wall_thickness),
        (0, height * wall_thickness),
        (0, 0)
    ]
    poly = Polygon(coords)
    
    # Center it
    centroid = poly.centroid
    poly = translate(poly, xoff=-centroid.x, yoff=-centroid.y)
    return poly


def create_courtyard_building(width, height):
    """Create a building with central courtyard."""
    courtyard_ratio = 0.4
    courtyard_width = width * courtyard_ratio
    courtyard_height = height * courtyard_ratio
    
    # Outer rectangle
    outer = box(0, 0, width, height)
    
    # Inner courtyard (centered)
    cx = width / 2
    cy = height / 2
    inner = box(cx - courtyard_width/2, cy - courtyard_height/2,
               cx + courtyard_width/2, cy + courtyard_height/2)
    
    # Subtract courtyard from building
    poly = outer.difference(inner)
    
    # Center it
    centroid = poly.centroid
    poly = translate(poly, xoff=-centroid.x, yoff=-centroid.y)
    return poly


def create_building_polygon(center_x, center_y, area, compactness, rotation_angle=None):
    """
    Create a building polygon with given area and compactness.
    
    Supports: rectangle, L-shape, C-shape, courtyard
    
    Args:
        center_x, center_y: Center position
        area: Target area in m²
        compactness: Perimeter² / area ratio
        rotation_angle: Rotation in degrees (random if None)
    
    Returns:
        Shapely Polygon
    """
    # Clamp compactness to reasonable range
    compactness = max(16, min(200, compactness))
    
    # Calculate aspect ratio from compactness
    aspect_ratio = np.sqrt(compactness / 16)
    
    # Calculate base width and height for target area
    width = np.sqrt(area / aspect_ratio)
    height = area / width
    
    # Choose building shape based on probability
    shape_type = np.random.choice(['rectangle', 'lshape', 'cshape', 'courtyard'],
                                  p=[0.5, 0.25, 0.15, 0.1])
    
    # Create shape
    if shape_type == 'rectangle':
        poly = create_rectangle_building(width, height)
    elif shape_type == 'lshape':
        poly = create_lshape_building(width * 1.2, height * 1.2)
    elif shape_type == 'cshape':
        poly = create_cshape_building(width * 1.3, height * 1.3)
    else:  # courtyard
        poly = create_courtyard_building(width * 1.4, height * 1.4)
    
    # Scale to match target area
    actual_area = poly.area
    if actual_area > 0:
        scale_factor = np.sqrt(area / actual_area)
        poly = scale(poly, xfact=scale_factor, yfact=scale_factor, origin='centroid')
    
    # Rotate
    if rotation_angle is None:
        rotation_angle = np.random.uniform(0, 90)
    poly = rotate(poly, rotation_angle, origin='centroid')
    
    # Translate to position
    poly = translate(poly, xoff=center_x, yoff=center_y)
    
    return poly


def generate_buildings_for_network(G, pos, target_coverage, reference_areas, reference_compactness, 
                                  max_distance_to_path=50):
    """
    Generate buildings for a network matching reference distributions.
    
    IMPROVED ALGORITHM: Uses grid-based placement for better coverage
    
    Args:
        G: NetworkX graph
        pos: Node positions
        target_coverage: Target building coverage ratio
        reference_areas: Reference footprint area distribution
        reference_compactness: Reference compactness distribution
        max_distance_to_path: Maximum distance from building to nearest path
    
    Returns:
        List of building polygons
    """
    buildings = []
    
    # Create network edges as lines for distance calculation
    path_lines = []
    for u, v in G.edges():
        line = LineString([pos[u], pos[v]])
        path_lines.append(line)
    
    if len(path_lines) == 0:
        return buildings
    
    all_paths = unary_union(path_lines)
    
    # Buffer paths slightly to avoid tiny gaps
    paths_buffer = all_paths.buffer(2.0)
    
    # Calculate target total area
    window_area = WINDOW_SIZE_M ** 2
    target_total_area = window_area * target_coverage
    
    current_total_area = 0
    max_attempts = 10000  # INCREASED from 1000
    attempts = 0
    failed_attempts = 0
    max_failed = 500
    
    # Use grid-based sampling for better coverage
    grid_size = 20  # Sample every 20m
    sample_points = []
    
    for x in range(grid_size, WINDOW_SIZE_M, grid_size):
        for y in range(grid_size, WINDOW_SIZE_M, grid_size):
            # Add random offset
            px = x + np.random.uniform(-grid_size/2, grid_size/2)
            py = y + np.random.uniform(-grid_size/2, grid_size/2)
            sample_points.append((px, py))
    
    # Shuffle sample points
    np.random.shuffle(sample_points)
    point_idx = 0
    
    while current_total_area < target_total_area and attempts < max_attempts:
        attempts += 1
        
        # Use grid points first, then random
        if point_idx < len(sample_points):
            x, y = sample_points[point_idx]
            point_idx += 1
        else:
            x = np.random.uniform(20, WINDOW_SIZE_M - 20)
            y = np.random.uniform(20, WINDOW_SIZE_M - 20)
        
        # Check distance to nearest path
        point = Point(x, y)
        distance_to_path = point.distance(all_paths)
        
        if distance_to_path > max_distance_to_path:
            continue
        
        # Sample building properties
        area = sample_building_area(reference_areas)
        compactness = sample_building_compactness(reference_compactness)
        
        # Create building
        try:
            building = create_building_polygon(x, y, area, compactness)
        except:
            continue
        
        # Check if building is valid
        if not building.is_valid or building.is_empty:
            continue
        
        # Check if building is within window
        window = box(0, 0, WINDOW_SIZE_M, WINDOW_SIZE_M)
        if not window.contains(building):
            # Try to clip to window
            building = building.intersection(window)
            if building.is_empty or building.area < MIN_BUILDING_AREA:
                continue
        
        # Check overlap with existing buildings (allow small buffer for realism)
        overlap = False
        for existing in buildings:
            # Allow 1m gap between buildings
            if building.buffer(0.5).intersects(existing):
                overlap = True
                break
        
        if overlap:
            failed_attempts += 1
            if failed_attempts > max_failed:
                # Reset and try smaller buildings
                failed_attempts = 0
            continue
        
        # Check overlap with paths (allow small distance)
        if building.buffer(-1.0).intersects(paths_buffer):
            continue
        
        # Add building
        buildings.append(building)
        current_total_area += building.area
        failed_attempts = 0  # Reset on success
        
        # Progress update
        if len(buildings) % 50 == 0:
            progress = current_total_area / target_total_area
            print(f"    Progress: {len(buildings)} buildings, {progress*100:.1f}% coverage")
    
    return buildings


print("✓ Building generation functions defined (Rectangle, L-shape, C-shape, Courtyard)")


## Generate Buildings for All 20 Networks

In [None]:
print("Generating buildings for 20 networks...")
print("="*70)

target_coverage = ref_building_metrics['building_coverage_ratio']
reference_areas = ref_building_metrics['footprint_areas']
reference_compactness = ref_building_metrics['compactness_values']

for network_data in generated_networks:
    G = network_data['graph']
    pos = network_data['pos']
    net_id = network_data['id']
    
    # Generate buildings
    buildings = generate_buildings_for_network(
        G, pos, 
        target_coverage, 
        reference_areas, 
        reference_compactness,
        max_distance_to_path=30
    )
    
    # Calculate actual coverage
    total_area = sum(b.area for b in buildings)
    actual_coverage = total_area / (WINDOW_SIZE_M ** 2)
    
    network_data['buildings'] = buildings
    network_data['building_coverage'] = actual_coverage
    
    print(f"Network {net_id+1:2d}: {len(buildings):3d} buildings, coverage {actual_coverage:.3f}")

print("="*70)
print(f"\n✓ Generated buildings for all 20 networks")
print(f"\nTarget coverage: {target_coverage:.3f}")
actual_coverages = [net['building_coverage'] for net in generated_networks]
print(f"Actual coverage: {np.mean(actual_coverages):.3f} ± {np.std(actual_coverages):.3f}")

## Visualize All 20 Networks with Buildings

In [None]:
# Create 4×5 grid
fig, axes = plt.subplots(4, 5, figsize=(20, 16))
axes = axes.flatten()

for idx, network_data in enumerate(generated_networks):
    ax = axes[idx]
    G = network_data['graph']
    pos = network_data['pos']
    buildings = network_data['buildings']
    
    # Window boundary
    ax.add_patch(Rectangle((0, 0), WINDOW_SIZE_M, WINDOW_SIZE_M,
                           fill=False, edgecolor='black', linestyle='-', linewidth=1))
    
    # Draw buildings first (background)
    for building in buildings:
        if building.geom_type == 'Polygon':
            xs, ys = building.exterior.xy
            poly_patch = MPLPolygon(list(zip(xs, ys)), 
                                   facecolor='lightgray', 
                                   edgecolor='darkgray', 
                                   linewidth=0.3, 
                                   alpha=0.8,
                                   zorder=1)
            ax.add_patch(poly_patch)
    
    # Draw edges (on top)
    for u, v in G.edges():
        x = [pos[u][0], pos[v][0]]
        y = [pos[u][1], pos[v][1]]
        ax.plot(x, y, color='steelblue', linewidth=1.2, alpha=0.9, zorder=2)
    
    # Draw nodes (on top)
    for node in G.nodes():
        ax.scatter(pos[node][0], pos[node][1], s=10, c='darkblue',
                  zorder=3, alpha=0.7)
    
    ax.set_xlim(-10, WINDOW_SIZE_M + 10)
    ax.set_ylim(-10, WINDOW_SIZE_M + 10)
    ax.set_aspect('equal')
    
    coverage = network_data['building_coverage']
    ax.set_title(f'Network {idx+1}\n{len(buildings)}b, cov={coverage:.2f}',
                fontsize=9, fontweight='bold')
    ax.set_xticks([])
    ax.set_yticks([])
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)

plt.suptitle(f'Generated Networks with Buildings (20 variations)\nCalibrated to {reference_city.upper()} building distributions',
            fontsize=16, fontweight='bold', y=0.995)
plt.tight_layout()

# Save as single SVG
plt.savefig('outputs/generated/visualizations/C1_all_20_networks_with_buildings.svg', 
           format='svg', bbox_inches='tight', dpi=300)
print("Saved: outputs/generated/visualizations/C1_all_20_networks_with_buildings.svg")

plt.show()

## Compute Building Metrics

In [None]:
def compute_building_metrics_for_network(buildings, path_lines):
    """Compute building metrics for generated network."""
    metrics = {}
    
    if len(buildings) == 0:
        return {
            'count': 0,
            'avg_area': 0,
            'coverage': 0,
            'avg_compactness': 0,
            'avg_proximity': 0
        }
    
    areas = [b.area for b in buildings]
    compactness_vals = [(b.length ** 2) / b.area if b.area > 0 else 0 for b in buildings]
    
    all_paths = unary_union(path_lines)
    proximities = [b.centroid.distance(all_paths) for b in buildings]
    
    total_area = sum(areas)
    coverage = total_area / (WINDOW_SIZE_M ** 2)
    
    return {
        'count': len(buildings),
        'avg_area': np.mean(areas),
        'coverage': coverage,
        'avg_compactness': np.mean(compactness_vals),
        'avg_proximity': np.mean(proximities)
    }


# Compute metrics for all networks
print("Computing building metrics...\n")

for network_data in generated_networks:
    G = network_data['graph']
    pos = network_data['pos']
    buildings = network_data['buildings']
    
    # Create path lines
    path_lines = [LineString([pos[u], pos[v]]) for u, v in G.edges()]
    
    metrics = compute_building_metrics_for_network(buildings, path_lines)
    network_data['building_metrics'] = metrics

print("✓ Building metrics computed")

# Summary
all_counts = [net['building_metrics']['count'] for net in generated_networks]
all_avg_areas = [net['building_metrics']['avg_area'] for net in generated_networks]
all_coverages = [net['building_metrics']['coverage'] for net in generated_networks]
all_compactness = [net['building_metrics']['avg_compactness'] for net in generated_networks]
all_proximities = [net['building_metrics']['avg_proximity'] for net in generated_networks]

print("\n" + "="*70)
print("GENERATED BUILDING METRICS (20 networks)")
print("="*70)
print(f"\nBuilding count:       {np.mean(all_counts):.1f} ± {np.std(all_counts):.1f}")
print(f"Avg footprint area:   {np.mean(all_avg_areas):.1f} ± {np.std(all_avg_areas):.1f} m²")
print(f"Coverage ratio:       {np.mean(all_coverages):.3f} ± {np.std(all_coverages):.3f}")
print(f"Avg compactness:      {np.mean(all_compactness):.1f} ± {np.std(all_compactness):.1f}")
print(f"Avg proximity:        {np.mean(all_proximities):.1f} ± {np.std(all_proximities):.1f} m")

print(f"\n{reference_city.upper()} REFERENCE:")
print(f"Building count:       {len(reference_data[reference_city]['buildings'])}")
print(f"Avg footprint area:   {ref_building_metrics['avg_footprint_area']:.1f} m²")
print(f"Coverage ratio:       {ref_building_metrics['building_coverage_ratio']:.3f}")
print(f"Avg compactness:      {ref_building_metrics['avg_compactness']:.1f}")
print(f"Avg proximity:        {ref_building_metrics['avg_building_road_proximity']:.1f} m")
print("="*70)

## Save Networks with Buildings

In [None]:
# Save all networks with buildings
with open('outputs/generated/buildings/networks_with_buildings_20.pkl', 'wb') as f:
    pickle.dump(generated_networks, f)

print("✓ Saved 20 networks with buildings to: outputs/generated/buildings/networks_with_buildings_20.pkl")
print(f"\nEach network now includes:")
print(f"  - NetworkX graph")
print(f"  - Node positions")
print(f"  - Network metrics")
print(f"  - Building polygons")
print(f"  - Building metrics")
print(f"  - Generation parameters")

## Next Steps

These 20 networks with buildings will be used for:

1. **Step 4**: Space syntax analysis (integration, choice, intelligibility)
2. **Step 5**: Multi-objective optimization and ranking
3. **Step 6**: Final selection and validation
4. **Step 7**: Export to GeoJSON/Shapefile for use in urban planning tools