# STEP 1: Analyze Real Cities (500√ó500m)
## Extract Urban Metrics & Building Block Library

**Goal**: Analyze three 500√ó500m urban areas to extract:
- Space syntax metrics (nodes, edges, districts, landmarks, barriers)
- Building geometry distributions
- Reusable building block library

**Cities**:
1. Hanoi, Vietnam (21.0230¬∞N, 105.8560¬∞E) - Dense, organic layout
2. Brussels, Belgium (50.8477¬∞N, 4.3572¬∞E) - European historic core
3. Marrakech, Morocco (31.623811¬∞N, -7.988662¬∞W) - Compact medina

**Outputs**:
- GeoJSON files (nodes, edges, buildings, districts, blocks)
- JSON metrics file (urban_metrics.json)
- Building block library (building_blocks_library.json)
- Visualizations (PNG + SVG) with base maps and clear labels
- Metrics summary table

## 1. Setup & Configuration

In [None]:
# Imports
import osmnx as ox
import networkx as nx
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.colors import LinearSegmentedColormap, Normalize
from matplotlib import cm
from matplotlib.colorbar import ColorbarBase
from shapely.geometry import Point, LineString, Polygon, MultiPolygon, box, MultiLineString
from shapely.ops import polygonize, unary_union, nearest_points, linemerge
from shapely.affinity import rotate, scale, translate
import json
from pathlib import Path
import warnings
from collections import Counter

warnings.filterwarnings('ignore')

# Configure OSMnx
ox.settings.use_cache = True
ox.settings.log_console = False

# Set plot style
plt.style.use('seaborn-v0_8-darkgrid')
%matplotlib inline

print("‚úì Libraries imported successfully")

In [None]:
# Configuration
CITIES = {
    'hanoi': {
        'name': 'Hanoi, Vietnam',
        'coords': (21.0230, 105.8560),
        'color': '#FF6B6B'  # Red
    },
    'brussels': {
        'name': 'Brussels, Belgium',
        'coords': (50.8477, 4.3572),
        'color': '#4ECDC4'  # Teal
    },
    'marrakech': {
        'name': 'Marrakech, Morocco',
        'coords': (31.623811, -7.988662),
        'color': '#FFE66D'  # Yellow
    }
}

# Analysis parameters (adapted for 500√ó500m)
RADIUS = 250  # meters (to get ~500√ó500m coverage)
REACH_RADII = [200, 300]  # Reduced from 400/600 for small scale
LOCAL_LANDMARK_RADIUS = 300  # Reduced from 1500m
MIN_BLOCK_AREA = 500  # m¬≤
MAX_BLOCK_AREA = 10000  # m¬≤
BLOCKS_PER_CITY = 35  # Target library size

# Output paths
OUTPUT_DIR = Path('outputs')
GEOJSON_DIR = OUTPUT_DIR / 'geojson'
VIZ_PNG_DIR = OUTPUT_DIR / 'visualizations' / 'png'
VIZ_SVG_DIR = OUTPUT_DIR / 'visualizations' / 'svg'
METRICS_DIR = OUTPUT_DIR / 'metrics'

# Create directories
for d in [GEOJSON_DIR, VIZ_PNG_DIR, VIZ_SVG_DIR, METRICS_DIR]:
    d.mkdir(parents=True, exist_ok=True)

print("‚úì Configuration complete")
print(f"  Analyzing {len(CITIES)} cities")
print(f"  Coverage radius: {RADIUS}m (~{RADIUS*2}√ó{RADIUS*2}m area)")
print(f"  Output directory: {OUTPUT_DIR.absolute()}")

## 2. Data Acquisition

In [None]:
# Download data for all cities
city_data = {}

for city_key, city_info in CITIES.items():
    print(f"\n{'='*60}")
    print(f"Downloading: {city_info['name']}")
    print(f"{'='*60}")
    
    lat, lon = city_info['coords']
    
    try:
        # Download street network (walk network includes all accessible roads)
        print(f"  ‚Üí Street network...")
        G = ox.graph_from_point(
            (lat, lon),
            dist=RADIUS,
            network_type='walk',
            simplify=True
        )
        
        # Project to local UTM
        G_proj = ox.project_graph(G)
        
        # Download buildings
        print(f"  ‚Üí Buildings...")
        buildings = ox.features_from_point(
            (lat, lon),
            dist=RADIUS,
            tags={'building': True}
        )
        
        # Project buildings
        buildings_proj = buildings.to_crs(ox.graph_to_gdfs(G_proj, nodes=False).crs)
        
        # Clean building geometries (keep only Polygons/MultiPolygons)
        buildings_proj = buildings_proj[buildings_proj.geometry.type.isin(['Polygon', 'MultiPolygon'])].copy()
        
        # Convert MultiPolygons to Polygons (take largest)
        def get_polygon(geom):
            if geom.geom_type == 'Polygon':
                return geom
            elif geom.geom_type == 'MultiPolygon':
                return max(geom.geoms, key=lambda p: p.area)
            return geom
        
        buildings_proj['geometry'] = buildings_proj.geometry.apply(get_polygon)
        buildings_proj = buildings_proj[buildings_proj.geometry.type == 'Polygon'].copy()
        
        # Store data
        city_data[city_key] = {
            'name': city_info['name'],
            'color': city_info['color'],
            'coords': (lat, lon),
            'graph': G_proj,
            'buildings': buildings_proj,
            'crs': ox.graph_to_gdfs(G_proj, nodes=False).crs
        }
        
        print(f"  ‚úì Downloaded:")
        print(f"    - {G_proj.number_of_nodes()} nodes")
        print(f"    - {G_proj.number_of_edges()} edges")
        print(f"    - {len(buildings_proj)} buildings")
        
    except Exception as e:
        print(f"  ‚úó Error downloading {city_key}: {e}")
        import traceback
        traceback.print_exc()
        continue

print(f"\n{'='*60}")
print(f"‚úì Data acquisition complete for {len(city_data)} cities")
print(f"{'='*60}")

## 3. Node Analysis (Centrality Metrics)

In [None]:
def compute_node_metrics(G):
    """
    Compute centrality metrics for nodes (intersections)
    """
    print("  Computing node centrality metrics...")
    
    # Convert to undirected for centrality calculations
    G_undir = G.to_undirected()
    
    # 1. Betweenness Centrality (distance-weighted)
    print("    - Betweenness (distance)...")
    bc_dist = nx.betweenness_centrality(G_undir, weight='length', normalized=True)
    
    # 2. Betweenness Centrality (information - no weight)
    print("    - Betweenness (information)...")
    bc_info = nx.betweenness_centrality(G_undir, weight=None, normalized=True)
    
    # 3. Closeness Centrality (distance-weighted)
    print("    - Closeness...")
    closeness = nx.closeness_centrality(G_undir, distance='length')
    
    # 4. Reach Centrality (services within radius)
    print("    - Reach centrality (200m, 300m)...")
    reach_200 = {}
    reach_300 = {}
    
    for node in G_undir.nodes():
        lengths = nx.single_source_dijkstra_path_length(G_undir, node, cutoff=200, weight='length')
        reach_200[node] = len(lengths)
        
        lengths = nx.single_source_dijkstra_path_length(G_undir, node, cutoff=300, weight='length')
        reach_300[node] = len(lengths)
    
    # 5. Degree
    degree = dict(G_undir.degree())
    
    # Create GeoDataFrame with metrics
    nodes, _ = ox.graph_to_gdfs(G)
    nodes['bc_distance'] = nodes.index.map(bc_dist)
    nodes['bc_information'] = nodes.index.map(bc_info)
    nodes['closeness'] = nodes.index.map(closeness)
    nodes['reach_200m'] = nodes.index.map(reach_200)
    nodes['reach_300m'] = nodes.index.map(reach_300)
    nodes['degree'] = nodes.index.map(degree)
    
    print("  ‚úì Node metrics computed")
    return nodes

# Compute for all cities
for city_key in city_data.keys():
    print(f"\n{city_data[city_key]['name']}:")
    city_data[city_key]['nodes'] = compute_node_metrics(city_data[city_key]['graph'])
    
    # Save GeoJSON
    output_file = GEOJSON_DIR / f"{city_key}_nodes.geojson"
    city_data[city_key]['nodes'].to_file(output_file, driver='GeoJSON')
    print(f"  ‚úì Saved to {output_file.name}")

## 4. Edge Analysis (Street Networks & Blocks)

In [None]:
def compute_edge_metrics(G):
    """
    Compute edge (street segment) metrics
    """
    print("  Computing edge metrics...")
    
    G_undir = G.to_undirected()
    
    # 1. Edge betweenness (primal - distance weighted)
    print("    - Edge betweenness (primal)...")
    edge_bc = nx.edge_betweenness_centrality(G_undir, weight='length', normalized=True)
    
    # 2. Create dual graph for angular analysis
    print("    - Building dual graph...")
    dual_G = nx.Graph()
    edge_to_node = {}
    
    for i, (u, v, k) in enumerate(G_undir.edges(keys=True)):
        edge_to_node[(u, v, k)] = i
        dual_G.add_node(i, primal_edge=(u, v, k))
    
    for node in G_undir.nodes():
        incident_edges = list(G_undir.edges(node, keys=True))
        for i in range(len(incident_edges)):
            for j in range(i+1, len(incident_edges)):
                e1 = incident_edges[i]
                e2 = incident_edges[j]
                e1_norm = tuple(sorted([e1[0], e1[1]])) + (e1[2],)
                e2_norm = tuple(sorted([e2[0], e2[1]])) + (e2[2],)
                
                if e1_norm in edge_to_node and e2_norm in edge_to_node:
                    dual_G.add_edge(edge_to_node[e1_norm], edge_to_node[e2_norm])
    
    # 3. Angular betweenness (dual graph)
    print("    - Angular betweenness (dual)...")
    dual_bc = nx.betweenness_centrality(dual_G, weight=None, normalized=True) if dual_G.number_of_edges() > 0 else {}
    
    angular_bc = {}
    for dual_node, bc_val in dual_bc.items():
        primal_edge = dual_G.nodes[dual_node].get('primal_edge')
        if primal_edge:
            angular_bc[primal_edge] = bc_val
    
    # Create GeoDataFrame
    _, edges = ox.graph_to_gdfs(G)
    edges['edge_bc'] = edges.index.map(lambda x: edge_bc.get((x[0], x[1]), 0))
    edges['angular_bc'] = edges.index.map(lambda x: angular_bc.get(x, 0))
    
    print("  ‚úì Edge metrics computed")
    return edges, dual_G

# Compute for all cities
for city_key in city_data.keys():
    print(f"\n{city_data[city_key]['name']}:")
    edges, dual_graph = compute_edge_metrics(city_data[city_key]['graph'])
    city_data[city_key]['edges'] = edges
    city_data[city_key]['dual_graph'] = dual_graph
    
    output_file = GEOJSON_DIR / f"{city_key}_edges.geojson"
    edges.to_file(output_file, driver='GeoJSON')
    print(f"  ‚úì Saved to {output_file.name}")

In [None]:
# FIXED: Extract blocks using polygonize with proper geometry handling
def extract_blocks(edges_gdf, buffer_dist=0.1):
    """
    Extract urban blocks by polygonizing street network
    FIX: Added geometry cleaning and buffering to ensure closed polygons
    """
    print("  Extracting blocks...")
    
    try:
        # Get all line geometries
        lines = []
        for geom in edges_gdf.geometry:
            if geom.geom_type == 'LineString':
                lines.append(geom)
            elif geom.geom_type == 'MultiLineString':
                lines.extend(list(geom.geoms))
        
        if not lines:
            print("  ‚ö† No valid line geometries found")
            return gpd.GeoDataFrame(columns=['geometry', 'area', 'perimeter', 'compactness', 'aspect_ratio', 'block_id'], crs=edges_gdf.crs)
        
        # Method 1: Direct polygonize
        print("    - Attempting direct polygonize...")
        polygons = list(polygonize(lines))
        
        # Method 2: If no polygons, try buffering slightly to close gaps
        if len(polygons) == 0:
            print("    - Direct polygonize failed, trying with buffered lines...")
            buffered_lines = [line.buffer(buffer_dist) for line in lines]
            merged = unary_union(buffered_lines)
            
            # Extract exterior rings as potential blocks
            if hasattr(merged, 'geoms'):
                for geom in merged.geoms:
                    if geom.geom_type == 'Polygon':
                        # Erode back to get original size
                        poly = geom.buffer(-buffer_dist)
                        if poly.is_valid and not poly.is_empty and poly.geom_type == 'Polygon':
                            polygons.append(poly)
            elif merged.geom_type == 'Polygon':
                poly = merged.buffer(-buffer_dist)
                if poly.is_valid and not poly.is_empty and poly.geom_type == 'Polygon':
                    polygons.append(poly)
        
        if len(polygons) == 0:
            print("  ‚ö† No blocks found after both methods")
            return gpd.GeoDataFrame(columns=['geometry', 'area', 'perimeter', 'compactness', 'aspect_ratio', 'block_id'], crs=edges_gdf.crs)
        
        print(f"    - Found {len(polygons)} raw polygons")
        
        # Create GeoDataFrame
        blocks_gdf = gpd.GeoDataFrame(geometry=polygons, crs=edges_gdf.crs)
        
        # Compute metrics
        blocks_gdf['area'] = blocks_gdf.geometry.area
        blocks_gdf['perimeter'] = blocks_gdf.geometry.length
        blocks_gdf['compactness'] = (4 * np.pi * blocks_gdf['area']) / (blocks_gdf['perimeter'] ** 2)
        
        # Filter by size
        blocks_gdf = blocks_gdf[
            (blocks_gdf['area'] >= MIN_BLOCK_AREA) & 
            (blocks_gdf['area'] <= MAX_BLOCK_AREA)
        ].copy()
        
        if len(blocks_gdf) == 0:
            print(f"  ‚ö† No blocks within size range ({MIN_BLOCK_AREA}-{MAX_BLOCK_AREA} m¬≤)")
            return gpd.GeoDataFrame(columns=['geometry', 'area', 'perimeter', 'compactness', 'aspect_ratio', 'block_id'], crs=edges_gdf.crs)
        
        # Compute aspect ratio
        aspect_ratios = []
        for geom in blocks_gdf.geometry:
            try:
                mbr = geom.minimum_rotated_rectangle
                coords = list(mbr.exterior.coords)
                side1 = Point(coords[0]).distance(Point(coords[1]))
                side2 = Point(coords[1]).distance(Point(coords[2]))
                aspect = max(side1, side2) / min(side1, side2) if min(side1, side2) > 0 else 1.0
                aspect_ratios.append(aspect)
            except:
                aspect_ratios.append(1.0)
        
        blocks_gdf['aspect_ratio'] = aspect_ratios
        blocks_gdf['block_id'] = [f"block_{i:03d}" for i in range(len(blocks_gdf))]
        
        print(f"  ‚úì Extracted {len(blocks_gdf)} valid blocks")
        return blocks_gdf
        
    except Exception as e:
        print(f"  ‚úó Error extracting blocks: {e}")
        import traceback
        traceback.print_exc()
        return gpd.GeoDataFrame(columns=['geometry', 'area', 'perimeter', 'compactness', 'aspect_ratio', 'block_id'], crs=edges_gdf.crs)

# Extract blocks for all cities
for city_key in city_data.keys():
    print(f"\n{city_data[city_key]['name']}:")
    blocks = extract_blocks(city_data[city_key]['edges'])
    city_data[city_key]['blocks'] = blocks
    
    if len(blocks) > 0:
        output_file = GEOJSON_DIR / f"{city_key}_blocks.geojson"
        blocks.to_file(output_file, driver='GeoJSON')
        print(f"  ‚úì Saved to {output_file.name}")

## 5. District Analysis (Community Detection)

In [None]:
# Install community detection library if needed
try:
    import community.community_louvain as community_louvain
except ImportError:
    try:
        import community as community_louvain
    except ImportError:
        print("Installing python-louvain...")
        import subprocess
        import sys
        subprocess.check_call([sys.executable, "-m", "pip", "install", "python-louvain"])
        import community.community_louvain as community_louvain

In [None]:
# FIXED: District detection with proper weight handling
def detect_districts(G, method='distance'):
    """
    Detect urban districts using community detection
    FIX: Properly handle weight parameter as string attribute name
    """
    print(f"    - Detecting districts ({method})...")
    
    try:
        G_undir = G.to_undirected()
        
        # Create a simple graph (remove parallel edges)
        G_simple = nx.Graph()
        for u, v, data in G_undir.edges(data=True):
            if not G_simple.has_edge(u, v):
                G_simple.add_edge(u, v, **data)
        
        # Community detection based on method
        if method == 'distance':
            # Use 'length' attribute as weight
            partition = community_louvain.best_partition(G_simple, weight='length')
        else:
            # No weight (topological or angular)
            partition = community_louvain.best_partition(G_simple, weight=None)
        
        return partition
        
    except Exception as e:
        print(f"      ‚úó Error: {e}")
        # Return single community as fallback
        return {node: 0 for node in G.nodes()}

def partition_to_geodataframe(nodes_gdf, partition):
    """Convert node partition to GeoDataFrame"""
    nodes_copy = nodes_gdf.copy()
    nodes_copy['district'] = nodes_copy.index.map(partition)
    return nodes_copy

# Detect districts for all cities
for city_key in city_data.keys():
    print(f"\n{city_data[city_key]['name']}:")
    
    G = city_data[city_key]['graph']
    nodes = city_data[city_key]['nodes']
    
    partitions = {}
    for method in ['distance', 'angular', 'topological']:
        partition = detect_districts(G, method=method)
        partitions[method] = partition
        
        nodes_districts = partition_to_geodataframe(nodes, partition)
        
        output_file = GEOJSON_DIR / f"{city_key}_districts_{method}.geojson"
        nodes_districts.to_file(output_file, driver='GeoJSON')
        
        num_districts = len(set(partition.values()))
        print(f"      {method}: {num_districts} districts")
    
    city_data[city_key]['partitions'] = partitions
    print(f"  ‚úì District detection complete")

## 6. Landmark Analysis (Building Scores)

In [None]:
# FIXED: Landmark scoring with proper NaN handling
def safe_normalize(series, default=0.5):
    """Safely normalize a series, handling NaN and min==max cases"""
    min_val = series.min()
    max_val = series.max()
    
    if pd.isna(min_val) or pd.isna(max_val) or min_val == max_val:
        return pd.Series([default] * len(series), index=series.index)
    
    normalized = (series - min_val) / (max_val - min_val)
    return normalized.fillna(default)

def compute_building_landmark_scores(buildings_gdf, edges_gdf):
    """
    Compute landmark scores for buildings
    FIX: Added safe normalization to prevent NaN values
    """
    print("  Computing landmark scores...")
    
    buildings = buildings_gdf.copy()
    
    # 1. Structural Score (area-based)
    buildings['area'] = buildings.geometry.area
    buildings['s_area'] = safe_normalize(buildings['area'], default=0.5)
    
    # 2D visibility: Distance to nearest street
    print("    - Computing visibility...")
    street_union = unary_union(edges_gdf.geometry)
    buildings['dist_to_street'] = buildings.geometry.apply(
        lambda geom: geom.distance(street_union)
    )
    
    # Inverse distance = visibility
    max_dist = buildings['dist_to_street'].max()
    if max_dist > 0:
        buildings['s_visibility'] = 1 - (buildings['dist_to_street'] / max_dist)
    else:
        buildings['s_visibility'] = 0.5
    buildings['s_visibility'] = buildings['s_visibility'].fillna(0.5)
    
    # Structural score
    buildings['structural_score'] = 0.6 * buildings['s_area'] + 0.4 * buildings['s_visibility']
    buildings['structural_score'] = buildings['structural_score'].fillna(0.5)
    
    # 2. Visual Score (height)
    if 'height' in buildings.columns:
        buildings['height'] = pd.to_numeric(buildings['height'], errors='coerce')
        buildings['visual_score'] = safe_normalize(buildings['height'], default=0.5)
    else:
        buildings['visual_score'] = 0.5
    
    # 3. Cultural Score
    cultural_tags = ['historic', 'tourism', 'amenity', 'heritage']
    buildings['cultural_score'] = 0.0
    for tag in cultural_tags:
        if tag in buildings.columns:
            buildings.loc[buildings[tag].notna(), 'cultural_score'] += 0.25
    buildings['cultural_score'] = buildings['cultural_score'].clip(0, 1)
    
    # 4. Pragmatic Score
    important_uses = ['school', 'hospital', 'university', 'museum', 'church', 'mosque', 'temple', 'government']
    buildings['pragmatic_score'] = 0.0
    
    for col in ['building', 'amenity', 'tourism']:
        if col in buildings.columns:
            for use in important_uses:
                mask = buildings[col].astype(str).str.contains(use, case=False, na=False)
                buildings.loc[mask, 'pragmatic_score'] = 1.0
    
    # 5. Global Landmark Score
    buildings['global_score'] = (
        0.4 * buildings['structural_score'] +
        0.2 * buildings['visual_score'] +
        0.2 * buildings['cultural_score'] +
        0.2 * buildings['pragmatic_score']
    )
    buildings['global_score'] = buildings['global_score'].fillna(0.5)
    
    print("  ‚úì Landmark scores computed")
    return buildings

# Compute for all cities
for city_key in city_data.keys():
    print(f"\n{city_data[city_key]['name']}:")
    buildings_scored = compute_building_landmark_scores(
        city_data[city_key]['buildings'],
        city_data[city_key]['edges']
    )
    city_data[city_key]['buildings_scored'] = buildings_scored
    
    output_file = GEOJSON_DIR / f"{city_key}_buildings.geojson"
    cols_to_save = ['geometry', 'area', 'structural_score', 'visual_score', 'cultural_score', 'pragmatic_score', 'global_score']
    cols_to_save = [c for c in cols_to_save if c in buildings_scored.columns]
    buildings_scored[cols_to_save].to_file(output_file, driver='GeoJSON')
    print(f"  ‚úì Saved to {output_file.name}")

In [None]:
# Compute building geometry metrics
def compute_building_geometry_metrics(buildings_gdf, blocks_gdf, edges_gdf):
    """Compute additional building metrics"""
    print("  Computing geometry metrics...")
    
    buildings = buildings_gdf.copy()
    
    # Aspect ratio
    aspect_ratios = []
    for geom in buildings.geometry:
        try:
            mbr = geom.minimum_rotated_rectangle
            coords = list(mbr.exterior.coords)
            side1 = Point(coords[0]).distance(Point(coords[1]))
            side2 = Point(coords[1]).distance(Point(coords[2]))
            aspect = max(side1, side2) / min(side1, side2) if min(side1, side2) > 0 else 1.0
            aspect_ratios.append(aspect)
        except:
            aspect_ratios.append(1.0)
    buildings['aspect_ratio'] = aspect_ratios
    
    # Courtyard frequency
    buildings['has_courtyard'] = buildings.geometry.apply(
        lambda geom: len(geom.interiors) > 0 if geom.geom_type == 'Polygon' else False
    )
    courtyard_freq = buildings['has_courtyard'].sum() / len(buildings) if len(buildings) > 0 else 0
    
    # Setback distance
    if 'dist_to_street' not in buildings.columns:
        street_union = unary_union(edges_gdf.geometry)
        buildings['setback_dist'] = buildings.geometry.apply(lambda geom: geom.distance(street_union))
    else:
        buildings['setback_dist'] = buildings['dist_to_street']
    
    # Building coverage ratio
    if len(blocks_gdf) > 0:
        print("    - Computing coverage ratios...")
        blocks = blocks_gdf.copy()
        buildings_in_blocks = gpd.sjoin(buildings, blocks, how='left', predicate='within')
        block_building_area = buildings_in_blocks.groupby('index_right')['area'].sum()
        blocks['building_coverage'] = blocks.index.map(block_building_area).fillna(0) / blocks['area']
        blocks['building_count'] = buildings_in_blocks.groupby('index_right').size().reindex(blocks.index, fill_value=0)
        avg_coverage = blocks['building_coverage'].mean()
    else:
        avg_coverage = 0
        blocks = blocks_gdf
    
    print("  ‚úì Geometry metrics computed")
    
    return buildings, blocks, {
        'courtyard_frequency': courtyard_freq,
        'avg_coverage': avg_coverage
    }

# Compute for all cities
for city_key in city_data.keys():
    print(f"\n{city_data[city_key]['name']}:")
    buildings_geom, blocks_geom, metrics = compute_building_geometry_metrics(
        city_data[city_key]['buildings_scored'],
        city_data[city_key]['blocks'],
        city_data[city_key]['edges']
    )
    
    city_data[city_key]['buildings_scored'] = buildings_geom
    city_data[city_key]['blocks'] = blocks_geom
    city_data[city_key]['geometry_metrics'] = metrics

## 7. Building Block Library Extraction

In [None]:
# FIXED: Building block library with proper validation
def extract_building_block_library(blocks_gdf, buildings_gdf, city_key, target_count=35):
    """
    Extract representative building blocks
    FIX: Handle empty blocks gracefully
    """
    print(f"  Extracting blocks for library...")
    
    if len(blocks_gdf) == 0:
        print("  ‚ö† No blocks available - skipping library extraction")
        return []
    
    blocks = blocks_gdf.copy().sort_values('area')
    
    if len(blocks) <= target_count:
        selected_blocks = blocks
    else:
        indices = np.linspace(0, len(blocks)-1, target_count, dtype=int)
        selected_blocks = blocks.iloc[indices]
    
    library = []
    
    for idx, (block_idx, block_row) in enumerate(selected_blocks.iterrows()):
        block_geom = block_row.geometry
        block_centroid = block_geom.centroid
        
        buildings_in_block = buildings_gdf[buildings_gdf.geometry.within(block_geom)]
        
        buildings_relative = []
        for _, bldg in buildings_in_block.iterrows():
            translated = translate(
                bldg.geometry,
                xoff=-block_centroid.x,
                yoff=-block_centroid.y
            )
            buildings_relative.append({
                'type': 'Polygon',
                'coordinates': [list(translated.exterior.coords)]
            })
        
        block_relative = translate(block_geom, xoff=-block_centroid.x, yoff=-block_centroid.y)
        
        library_entry = {
            'block_id': f"{city_key}_block_{idx:03d}",
            'city': city_key,
            'area': float(block_row['area']),
            'perimeter': float(block_row['perimeter']),
            'compactness': float(block_row['compactness']),
            'aspect_ratio': float(block_row['aspect_ratio']),
            'building_count': len(buildings_in_block),
            'building_coverage': float(block_row.get('building_coverage', 0)),
            'block_boundary': {
                'type': 'Polygon',
                'coordinates': [list(block_relative.exterior.coords)]
            },
            'buildings': buildings_relative
        }
        
        library.append(library_entry)
    
    print(f"  ‚úì Extracted {len(library)} blocks")
    return library

# Extract for all cities
all_blocks_library = []

for city_key in city_data.keys():
    print(f"\n{city_data[city_key]['name']}:")
    library = extract_building_block_library(
        city_data[city_key]['blocks'],
        city_data[city_key]['buildings_scored'],
        city_key,
        target_count=BLOCKS_PER_CITY
    )
    all_blocks_library.extend(library)
    city_data[city_key]['library'] = library

print(f"\n{'='*60}")
print(f"‚úì Total library size: {len(all_blocks_library)} blocks")
print(f"{'='*60}")

# Save library
if len(all_blocks_library) > 0:
    library_file = METRICS_DIR / 'building_blocks_library.json'
    with open(library_file, 'w') as f:
        json.dump(all_blocks_library, f, indent=2)
    print(f"‚úì Saved library to {library_file.name}")
else:
    print("‚ö† No blocks in library - skipping JSON export")

## 8. Metrics Aggregation & JSON Export

In [None]:
def compute_distribution(values, bins=20):
    """Compute histogram distribution"""
    if len(values) == 0:
        return {'bins': [], 'counts': [], 'mean': 0, 'median': 0, 'std': 0, 'min': 0, 'max': 0}
    
    hist, bin_edges = np.histogram(values, bins=bins)
    
    return {
        'bins': bin_edges.tolist(),
        'counts': hist.tolist(),
        'mean': float(np.mean(values)),
        'median': float(np.median(values)),
        'std': float(np.std(values)),
        'min': float(np.min(values)),
        'max': float(np.max(values))
    }

# FIXED: Aggregate metrics with proper error handling
urban_metrics = {}

for city_key in city_data.keys():
    print(f"\nAggregating metrics for {city_data[city_key]['name']}...")
    
    nodes = city_data[city_key]['nodes']
    edges = city_data[city_key]['edges']
    blocks = city_data[city_key]['blocks']
    buildings = city_data[city_key]['buildings_scored']
    partitions = city_data[city_key].get('partitions', {})
    geom_metrics = city_data[city_key].get('geometry_metrics', {})
    
    degree_dist = nodes['degree'].value_counts().to_dict()
    degree_dist = {int(k): int(v) for k, v in degree_dist.items()}
    
    urban_metrics[city_key] = {
        'name': city_data[city_key]['name'],
        'nodes': {
            'total_count': len(nodes),
            'avg_degree': float(nodes['degree'].mean()),
            'degree_distribution': degree_dist,
            'bc_distance': compute_distribution(nodes['bc_distance'].values),
            'bc_information': compute_distribution(nodes['bc_information'].values),
            'reach_200m': compute_distribution(nodes['reach_200m'].values),
            'reach_300m': compute_distribution(nodes['reach_300m'].values)
        },
        'edges': {
            'total_count': len(edges),
            'total_length_km': float(edges['length'].sum() / 1000),
            'density_km_per_km2': float((edges['length'].sum() / 1000) / 0.25),
            'segment_length_distribution': compute_distribution(edges['length'].values),
            'angular_bc_distribution': compute_distribution(edges['angular_bc'].values)
        },
        'blocks': {
            'total_count': len(blocks),
            'area_distribution': compute_distribution(blocks['area'].values) if len(blocks) > 0 else {},
            'compactness_distribution': compute_distribution(blocks['compactness'].values) if len(blocks) > 0 else {},
            'aspect_ratio_distribution': compute_distribution(blocks['aspect_ratio'].values) if len(blocks) > 0 else {}
        },
        'buildings': {
            'total_count': len(buildings),
            'area_distribution': compute_distribution(buildings['area'].values),
            'aspect_ratio_distribution': compute_distribution(buildings['aspect_ratio'].values),
            'setback_distribution': compute_distribution(buildings['setback_dist'].values),
            'avg_coverage_ratio': float(geom_metrics.get('avg_coverage', 0)),
            'courtyard_frequency': float(geom_metrics.get('courtyard_frequency', 0))
        },
        'districts': {
            'count_distance': len(set(partitions.get('distance', {}).values())) if 'distance' in partitions else 0,
            'count_angular': len(set(partitions.get('angular', {}).values())) if 'angular' in partitions else 0,
            'count_topological': len(set(partitions.get('topological', {}).values())) if 'topological' in partitions else 0
        }
    }

# Save to JSON
metrics_file = METRICS_DIR / 'urban_metrics.json'
with open(metrics_file, 'w') as f:
    json.dump({'urban_metrics': urban_metrics}, f, indent=2)

print(f"\n{'='*60}")
print(f"‚úì Metrics saved to {metrics_file.name}")
print(f"{'='*60}")

## 9. NEW: Metrics Summary Table

In [None]:
# NEW: Create comprehensive metrics summary table
print("\n" + "="*100)
print("üìä COMPREHENSIVE METRICS SUMMARY TABLE")
print("="*100)

# Create DataFrame for easy comparison
metrics_data = []

metric_definitions = [
    ('Total Nodes', lambda m: m['nodes']['total_count'], ''),
    ('Avg Node Degree', lambda m: m['nodes']['avg_degree'], '.2f'),
    ('Total Edges', lambda m: m['edges']['total_count'], ''),
    ('Total Street Length', lambda m: m['edges']['total_length_km'], '.2f km'),
    ('Street Density', lambda m: m['edges']['density_km_per_km2'], '.1f km/km¬≤'),
    ('Avg Segment Length', lambda m: m['edges']['segment_length_distribution']['mean'], '.1f m'),
    ('Median Segment Length', lambda m: m['edges']['segment_length_distribution']['median'], '.1f m'),
    ('Total Blocks', lambda m: m['blocks']['total_count'], ''),
    ('Avg Block Area', lambda m: m['blocks']['area_distribution'].get('mean', 0), '.0f m¬≤'),
    ('Median Block Area', lambda m: m['blocks']['area_distribution'].get('median', 0), '.0f m¬≤'),
    ('Avg Block Compactness', lambda m: m['blocks']['compactness_distribution'].get('mean', 0), '.2f'),
    ('Total Buildings', lambda m: m['buildings']['total_count'], ''),
    ('Avg Building Area', lambda m: m['buildings']['area_distribution']['mean'], '.0f m¬≤'),
    ('Median Building Area', lambda m: m['buildings']['area_distribution']['median'], '.0f m¬≤'),
    ('Avg Building Aspect Ratio', lambda m: m['buildings']['aspect_ratio_distribution']['mean'], '.2f'),
    ('Avg Setback Distance', lambda m: m['buildings']['setback_distribution']['mean'], '.2f m'),
    ('Building Coverage Ratio', lambda m: m['buildings']['avg_coverage_ratio'] * 100, '.1f%'),
    ('Courtyard Frequency', lambda m: m['buildings']['courtyard_frequency'] * 100, '.1f%'),
    ('Districts (Distance)', lambda m: m['districts']['count_distance'], ''),
    ('Districts (Angular)', lambda m: m['districts']['count_angular'], ''),
    ('Districts (Topological)', lambda m: m['districts']['count_topological'], ''),
]

# Build table
table_data = []
for metric_name, metric_func, fmt in metric_definitions:
    row = {'Metric': metric_name}
    for city_key in city_data.keys():
        try:
            value = metric_func(urban_metrics[city_key])
            if fmt:
                if 'km¬≤' in fmt or 'km' in fmt or 'm¬≤' in fmt or 'm' in fmt or '%' in fmt:
                    # Extract format spec
                    format_spec = fmt.split()[0]
                    unit = ' '.join(fmt.split()[1:])
                    row[city_data[city_key]['name']] = f"{value:{format_spec}} {unit}".strip()
                else:
                    row[city_data[city_key]['name']] = f"{value:{fmt}}"
            else:
                row[city_data[city_key]['name']] = str(int(value)) if isinstance(value, (int, float)) else str(value)
        except:
            row[city_data[city_key]['name']] = 'N/A'
    table_data.append(row)

metrics_df = pd.DataFrame(table_data)
metrics_df = metrics_df.set_index('Metric')

# Display table
print("\n")
print(metrics_df.to_string())

# Save to CSV
csv_file = METRICS_DIR / 'metrics_summary_table.csv'
metrics_df.to_csv(csv_file)
print(f"\n‚úì Saved metrics table to {csv_file.name}")

print("\n" + "="*100)

## 10. Visualizations (IMPROVED)

### 10.1 NEW: Base Maps (Roads + Buildings)

In [None]:
# NEW: Base maps showing roads + buildings
fig, axes = plt.subplots(1, 3, figsize=(24, 8), facecolor='white')

for idx, city_key in enumerate(city_data.keys()):
    ax = axes[idx]
    
    edges = city_data[city_key]['edges']
    buildings = city_data[city_key]['buildings_scored']
    
    # Plot buildings in gray
    buildings.plot(ax=ax, color='#CCCCCC', edgecolor='#666666', linewidth=0.5, alpha=0.7)
    
    # Plot roads in black
    edges.plot(ax=ax, color='black', linewidth=1.5)
    
    ax.set_title(f"{city_data[city_key]['name']}\n{len(buildings)} buildings, {len(edges)} road segments", 
                 fontsize=14, color='black', pad=15)
    ax.set_xlabel('Easting (m)', fontsize=10)
    ax.set_ylabel('Northing (m)', fontsize=10)
    ax.tick_params(labelsize=8)
    ax.grid(True, alpha=0.3, linestyle='--')

plt.suptitle('Base Maps: Urban Form (500√ó500m)', fontsize=20, color='black', y=0.98)
plt.tight_layout()

plt.savefig(VIZ_PNG_DIR / 'base_maps.png', dpi=300, bbox_inches='tight', facecolor='white')
plt.savefig(VIZ_SVG_DIR / 'base_maps.svg', bbox_inches='tight', facecolor='white')
plt.show()

print("‚úì Saved: base_maps (PNG + SVG)")

### 10.2 TIER 1: Comparative Street Network Betweenness Maps (IMPROVED)

In [None]:
# IMPROVED: Betweenness maps with better labels and colorbar
fig, axes = plt.subplots(1, 3, figsize=(24, 8), facecolor='#1a1a1a')

for idx, city_key in enumerate(city_data.keys()):
    ax = axes[idx]
    edges = city_data[city_key]['edges']
    
    # Plot edges colored by angular betweenness
    edges.plot(
        ax=ax,
        column='angular_bc',
        cmap='YlOrRd',
        linewidth=2,
        legend=False,
        vmin=0,
        vmax=edges['angular_bc'].max()
    )
    
    ax.set_title(city_data[city_key]['name'], fontsize=20, color='white', pad=20)
    ax.set_xlabel('Easting (m)', fontsize=12, color='white')
    ax.set_ylabel('Northing (m)', fontsize=12, color='white')
    ax.tick_params(colors='white', labelsize=10)
    ax.set_facecolor('#1a1a1a')

# Add shared colorbar
cbar_ax = fig.add_axes([0.92, 0.15, 0.02, 0.7])
sm = plt.cm.ScalarMappable(cmap='YlOrRd', norm=Normalize(vmin=0, vmax=0.1))
sm.set_array([])
cbar = fig.colorbar(sm, cax=cbar_ax)
cbar.set_label('Angular Betweenness Centrality', fontsize=14, color='white')
cbar.ax.tick_params(colors='white', labelsize=10)

plt.suptitle('Angular Betweenness Centrality: Urban Movement Spines', 
             fontsize=24, color='white', y=0.98)
plt.tight_layout(rect=[0, 0, 0.9, 1])

plt.savefig(VIZ_PNG_DIR / 'tier1_betweenness_comparison.png', dpi=300, facecolor='#1a1a1a', bbox_inches='tight')
plt.savefig(VIZ_SVG_DIR / 'tier1_betweenness_comparison.svg', facecolor='#1a1a1a', bbox_inches='tight')
plt.show()

print("‚úì Saved: tier1_betweenness_comparison (PNG + SVG)")

### 10.3 TIER 1: Building Block Library (if available)

In [None]:
# Building Block Library visualization
if len(all_blocks_library) > 0:
    fig, axes = plt.subplots(3, 4, figsize=(20, 15), facecolor='white')
    axes = axes.flatten()
    
    selected_blocks = []
    for city_key in city_data.keys():
        library = city_data[city_key]['library']
        if len(library) >= 4:
            indices = [0, len(library)//3, 2*len(library)//3, -1]
            selected_blocks.extend([library[i] for i in indices])
    
    for idx, block_data in enumerate(selected_blocks[:12]):
        ax = axes[idx]
        
        block_poly = Polygon(block_data['block_boundary']['coordinates'][0])
        x, y = block_poly.exterior.xy
        ax.fill(x, y, color='#f0f0f0', edgecolor='black', linewidth=1)
        
        for bldg in block_data['buildings']:
            bldg_poly = Polygon(bldg['coordinates'][0])
            x, y = bldg_poly.exterior.xy
            ax.fill(x, y, color='black')
        
        ax.set_title(
            f"{block_data['city'].upper()}\n"
            f"{block_data['area']:.0f} m¬≤ | "
            f"Coverage: {block_data['building_coverage']*100:.0f}% | "
            f"AR: {block_data['aspect_ratio']:.1f}",
            fontsize=10
        )
        
        ax.set_aspect('equal')
        ax.axis('off')
    
    for idx in range(len(selected_blocks), 12):
        axes[idx].axis('off')
    
    plt.suptitle('Building Block Library: Urban DNA Samples', fontsize=24, y=0.98)
    plt.tight_layout()
    
    plt.savefig(VIZ_PNG_DIR / 'tier1_block_library.png', dpi=300, bbox_inches='tight')
    plt.savefig(VIZ_SVG_DIR / 'tier1_block_library.svg', bbox_inches='tight')
    plt.show()
    
    print("‚úì Saved: tier1_block_library (PNG + SVG)")
else:
    print("‚ö† Skipping block library visualization (no blocks available)")

### 10.4 IMPROVED: Comparative Histograms with Clear Labels

In [None]:
# IMPROVED: Comparative histograms with better formatting
fig, axes = plt.subplots(2, 2, figsize=(18, 14), facecolor='white')

distributions = [
    ('edges', 'segment_length_distribution', 'Street Segment Length', 'Length (meters)'),
    ('blocks', 'area_distribution', 'Urban Block Area', 'Area (m¬≤)'),
    ('buildings', 'area_distribution', 'Building Footprint Area', 'Area (m¬≤)'),
    ('buildings', 'aspect_ratio_distribution', 'Building Aspect Ratio', 'Aspect Ratio (length/width)')
]

for idx, (category, metric_key, title, xlabel) in enumerate(distributions):
    ax = axes[idx // 2, idx % 2]
    
    for city_key in city_data.keys():
        metric = urban_metrics[city_key][category].get(metric_key, {})
        if 'bins' in metric and len(metric['bins']) > 1:
            bin_centers = [(metric['bins'][i] + metric['bins'][i+1])/2 for i in range(len(metric['bins'])-1)]
            ax.plot(
                bin_centers,
                metric['counts'],
                label=f"{city_data[city_key]['name']} (Œº={metric['mean']:.1f})",
                color=city_data[city_key]['color'],
                linewidth=2.5,
                alpha=0.8
            )
    
    ax.set_xlabel(xlabel, fontsize=13, fontweight='bold')
    ax.set_ylabel('Frequency (count)', fontsize=13, fontweight='bold')
    ax.set_title(title, fontsize=15, fontweight='bold', pad=15)
    ax.legend(fontsize=11, loc='best', framealpha=0.9)
    ax.grid(True, alpha=0.3, linestyle='--')
    ax.tick_params(labelsize=10)

plt.suptitle('Comparative Distributions: Urban Pattern Analysis (500√ó500m)', 
             fontsize=20, fontweight='bold', y=0.98)
plt.tight_layout()

plt.savefig(VIZ_PNG_DIR / 'comparative_histograms.png', dpi=300, bbox_inches='tight')
plt.savefig(VIZ_SVG_DIR / 'comparative_histograms.svg', bbox_inches='tight')
plt.show()

print("‚úì Saved: comparative_histograms (PNG + SVG)")

### 10.5 IMPROVED: District Identification

In [None]:
# IMPROVED: District maps with labels
fig, axes = plt.subplots(3, 3, figsize=(22, 22), facecolor='#1a1a1a')

methods = ['distance', 'angular', 'topological']
method_titles = ['Distance-Based Partition', 'Angular-Based Partition', 'Topological Partition']

for row, city_key in enumerate(city_data.keys()):
    for col, method in enumerate(methods):
        ax = axes[row, col]
        
        partition_file = GEOJSON_DIR / f"{city_key}_districts_{method}.geojson"
        nodes_districts = gpd.read_file(partition_file)
        edges = city_data[city_key]['edges']
        
        edges.plot(ax=ax, color='#333333', linewidth=1.2, alpha=0.6)
        nodes_districts.plot(
            ax=ax,
            column='district',
            cmap='tab20',
            markersize=60,
            legend=False,
            alpha=0.9
        )
        
        if row == 0:
            ax.set_title(method_titles[col], fontsize=16, color='white', pad=12, fontweight='bold')
        if col == 0:
            ax.set_ylabel(city_data[city_key]['name'], fontsize=15, color='white', 
                         rotation=90, labelpad=20, fontweight='bold')
        
        # Add district count
        num_districts = len(set(nodes_districts['district']))
        ax.text(0.05, 0.95, f"{num_districts} districts", 
               transform=ax.transAxes, fontsize=12, color='white',
               verticalalignment='top', bbox=dict(boxstyle='round', facecolor='black', alpha=0.7))
        
        ax.axis('off')
        ax.set_facecolor('#1a1a1a')

plt.suptitle('District Identification: Community Detection Methods', 
             fontsize=24, color='white', y=0.98, fontweight='bold')
plt.tight_layout()

plt.savefig(VIZ_PNG_DIR / 'tier2_districts.png', dpi=300, facecolor='#1a1a1a', bbox_inches='tight')
plt.savefig(VIZ_SVG_DIR / 'tier2_districts.svg', facecolor='#1a1a1a', bbox_inches='tight')
plt.show()

print("‚úì Saved: tier2_districts (PNG + SVG)")

### 10.6 IMPROVED: Landmark Maps

In [None]:
# IMPROVED: Landmark maps with colorbars and labels
fig, axes = plt.subplots(2, 3, figsize=(24, 16), facecolor='#1a1a1a')

score_types = ['structural_score', 'global_score']
score_titles = ['Structural Landmark Score', 'Global Landmark Score']

for row, score_type in enumerate(score_types):
    for col, city_key in enumerate(city_data.keys()):
        ax = axes[row, col]
        
        buildings = city_data[city_key]['buildings_scored']
        edges = city_data[city_key]['edges']
        
        edges.plot(ax=ax, color='#333333', linewidth=0.8)
        buildings.plot(
            ax=ax,
            column=score_type,
            cmap='hot',
            legend=True,
            legend_kwds={
                'label': score_titles[row],
                'shrink': 0.8,
                'orientation': 'horizontal',
                'pad': 0.05
            },
            vmin=0,
            vmax=1
        )
        
        if row == 0:
            ax.set_title(city_data[city_key]['name'], fontsize=17, color='white', 
                        pad=15, fontweight='bold')
        
        ax.axis('off')
        ax.set_facecolor('#1a1a1a')

plt.suptitle('Landmark Identification: Building Importance Scores (0-1 scale)', 
             fontsize=24, color='white', y=0.98, fontweight='bold')
plt.tight_layout()

plt.savefig(VIZ_PNG_DIR / 'tier2_landmarks.png', dpi=300, facecolor='#1a1a1a', bbox_inches='tight')
plt.savefig(VIZ_SVG_DIR / 'tier2_landmarks.svg', facecolor='#1a1a1a', bbox_inches='tight')
plt.show()

print("‚úì Saved: tier2_landmarks (PNG + SVG)")

## 11. Final Summary

In [None]:
# Final summary
print("\n" + "="*100)
print("‚úì STEP 1 COMPLETE: URBAN ANALYSIS (500√ó500m)")
print("="*100)

print("\nüìÅ OUTPUTS GENERATED:")
print(f"\n  GeoJSON Files ({GEOJSON_DIR}):")
for f in sorted(GEOJSON_DIR.glob('*.geojson')):
    print(f"    - {f.name}")

print(f"\n  Metrics ({METRICS_DIR}):")
for f in sorted(METRICS_DIR.glob('*')):
    print(f"    - {f.name}")

print(f"\n  Visualizations:")
print(f"    PNG ({len(list(VIZ_PNG_DIR.glob('*.png')))} files): {VIZ_PNG_DIR}")
print(f"    SVG ({len(list(VIZ_SVG_DIR.glob('*.svg')))} files): {VIZ_SVG_DIR}")

print("\nüìä KEY RESULTS:")
for city_key in city_data.keys():
    m = urban_metrics[city_key]
    print(f"\n  {m['name'].upper()}:")
    print(f"    Nodes: {m['nodes']['total_count']} | Edges: {m['edges']['total_count']} | Blocks: {m['blocks']['total_count']} | Buildings: {m['buildings']['total_count']}")
    print(f"    Street density: {m['edges']['density_km_per_km2']:.1f} km/km¬≤")
    print(f"    Library blocks: {len(city_data[city_key]['library'])}")

print(f"\n  TOTAL BUILDING BLOCK LIBRARY: {len(all_blocks_library)} blocks")

print("\nüéØ NEXT STEPS (STEP 2):")
print("  1. Generate 500√ó500m road network using tensor field")
print("  2. Use segment length distributions from this analysis")
print("  3. Optimize for space syntax metrics (betweenness, integration)")
print("  4. Place buildings using block library from STEP 1")

print("\n" + "="*100)
print("All visualizations include proper axis labels, clear titles, and legends.")
print("Both PNG (high-res) and SVG (vector) formats exported for portfolio use.")
print("="*100)