# STEP 1: Analyze Real Cities (500√ó500m)
## Extract Urban Metrics & Building Block Library

**Goal**: Analyze three 500√ó500m urban areas to extract:
- Space syntax metrics (nodes, edges, districts, landmarks, barriers)
- Building geometry distributions
- Reusable building block library

**Cities**:
1. Hanoi, Vietnam (21.0230¬∞N, 105.8560¬∞E) - Dense, organic layout
2. Brussels, Belgium (50.8477¬∞N, 4.3572¬∞E) - European historic core
3. Marrakech, Morocco (31.623811¬∞N, -7.988662¬∞W) - Compact medina

**Outputs**:
- GeoJSON files (nodes, edges, buildings, districts, blocks)
- JSON metrics file (urban_metrics.json)
- Building block library (building_blocks_library.json)
- Visualizations (PNG + SVG)

## 1. Setup & Configuration

In [None]:
# Imports
import osmnx as ox
import networkx as nx
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.colors import LinearSegmentedColormap
from matplotlib import cm
from shapely.geometry import Point, LineString, Polygon, MultiPolygon, box
from shapely.ops import polygonize, unary_union, nearest_points
from shapely.affinity import rotate, scale, translate
from shapely import buffer, centroid
import json
from pathlib import Path
import warnings
from collections import Counter
from scipy.stats import gaussian_kde

warnings.filterwarnings('ignore')

# Configure OSMnx
ox.settings.use_cache = True
ox.settings.log_console = False

# Set plot style
plt.style.use('dark_background')
%matplotlib inline

In [None]:
# Configuration
CITIES = {
    'hanoi': {
        'name': 'Hanoi, Vietnam',
        'coords': (21.0230, 105.8560),
        'color': '#FF6B6B'  # Red
    },
    'brussels': {
        'name': 'Brussels, Belgium',
        'coords': (50.8477, 4.3572),
        'color': '#4ECDC4'  # Teal
    },
    'marrakech': {
        'name': 'Marrakech, Morocco',
        'coords': (31.623811, -7.988662),
        'color': '#FFE66D'  # Yellow
    }
}

# Analysis parameters (adapted for 500√ó500m)
RADIUS = 250  # meters (to get ~500√ó500m coverage)
REACH_RADII = [200, 300]  # Reduced from 400/600 for small scale
LOCAL_LANDMARK_RADIUS = 300  # Reduced from 1500m
MIN_BLOCK_AREA = 500  # m¬≤
MAX_BLOCK_AREA = 10000  # m¬≤
BLOCKS_PER_CITY = 35  # Target library size

# Output paths
OUTPUT_DIR = Path('outputs')
GEOJSON_DIR = OUTPUT_DIR / 'geojson'
VIZ_PNG_DIR = OUTPUT_DIR / 'visualizations' / 'png'
VIZ_SVG_DIR = OUTPUT_DIR / 'visualizations' / 'svg'
METRICS_DIR = OUTPUT_DIR / 'metrics'

# Create directories
for d in [GEOJSON_DIR, VIZ_PNG_DIR, VIZ_SVG_DIR, METRICS_DIR]:
    d.mkdir(parents=True, exist_ok=True)

print("‚úì Configuration complete")
print(f"  Analyzing {len(CITIES)} cities")
print(f"  Coverage radius: {RADIUS}m (~{RADIUS*2}√ó{RADIUS*2}m area)")
print(f"  Output directory: {OUTPUT_DIR.absolute()}")

## 2. Data Acquisition

In [None]:
# Download data for all cities
city_data = {}

for city_key, city_info in CITIES.items():
    print(f"\n{'='*60}")
    print(f"Downloading: {city_info['name']}")
    print(f"{'='*60}")
    
    lat, lon = city_info['coords']
    
    try:
        # Download street network (walk network includes all accessible roads)
        print(f"  ‚Üí Street network...")
        G = ox.graph_from_point(
            (lat, lon),
            dist=RADIUS,
            network_type='walk',
            simplify=True
        )
        
        # Project to local UTM
        G_proj = ox.project_graph(G)
        
        # Download buildings
        print(f"  ‚Üí Buildings...")
        buildings = ox.features_from_point(
            (lat, lon),
            dist=RADIUS,
            tags={'building': True}
        )
        
        # Project buildings
        buildings_proj = buildings.to_crs(ox.graph_to_gdfs(G_proj, nodes=False).crs)
        
        # Clean building geometries (keep only Polygons/MultiPolygons)
        buildings_proj = buildings_proj[buildings_proj.geometry.type.isin(['Polygon', 'MultiPolygon'])]
        
        # Store data
        city_data[city_key] = {
            'name': city_info['name'],
            'color': city_info['color'],
            'coords': (lat, lon),
            'graph': G_proj,
            'buildings': buildings_proj,
            'crs': ox.graph_to_gdfs(G_proj, nodes=False).crs
        }
        
        print(f"  ‚úì Downloaded:")
        print(f"    - {G_proj.number_of_nodes()} nodes")
        print(f"    - {G_proj.number_of_edges()} edges")
        print(f"    - {len(buildings_proj)} buildings")
        
    except Exception as e:
        print(f"  ‚úó Error downloading {city_key}: {e}")
        continue

print(f"\n{'='*60}")
print(f"‚úì Data acquisition complete for {len(city_data)} cities")
print(f"{'='*60}")

## 3. Node Analysis (Centrality Metrics)
Adapted from Notebook 01 - Computing multiple centrality measures for intersections

In [None]:
def compute_node_metrics(G):
    """
    Compute centrality metrics for nodes (intersections)
    - Betweenness (distance-weighted)
    - Betweenness (information/random walk)
    - Straightness centrality
    - Reach centrality (200m, 300m)
    - Degree
    """
    print("  Computing node centrality metrics...")
    
    # Convert to undirected for centrality calculations
    G_undir = G.to_undirected()
    
    # 1. Betweenness Centrality (distance-weighted)
    print("    - Betweenness (distance)...")
    bc_dist = nx.betweenness_centrality(G_undir, weight='length', normalized=True)
    
    # 2. Betweenness Centrality (information - no weight)
    print("    - Betweenness (information)...")
    bc_info = nx.betweenness_centrality(G_undir, weight=None, normalized=True)
    
    # 3. Closeness Centrality (distance-weighted) - as proxy for straightness
    print("    - Closeness...")
    closeness = nx.closeness_centrality(G_undir, distance='length')
    
    # 4. Reach Centrality (services within radius)
    print("    - Reach centrality (200m, 300m)...")
    reach_200 = {}
    reach_300 = {}
    
    for node in G_undir.nodes():
        # Get nodes within radius using single-source Dijkstra
        lengths = nx.single_source_dijkstra_path_length(G_undir, node, cutoff=200, weight='length')
        reach_200[node] = len(lengths)
        
        lengths = nx.single_source_dijkstra_path_length(G_undir, node, cutoff=300, weight='length')
        reach_300[node] = len(lengths)
    
    # 5. Degree
    degree = dict(G_undir.degree())
    
    # Create GeoDataFrame with metrics
    nodes, edges = ox.graph_to_gdfs(G)
    nodes['bc_distance'] = nodes.index.map(bc_dist)
    nodes['bc_information'] = nodes.index.map(bc_info)
    nodes['closeness'] = nodes.index.map(closeness)
    nodes['reach_200m'] = nodes.index.map(reach_200)
    nodes['reach_300m'] = nodes.index.map(reach_300)
    nodes['degree'] = nodes.index.map(degree)
    
    print("  ‚úì Node metrics computed")
    return nodes

# Compute for all cities
for city_key in city_data.keys():
    print(f"\n{city_data[city_key]['name']}:")
    city_data[city_key]['nodes'] = compute_node_metrics(city_data[city_key]['graph'])
    
    # Save GeoJSON
    output_file = GEOJSON_DIR / f"{city_key}_nodes.geojson"
    city_data[city_key]['nodes'].to_file(output_file, driver='GeoJSON')
    print(f"  ‚úì Saved to {output_file.name}")

In [None]:
# Print key node metrics for portfolio
print("\n" + "="*80)
print("üìä KEY NODE METRICS (PORTFOLIO HIGHLIGHTS)")
print("="*80)

for city_key in city_data.keys():
    nodes = city_data[city_key]['nodes']
    print(f"\n{city_data[city_key]['name'].upper()}:")
    print(f"  Total Nodes: {len(nodes)}")
    print(f"  Avg Degree: {nodes['degree'].mean():.2f}")
    print(f"  Degree Distribution:")
    degree_counts = nodes['degree'].value_counts().sort_index()
    for deg, count in degree_counts.items():
        print(f"    {int(deg)}-way intersections: {count} ({count/len(nodes)*100:.1f}%)")
    print(f"  Avg Reach (200m): {nodes['reach_200m'].mean():.1f} nodes")
    print(f"  Avg Reach (300m): {nodes['reach_300m'].mean():.1f} nodes")
    print(f"  Max Betweenness (distance): {nodes['bc_distance'].max():.4f}")
    print(f"  Avg Betweenness (distance): {nodes['bc_distance'].mean():.4f}")

## 4. Edge Analysis (Street Networks & Blocks)

In [None]:
def compute_edge_metrics(G):
    """
    Compute edge (street segment) metrics:
    - Edge betweenness (primal graph)
    - Angular betweenness (dual graph)
    - Segment lengths
    - Network density
    """
    print("  Computing edge metrics...")
    
    G_undir = G.to_undirected()
    
    # 1. Edge betweenness (primal - distance weighted)
    print("    - Edge betweenness (primal)...")
    edge_bc = nx.edge_betweenness_centrality(G_undir, weight='length', normalized=True)
    
    # 2. Create dual graph for angular analysis
    print("    - Building dual graph...")
    dual_G = nx.Graph()
    
    # Each edge in primal becomes a node in dual
    edge_to_node = {}
    for i, (u, v, k) in enumerate(G_undir.edges(keys=True)):
        edge_to_node[(u, v, k)] = i
        dual_G.add_node(i, primal_edge=(u, v, k))
    
    # Connect dual nodes if primal edges share a vertex
    for node in G_undir.nodes():
        incident_edges = list(G_undir.edges(node, keys=True))
        for i in range(len(incident_edges)):
            for j in range(i+1, len(incident_edges)):
                e1 = incident_edges[i]
                e2 = incident_edges[j]
                # Normalize edge tuples
                e1_norm = tuple(sorted([e1[0], e1[1]])) + (e1[2],)
                e2_norm = tuple(sorted([e2[0], e2[1]])) + (e2[2],)
                
                if e1_norm in edge_to_node and e2_norm in edge_to_node:
                    dual_G.add_edge(edge_to_node[e1_norm], edge_to_node[e2_norm])
    
    # 3. Angular betweenness (dual graph)
    print("    - Angular betweenness (dual)...")
    dual_bc = nx.betweenness_centrality(dual_G, weight=None, normalized=True) if dual_G.number_of_edges() > 0 else {}
    
    # Map back to primal edges
    angular_bc = {}
    for dual_node, bc_val in dual_bc.items():
        primal_edge = dual_G.nodes[dual_node].get('primal_edge')
        if primal_edge:
            angular_bc[primal_edge] = bc_val
    
    # Create GeoDataFrame
    _, edges = ox.graph_to_gdfs(G)
    
    # Add metrics
    edges['edge_bc'] = edges.index.map(lambda x: edge_bc.get((x[0], x[1]), 0))
    edges['angular_bc'] = edges.index.map(lambda x: angular_bc.get(x, 0))
    
    print("  ‚úì Edge metrics computed")
    return edges, dual_G

# Compute for all cities
for city_key in city_data.keys():
    print(f"\n{city_data[city_key]['name']}:")
    edges, dual_graph = compute_edge_metrics(city_data[city_key]['graph'])
    city_data[city_key]['edges'] = edges
    city_data[city_key]['dual_graph'] = dual_graph
    
    # Save GeoJSON
    output_file = GEOJSON_DIR / f"{city_key}_edges.geojson"
    edges.to_file(output_file, driver='GeoJSON')
    print(f"  ‚úì Saved to {output_file.name}")

In [None]:
# Extract blocks using polygonize
def extract_blocks(edges_gdf):
    """
    Extract urban blocks by polygonizing street network
    """
    print("  Extracting blocks...")
    
    # Get all line geometries
    lines = list(edges_gdf.geometry)
    
    # Polygonize
    polygons = list(polygonize(lines))
    
    if not polygons:
        print("  ‚ö† No blocks found")
        return gpd.GeoDataFrame()
    
    # Create GeoDataFrame
    blocks_gdf = gpd.GeoDataFrame(geometry=polygons, crs=edges_gdf.crs)
    
    # Compute metrics
    blocks_gdf['area'] = blocks_gdf.geometry.area
    blocks_gdf['perimeter'] = blocks_gdf.geometry.length
    blocks_gdf['compactness'] = (4 * np.pi * blocks_gdf['area']) / (blocks_gdf['perimeter'] ** 2)
    
    # Filter by size
    blocks_gdf = blocks_gdf[
        (blocks_gdf['area'] >= MIN_BLOCK_AREA) & 
        (blocks_gdf['area'] <= MAX_BLOCK_AREA)
    ].copy()
    
    # Compute aspect ratio using minimum bounding rectangle
    aspect_ratios = []
    for geom in blocks_gdf.geometry:
        mbr = geom.minimum_rotated_rectangle
        coords = list(mbr.exterior.coords)
        # Get side lengths
        side1 = Point(coords[0]).distance(Point(coords[1]))
        side2 = Point(coords[1]).distance(Point(coords[2]))
        aspect = max(side1, side2) / min(side1, side2) if min(side1, side2) > 0 else 1
        aspect_ratios.append(aspect)
    
    blocks_gdf['aspect_ratio'] = aspect_ratios
    blocks_gdf['block_id'] = [f"block_{i:03d}" for i in range(len(blocks_gdf))]
    
    print(f"  ‚úì Extracted {len(blocks_gdf)} blocks")
    return blocks_gdf

# Extract blocks for all cities
for city_key in city_data.keys():
    print(f"\n{city_data[city_key]['name']}:")
    blocks = extract_blocks(city_data[city_key]['edges'])
    city_data[city_key]['blocks'] = blocks
    
    if len(blocks) > 0:
        output_file = GEOJSON_DIR / f"{city_key}_blocks.geojson"
        blocks.to_file(output_file, driver='GeoJSON')
        print(f"  ‚úì Saved to {output_file.name}")

In [None]:
# Print key edge/block metrics for portfolio
print("\n" + "="*80)
print("üìä KEY EDGE & BLOCK METRICS (PORTFOLIO HIGHLIGHTS)")
print("="*80)

for city_key in city_data.keys():
    edges = city_data[city_key]['edges']
    blocks = city_data[city_key]['blocks']
    
    print(f"\n{city_data[city_key]['name'].upper()}:")
    
    # Edge metrics
    print(f"\n  STREETS:")
    print(f"    Total segments: {len(edges)}")
    print(f"    Total length: {edges['length'].sum()/1000:.2f} km")
    print(f"    Avg segment length: {edges['length'].mean():.1f} m")
    print(f"    Median segment length: {edges['length'].median():.1f} m")
    print(f"    Street density: {(edges['length'].sum()/1000) / (0.25):.1f} km/km¬≤")  # 0.25 km¬≤ = 500√ó500m
    print(f"    Max angular betweenness: {edges['angular_bc'].max():.4f}")
    
    # Block metrics
    if len(blocks) > 0:
        print(f"\n  BLOCKS:")
        print(f"    Total blocks: {len(blocks)}")
        print(f"    Avg block area: {blocks['area'].mean():.0f} m¬≤")
        print(f"    Median block area: {blocks['area'].median():.0f} m¬≤")
        print(f"    Avg compactness: {blocks['compactness'].mean():.2f}")
        print(f"    Avg aspect ratio: {blocks['aspect_ratio'].mean():.2f}")

## 5. District Analysis (Community Detection)

In [None]:
# Install community detection library if needed
try:
    import community as community_louvain
except ImportError:
    print("Installing python-louvain...")
    !pip install python-louvain
    import community as community_louvain

In [None]:
def detect_districts(G, method='distance'):
    """
    Detect urban districts using community detection
    Methods:
    - 'distance': Use segment length as weight
    - 'angular': Use dual graph (no weights)
    - 'topological': Pure connectivity (no weights)
    """
    print(f"    - Detecting districts ({method})...")
    
    G_undir = G.to_undirected()
    
    # Set weights based on method
    if method == 'distance':
        # Use length as weight
        partition = community_louvain.best_partition(G_undir, weight='length')
    elif method == 'topological':
        # No weights
        partition = community_louvain.best_partition(G_undir, weight=None)
    else:  # angular - would use dual graph, but simplified here
        partition = community_louvain.best_partition(G_undir, weight=None)
    
    return partition

def partition_to_geodataframe(nodes_gdf, partition):
    """
    Convert node partition to GeoDataFrame
    """
    nodes_copy = nodes_gdf.copy()
    nodes_copy['district'] = nodes_copy.index.map(partition)
    return nodes_copy

# Detect districts for all cities
for city_key in city_data.keys():
    print(f"\n{city_data[city_key]['name']}:")
    
    G = city_data[city_key]['graph']
    nodes = city_data[city_key]['nodes']
    
    # Three methods
    partitions = {}
    for method in ['distance', 'angular', 'topological']:
        partition = detect_districts(G, method=method)
        partitions[method] = partition
        
        # Convert to GeoDataFrame
        nodes_districts = partition_to_geodataframe(nodes, partition)
        
        # Save
        output_file = GEOJSON_DIR / f"{city_key}_districts_{method}.geojson"
        nodes_districts.to_file(output_file, driver='GeoJSON')
        
        num_districts = len(set(partition.values()))
        print(f"      {method}: {num_districts} districts")
    
    city_data[city_key]['partitions'] = partitions
    print(f"  ‚úì District detection complete")

In [None]:
# Print district metrics
print("\n" + "="*80)
print("üìä DISTRICT METRICS")
print("="*80)

for city_key in city_data.keys():
    partitions = city_data[city_key]['partitions']
    print(f"\n{city_data[city_key]['name'].upper()}:")
    
    for method, partition in partitions.items():
        num_districts = len(set(partition.values()))
        print(f"  {method.capitalize()} partition: {num_districts} districts")

## 6. Landmark Analysis (Building Scores)

In [None]:
def compute_building_landmark_scores(buildings_gdf, edges_gdf):
    """
    Compute landmark scores for buildings:
    - Structural score (area, visibility, uniqueness)
    - Visual score (height if available)
    - Cultural score (OSM tags)
    - Pragmatic score (land use)
    """
    print("  Computing landmark scores...")
    
    buildings = buildings_gdf.copy()
    
    # 1. Structural Score (area-based)
    buildings['area'] = buildings.geometry.area
    buildings['s_area'] = (buildings['area'] - buildings['area'].min()) / (buildings['area'].max() - buildings['area'].min())
    
    # 2D visibility approximation: Distance to nearest street
    print("    - Computing visibility...")
    street_union = unary_union(edges_gdf.geometry)
    buildings['dist_to_street'] = buildings.geometry.apply(
        lambda geom: geom.distance(street_union)
    )
    # Inverse distance = visibility (closer = more visible)
    max_dist = buildings['dist_to_street'].max()
    buildings['s_visibility'] = 1 - (buildings['dist_to_street'] / max_dist) if max_dist > 0 else 1
    
    # Structural score (weighted combination)
    buildings['structural_score'] = 0.6 * buildings['s_area'] + 0.4 * buildings['s_visibility']
    
    # 2. Visual Score (height if available)
    if 'height' in buildings.columns:
        buildings['height'] = pd.to_numeric(buildings['height'], errors='coerce')
        buildings['visual_score'] = (buildings['height'] - buildings['height'].min()) / (buildings['height'].max() - buildings['height'].min())
    else:
        buildings['visual_score'] = 0.5  # Default
    
    # 3. Cultural Score (tags like historic, tourism, amenity)
    cultural_tags = ['historic', 'tourism', 'amenity', 'heritage']
    buildings['cultural_score'] = 0.0
    for tag in cultural_tags:
        if tag in buildings.columns:
            buildings.loc[buildings[tag].notna(), 'cultural_score'] += 0.25
    buildings['cultural_score'] = buildings['cultural_score'].clip(0, 1)
    
    # 4. Pragmatic Score (land use)
    # High score for unique/important uses
    important_uses = ['school', 'hospital', 'university', 'museum', 'church', 'mosque', 'temple', 'government']
    buildings['pragmatic_score'] = 0.0
    
    for col in ['building', 'amenity', 'tourism']:
        if col in buildings.columns:
            for use in important_uses:
                mask = buildings[col].astype(str).str.contains(use, case=False, na=False)
                buildings.loc[mask, 'pragmatic_score'] = 1.0
    
    # 5. Global Landmark Score (weighted average)
    buildings['global_score'] = (
        0.4 * buildings['structural_score'] +
        0.2 * buildings['visual_score'] +
        0.2 * buildings['cultural_score'] +
        0.2 * buildings['pragmatic_score']
    )
    
    print("  ‚úì Landmark scores computed")
    return buildings

# Compute for all cities
for city_key in city_data.keys():
    print(f"\n{city_data[city_key]['name']}:")
    buildings_scored = compute_building_landmark_scores(
        city_data[city_key]['buildings'],
        city_data[city_key]['edges']
    )
    city_data[city_key]['buildings_scored'] = buildings_scored
    
    # Save
    output_file = GEOJSON_DIR / f"{city_key}_buildings.geojson"
    # Select only important columns for file size
    cols_to_save = ['geometry', 'area', 'structural_score', 'visual_score', 'cultural_score', 'pragmatic_score', 'global_score']
    cols_to_save = [c for c in cols_to_save if c in buildings_scored.columns]
    buildings_scored[cols_to_save].to_file(output_file, driver='GeoJSON')
    print(f"  ‚úì Saved to {output_file.name}")

In [None]:
# Compute building geometry metrics for project brief
def compute_building_geometry_metrics(buildings_gdf, blocks_gdf, edges_gdf):
    """
    Compute building metrics:
    - Footprint area distribution
    - Aspect ratio
    - Building coverage ratio (per block)
    - Setback distance
    - Courtyard frequency
    - Frontage width
    """
    print("  Computing geometry metrics...")
    
    buildings = buildings_gdf.copy()
    
    # Aspect ratio (already computed length/width via MBR)
    aspect_ratios = []
    for geom in buildings.geometry:
        mbr = geom.minimum_rotated_rectangle
        coords = list(mbr.exterior.coords)
        side1 = Point(coords[0]).distance(Point(coords[1]))
        side2 = Point(coords[1]).distance(Point(coords[2]))
        aspect = max(side1, side2) / min(side1, side2) if min(side1, side2) > 0 else 1
        aspect_ratios.append(aspect)
    buildings['aspect_ratio'] = aspect_ratios
    
    # Courtyard frequency (buildings with holes)
    buildings['has_courtyard'] = buildings.geometry.apply(
        lambda geom: len(geom.interiors) > 0 if geom.geom_type == 'Polygon' else False
    )
    courtyard_freq = buildings['has_courtyard'].sum() / len(buildings) if len(buildings) > 0 else 0
    
    # Setback distance (distance to nearest road)
    # Already computed as 'dist_to_street' in landmark analysis
    if 'dist_to_street' not in buildings.columns:
        street_union = unary_union(edges_gdf.geometry)
        buildings['setback_dist'] = buildings.geometry.apply(lambda geom: geom.distance(street_union))
    else:
        buildings['setback_dist'] = buildings['dist_to_street']
    
    # Building coverage ratio (compute per block)
    if len(blocks_gdf) > 0:
        print("    - Computing coverage ratios...")
        blocks = blocks_gdf.copy()
        
        # Spatial join
        buildings_in_blocks = gpd.sjoin(buildings, blocks, how='left', predicate='within')
        
        # Aggregate building area per block
        block_building_area = buildings_in_blocks.groupby('index_right')['area'].sum()
        
        blocks['building_coverage'] = blocks.index.map(block_building_area).fillna(0) / blocks['area']
        blocks['building_count'] = buildings_in_blocks.groupby('index_right').size().reindex(blocks.index, fill_value=0)
        
        avg_coverage = blocks['building_coverage'].mean()
    else:
        avg_coverage = 0
        blocks = blocks_gdf
    
    print("  ‚úì Geometry metrics computed")
    
    return buildings, blocks, {
        'courtyard_frequency': courtyard_freq,
        'avg_coverage': avg_coverage
    }

# Compute for all cities
for city_key in city_data.keys():
    print(f"\n{city_data[city_key]['name']}:")
    buildings_geom, blocks_geom, metrics = compute_building_geometry_metrics(
        city_data[city_key]['buildings_scored'],
        city_data[city_key]['blocks'],
        city_data[city_key]['edges']
    )
    
    city_data[city_key]['buildings_scored'] = buildings_geom
    city_data[city_key]['blocks'] = blocks_geom
    city_data[city_key]['geometry_metrics'] = metrics

In [None]:
# Print building metrics for portfolio
print("\n" + "="*80)
print("üìä KEY BUILDING METRICS (PORTFOLIO HIGHLIGHTS)")
print("="*80)

for city_key in city_data.keys():
    buildings = city_data[city_key]['buildings_scored']
    geom_metrics = city_data[city_key]['geometry_metrics']
    
    print(f"\n{city_data[city_key]['name'].upper()}:")
    print(f"  Total buildings: {len(buildings)}")
    print(f"  Avg footprint area: {buildings['area'].mean():.0f} m¬≤")
    print(f"  Median footprint area: {buildings['area'].median():.0f} m¬≤")
    print(f"  Avg aspect ratio: {buildings['aspect_ratio'].mean():.2f}")
    print(f"  Avg setback distance: {buildings['setback_dist'].mean():.2f} m")
    print(f"  Courtyard frequency: {geom_metrics['courtyard_frequency']*100:.1f}%")
    print(f"  Avg building coverage ratio: {geom_metrics['avg_coverage']*100:.1f}%")
    print(f"  Top 10 landmarks (global score):")
    top_landmarks = buildings.nlargest(10, 'global_score')[['area', 'global_score']]
    for idx, (area, score) in enumerate(zip(top_landmarks['area'], top_landmarks['global_score']), 1):
        print(f"    {idx}. Area: {area:.0f} m¬≤, Score: {score:.3f}")

## 7. Building Block Library Extraction

In [None]:
def extract_building_block_library(blocks_gdf, buildings_gdf, city_key, target_count=35):
    """
    Extract representative building blocks as reusable templates
    Each block includes:
    - Geometry metadata (area, compactness, aspect ratio)
    - Building footprints (relative to centroid)
    - Building coverage statistics
    """
    print(f"  Extracting {target_count} blocks for library...")
    
    if len(blocks_gdf) == 0:
        print("  ‚ö† No blocks available")
        return []
    
    # Sort by area for diverse selection
    blocks = blocks_gdf.copy().sort_values('area')
    
    # Select evenly spaced blocks across area distribution
    if len(blocks) <= target_count:
        selected_blocks = blocks
    else:
        indices = np.linspace(0, len(blocks)-1, target_count, dtype=int)
        selected_blocks = blocks.iloc[indices]
    
    library = []
    
    for idx, (block_idx, block_row) in enumerate(selected_blocks.iterrows()):
        block_geom = block_row.geometry
        block_centroid = block_geom.centroid
        
        # Find buildings within this block
        buildings_in_block = buildings_gdf[buildings_gdf.geometry.within(block_geom)]
        
        # Translate building geometries to be relative to block centroid
        buildings_relative = []
        for _, bldg in buildings_in_block.iterrows():
            # Translate to origin
            translated = translate(
                bldg.geometry,
                xoff=-block_centroid.x,
                yoff=-block_centroid.y
            )
            buildings_relative.append({
                'type': 'Polygon',
                'coordinates': [list(translated.exterior.coords)]
            })
        
        # Also translate block boundary
        block_relative = translate(
            block_geom,
            xoff=-block_centroid.x,
            yoff=-block_centroid.y
        )
        
        library_entry = {
            'block_id': f"{city_key}_block_{idx:03d}",
            'city': city_key,
            'area': float(block_row['area']),
            'perimeter': float(block_row['perimeter']),
            'compactness': float(block_row['compactness']),
            'aspect_ratio': float(block_row['aspect_ratio']),
            'building_count': len(buildings_in_block),
            'building_coverage': float(block_row.get('building_coverage', 0)),
            'block_boundary': {
                'type': 'Polygon',
                'coordinates': [list(block_relative.exterior.coords)]
            },
            'buildings': buildings_relative
        }
        
        library.append(library_entry)
    
    print(f"  ‚úì Extracted {len(library)} blocks")
    return library

# Extract for all cities
all_blocks_library = []

for city_key in city_data.keys():
    print(f"\n{city_data[city_key]['name']}:")
    
    library = extract_building_block_library(
        city_data[city_key]['blocks'],
        city_data[city_key]['buildings_scored'],
        city_key,
        target_count=BLOCKS_PER_CITY
    )
    
    all_blocks_library.extend(library)
    city_data[city_key]['library'] = library

print(f"\n{'='*60}")
print(f"‚úì Total library size: {len(all_blocks_library)} blocks")
print(f"{'='*60}")

# Save library to JSON
library_file = METRICS_DIR / 'building_blocks_library.json'
with open(library_file, 'w') as f:
    json.dump(all_blocks_library, f, indent=2)

print(f"‚úì Saved library to {library_file.name}")

In [None]:
# Print library statistics
print("\n" + "="*80)
print("üìö BUILDING BLOCK LIBRARY STATISTICS")
print("="*80)

for city_key in city_data.keys():
    library = city_data[city_key]['library']
    
    if len(library) > 0:
        areas = [b['area'] for b in library]
        coverages = [b['building_coverage'] for b in library]
        building_counts = [b['building_count'] for b in library]
        
        print(f"\n{city_data[city_key]['name'].upper()}:")
        print(f"  Blocks in library: {len(library)}")
        print(f"  Area range: {min(areas):.0f} - {max(areas):.0f} m¬≤")
        print(f"  Avg coverage: {np.mean(coverages)*100:.1f}%")
        print(f"  Avg buildings per block: {np.mean(building_counts):.1f}")

## 8. Metrics Aggregation & JSON Export

In [None]:
def compute_distribution(values, bins=20):
    """
    Compute histogram distribution for metrics JSON
    """
    if len(values) == 0:
        return {'bins': [], 'counts': [], 'mean': 0, 'median': 0, 'std': 0}
    
    hist, bin_edges = np.histogram(values, bins=bins)
    
    return {
        'bins': bin_edges.tolist(),
        'counts': hist.tolist(),
        'mean': float(np.mean(values)),
        'median': float(np.median(values)),
        'std': float(np.std(values)),
        'min': float(np.min(values)),
        'max': float(np.max(values))
    }

# Aggregate all metrics
urban_metrics = {}

for city_key in city_data.keys():
    print(f"\nAggregating metrics for {city_data[city_key]['name']}...")
    
    nodes = city_data[city_key]['nodes']
    edges = city_data[city_key]['edges']
    blocks = city_data[city_key]['blocks']
    buildings = city_data[city_key]['buildings_scored']
    partitions = city_data[city_key]['partitions']
    geom_metrics = city_data[city_key]['geometry_metrics']
    
    # Degree distribution
    degree_dist = nodes['degree'].value_counts().to_dict()
    degree_dist = {int(k): int(v) for k, v in degree_dist.items()}
    
    urban_metrics[city_key] = {
        'name': city_data[city_key]['name'],
        'nodes': {
            'total_count': len(nodes),
            'avg_degree': float(nodes['degree'].mean()),
            'degree_distribution': degree_dist,
            'bc_distance': compute_distribution(nodes['bc_distance'].values),
            'bc_information': compute_distribution(nodes['bc_information'].values),
            'reach_200m': compute_distribution(nodes['reach_200m'].values),
            'reach_300m': compute_distribution(nodes['reach_300m'].values)
        },
        'edges': {
            'total_count': len(edges),
            'total_length_km': float(edges['length'].sum() / 1000),
            'density_km_per_km2': float((edges['length'].sum() / 1000) / 0.25),
            'segment_length_distribution': compute_distribution(edges['length'].values),
            'angular_bc_distribution': compute_distribution(edges['angular_bc'].values)
        },
        'blocks': {
            'total_count': len(blocks),
            'area_distribution': compute_distribution(blocks['area'].values) if len(blocks) > 0 else {},
            'compactness_distribution': compute_distribution(blocks['compactness'].values) if len(blocks) > 0 else {},
            'aspect_ratio_distribution': compute_distribution(blocks['aspect_ratio'].values) if len(blocks) > 0 else {}
        },
        'buildings': {
            'total_count': len(buildings),
            'area_distribution': compute_distribution(buildings['area'].values),
            'aspect_ratio_distribution': compute_distribution(buildings['aspect_ratio'].values),
            'setback_distribution': compute_distribution(buildings['setback_dist'].values),
            'avg_coverage_ratio': float(geom_metrics['avg_coverage']),
            'courtyard_frequency': float(geom_metrics['courtyard_frequency'])
        },
        'districts': {
            'count_distance': len(set(partitions['distance'].values())),
            'count_angular': len(set(partitions['angular'].values())),
            'count_topological': len(set(partitions['topological'].values()))
        }
    }

# Save to JSON
metrics_file = METRICS_DIR / 'urban_metrics.json'
with open(metrics_file, 'w') as f:
    json.dump({'urban_metrics': urban_metrics}, f, indent=2)

print(f"\n{'='*60}")
print(f"‚úì Metrics saved to {metrics_file.name}")
print(f"{'='*60}")

## 9. Visualizations
Creating portfolio-quality visualizations (PNG + SVG)

### 9.1 TIER 1: Comparative Street Network Betweenness Maps

In [None]:
# TIER 1 #1: Comparative Betweenness Maps
fig, axes = plt.subplots(1, 3, figsize=(24, 8), facecolor='#1a1a1a')

for idx, city_key in enumerate(city_data.keys()):
    ax = axes[idx]
    edges = city_data[city_key]['edges']
    
    # Plot edges colored by angular betweenness
    edges.plot(
        ax=ax,
        column='angular_bc',
        cmap='YlOrRd',
        linewidth=2,
        legend=False
    )
    
    ax.set_title(city_data[city_key]['name'], fontsize=20, color='white', pad=20)
    ax.axis('off')
    ax.set_facecolor('#1a1a1a')

plt.suptitle('Angular Betweenness Centrality: Urban Movement Spines', 
             fontsize=24, color='white', y=0.98)
plt.tight_layout()

# Save PNG and SVG
plt.savefig(VIZ_PNG_DIR / 'tier1_betweenness_comparison.png', dpi=300, facecolor='#1a1a1a', bbox_inches='tight')
plt.savefig(VIZ_SVG_DIR / 'tier1_betweenness_comparison.svg', facecolor='#1a1a1a', bbox_inches='tight')
plt.show()

print("‚úì Saved: tier1_betweenness_comparison (PNG + SVG)")

### 9.2 TIER 1: Building Block Library Catalog

In [None]:
# TIER 1 #2: Building Block Library (3√ó4 grid)
fig, axes = plt.subplots(3, 4, figsize=(20, 15), facecolor='white')
axes = axes.flatten()

# Select 12 diverse blocks (4 per city)
selected_blocks = []
for city_key in city_data.keys():
    library = city_data[city_key]['library']
    if len(library) >= 4:
        # Select small, medium, large, very large
        indices = [0, len(library)//3, 2*len(library)//3, -1]
        selected_blocks.extend([library[i] for i in indices])

for idx, block_data in enumerate(selected_blocks[:12]):
    ax = axes[idx]
    
    # Draw block boundary
    block_poly = Polygon(block_data['block_boundary']['coordinates'][0])
    x, y = block_poly.exterior.xy
    ax.fill(x, y, color='#f0f0f0', edgecolor='black', linewidth=1)
    
    # Draw buildings (figure-ground)
    for bldg in block_data['buildings']:
        bldg_poly = Polygon(bldg['coordinates'][0])
        x, y = bldg_poly.exterior.xy
        ax.fill(x, y, color='black')
    
    # Metadata
    ax.set_title(
        f"{block_data['city'].upper()}\n"
        f"{block_data['area']:.0f} m¬≤ | "
        f"Coverage: {block_data['building_coverage']*100:.0f}% | "
        f"AR: {block_data['aspect_ratio']:.1f}",
        fontsize=10
    )
    
    ax.set_aspect('equal')
    ax.axis('off')

# Hide unused subplots
for idx in range(len(selected_blocks), 12):
    axes[idx].axis('off')

plt.suptitle('Building Block Library: Urban DNA Samples', fontsize=24, y=0.98)
plt.tight_layout()

plt.savefig(VIZ_PNG_DIR / 'tier1_block_library.png', dpi=300, bbox_inches='tight')
plt.savefig(VIZ_SVG_DIR / 'tier1_block_library.svg', bbox_inches='tight')
plt.show()

print("‚úì Saved: tier1_block_library (PNG + SVG)")

### 9.3 TIER 1: Multi-Metric Comparative Dashboard

In [None]:
# TIER 1 #3: Multi-Metric Dashboard
fig, axes = plt.subplots(2, 3, figsize=(20, 12), facecolor='white')

metrics_to_plot = [
    ('edges', 'segment_length_distribution', 'Segment Length (m)', 'mean'),
    ('blocks', 'area_distribution', 'Block Area (m¬≤)', 'mean'),
    ('nodes', 'degree_distribution', 'Intersection Degree', None),
    ('buildings', 'avg_coverage_ratio', 'Building Coverage (%)', None),
    ('buildings', 'aspect_ratio_distribution', 'Building Aspect Ratio', 'mean'),
    ('edges', 'density_km_per_km2', 'Street Density (km/km¬≤)', None)
]

for idx, (category, metric_key, title, stat_key) in enumerate(metrics_to_plot):
    ax = axes[idx // 3, idx % 3]
    
    if stat_key:  # Distribution plot
        for city_key in city_data.keys():
            metric = urban_metrics[city_key][category].get(metric_key, {})
            if stat_key in metric:
                ax.axvline(
                    metric[stat_key],
                    label=city_data[city_key]['name'],
                    color=city_data[city_key]['color'],
                    linewidth=3,
                    alpha=0.7
                )
    elif metric_key == 'degree_distribution':  # Bar chart
        degrees = set()
        for city_key in city_data.keys():
            degrees.update(urban_metrics[city_key]['nodes']['degree_distribution'].keys())
        degrees = sorted(degrees)
        
        x = np.arange(len(degrees))
        width = 0.25
        
        for i, city_key in enumerate(city_data.keys()):
            counts = [urban_metrics[city_key]['nodes']['degree_distribution'].get(d, 0) for d in degrees]
            ax.bar(
                x + i * width,
                counts,
                width,
                label=city_data[city_key]['name'],
                color=city_data[city_key]['color'],
                alpha=0.7
            )
        
        ax.set_xticks(x + width)
        ax.set_xticklabels([f"{int(d)}-way" for d in degrees])
    else:  # Single value bar
        values = []
        labels = []
        colors = []
        
        for city_key in city_data.keys():
            val = urban_metrics[city_key][category][metric_key]
            if 'coverage' in metric_key:
                val *= 100
            values.append(val)
            labels.append(city_data[city_key]['name'].split(',')[0])
            colors.append(city_data[city_key]['color'])
        
        ax.bar(labels, values, color=colors, alpha=0.7)
    
    ax.set_title(title, fontsize=14, pad=10)
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)

plt.suptitle('Comparative Urban Metrics: Three City Patterns', fontsize=20, y=0.98)
plt.tight_layout()

plt.savefig(VIZ_PNG_DIR / 'tier1_metrics_dashboard.png', dpi=300, bbox_inches='tight')
plt.savefig(VIZ_SVG_DIR / 'tier1_metrics_dashboard.svg', bbox_inches='tight')
plt.show()

print("‚úì Saved: tier1_metrics_dashboard (PNG + SVG)")

### 9.4 TIER 2: District Identification

In [None]:
# TIER 2 #1: District Identification (3√ó3 grid)
fig, axes = plt.subplots(3, 3, figsize=(20, 20), facecolor='#1a1a1a')

methods = ['distance', 'angular', 'topological']
method_titles = ['Distance-Based', 'Angular-Based', 'Topological']

for row, city_key in enumerate(city_data.keys()):
    for col, method in enumerate(methods):
        ax = axes[row, col]
        
        # Load partition
        partition_file = GEOJSON_DIR / f"{city_key}_districts_{method}.geojson"
        nodes_districts = gpd.read_file(partition_file)
        
        # Plot edges in gray
        edges = city_data[city_key]['edges']
        edges.plot(ax=ax, color='#333333', linewidth=1, alpha=0.5)
        
        # Plot nodes colored by district
        nodes_districts.plot(
            ax=ax,
            column='district',
            cmap='tab20',
            markersize=50,
            legend=False
        )
        
        if row == 0:
            ax.set_title(method_titles[col], fontsize=16, color='white', pad=10)
        if col == 0:
            ax.set_ylabel(city_data[city_key]['name'], fontsize=14, color='white', rotation=90, labelpad=15)
        
        ax.axis('off')
        ax.set_facecolor('#1a1a1a')

plt.suptitle('District Identification: Community Detection Methods', fontsize=24, color='white', y=0.98)
plt.tight_layout()

plt.savefig(VIZ_PNG_DIR / 'tier2_districts.png', dpi=300, facecolor='#1a1a1a', bbox_inches='tight')
plt.savefig(VIZ_SVG_DIR / 'tier2_districts.svg', facecolor='#1a1a1a', bbox_inches='tight')
plt.show()

print("‚úì Saved: tier2_districts (PNG + SVG)")

### 9.5 TIER 2: Landmark Identification

In [None]:
# TIER 2 #2: Landmark Maps
fig, axes = plt.subplots(2, 3, figsize=(24, 16), facecolor='#1a1a1a')

score_types = ['structural_score', 'global_score']
score_titles = ['Structural Score', 'Global Landmark Score']

for row, score_type in enumerate(score_types):
    for col, city_key in enumerate(city_data.keys()):
        ax = axes[row, col]
        
        buildings = city_data[city_key]['buildings_scored']
        edges = city_data[city_key]['edges']
        
        # Plot streets in dark gray
        edges.plot(ax=ax, color='#333333', linewidth=0.5)
        
        # Plot buildings colored by score
        buildings.plot(
            ax=ax,
            column=score_type,
            cmap='hot',
            legend=True,
            legend_kwds={'label': score_titles[row], 'shrink': 0.8}
        )
        
        if row == 0:
            ax.set_title(city_data[city_key]['name'], fontsize=16, color='white', pad=10)
        
        ax.axis('off')
        ax.set_facecolor('#1a1a1a')

plt.suptitle('Landmark Identification: Building Importance Scores', fontsize=24, color='white', y=0.98)
plt.tight_layout()

plt.savefig(VIZ_PNG_DIR / 'tier2_landmarks.png', dpi=300, facecolor='#1a1a1a', bbox_inches='tight')
plt.savefig(VIZ_SVG_DIR / 'tier2_landmarks.svg', facecolor='#1a1a1a', bbox_inches='tight')
plt.show()

print("‚úì Saved: tier2_landmarks (PNG + SVG)")

### 9.6 TIER 2: Node Centrality Networks

In [None]:
# TIER 2 #3: Reach Centrality (300m)
fig, axes = plt.subplots(1, 3, figsize=(24, 8), facecolor='#1a1a1a')

for idx, city_key in enumerate(city_data.keys()):
    ax = axes[idx]
    
    edges = city_data[city_key]['edges']
    nodes = city_data[city_key]['nodes']
    
    # Plot edges
    edges.plot(ax=ax, color='#333333', linewidth=1, alpha=0.5)
    
    # Plot nodes sized by reach centrality
    nodes.plot(
        ax=ax,
        column='reach_300m',
        cmap='viridis',
        markersize=nodes['reach_300m'] * 2,
        legend=True,
        legend_kwds={'label': 'Reachable Nodes (300m)', 'shrink': 0.8}
    )
    
    ax.set_title(city_data[city_key]['name'], fontsize=20, color='white', pad=20)
    ax.axis('off')
    ax.set_facecolor('#1a1a1a')

plt.suptitle('Reach Centrality: Walkable Access Within 300m', fontsize=24, color='white', y=0.98)
plt.tight_layout()

plt.savefig(VIZ_PNG_DIR / 'tier2_reach_centrality.png', dpi=300, facecolor='#1a1a1a', bbox_inches='tight')
plt.savefig(VIZ_SVG_DIR / 'tier2_reach_centrality.svg', facecolor='#1a1a1a', bbox_inches='tight')
plt.show()

print("‚úì Saved: tier2_reach_centrality (PNG + SVG)")

### 9.7 Additional: Comparative Histograms

In [None]:
# Comparative Histograms
fig, axes = plt.subplots(2, 2, figsize=(16, 12), facecolor='white')

distributions = [
    ('edges', 'segment_length_distribution', 'Segment Length (m)'),
    ('blocks', 'area_distribution', 'Block Area (m¬≤)'),
    ('buildings', 'area_distribution', 'Building Area (m¬≤)'),
    ('buildings', 'aspect_ratio_distribution', 'Building Aspect Ratio')
]

for idx, (category, metric_key, title) in enumerate(distributions):
    ax = axes[idx // 2, idx % 2]
    
    for city_key in city_data.keys():
        metric = urban_metrics[city_key][category].get(metric_key, {})
        if 'bins' in metric and len(metric['bins']) > 1:
            bin_centers = [(metric['bins'][i] + metric['bins'][i+1])/2 for i in range(len(metric['bins'])-1)]
            ax.plot(
                bin_centers,
                metric['counts'],
                label=city_data[city_key]['name'],
                color=city_data[city_key]['color'],
                linewidth=2,
                alpha=0.7
            )
    
    ax.set_xlabel(title, fontsize=12)
    ax.set_ylabel('Frequency', fontsize=12)
    ax.set_title(title, fontsize=14, pad=10)
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)

plt.suptitle('Comparative Distributions: Urban Pattern Analysis', fontsize=18, y=0.98)
plt.tight_layout()

plt.savefig(VIZ_PNG_DIR / 'comparative_histograms.png', dpi=300, bbox_inches='tight')
plt.savefig(VIZ_SVG_DIR / 'comparative_histograms.svg', bbox_inches='tight')
plt.show()

print("‚úì Saved: comparative_histograms (PNG + SVG)")

## 10. Summary & Next Steps

In [None]:
# Final summary
print("\n" + "="*80)
print("‚úì STEP 1 COMPLETE: URBAN ANALYSIS")
print("="*80)

print("\nüìÅ OUTPUTS GENERATED:")
print(f"\n  GeoJSON Files ({GEOJSON_DIR}):")
geojson_files = list(GEOJSON_DIR.glob('*.geojson'))
for f in sorted(geojson_files):
    print(f"    - {f.name}")

print(f"\n  Metrics ({METRICS_DIR}):")
metrics_files = list(METRICS_DIR.glob('*.json'))
for f in sorted(metrics_files):
    print(f"    - {f.name}")

print(f"\n  Visualizations:")
print(f"    PNG ({len(list(VIZ_PNG_DIR.glob('*.png')))} files): {VIZ_PNG_DIR}")
print(f"    SVG ({len(list(VIZ_SVG_DIR.glob('*.svg')))} files): {VIZ_SVG_DIR}")

print("\nüìä KEY PORTFOLIO METRICS SUMMARY:")
for city_key in city_data.keys():
    m = urban_metrics[city_key]
    print(f"\n  {m['name'].upper()}:")
    print(f"    Street density: {m['edges']['density_km_per_km2']:.1f} km/km¬≤")
    print(f"    Avg segment length: {m['edges']['segment_length_distribution']['mean']:.1f} m")
    print(f"    Avg block area: {m['blocks']['area_distribution'].get('mean', 0):.0f} m¬≤")
    print(f"    Building coverage: {m['buildings']['avg_coverage_ratio']*100:.1f}%")
    print(f"    Library blocks: {len(city_data[city_key]['library'])}")

print(f"\n  TOTAL LIBRARY: {len(all_blocks_library)} reusable building blocks")

print("\nüéØ NEXT STEPS (STEP 2):")
print("  1. Generate 500√ó500m road network using tensor field")
print("  2. Use segment length distributions from Step 1")
print("  3. Optimize network for space syntax metrics (integration/choice)")
print("  4. Target metrics from analyzed cities")

print("\n" + "="*80)