# Step 4: Space Syntax Analysis

**Compute space syntax metrics for all 20 generated networks**

This notebook:
1. Loads 20 networks with buildings from Step 3
2. Computes space syntax metrics for each network
3. Metrics: integration (local/global), choice, mean depth, intelligibility
4. Visualizes distributions and compares to reference cities
5. Saves enriched networks for Step 5 (ranking and selection)

In [None]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from pathlib import Path
import pickle
from collections import defaultdict
import math

%matplotlib inline
plt.rcParams['figure.dpi'] = 100
plt.rcParams['font.size'] = 10

print("✓ Libraries loaded")

## Configuration

In [None]:
WINDOW_SIZE_M = 500  # 500×500m window

# Create output directories
Path("outputs/generated/visualizations").mkdir(parents=True, exist_ok=True)
Path("outputs/generated/syntax").mkdir(parents=True, exist_ok=True)

print(f"Window size: {WINDOW_SIZE_M}m × {WINDOW_SIZE_M}m")
print("✓ Output directories created")

## Load Reference Data and Generated Networks

In [None]:
# Load reference city data
with open('outputs/data/reference_cities_data.pkl', 'rb') as f:
    reference_data = pickle.load(f)

# Load generated networks with buildings from Step 3
with open('outputs/generated/buildings/networks_with_buildings_20.pkl', 'rb') as f:
    generated_networks = pickle.load(f)

print("✓ Loaded reference data from Step 1")
print(f"✓ Loaded {len(generated_networks)} networks with buildings from Step 3")

# Reference cities
reference_cities = ['london', 'berlin', 'belgrade', 'torino']
print(f"\nReference cities: {', '.join([c.upper() for c in reference_cities])}")

## Space Syntax Functions

In [None]:
def compute_integration(G, node, radius=None):
    """
    Compute integration for a node.
    
    Integration measures how accessible a location is from all other locations.
    Higher integration = more central/accessible.
    
    Args:
        G: NetworkX graph
        node: Node to compute integration for
        radius: Maximum distance (None = global, number = local radius)
    
    Returns:
        Integration value (higher = more integrated)
    """
    if radius is None:
        # Global integration: use all reachable nodes
        lengths = nx.single_source_shortest_path_length(G, node)
    else:
        # Local integration: only nodes within radius
        lengths = nx.single_source_dijkstra_path_length(G, node, cutoff=radius, weight='length')
    
    if len(lengths) <= 1:
        return 0.0
    
    # Mean depth (average distance to all reachable nodes)
    total_depth = sum(lengths.values())
    n = len(lengths) - 1  # Exclude the node itself
    
    if n == 0:
        return 0.0
    
    mean_depth = total_depth / n
    
    # Integration is inverse of mean depth (normalized)
    # Higher integration = lower mean depth = more accessible
    if mean_depth > 0:
        integration = 1.0 / mean_depth
    else:
        integration = 0.0
    
    return integration


def compute_choice(G, node, radius=None):
    """
    Compute choice (betweenness) for a node.
    
    Choice measures how often a location lies on shortest paths between other locations.
    Higher choice = more through-movement potential.
    
    Args:
        G: NetworkX graph
        node: Node to compute choice for
        radius: Maximum distance (None = global, number = local radius)
    
    Returns:
        Choice value (normalized betweenness centrality)
    """
    # This is computationally expensive, so we'll use NetworkX's betweenness
    # For local choice, we'd need to implement radius-constrained betweenness
    # For now, compute global choice
    
    if G.number_of_nodes() <= 2:
        return 0.0
    
    # Use betweenness centrality as choice metric
    betweenness = nx.betweenness_centrality(G, weight='length', normalized=True)
    
    return betweenness.get(node, 0.0)


def compute_mean_depth(G, node):
    """
    Compute mean depth for a node.
    
    Mean depth is the average topological distance to all other nodes.
    Lower mean depth = more central.
    
    Args:
        G: NetworkX graph
        node: Node to compute mean depth for
    
    Returns:
        Mean depth value
    """
    lengths = nx.single_source_shortest_path_length(G, node)
    
    if len(lengths) <= 1:
        return 0.0
    
    total_depth = sum(lengths.values())
    n = len(lengths) - 1  # Exclude the node itself
    
    if n == 0:
        return 0.0
    
    return total_depth / n


def compute_space_syntax_metrics(G, local_radius=200):
    """
    Compute space syntax metrics for all nodes in a graph.
    
    Args:
        G: NetworkX graph
        local_radius: Radius for local integration (meters)
    
    Returns:
        Dictionary with syntax metrics
    """
    metrics = {
        'global_integration': {},
        'local_integration': {},
        'choice': {},
        'mean_depth': {}
    }
    
    # Convert to undirected for syntax analysis
    if G.is_directed():
        G_undirected = G.to_undirected()
    else:
        G_undirected = G
    
    # Get largest connected component
    if not nx.is_connected(G_undirected):
        largest_cc = max(nx.connected_components(G_undirected), key=len)
        G_undirected = G_undirected.subgraph(largest_cc).copy()
    
    nodes = list(G_undirected.nodes())
    
    # Compute choice once (betweenness) for all nodes
    print("  Computing choice (betweenness)...")
    betweenness = nx.betweenness_centrality(G_undirected, weight='length', normalized=True)
    
    # Compute other metrics for each node
    print("  Computing integration and mean depth...")
    for i, node in enumerate(nodes):
        # Global integration
        metrics['global_integration'][node] = compute_integration(G_undirected, node, radius=None)
        
        # Local integration
        metrics['local_integration'][node] = compute_integration(G_undirected, node, radius=local_radius)
        
        # Choice
        metrics['choice'][node] = betweenness.get(node, 0.0)
        
        # Mean depth
        metrics['mean_depth'][node] = compute_mean_depth(G_undirected, node)
        
        if (i + 1) % 20 == 0:
            print(f"    Processed {i+1}/{len(nodes)} nodes")
    
    return metrics


def compute_intelligibility(integration_values, choice_values):
    """
    Compute intelligibility (correlation between integration and choice).
    
    Intelligibility measures how well local properties predict global properties.
    Higher correlation = more intelligible network.
    
    Args:
        integration_values: List of integration values
        choice_values: List of choice values
    
    Returns:
        Pearson correlation coefficient (R²)
    """
    if len(integration_values) < 2 or len(choice_values) < 2:
        return 0.0
    
    # Compute Pearson correlation
    correlation = np.corrcoef(integration_values, choice_values)[0, 1]
    
    # Return R² (squared correlation)
    if np.isnan(correlation):
        return 0.0
    
    return correlation ** 2


print("✓ Space syntax functions defined")

## Compute Space Syntax for All 20 Networks

In [None]:
print("Computing space syntax metrics for 20 networks...")
print("="*70)

for network_data in generated_networks:
    G = network_data['graph']
    net_id = network_data['id']
    
    print(f"\nNetwork {net_id+1}:")
    
    # Compute syntax metrics
    syntax_metrics = compute_space_syntax_metrics(G, local_radius=200)
    
    # Extract values
    global_int_values = list(syntax_metrics['global_integration'].values())
    local_int_values = list(syntax_metrics['local_integration'].values())
    choice_values = list(syntax_metrics['choice'].values())
    mean_depth_values = list(syntax_metrics['mean_depth'].values())
    
    # Compute intelligibility
    intelligibility = compute_intelligibility(global_int_values, choice_values)
    
    # Store in network data
    network_data['syntax_metrics'] = {
        'node_metrics': syntax_metrics,
        'avg_global_integration': np.mean(global_int_values) if global_int_values else 0,
        'avg_local_integration': np.mean(local_int_values) if local_int_values else 0,
        'avg_choice': np.mean(choice_values) if choice_values else 0,
        'avg_mean_depth': np.mean(mean_depth_values) if mean_depth_values else 0,
        'intelligibility': intelligibility
    }
    
    print(f"  Global integration: {network_data['syntax_metrics']['avg_global_integration']:.4f}")
    print(f"  Local integration:  {network_data['syntax_metrics']['avg_local_integration']:.4f}")
    print(f"  Choice:             {network_data['syntax_metrics']['avg_choice']:.4f}")
    print(f"  Mean depth:         {network_data['syntax_metrics']['avg_mean_depth']:.2f}")
    print(f"  Intelligibility:    {intelligibility:.4f}")

print("\n" + "="*70)
print("✓ Space syntax computed for all 20 networks")

## Summary Statistics

In [None]:
# Collect summary statistics
all_global_int = [net['syntax_metrics']['avg_global_integration'] for net in generated_networks]
all_local_int = [net['syntax_metrics']['avg_local_integration'] for net in generated_networks]
all_choice = [net['syntax_metrics']['avg_choice'] for net in generated_networks]
all_mean_depth = [net['syntax_metrics']['avg_mean_depth'] for net in generated_networks]
all_intelligibility = [net['syntax_metrics']['intelligibility'] for net in generated_networks]

print("\n" + "="*70)
print("GENERATED NETWORKS - SPACE SYNTAX METRICS")
print("="*70)
print(f"\nGlobal Integration: {np.mean(all_global_int):.4f} ± {np.std(all_global_int):.4f}")
print(f"Local Integration:  {np.mean(all_local_int):.4f} ± {np.std(all_local_int):.4f}")
print(f"Choice:             {np.mean(all_choice):.4f} ± {np.std(all_choice):.4f}")
print(f"Mean Depth:         {np.mean(all_mean_depth):.2f} ± {np.std(all_mean_depth):.2f}")
print(f"Intelligibility:    {np.mean(all_intelligibility):.4f} ± {np.std(all_intelligibility):.4f}")

# Compare to reference cities
print("\n" + "="*70)
print("REFERENCE CITIES - SPACE SYNTAX METRICS")
print("="*70)

for city in reference_cities:
    ref_syntax = reference_data[city]['syntax']
    print(f"\n{city.upper()}:")
    print(f"  Intelligibility: {ref_syntax['intelligibility']:.4f}")
    print(f"  Mean Depth:      {ref_syntax['mean_depth']:.2f}")

print("="*70)

## Visualize Space Syntax Distributions

In [None]:
# Create visualization of syntax metrics
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# Global Integration
ax = axes[0, 0]
ax.hist(all_global_int, bins=15, color='steelblue', alpha=0.7, edgecolor='black')
ax.set_xlabel('Global Integration')
ax.set_ylabel('Frequency')
ax.set_title('Global Integration Distribution\n(20 networks)', fontweight='bold')
ax.axvline(np.mean(all_global_int), color='red', linestyle='--', linewidth=2, label=f'Mean: {np.mean(all_global_int):.4f}')
ax.legend()
ax.grid(axis='y', alpha=0.3)

# Local Integration
ax = axes[0, 1]
ax.hist(all_local_int, bins=15, color='steelblue', alpha=0.7, edgecolor='black')
ax.set_xlabel('Local Integration')
ax.set_ylabel('Frequency')
ax.set_title('Local Integration Distribution\n(20 networks)', fontweight='bold')
ax.axvline(np.mean(all_local_int), color='red', linestyle='--', linewidth=2, label=f'Mean: {np.mean(all_local_int):.4f}')
ax.legend()
ax.grid(axis='y', alpha=0.3)

# Choice
ax = axes[0, 2]
ax.hist(all_choice, bins=15, color='steelblue', alpha=0.7, edgecolor='black')
ax.set_xlabel('Choice (Betweenness)')
ax.set_ylabel('Frequency')
ax.set_title('Choice Distribution\n(20 networks)', fontweight='bold')
ax.axvline(np.mean(all_choice), color='red', linestyle='--', linewidth=2, label=f'Mean: {np.mean(all_choice):.4f}')
ax.legend()
ax.grid(axis='y', alpha=0.3)

# Mean Depth
ax = axes[1, 0]
ax.hist(all_mean_depth, bins=15, color='steelblue', alpha=0.7, edgecolor='black')
ax.set_xlabel('Mean Depth')
ax.set_ylabel('Frequency')
ax.set_title('Mean Depth Distribution\n(20 networks)', fontweight='bold')
ax.axvline(np.mean(all_mean_depth), color='red', linestyle='--', linewidth=2, label=f'Mean: {np.mean(all_mean_depth):.2f}')
ax.legend()
ax.grid(axis='y', alpha=0.3)

# Intelligibility
ax = axes[1, 1]
ax.hist(all_intelligibility, bins=15, color='steelblue', alpha=0.7, edgecolor='black')
ax.set_xlabel('Intelligibility (R²)')
ax.set_ylabel('Frequency')
ax.set_title('Intelligibility Distribution\n(20 networks)', fontweight='bold')
ax.axvline(np.mean(all_intelligibility), color='red', linestyle='--', linewidth=2, label=f'Mean: {np.mean(all_intelligibility):.4f}')
ax.legend()
ax.grid(axis='y', alpha=0.3)

# Comparison to reference cities
ax = axes[1, 2]
ref_intelligibility = [reference_data[city]['syntax']['intelligibility'] for city in reference_cities]
ref_cities_upper = [c.upper() for c in reference_cities]

x_pos = np.arange(len(reference_cities))
ax.bar(x_pos, ref_intelligibility, color='coral', alpha=0.7, edgecolor='black', label='Reference')
ax.axhline(np.mean(all_intelligibility), color='steelblue', linestyle='--', linewidth=2, label='Generated (mean)')
ax.set_xticks(x_pos)
ax.set_xticklabels(ref_cities_upper)
ax.set_ylabel('Intelligibility (R²)')
ax.set_title('Intelligibility Comparison\nReference vs Generated', fontweight='bold')
ax.legend()
ax.grid(axis='y', alpha=0.3)

plt.suptitle('Space Syntax Analysis - 20 Generated Networks', fontsize=16, fontweight='bold', y=0.995)
plt.tight_layout()

# Save
plt.savefig('outputs/generated/visualizations/D1_space_syntax_distributions.svg',
           format='svg', bbox_inches='tight', dpi=300)
print("Saved: outputs/generated/visualizations/D1_space_syntax_distributions.svg")

plt.show()

## Save Enriched Networks

In [None]:
# Save all networks with space syntax metrics
with open('outputs/generated/syntax/networks_with_syntax_20.pkl', 'wb') as f:
    pickle.dump(generated_networks, f)

print("✓ Saved 20 networks with space syntax to: outputs/generated/syntax/networks_with_syntax_20.pkl")
print(f"\nEach network now includes:")
print(f"  - NetworkX graph")
print(f"  - Node positions")
print(f"  - Network metrics (morphology)")
print(f"  - Building polygons")
print(f"  - Building metrics")
print(f"  - Space syntax metrics (NEW)")
print(f"  - Generation parameters")

## Next Steps

These 20 networks with space syntax analysis will be used for:

1. **Step 5**: Multi-objective ranking (combine all metrics to score networks)
2. **Step 6**: Final selection and validation (pick best network)
3. **Step 7**: Optimize building generation for final selected network
4. **Step 8**: Export to GeoJSON/Shapefile for use in urban planning tools