# Step 1: Reference Data Analysis

Load and visualize reference districts from 4 cities:
- London, UK
- Berlin, Germany
- Belgrade, Serbia
- Torino, Italy

For each 500×500m window, we'll compute and visualize:
- Urban morphology metrics
- Space syntax metrics
- Distribution histograms

In [None]:
import sys
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

# Add street_network_generator to path
sys.path.insert(0, str(Path.cwd()))

from street_network_generator import ReferenceExtractor
from street_network_generator.visualization import (
    plot_network,
    plot_histogram_comparison,
    plot_degree_distribution_comparison
)

%matplotlib inline
plt.rcParams['figure.dpi'] = 100

## Load Reference Data

In [None]:
# Initialize extractor
extractor = ReferenceExtractor(data_dir="inv_city/outputs")

# Load all cities
cities = ['london', 'berlin', 'belgrade', 'torino']
references = {}

print("Loading reference data...\n")
for city in cities:
    try:
        ref = extractor.load_from_geojson(city, window_size_m=500)
        references[city] = ref
        print(f"✓ {city.capitalize():12s} - Nodes: {ref.graph.number_of_nodes():4d}  Edges: {ref.graph.number_of_edges():4d}")
    except Exception as e:
        print(f"✗ {city.capitalize():12s} - Error: {e}")

print(f"\nLoaded {len(references)} reference cities")

## Visualize All Reference Networks

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 14))
axes = axes.flatten()

colors = {
    'london': '#E74C3C',
    'berlin': '#3498DB',
    'belgrade': '#2ECC71',
    'torino': '#F39C12'
}

for idx, (city, ref) in enumerate(references.items()):
    plot_network(
        ref.graph,
        ref.pos,
        window_size_m=500,
        ax=axes[idx],
        title=f"{city.capitalize()} ({ref.graph.number_of_nodes()} nodes, {ref.graph.number_of_edges()} edges)",
        show_node_degrees=True,
        node_size=30,
        edge_width=1.5,
        edge_color=colors[city]
    )

plt.suptitle("Reference Street Networks (500m × 500m)", fontsize=16, fontweight='bold', y=0.995)
plt.tight_layout()
plt.show()

## Morphology Metrics Comparison

In [None]:
# Summary table
import pandas as pd

summary_data = []
for city, ref in references.items():
    summary_data.append({
        'City': city.capitalize(),
        'Nodes': ref.graph.number_of_nodes(),
        'Edges': ref.graph.number_of_edges(),
        'Node Density\n(nodes/km²)': f"{ref.node_density:.1f}",
        'Dead-End\nRatio': f"{ref.dead_end_ratio:.3f}",
        'Mean\nDepth': f"{ref.mean_depth:.2f}",
        'Intelligibility': f"{ref.intelligibility:.3f}"
    })

df = pd.DataFrame(summary_data)
print("\n" + "="*80)
print("REFERENCE CITIES SUMMARY")
print("="*80)
print(df.to_string(index=False))
print("="*80)

## Degree Distribution Comparison

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
axes = axes.flatten()

for idx, (city, ref) in enumerate(references.items()):
    degree_dist = ref.degree_distribution
    
    degrees = sorted(degree_dist.keys())
    counts = [degree_dist[d] for d in degrees]
    total = sum(counts)
    probs = [c / total for c in counts]
    
    axes[idx].bar(degrees, probs, color=colors[city], alpha=0.7, edgecolor='black')
    axes[idx].set_xlabel('Node Degree')
    axes[idx].set_ylabel('Probability')
    axes[idx].set_title(f"{city.capitalize()} - Degree Distribution", fontweight='bold')
    axes[idx].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

## Segment Length Distribution

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
axes = axes.flatten()

for idx, (city, ref) in enumerate(references.items()):
    bin_edges, counts = ref.segment_length_hist
    bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
    
    # Normalize
    total = np.sum(counts)
    probs = counts / total if total > 0 else counts
    
    axes[idx].bar(
        bin_centers, probs,
        width=(bin_edges[1] - bin_edges[0]) * 0.8,
        color=colors[city], alpha=0.7, edgecolor='black'
    )
    axes[idx].set_xlabel('Segment Length (m)')
    axes[idx].set_ylabel('Probability')
    axes[idx].set_title(f"{city.capitalize()} - Segment Lengths", fontweight='bold')
    axes[idx].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

## Orientation Distribution

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
axes = axes.flatten()

for idx, (city, ref) in enumerate(references.items()):
    bin_edges, counts = ref.orientation_hist
    bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
    
    # Normalize
    total = np.sum(counts)
    probs = counts / total if total > 0 else counts
    
    axes[idx].bar(
        bin_centers, probs,
        width=(bin_edges[1] - bin_edges[0]) * 0.8,
        color=colors[city], alpha=0.7, edgecolor='black'
    )
    axes[idx].set_xlabel('Bearing (degrees)')
    axes[idx].set_ylabel('Probability')
    axes[idx].set_title(f"{city.capitalize()} - Orientation", fontweight='bold')
    axes[idx].set_xlim(0, 180)
    axes[idx].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

## Space Syntax Metrics

In [None]:
# Bar chart of scalar metrics
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

city_names = [c.capitalize() for c in cities]
mean_depths = [references[c].mean_depth for c in cities]
intelligibilities = [references[c].intelligibility for c in cities]
city_colors = [colors[c] for c in cities]

axes[0].bar(city_names, mean_depths, color=city_colors, alpha=0.7, edgecolor='black')
axes[0].set_ylabel('Mean Depth')
axes[0].set_title('Mean Depth Comparison', fontweight='bold')
axes[0].grid(True, alpha=0.3, axis='y')

axes[1].bar(city_names, intelligibilities, color=city_colors, alpha=0.7, edgecolor='black')
axes[1].set_ylabel('Intelligibility (correlation)')
axes[1].set_title('Intelligibility Comparison', fontweight='bold')
axes[1].grid(True, alpha=0.3, axis='y')
axes[1].axhline(y=0, color='black', linestyle='--', linewidth=0.5)

plt.tight_layout()
plt.show()

## Key Observations

Compare the four reference cities and note:

1. **Network Topology**: Which cities have more/fewer intersections?
2. **Degree Distribution**: Grid-like (many degree-4) vs irregular (mixed degrees)?
3. **Segment Lengths**: Short blocks vs long streets?
4. **Orientation**: Grid patterns show peaks, irregular shows uniform
5. **Space Syntax**: Higher intelligibility = stronger correlation between local/global structure

These distributions will be our **targets** for generation.

In [None]:
# Save references for next notebook
import pickle

with open('reference_data.pkl', 'wb') as f:
    pickle.dump(references, f)

print("✓ Reference data saved to reference_data.pkl")