# Step 3: Batch Generation - 10 Networks

Generate **10 networks** with different seeds for one reference city.

**Goals:**
1. Test generator robustness across different random seeds
2. Visualize all 10 networks in a grid
3. Compare distribution statistics across all runs
4. Identify best/worst matches
5. Compute average divergence metrics

In [None]:
import sys
import pickle
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm.notebook import tqdm

sys.path.insert(0, str(Path.cwd()))

from street_network_generator import (
    GeneratorConfig,
    StreetNetworkGenerator,
)
from street_network_generator.metrics import MorphologyMetrics, SpaceSyntaxMetrics
from street_network_generator.visualization import (
    plot_network_grid,
    plot_histogram_comparison,
)

%matplotlib inline
plt.rcParams['figure.dpi'] = 100

print("✓ Modules loaded")

## Configuration

In [None]:
# Load references
with open('reference_data.pkl', 'rb') as f:
    references = pickle.load(f)

# CONFIGURATION
TARGET_CITY = 'london'  # Change to: 'london', 'berlin', 'belgrade', 'torino'
NUM_NETWORKS = 10
SEEDS = range(100, 100 + NUM_NETWORKS)  # Seeds: 100, 101, ..., 109
MAX_ITERATIONS = 1000  # Reduce for faster batch processing

reference = references[TARGET_CITY]

print(f"Target: {TARGET_CITY.capitalize()}")
print(f"Number of networks: {NUM_NETWORKS}")
print(f"Seeds: {list(SEEDS)}")
print(f"Max iterations per network: {MAX_ITERATIONS}")

## Generate 10 Networks

In [None]:
results = []

print("="*70)
print(f" GENERATING {NUM_NETWORKS} NETWORKS FOR {TARGET_CITY.upper()}")
print("="*70)

for seed in tqdm(SEEDS, desc="Generating networks"):
    # Configure
    config = GeneratorConfig(
        seed=seed,
        window_size_m=500,
        max_iterations=MAX_ITERATIONS,
        min_iterations=150,
        syntax_recompute_interval=60,
        candidate_per_step=12,
    )
    
    # Generate
    generator = StreetNetworkGenerator(reference, config)
    graph, pos, metadata = generator.generate()
    
    # Compute metrics
    morph = MorphologyMetrics.compute_all_morphology(graph, pos, 500)
    syntax = SpaceSyntaxMetrics.compute_all_syntax(graph, radius=3)
    
    # Store result
    results.append({
        'seed': seed,
        'graph': graph,
        'pos': pos,
        'metadata': metadata,
        'morph': morph,
        'syntax': syntax,
    })
    
    print(f"  Seed {seed}: {graph.number_of_nodes()} nodes, "
          f"{graph.number_of_edges()} edges, "
          f"score={metadata['final_score']:.4f}")

print("\n✓ All networks generated!")

## Visualize All 10 Networks

In [None]:
# Prepare network list for grid plot
networks = [
    (
        r['graph'],
        r['pos'],
        f"Seed {r['seed']}\n{r['graph'].number_of_nodes()}N/{r['graph'].number_of_edges()}E"
    )
    for r in results
]

fig = plot_network_grid(
    networks,
    window_size_m=500,
    figsize=(20, 8),
    cols=5
)
plt.suptitle(
    f"10 Generated Networks - {TARGET_CITY.capitalize()}",
    fontsize=16, fontweight='bold', y=0.98
)
plt.show()

## Statistics Across All Runs

In [None]:
import pandas as pd

# Collect metrics
stats_data = []
for r in results:
    stats_data.append({
        'Seed': r['seed'],
        'Nodes': r['graph'].number_of_nodes(),
        'Edges': r['graph'].number_of_edges(),
        'Density': r['morph']['node_density'],
        'Dead-End': r['morph']['dead_end_ratio'],
        'Mean Depth': r['syntax']['mean_depth'],
        'Intelligibility': r['syntax']['intelligibility'],
        'Final Score': r['metadata']['final_score'],
    })

df = pd.DataFrame(stats_data)

print("\n" + "="*90)
print(f" STATISTICS FOR 10 NETWORKS - {TARGET_CITY.upper()}")
print("="*90)
print(df.to_string(index=False))
print("="*90)

# Summary statistics
print("\nSUMMARY (Mean ± Std):")
print(f"  Nodes:          {df['Nodes'].mean():.1f} ± {df['Nodes'].std():.1f}  (Ref: {reference.graph.number_of_nodes()})")
print(f"  Edges:          {df['Edges'].mean():.1f} ± {df['Edges'].std():.1f}  (Ref: {reference.graph.number_of_edges()})")
print(f"  Density:        {df['Density'].mean():.1f} ± {df['Density'].std():.1f}  (Ref: {reference.node_density:.1f})")
print(f"  Dead-End:       {df['Dead-End'].mean():.3f} ± {df['Dead-End'].std():.3f}  (Ref: {reference.dead_end_ratio:.3f})")
print(f"  Mean Depth:     {df['Mean Depth'].mean():.3f} ± {df['Mean Depth'].std():.3f}  (Ref: {reference.mean_depth:.3f})")
print(f"  Intelligibility: {df['Intelligibility'].mean():.3f} ± {df['Intelligibility'].std():.3f}  (Ref: {reference.intelligibility:.3f})")
print(f"  Final Score:    {df['Final Score'].mean():.4f} ± {df['Final Score'].std():.4f}")

## Best and Worst Networks

In [None]:
# Find best and worst by final score
best_idx = df['Final Score'].idxmin()
worst_idx = df['Final Score'].idxmax()

best = results[best_idx]
worst = results[worst_idx]

print(f"BEST Network:  Seed {best['seed']}, Score = {best['metadata']['final_score']:.4f}")
print(f"WORST Network: Seed {worst['seed']}, Score = {worst['metadata']['final_score']:.4f}")

# Plot side by side
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

plot_network(
    best['graph'], best['pos'], 500,
    ax=axes[0],
    title=f"BEST (Seed {best['seed']}, Score {best['metadata']['final_score']:.4f})",
    show_node_degrees=True
)

plot_network(
    worst['graph'], worst['pos'], 500,
    ax=axes[1],
    title=f"WORST (Seed {worst['seed']}, Score {worst['metadata']['final_score']:.4f})",
    show_node_degrees=True
)

plt.tight_layout()
plt.show()

## Distribution Boxplots: Generated vs Reference

In [None]:
fig, axes = plt.subplots(2, 3, figsize=(15, 8))
axes = axes.flatten()

# Nodes
axes[0].boxplot([df['Nodes']], labels=['Generated'])
axes[0].axhline(y=reference.graph.number_of_nodes(), color='red', linestyle='--', label='Reference')
axes[0].set_ylabel('Node Count')
axes[0].set_title('Nodes')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Edges
axes[1].boxplot([df['Edges']], labels=['Generated'])
axes[1].axhline(y=reference.graph.number_of_edges(), color='red', linestyle='--', label='Reference')
axes[1].set_ylabel('Edge Count')
axes[1].set_title('Edges')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

# Density
axes[2].boxplot([df['Density']], labels=['Generated'])
axes[2].axhline(y=reference.node_density, color='red', linestyle='--', label='Reference')
axes[2].set_ylabel('Nodes/km²')
axes[2].set_title('Node Density')
axes[2].legend()
axes[2].grid(True, alpha=0.3)

# Dead-end ratio
axes[3].boxplot([df['Dead-End']], labels=['Generated'])
axes[3].axhline(y=reference.dead_end_ratio, color='red', linestyle='--', label='Reference')
axes[3].set_ylabel('Ratio')
axes[3].set_title('Dead-End Ratio')
axes[3].legend()
axes[3].grid(True, alpha=0.3)

# Mean depth
axes[4].boxplot([df['Mean Depth']], labels=['Generated'])
axes[4].axhline(y=reference.mean_depth, color='red', linestyle='--', label='Reference')
axes[4].set_ylabel('Mean Depth')
axes[4].set_title('Mean Depth')
axes[4].legend()
axes[4].grid(True, alpha=0.3)

# Intelligibility
axes[5].boxplot([df['Intelligibility']], labels=['Generated'])
axes[5].axhline(y=reference.intelligibility, color='red', linestyle='--', label='Reference')
axes[5].set_ylabel('Correlation')
axes[5].set_title('Intelligibility')
axes[5].legend()
axes[5].grid(True, alpha=0.3)

plt.suptitle(f"Metric Distributions - 10 Networks ({TARGET_CITY.capitalize()})", fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

## Aggregated Histogram Comparison

Average histograms across all 10 networks:

In [None]:
# Aggregate segment length histograms
ref_bins = reference.segment_length_hist[0]
ref_counts = reference.segment_length_hist[1]

# Collect all segment lengths from all networks
all_gen_lengths = []
for r in results:
    all_gen_lengths.extend(r['morph']['segment_lengths'])

gen_counts, _ = np.histogram(all_gen_lengths, bins=ref_bins)

# Plot
fig, axes = plt.subplots(1, 2, figsize=(14, 4))

plot_histogram_comparison(
    (ref_bins, ref_counts),
    (ref_bins, gen_counts),
    ax=axes[0],
    title="Segment Length Distribution (Aggregated 10 Networks)",
    xlabel="Length (m)"
)

# Degree distribution
from collections import Counter
all_degrees = []
for r in results:
    all_degrees.extend([d for _, d in r['graph'].degree()])

gen_degree_dist = dict(Counter(all_degrees))

from street_network_generator.visualization import plot_degree_distribution_comparison
plot_degree_distribution_comparison(
    reference.degree_distribution,
    gen_degree_dist,
    ax=axes[1]
)
axes[1].set_title("Degree Distribution (Aggregated 10 Networks)")

plt.tight_layout()
plt.show()

## Save Batch Results

In [None]:
# Save all results
batch_output = {
    'city': TARGET_CITY,
    'reference': reference,
    'results': results,
    'stats_df': df,
}

with open(f'{TARGET_CITY}_batch_10_results.pkl', 'wb') as f:
    pickle.dump(batch_output, f)

print(f"✓ Batch results saved to {TARGET_CITY}_batch_10_results.pkl")

# Export best network
from street_network_generator.validation import NetworkValidator

output_dir = Path(f"outputs_generated/{TARGET_CITY}_batch_best")
output_dir.mkdir(parents=True, exist_ok=True)

config_dummy = GeneratorConfig(seed=best['seed'], window_size_m=500)
validator = NetworkValidator(config_dummy)

validator.validate_and_export(
    best['graph'], best['pos'], reference, best['metadata'],
    str(output_dir),
    prefix=f"{TARGET_CITY}_best"
)

print(f"✓ Best network exported to {output_dir}/")

## Summary

**Key Findings:**
1. Variability across seeds: Check std deviations in summary table
2. Consistency: Do most networks match the reference well?
3. Best vs worst: How much difference does the random seed make?
4. Aggregated histograms: Do combined distributions match better?

**Next Steps:**
- Try batch generation for other cities
- Adjust generator parameters to reduce variability
- Analyze which metrics have highest/lowest variance
- Use best seed for final production runs