In [2]:
"""
Jupyter notebook cell — Version 2 (Nodes only)
Population density → city nuclei via gravity-like clustering

Goal: Start from a total population spread over space with higher/lower density areas,
then let density peaks attract nearby population into city nuclei until only cities with
≥ min_city_pop remain (e.g., 1,000 inhabitants).

- Field: Mixture-of-Gaussians density + low‑frequency noise + uniform baseline
- Allocation: Multinomial over grid cells (so total people is exact)
- Peaks: Local maxima of the density with non‑maximum suppression (min separation)
- Assignment: Each cell’s population goes to the peak with max attractiveness
             A_j = weight_j / (distance + eps)^gamma, weight_j ∝ peak density
- Pruning: Iteratively remove cities with pop < min_city_pop and reassign their cells
- City center: Population‑weighted centroid of assigned cells
- Outputs: nodes.csv, meta.json, preview.png (color = population; top‑3 annotated)

Usage: run this cell. Edit `V2Config` at the end.
"""
from __future__ import annotations

import json
import os
import time
import hashlib
from dataclasses import dataclass
from typing import Tuple, Dict, Any, List

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import StrMethodFormatter


# ------------------------------
# Config
# ------------------------------
@dataclass
class V2Config:
    seed: int = 42
    total_population: int = 5_000_000
    bbox_km: Tuple[float, float, float, float] = (0.0, 0.0, 200.0, 200.0)  # (minx, miny, maxx, maxy)

    # Grid
    grid_res_km: float = 2.0  # cell size (km). 200 km / 2 km ⇒ 100×100 grid

    # Density mixture (centers auto if None)
    n_centers: int = 4
    center_sigma_km_min: float = 12.0
    center_sigma_km_max: float = 35.0
    baseline_frac: float = 0.05   # baseline level as a fraction of mean Gaussian field

    # Low‑frequency noise (coarse grid upsampled bilinearly)
    noise_amp: float = 0.30       # multiplicative amplitude; 0.3 ⇒ ×(1±0.3)
    noise_grid: Tuple[int, int] = (10, 10)  # (rows, cols) of coarse noise grid

    # Peak detection (on density field)
    peaks_percentile: float = 92.0  # keep local maxima above this percentile
    min_peak_separation_km: float = 8.0

    # Gravity assignment
    gamma: float = 1.7             # distance exponent in attractiveness
    eps_km: float = 0.5            # small distance softening (km)

    # City pruning
    min_city_pop: int = 1_000

    n_cities: int = 8                    # target minimum number of cities
    peaks_percentile_floor: float = 60.0 # how far we can relax the threshold
    separation_shrink: float = 0.85      # shrink factor for min peak separation per relax step
    max_relax_iters: int = 10            # max relax attempts to reach n_cities


    # Output & metadata
    out_dir: str = "maps/sv1.2/dv0.1_v2_density_cities"
    crs: str = "EPSG:3857"
    schema_version: str = "1.2"  # optional new cols (`radius_km`, `n_cells_assigned`)
    dataset_version: str = "0.1"


# ------------------------------
# Helpers
# ------------------------------

def set_seed(seed: int) -> None:
    np.random.seed(seed)


def _bbox_arrays(cfg: V2Config):
    minx, miny, maxx, maxy = cfg.bbox_km
    W, H = maxx - minx, maxy - miny
    nx = int(np.ceil(W / cfg.grid_res_km))
    ny = int(np.ceil(H / cfg.grid_res_km))
    x = minx + (np.arange(nx) + 0.5) * cfg.grid_res_km
    y = miny + (np.arange(ny) + 0.5) * cfg.grid_res_km
    X, Y = np.meshgrid(x, y)  # shape (ny, nx)
    return X, Y, x, y, nx, ny


def _dirichlet_weights(k: int) -> np.ndarray:
    w = np.random.rand(k)
    w = w + 0.01  # avoid zeros
    return w / w.sum()


def _upsample_bilinear(coarse: np.ndarray, ny: int, nx: int) -> np.ndarray:
    """Simple bilinear upsample using two 1D interpolations (no SciPy)."""
    cy, cx = coarse.shape
    x_old = np.linspace(0.0, 1.0, cx)
    x_new = np.linspace(0.0, 1.0, nx)
    # interp along x for each row
    tmp = np.array([np.interp(x_new, x_old, coarse[i, :]) for i in range(cy)])  # (cy, nx)
    y_old = np.linspace(0.0, 1.0, cy)
    y_new = np.linspace(0.0, 1.0, ny)
    # interp along y for each column
    out = np.array([np.interp(y_new, y_old, tmp[:, j]) for j in range(nx)]).T  # (ny, nx)
    return out


def generate_density_field(cfg: V2Config) -> Tuple[np.ndarray, Dict[str, Any]]:
    X, Y, x, y, nx, ny = _bbox_arrays(cfg)

    # Mixture of Gaussians
    centers = np.column_stack([
        np.random.uniform(x.min(), x.max(), size=cfg.n_centers),
        np.random.uniform(y.min(), y.max(), size=cfg.n_centers),
    ])
    sigmas = np.random.uniform(cfg.center_sigma_km_min, cfg.center_sigma_km_max, size=cfg.n_centers)
    weights = _dirichlet_weights(cfg.n_centers)

    G = np.zeros((ny, nx), dtype=float)
    for (cx, cy), s, w in zip(centers, sigmas, weights):
        G += w * np.exp(-((X - cx) ** 2 + (Y - cy) ** 2) / (2 * s * s))

    # Baseline
    baseline = cfg.baseline_frac * (G.mean() + 1e-9)

    # Low-frequency noise
    if cfg.noise_amp > 0:
        ngy, ngx = cfg.noise_grid
        coarse = np.random.rand(ngy, ngx)
        noise = _upsample_bilinear(coarse, ny, nx)
        noise = (noise - 0.5) * 2.0  # ~[-1,1]
        field = (G + baseline) * (1.0 + cfg.noise_amp * noise)
        field = np.clip(field, a_min=baseline * 0.1, a_max=None)
    else:
        field = G + baseline

    info = {
        "centers": centers.tolist(),
        "sigmas": sigmas.tolist(),
        "weights": weights.tolist(),
        "baseline": baseline,
        "noise_amp": cfg.noise_amp,
        "noise_grid": cfg.noise_grid,
    }
    return field, info


def _find_local_maxima(field: np.ndarray, percentile: float, min_sep_cells: int) -> List[Tuple[int, int, float]]:
    ny, nx = field.shape
    thr = np.percentile(field, percentile)
    peaks: List[Tuple[int, int, float]] = []
    for i in range(1, ny - 1):
        for j in range(1, nx - 1):
            v = field[i, j]
            if v < thr:
                continue
            nb = field[i-1:i+2, j-1:j+2]
            if v >= nb.max():
                peaks.append((i, j, float(v)))
    # Non-maximum suppression by min_sep_cells (greedy)
    peaks.sort(key=lambda t: t[2], reverse=True)
    accepted: List[Tuple[int, int, float]] = []
    for i, j, v in peaks:
        ok = True
        for ia, ja, _ in accepted:
            if (i - ia) ** 2 + (j - ja) ** 2 < (min_sep_cells ** 2):
                ok = False
                break
        if ok:
            accepted.append((i, j, v))
    return accepted


def _assign_cells_to_peaks(counts: np.ndarray, field: np.ndarray, peaks: List[Tuple[int, int, float]], cfg: V2Config) -> np.ndarray:
    """Return array of shape (ny, nx) with city index per cell (−1 if no peaks)."""
    ny, nx = counts.shape
    if not peaks:
        return -np.ones((ny, nx), dtype=int)

    # Precompute peak weights and coordinates in km
    X, Y, x, y, nx2, ny2 = _bbox_arrays(cfg)
    assert nx2 == nx and ny2 == ny

    px = np.array([x[int(j)] for (_, j, _) in peaks])  # careful: peaks store (row=i, col=j)
    py = np.array([y[int(i)] for (i, _, _) in peaks])

    peak_weight = np.array([v for (_, _, v) in peaks], dtype=float)
    peak_weight = peak_weight / (peak_weight.max() + 1e-12)

    # We compute attractiveness for each peak: w / (dist + eps)^gamma
    eps2 = (cfg.eps_km ** 2)

    # Flatten coordinates for vectorization
    XX = X.reshape(-1)
    YY = Y.reshape(-1)
    counts_flat = counts.reshape(-1)

    # Only consider cells with people
    active_idx = np.where(counts_flat > 0)[0]
    XXa = XX[active_idx][:, None]
    YYa = YY[active_idx][:, None]

    # distances to peaks (active cells × n_peaks)
    dx = XXa - px[None, :]
    dy = YYa - py[None, :]
    dist2 = dx * dx + dy * dy

    attractiveness = peak_weight[None, :] / np.power(dist2 + eps2, cfg.gamma / 2.0)
    best = np.argmax(attractiveness, axis=1)

    assign = -np.ones(XX.shape[0], dtype=int)
    assign[active_idx] = best
    return assign.reshape(ny, nx)


def _city_stats_from_assignment(assign: np.ndarray, counts: np.ndarray, cfg: V2Config) -> Tuple[pd.DataFrame, Dict[int, np.ndarray]]:
    """Compute city populations, centroids, radius, and keep cell masks per city."""
    X, Y, *_ = _bbox_arrays(cfg)
    ny, nx = counts.shape

    city_ids = np.unique(assign[assign >= 0])
    masks: Dict[int, np.ndarray] = {}
    rows = []
    for cid in city_ids:
        mask = assign == cid
        pop = int(counts[mask].sum())
        if pop <= 0:
            continue
        masks[cid] = mask
        # Pop-weighted centroid
        w = counts[mask].astype(float)
        xs = X[mask]
        ys = Y[mask]
        x_c = float((w * xs).sum() / w.sum())
        y_c = float((w * ys).sum() / w.sum())
        n_cells = int(mask.sum())
        area_km2 = n_cells * (cfg.grid_res_km ** 2)
        radius_km = float(np.sqrt(area_km2 / np.pi))
        rows.append({"city_id": int(cid), "x_km": x_c, "y_km": y_c, "pop": pop, "n_cells_assigned": n_cells, "radius_km": radius_km})

    df = pd.DataFrame(rows).sort_values("pop", ascending=False).reset_index(drop=True)
    return df, masks


def _prune_and_reassign(assign: np.ndarray, counts: np.ndarray, peaks: List[Tuple[int, int, float]], cfg: V2Config) -> Tuple[np.ndarray, List[int]]:
    """Iteratively remove cities below threshold and reassign their cells to survivors."""
    ny, nx = counts.shape
    while True:
        df, masks = _city_stats_from_assignment(assign, counts, cfg)
        if df.empty:
            raise RuntimeError("No cities formed — check parameters.")
        low = df[df["pop"] < cfg.min_city_pop]
        if low.empty or len(df) == 1:
            # Done
            survivors = df["city_id"].tolist()
            return assign, survivors
        # Remove the smallest city under threshold
        remove_id = int(low.sort_values("pop").iloc[0]["city_id"])
        # Reassign its cells to the best among survivors
        survivors = [int(cid) for cid in df["city_id"].tolist() if cid != remove_id]

        # Build survivors peak subset
        surv_peaks = [peaks[cid] for cid in survivors]
        # Temporarily set these cells to -1 to be reassigned
        rem_mask = masks[remove_id]
        assign[rem_mask] = -1

        # Reassign only the removed cells by recomputing best survivor for those cells
        # Compute attractiveness for survivors
        X, Y, x, y, nx2, ny2 = _bbox_arrays(cfg)
        px = np.array([x[int(j)] for (i, j, v) in surv_peaks])
        py = np.array([y[int(i)] for (i, j, v) in surv_peaks])
        peak_weight = np.array([v for (i, j, v) in surv_peaks], dtype=float)
        peak_weight = peak_weight / (peak_weight.max() + 1e-12)
        eps2 = (cfg.eps_km ** 2)

        idx_cells = np.where(rem_mask.reshape(-1))[0]
        XX = X.reshape(-1)[idx_cells][:, None]
        YY = Y.reshape(-1)[idx_cells][:, None]
        dx = XX - px[None, :]
        dy = YY - py[None, :]
        dist2 = dx * dx + dy * dy
        attractiveness = peak_weight[None, :] / np.power(dist2 + eps2, cfg.gamma / 2.0)
        best = np.argmax(attractiveness, axis=1)
        reassigned = np.array([survivors[b] for b in best], dtype=int)

        # Write back
        flat_assign = assign.reshape(-1)
        flat_assign[idx_cells] = reassigned
        assign = flat_assign.reshape(ny, nx)


# ------------------------------
# Main generator / validator / saver
# ------------------------------

def generate_nodes_v2(cfg: V2Config) -> Tuple[pd.DataFrame, Dict[str, Any]]:
    """
    Guarantee >= cfg.n_cities with each city >= cfg.min_city_pop by:
      1) detecting density peaks (relaxing thresholds if needed),
      2) assigning by gravity,
      3) pruning tiny cities,
      4) if still short, SPLITTING the largest city into two (by weighted median along the widest axis)
         and repeating assignment until the target count is reached or no city can be split further.
    """
    set_seed(cfg.seed)

    # ----- Density + allocation -----
    field, field_info = generate_density_field(cfg)
    probs = field / field.sum()
    X, Y, x, y, nx, ny = _bbox_arrays(cfg)
    counts = np.random.multinomial(cfg.total_population, probs.reshape(-1)).reshape(ny, nx)

    def find_peaks(percentile: float, sep_cells: int) -> List[Tuple[int, int, float]]:
        return _find_local_maxima(field, percentile, max(1, sep_cells))

    def assign_and_stats(peaks: List[Tuple[int, int, float]]):
        assign = _assign_cells_to_peaks(counts, field, peaks, cfg)
        df_cities, masks = _city_stats_from_assignment(assign, counts, cfg)
        return assign, df_cities, masks

    def prune_below_threshold(assign: np.ndarray, peaks: List[Tuple[int, int, float]]):
        # use existing prune (merges tiny cities into survivors)
        assign2, survivors = _prune_and_reassign(assign.copy(), counts, peaks, cfg)
        df2, masks2 = _city_stats_from_assignment(assign2, counts, cfg)
        return assign2, df2, masks2

    def split_city_mask(mask: np.ndarray) -> List[Tuple[int, int, float]] | None:
        """Split one city into two by weighted median along widest axis; return two new peak tuples (i,j,value)."""
        w = counts[mask].astype(float)
        if w.sum() < 2 * cfg.min_city_pop or mask.sum() < 2:
            return None
        xs = X[mask]; ys = Y[mask]
        # choose axis with larger variance
        varx, vary = np.var(xs, ddof=0), np.var(ys, ddof=0)
        coord = xs if varx >= vary else ys
        order = np.argsort(coord)
        w_sorted = w[order]
        xs_sorted, ys_sorted = xs[order], ys[order]
        csum = np.cumsum(w_sorted)
        # cut near half but ensure both sides >= min_city_pop
        total = csum[-1]
        cut_idx = np.searchsorted(csum, total / 2.0)
        # expand cut to meet threshold
        left_ok = lambda k: csum[k] >= cfg.min_city_pop
        right_ok = lambda k: (total - csum[k]) >= cfg.min_city_pop
        k = int(np.clip(cut_idx, 1, len(w_sorted) - 2))
        moved = True
        while moved:
            moved = False
            if not left_ok(k):
                k += 1; moved = True
            if not right_ok(k):
                k -= 1; moved = True
            if k <= 0 or k >= len(w_sorted) - 1:
                return None  # cannot split with thresholds
        # weighted centroids for two parts
        wL, wR = w_sorted[:k], w_sorted[k:]
        xL = float(np.average(xs_sorted[:k], weights=wL))
        yL = float(np.average(ys_sorted[:k], weights=wL))
        xR = float(np.average(xs_sorted[k:],  weights=wR))
        yR = float(np.average(ys_sorted[k:],  weights=wR))
        # snap to nearest grid cells
        jL, iL = int(np.argmin(np.abs(x - xL))), int(np.argmin(np.abs(y - yL)))
        jR, iR = int(np.argmin(np.abs(x - xR))), int(np.argmin(np.abs(y - yR)))
        pL = (iL, jL, float(field[iL, jL]))
        pR = (iR, jR, float(field[iR, jR]))
        return [pL, pR]

    # ---- 1) initial peaks with relaxation ----
    current_pct = float(getattr(cfg, "peaks_percentile", 92.0))
    current_sep = int(round(cfg.min_peak_separation_km / cfg.grid_res_km))
    pct_floor = float(getattr(cfg, "peaks_percentile_floor", 60.0))
    shrink = float(getattr(cfg, "separation_shrink", 0.85))
    max_relax = int(getattr(cfg, "max_relax_iters", 10))

    peaks = find_peaks(current_pct, current_sep)
    for _ in range(max_relax):
        assign, df, masks = assign_and_stats(peaks)
        # prune tiny cities (merge) only if we still have >= n_cities afterwards
        assign_p, df_p, masks_p = prune_below_threshold(assign, peaks)
        df = df_p; masks = masks_p; assign = assign_p
        if len(df) >= cfg.n_cities:
            break
        # relax detection if we can
        new_pct = max(pct_floor, current_pct - 5.0)
        new_sep = max(1, int(np.ceil(current_sep * shrink)))
        new_peaks = find_peaks(new_pct, new_sep)
        if len(new_peaks) > len(peaks):  # only accept if we actually got more
            peaks, current_pct, current_sep = new_peaks, new_pct, new_sep
        else:
            break  # no further improvement

    # ---- 2) enforce minimum by splitting largest cities if needed ----
    # always recompute with current peaks
    assign, df, masks = assign_and_stats(peaks)
    # prune tiny ones first (merge them up)
    assign, df, masks = prune_below_threshold(assign, peaks)

    # if still short, repeatedly split the largest splittable city
    attempts = 0
    while len(df) < cfg.n_cities:
        attempts += 1
        if attempts > 200:  # safety
            break
        # pick largest city that can be split
        df_sorted = df.sort_values("pop", ascending=False)
        split_done = False
        for _, row in df_sorted.iterrows():
            cid = int(row["city_id"])
            mask = masks[cid]
            new_two = split_city_mask(mask)
            if new_two is None:
                continue
            # replace this city's peak with two new peaks
            peaks = [p for idx, p in enumerate(peaks) if idx != cid] + new_two
            # reassign & prune
            assign, df, masks = assign_and_stats(peaks)
            assign, df, masks = prune_below_threshold(assign, peaks)
            split_done = True
            if len(df) >= cfg.n_cities:
                break
        if not split_done:
            break  # no city can be split further while respecting min_city_pop

    if len(df) < cfg.n_cities:
        raise RuntimeError(f"Could not reach the target number of cities (got {len(df)}, want {cfg.n_cities}). "
                           f"Try decreasing min_city_pop, increasing total_population, or using finer grid_res_km.")

    # ---- finalize nodes ----
    df_cities = df
    assert int(df_cities["pop"].sum()) == int(cfg.total_population)

    df_nodes = df_cities.rename(columns={"city_id": "id"})[
        ["id", "x_km", "y_km", "pop", "n_cells_assigned", "radius_km"]
    ].copy()
    df_nodes["id"] = df_nodes["id"].astype(int)

    extras = {
        "grid": {"nx": nx, "ny": ny, "res_km": cfg.grid_res_km},
        "peaks_count_used": int(len(peaks)),
        "relaxation": {
            "final_percentile": current_pct,
            "final_sep_cells": current_sep,
            "target_n_cities": int(cfg.n_cities),
            "achieved_n_cities": int(len(df_nodes)),
        },
        "field_info": field_info,
        "counts": counts,
        "field": field,
    }
    return df_nodes, extras

def validate_nodes(df_nodes: pd.DataFrame, cfg: V2Config) -> Dict[str, Any]:
    metrics: Dict[str, Any] = {}
    n = len(df_nodes)
    if n == 0:
        raise AssertionError("No cities produced")
    if (df_nodes["pop"] < cfg.min_city_pop).any():
        raise AssertionError("Found a city below min_city_pop after pruning")
    metrics["n_cities"] = int(n)
    metrics["total_population"] = int(df_nodes["pop"].sum())
    metrics["pop_percentiles"] = {q: int(np.percentile(df_nodes["pop"], q)) for q in (5, 25, 50, 75, 90, 95, 99)}
    return metrics

def preview_nodes(df_nodes: pd.DataFrame, cfg: V2Config, save_path: str) -> None:
    minx, miny, maxx, maxy = cfg.bbox_km

    plt.figure(figsize=(6, 6))
    vmax = df_nodes["pop"].max()
    sc = plt.scatter(
        df_nodes["x_km"], df_nodes["y_km"],
        s=10 + 90 * np.sqrt(df_nodes["pop"].values / vmax),
        c=df_nodes["pop"].values.astype(float),
    )
    cbar = plt.colorbar(sc)
    cbar.set_label("Population")
    try:
        cbar.ax.yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
    except Exception:
        pass

    # Annotate top‑3 by population
    top3 = df_nodes.nlargest(3, "pop").copy()
    dx = 0.01 * (maxx - minx)
    dy = 0.01 * (maxy - miny)
    for _, row in top3.iterrows():
        label = f"{int(row['pop']):,}"
        plt.text(
            row["x_km"] + dx,
            row["y_km"] + dy,
            label,
            fontsize=8,
            ha="left",
            va="bottom",
            bbox=dict(boxstyle="round,pad=0.2", fc="white", ec="none", alpha=0.7),
        )

    plt.title("Nodes — V2 (density → gravity cities)")
    plt.xlabel("x (km)")
    plt.ylabel("y (km)")
    plt.xlim(minx, maxx)
    plt.ylim(miny, maxy)
    plt.gca().set_aspect("equal", adjustable="box")
    plt.tight_layout()
    plt.savefig(save_path, dpi=150)
    plt.close()

def compute_metrics_hash(metrics: Dict[str, Any]) -> str:
    blob = json.dumps(metrics, sort_keys=True).encode("utf-8")
    return hashlib.sha256(blob).hexdigest()[:16]

def save_artifacts(df_nodes: pd.DataFrame, cfg: V2Config, metrics: Dict[str, Any], extras: Dict[str, Any]) -> Dict[str, str]:
    import matplotlib.pyplot as plt
    from matplotlib.ticker import StrMethodFormatter

    os.makedirs(cfg.out_dir, exist_ok=True)
    nodes_path = os.path.join(cfg.out_dir, "nodes.csv")
    preview_path = os.path.join(cfg.out_dir, "preview.png")
    heatmap_path = os.path.join(cfg.out_dir, "population_heatmap.png")
    meta_path = os.path.join(cfg.out_dir, "meta.json")

    # Save nodes CSV
    df_nodes.to_csv(nodes_path, index=False)

    # Existing scatter preview of cities
    preview_nodes(df_nodes, cfg, preview_path)

    # --- NEW: heatmap of individual distribution (grid cell counts) ---
    counts = extras.get("counts", None)  # expected shape (ny, nx)
    if counts is not None:
        minx, miny, maxx, maxy = cfg.bbox_km
        plt.figure(figsize=(6, 6))
        # imshow with spatial extent to align axes with km coordinates
        plt.imshow(
            counts,
            origin="lower",
            extent=[minx, maxx, miny, maxy],
            aspect="equal",
        )
        cbar = plt.colorbar()
        cbar.set_label("People per cell")
        try:
            cbar.ax.yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
        except Exception:
            pass
        plt.title("Population distribution (heatmap)")
        plt.xlabel("x (km)")
        plt.ylabel("y (km)")
        plt.tight_layout()
        plt.savefig(heatmap_path, dpi=150)
        plt.close()

    # Meta
    meta = {
        "schema_version": cfg.schema_version,
        "dataset_version": cfg.dataset_version,
        "crs": cfg.crs,
        "seed": cfg.seed,
        "generator": {
            "name": "nodes_v2_density_to_cities",
            "params": {
                "total_population": cfg.total_population,
                "bbox_km": cfg.bbox_km,
                "grid_res_km": cfg.grid_res_km,
                "n_centers": cfg.n_centers,
                "center_sigma_km_min": cfg.center_sigma_km_min,
                "center_sigma_km_max": cfg.center_sigma_km_max,
                "baseline_frac": cfg.baseline_frac,
                "noise_amp": cfg.noise_amp,
                "noise_grid": cfg.noise_grid,
                "peaks_percentile": cfg.peaks_percentile,
                "min_peak_separation_km": cfg.min_peak_separation_km,
                "gamma": cfg.gamma,
                "eps_km": cfg.eps_km,
                "min_city_pop": cfg.min_city_pop,
            },
        },
        "extras_summary": {
            "grid": extras.get("grid"),
            "peaks_count": extras.get("peaks_count"),
        },
        "artifacts": {
            "nodes_csv": nodes_path,
            "preview_png": preview_path,
            "population_heatmap_png": heatmap_path if counts is not None else None,
        },
        "metrics": metrics,
        "metrics_hash": compute_metrics_hash(metrics),
        "created_at_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
    }
    with open(meta_path, "w", encoding="utf-8") as f:
        json.dump(meta, f, indent=2)

    return {"nodes": nodes_path, "preview": preview_path, "heatmap": (heatmap_path if counts is not None else None), "meta": meta_path}

# ------------------------------
# Orchestration
# ------------------------------

def main(cfg: V2Config | None = None) -> pd.DataFrame:
    cfg = cfg or V2Config()
    set_seed(cfg.seed)

    df_nodes, extras = generate_nodes_v2(cfg)
    metrics = validate_nodes(df_nodes, cfg)
    paths = save_artifacts(df_nodes, cfg, metrics, extras)

    print("\n[Nodes V2] Build complete:\n" + "-" * 40)
    print(f"Cities: {len(df_nodes)} | Total pop: {metrics['total_population']:,}")
    print(f"Saved: nodes → {paths['nodes']}\n       preview → {paths['preview']}\n       meta → {paths['meta']}")
    print(f"Metrics hash: {compute_metrics_hash(metrics)}")
    return df_nodes


# ------------------------------
# Run
# ------------------------------
_cfg = V2Config(
    seed=42,
    total_population=5_000_000,
    bbox_km=(0.0, 0.0, 200.0, 200.0),
    grid_res_km=2.0,
    n_centers=4,
    center_sigma_km_min=12.0,
    center_sigma_km_max=35.0,
    baseline_frac=0.05,
    noise_amp=0.30,
    noise_grid=(10, 10),
    peaks_percentile=92.0,
    min_peak_separation_km=8.0,
    gamma=1.7,
    eps_km=0.5,
    min_city_pop=1_000,
    n_cities=20,
    peaks_percentile_floor=60.0,
    separation_shrink=0.85,
    max_relax_iters=10,
    out_dir="maps/sv1.2/dv0.1_v2_density_cities",
)

# Jupyter notebook cell — Animation builder (≤ 15s)
# Requires: matplotlib, numpy, pandas, tqdm, opencv-python (for MP4), imageio (optional for GIF)

from __future__ import annotations
import os, math, gc
from typing import List, Tuple
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import StrMethodFormatter
from tqdm.auto import tqdm

# Optional encoders
try:
    import cv2  # for MP4
    _HAS_CV2 = True
except Exception:
    _HAS_CV2 = False

try:
    import imageio.v2 as imageio  # for GIF (optional)
    _HAS_IMAGEIO = True
except Exception:
    _HAS_IMAGEIO = False


def _ease_in_out(t: np.ndarray, power: float = 2.0) -> np.ndarray:
    """Smooth easing 0→1 (s-curve)."""
    t = np.clip(t, 0.0, 1.0)
    a = np.power(t, power) / (np.power(t, power) + np.power(1 - t, power))
    a[np.isnan(a)] = 0.0
    return a


def _plot_heatmap(counts: np.ndarray, cfg: V2Config, save_path: str,
                  vmin=None, vmax=None) -> None:
    minx, miny, maxx, maxy = cfg.bbox_km
    plt.figure(figsize=(6, 6))
    im = plt.imshow(
        counts, origin="lower",
        extent=[minx, maxx, miny, maxy], aspect="equal",
        vmin=vmin, vmax=vmax, cmap="viridis"
    )
    cbar = plt.colorbar(im)
    cbar.set_label("People per cell")
    try:
        cbar.ax.yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
    except Exception:
        pass
    plt.title("Population distribution (heatmap)")
    plt.xlabel("x (km)")
    plt.ylabel("y (km)")
    plt.tight_layout()
    plt.savefig(save_path, dpi=150)
    plt.close()


# --- UPDATED helpers ---

def _plot_scatter(df_nodes: pd.DataFrame, cfg: V2Config, save_path: str,
                  grow: float = 1.0,
                  heatmap_underlay: np.ndarray | None = None,
                  heatmap_vmin: float | None = None,
                  heatmap_vmax: float | None = None,
                  heatmap_alpha: float = 0.25) -> None:
    """
    Scatter of cities; if heatmap_underlay is provided, draw it underneath
    with given alpha (default 0.25) so we see density + cities together.
    """
    minx, miny, maxx, maxy = cfg.bbox_km

    plt.figure(figsize=(6, 6))

    # Optional heatmap underlay
    if heatmap_underlay is not None:
        plt.imshow(
            heatmap_underlay,
            origin="lower",
            extent=[minx, maxx, miny, maxy],
            aspect="equal",
            cmap="viridis",
            vmin=heatmap_vmin, vmax=heatmap_vmax,
            alpha=heatmap_alpha,
            zorder=0
        )

    # Cities on top
    vmax = df_nodes["pop"].max()
    sizes = (10 + 90 * np.sqrt(df_nodes["pop"].values / vmax)) * np.clip(grow, 0.05, 1.0)
    sc = plt.scatter(
        df_nodes["x_km"], df_nodes["y_km"],
        s=sizes, c=df_nodes["pop"].values.astype(float),
        cmap="viridis", zorder=1
    )
    cbar = plt.colorbar(sc)
    cbar.set_label("Population")
    try:
        cbar.ax.yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
    except Exception:
        pass

    # Labels near the end
    if grow >= 0.85:
        top3 = df_nodes.nlargest(3, "pop").copy()
        dx = 0.01 * (maxx - minx)
        dy = 0.01 * (maxy - miny)
        for _, row in top3.iterrows():
            label = f"{int(row['pop']):,}"
            plt.text(row["x_km"] + dx, row["y_km"] + dy, label,
                     fontsize=8, ha="left", va="bottom",
                     bbox=dict(boxstyle="round,pad=0.2", fc="white", ec="none", alpha=0.7),
                     zorder=2)

    plt.title("Nodes — V2 (density → gravity cities)")
    plt.xlabel("x (km)"); plt.ylabel("y (km)")
    plt.xlim(minx, maxx); plt.ylim(miny, maxy)
    plt.gca().set_aspect("equal", adjustable="box")
    plt.tight_layout()
    plt.savefig(save_path, dpi=150)
    plt.close()



def make_frames_density_to_cities(cfg: V2Config,
                                  total_duration_s: float = 15.0,
                                  fps: int = 15,
                                  frames_dir_name: str = "frames",
                                  also_gif: bool = False) -> Tuple[List[str], str, str]:
    """
    Generates frames for:
      1) Heatmap build (ends exactly at the full heatmap)
      2) Node scatter growth (ends exactly at final preview)
    Returns (frame_paths, mp4_path, gif_path_or_None).
    """
    # --- Generate the data (deterministic due to cfg.seed) ---
    df_nodes, extras = generate_nodes_v2(cfg)
    counts = extras["counts"]  # (ny, nx)

    vmin, vmax = np.min(counts), np.max(counts)

    # Where to save things
    out_dir = cfg.out_dir
    os.makedirs(out_dir, exist_ok=True)
    frames_dir = os.path.join(out_dir, frames_dir_name)
    os.makedirs(frames_dir, exist_ok=True)

    # Timing and partition (≈60% heatmap, 40% scatter)
    total_frames = max(1, int(min(total_duration_s, 15.0) * fps))
    n_heat = max(8, int(total_frames * 0.6))
    n_scatter = max(8, total_frames - n_heat)

    # Fix color scaling so the first part ends matching your heatmap asset
    vmin, vmax = np.min(counts), np.max(counts)

    paths: List[str] = []
    fidx = 0

    # ---------- Stage 1: heatmap build ----------
    t_heat = _ease_in_out(np.linspace(0.0, 1.0, n_heat), power=2.5)
    for r in tqdm(t_heat, total=n_heat, desc="Building heatmap frames"):
        # cumulative reveal (monotone increase) using easing ratio
        partial = np.floor(counts * r).astype(counts.dtype)
        frame_path = os.path.join(frames_dir, f"frame_{fidx:04d}.png")
        _plot_heatmap(partial, cfg, frame_path, vmin=vmin, vmax=vmax)
        paths.append(frame_path)
        fidx += 1

    # ---------- Stage 2: nodes growth ----------
    t_sc = _ease_in_out(np.linspace(0.0, 1.0, n_scatter), power=2.0)

    # If you ever want the very last frame WITHOUT the underlay (to match your old second image),
    # set overlay_in_final = False.
    overlay_in_final = True

    for k, g in enumerate(t_sc):
        use_underlay = counts if (overlay_in_final or k < len(t_sc) - 1) else None
        frame_path = os.path.join(frames_dir, f"frame_{fidx:04d}.png")
        _plot_scatter(
            df_nodes, cfg, frame_path,
            grow=float(g),
            heatmap_underlay=use_underlay,
            heatmap_vmin=vmin, heatmap_vmax=vmax,
            heatmap_alpha=0.66
        )
        paths.append(frame_path)
        fidx += 1

    # ---------- Encode MP4 (always) ----------
    mp4_path = os.path.join(out_dir, "density_to_cities.mp4")
    if not _HAS_CV2:
        raise RuntimeError("opencv-python is required to generate MP4. Install with: pip install opencv-python")

    # Get frame size from first image
    first = cv2.imread(paths[0])
    if first is None:
        raise RuntimeError("Failed to read first frame for video encoding.")
    h, w = first.shape[:2]
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    writer = cv2.VideoWriter(mp4_path, fourcc, fps, (w, h))
    if not writer.isOpened():
        raise RuntimeError("Failed to open VideoWriter. Check OpenCV codecs.")

    for p in tqdm(paths, desc="Encoding MP4"):
        img = cv2.imread(p)
        if img is None:
            continue
        # Ensure size matches exactly
        if img.shape[1] != w or img.shape[0] != h:
            img = cv2.resize(img, (w, h), interpolation=cv2.INTER_AREA)
        writer.write(img)
    writer.release()

    # ---------- Optional GIF ----------
    gif_path = None
    if also_gif and _HAS_IMAGEIO:
        gif_path = os.path.join(out_dir, "density_to_cities.gif")
        imgs = [imageio.imread(p) for p in tqdm(paths, desc="Encoding GIF")]
        # duration per frame in seconds
        imageio.mimsave(gif_path, imgs, duration=1.0 / fps)

    # Clean up big arrays
    del first; gc.collect()
    return paths, mp4_path, (gif_path or "")


# ------------------------------
# Run the animation builder
# ------------------------------
# You can tweak fps (≤ 30 recommended) and duration_s (hard-capped at 15s).
anim_cfg = _cfg  # reuse your config block above
frame_list, mp4_out, gif_out = make_frames_density_to_cities(
    anim_cfg,
    total_duration_s=15.0,   # hard cap
    fps=15,                  # 15 fps → up to 225 frames in 15s
    frames_dir_name="frames",# frames saved here
    also_gif=True            # set False if you only want MP4
)

print(f"\nSaved {len(frame_list)} frames in: {os.path.join(anim_cfg.out_dir, 'frames')}")
print(f"MP4: {mp4_out}")
if gif_out:
    print(f"GIF: {gif_out}")


Building heatmap frames: 100%|██████████| 135/135 [00:24<00:00,  5.49it/s]
Encoding MP4: 100%|██████████| 225/225 [00:04<00:00, 45.01it/s]
Encoding GIF: 100%|██████████| 225/225 [00:01<00:00, 187.13it/s]



Saved 225 frames in: maps/sv1.2/dv0.1_v2_density_cities\frames
MP4: maps/sv1.2/dv0.1_v2_density_cities\density_to_cities.mp4
GIF: maps/sv1.2/dv0.1_v2_density_cities\density_to_cities.gif


In [4]:
"""
Jupyter notebook cell — Version 1 (Nodes only)
Probabilistic city classes + uniform population ranges per class.
- Placement: uniform within bbox (same as V0)
- Class assignment: per-node categorical draw with probabilities
- Population: uniform integer in [min, max] for the node’s class
- Outputs: nodes.csv, meta.json, preview.png (color = population; marker shape = class)

Usage: run this cell. Edit `V1Config` to change class probabilities or ranges.
"""
from __future__ import annotations

import json
import os
import time
import hashlib
from dataclasses import dataclass, field
from typing import Tuple, Dict, Any, List

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import StrMethodFormatter
from matplotlib.lines import Line2D


# ------------------------------
# Config
# ------------------------------
@dataclass
class V1Config:
    seed: int = 42
    n_nodes: int = 120
    bbox_km: Tuple[float, float, float, float] = (0.0, 0.0, 200.0, 200.0)  # (minx, miny, maxx, maxy)

    # Per-class probabilities (will be normalized to sum 1). Each node draws its class independently.
    class_prob: Dict[str, float] = field(
        default_factory=lambda: {"large": 0.10, "medium": 0.30, "small": 0.60}
    )

    # Uniform population ranges per class (inclusive of min/max)
    class_ranges: Dict[str, Dict[str, int]] = field(
        default_factory=lambda: {
            # Choose non-overlapping ranges so medians satisfy large > medium > small
            "large":  {"min": 600_000, "max": 1_500_000},
            "medium": {"min": 150_000, "max":   500_000},
            "small":  {"min":   1_000, "max":   120_000},
        }
    )

    # Output & metadata
    out_dir: str = "maps/sv1.1/dv0.2_v1_classes_prob_uniform"
    crs: str = "EPSG:3857"  # Synthetic planar; coordinates stored in km for simplicity
    schema_version: str = "1.1"  # still includes optional `class` column
    dataset_version: str = "0.2"


# ------------------------------
# Core helpers
# ------------------------------

def set_seed(seed: int) -> None:
    np.random.seed(seed)


def generate_positions_uniform(bbox_km: Tuple[float, float, float, float], n: int) -> np.ndarray:
    minx, miny, maxx, maxy = bbox_km
    if not (minx < maxx and miny < maxy):
        raise ValueError("Invalid bbox: must satisfy minx<maxx and miny<maxy")
    xs = np.random.uniform(minx, maxx, size=n)
    ys = np.random.uniform(miny, maxy, size=n)
    return np.column_stack([xs, ys])


def _sample_classes(n: int, class_prob: Dict[str, float]) -> List[str]:
    labels = list(class_prob.keys())
    probs = np.array([class_prob[k] for k in labels], dtype=float)
    probs = probs / probs.sum()
    draws = np.random.choice(labels, size=n, p=probs)
    return draws.tolist()


def _sample_uniform_int(low: int, high: int, size: int) -> np.ndarray:
    """Inclusive uniform integer sampling in [low, high]."""
    if high < low:
        raise ValueError(f"Invalid range: [{low},{high}]")
    return np.random.randint(low, high + 1, size=size, dtype=int)


def generate_nodes_v1(cfg: V1Config) -> pd.DataFrame:
    # positions
    pts = generate_positions_uniform(cfg.bbox_km, cfg.n_nodes)

    # classes (probabilistic per-node)
    classes = _sample_classes(cfg.n_nodes, cfg.class_prob)

    # populations per class (uniform within class range)
    pops = np.empty(cfg.n_nodes, dtype=int)
    for cls in cfg.class_prob.keys():
        idx = [i for i, c in enumerate(classes) if c == cls]
        if not idx:
            continue
        r = cfg.class_ranges.get(cls, None)
        if r is None:
            raise KeyError(f"Missing class range for '{cls}'")
        pops[idx] = _sample_uniform_int(int(r["min"]), int(r["max"]), size=len(idx))

    df = pd.DataFrame({
        "id": np.arange(cfg.n_nodes, dtype=int),
        "x_km": pts[:, 0],
        "y_km": pts[:, 1],
        "class": classes,
        "pop": pops,
    })
    return df


def validate_nodes(df: pd.DataFrame, cfg: V1Config) -> Dict[str, Any]:
    minx, miny, maxx, maxy = cfg.bbox_km
    metrics: Dict[str, Any] = {}

    # Count
    n = len(df)
    if n != cfg.n_nodes:
        raise AssertionError(f"Node count mismatch: expected {cfg.n_nodes}, got {n}")
    metrics["n_nodes"] = n

    # Bounds
    inside_x = (df["x_km"] >= minx) & (df["x_km"] <= maxx)
    inside_y = (df["y_km"] >= miny) & (df["y_km"] <= maxy)
    violations = int((~(inside_x & inside_y)).sum())
    if violations:
        raise AssertionError(f"{violations} nodes fall outside bbox")
    metrics["bbox"] = {"minx": minx, "miny": miny, "maxx": maxx, "maxy": maxy}

    # Class counts (no strict quota check; just report)
    metrics["class_counts"] = df["class"].value_counts().to_dict()

    # Population summaries and median ordering (should hold if ranges are non-overlapping)
    med = df.groupby("class")["pop"].median().to_dict()
    metrics["class_medians"] = {k: int(v) for k, v in med.items()}
    try:
        if not (med["large"] > med["medium"] > med["small"]):
            # Don’t hard fail, just record a flag
            metrics["median_order_ok"] = False
        else:
            metrics["median_order_ok"] = True
    except KeyError:
        metrics["median_order_ok"] = False

    # Global population range
    pmin, pmax = int(df["pop"].min()), int(df["pop"].max())
    metrics["pop_range_observed"] = {"min": pmin, "max": pmax}
    metrics["pop_percentiles"] = {q: int(np.percentile(df["pop"], q)) for q in (5, 25, 50, 75, 95)}

    return metrics


def preview_nodes(df: pd.DataFrame, cfg: V1Config, save_path: str) -> None:
    """Scatter sized by population (color = population, colorbar legend).
    Marker shape encodes class ({large: square, medium: triangle, small: circle}).
    Annotates the top-3 most populated cities with population labels.
    """
    minx, miny, maxx, maxy = cfg.bbox_km

    vmax = df["pop"].max()
    vmin = df["pop"].min()

    markers = {"large": "s", "medium": "^", "small": "o"}

    plt.figure(figsize=(6, 6))

    sc = None
    for cls in ["large", "medium", "small"]:
        sub = df[df["class"] == cls]
        if sub.empty:
            continue
        sc = plt.scatter(
            sub["x_km"],
            sub["y_km"],
            s=10 + 90 * np.sqrt(sub["pop"].values / vmax),
            c=sub["pop"].values.astype(float),
            vmin=vmin,
            vmax=vmax,
            marker=markers.get(cls, "o"),
            label=cls.title(),
        )

    if sc is not None:
        cbar = plt.colorbar(sc)
        cbar.set_label("Population")
        try:
            cbar.ax.yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
        except Exception:
            pass

    handles = [Line2D([], [], marker=markers.get(cls, "o"), linestyle="None", label=cls.title())
               for cls in ["large", "medium", "small"]]
    plt.legend(handles=handles, title="Class", loc="best", framealpha=0.8)

    # Annotate top-3 by population
    top3 = df.nlargest(3, "pop").copy()
    dx = 0.01 * (maxx - minx)
    dy = 0.01 * (maxy - miny)
    for _, row in top3.iterrows():
        label = f"{int(row['pop']):,}"
        plt.text(
            row["x_km"] + dx,
            row["y_km"] + dy,
            label,
            fontsize=8,
            ha="left",
            va="bottom",
            bbox=dict(boxstyle="round,pad=0.2", fc="white", ec="none", alpha=0.7),
        )

    plt.title("Nodes — V1 (probabilistic classes; uniform ranges)")
    plt.xlabel("x (km)")
    plt.ylabel("y (km)")
    plt.xlim(minx, maxx)
    plt.ylim(miny, maxy)
    plt.gca().set_aspect("equal", adjustable="box")
    plt.tight_layout()
    plt.savefig(save_path, dpi=150)
    plt.close()


def compute_metrics_hash(metrics: Dict[str, Any]) -> str:
    blob = json.dumps(metrics, sort_keys=True).encode("utf-8")
    return hashlib.sha256(blob).hexdigest()[:16]


def save_artifacts(df: pd.DataFrame, cfg: V1Config, metrics: Dict[str, Any]) -> Dict[str, str]:
    os.makedirs(cfg.out_dir, exist_ok=True)

    nodes_path = os.path.join(cfg.out_dir, "nodes.csv")
    preview_path = os.path.join(cfg.out_dir, "preview.png")
    meta_path = os.path.join(cfg.out_dir, "meta.json")

    df.to_csv(nodes_path, index=False)
    preview_nodes(df, cfg, preview_path)

    meta = {
        "schema_version": cfg.schema_version,
        "dataset_version": cfg.dataset_version,
        "crs": cfg.crs,
        "seed": cfg.seed,
        "generator": {
            "name": "nodes_v1_classes_prob_uniform",
            "params": {
                "n_nodes": cfg.n_nodes,
                "bbox_km": cfg.bbox_km,
                "class_prob": cfg.class_prob,
                "class_ranges": cfg.class_ranges,
            },
        },
        "region_bbox": list(cfg.bbox_km),
        "metrics": metrics,
        "metrics_hash": compute_metrics_hash(metrics),
        "created_at_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
    }
    with open(meta_path, "w", encoding="utf-8") as f:
        json.dump(meta, f, indent=2)

    return {"nodes": nodes_path, "preview": preview_path, "meta": meta_path}


# ------------------------------
# Orchestration
# ------------------------------

def main(cfg: V1Config | None = None) -> pd.DataFrame:
    cfg = cfg or V1Config()
    set_seed(cfg.seed)

    df = generate_nodes_v1(cfg)
    metrics = validate_nodes(df, cfg)
    paths = save_artifacts(df, cfg, metrics)

    print("\n[Nodes V1] Build complete:\n" + "-" * 40)
    print(f"Nodes: {len(df)} | bbox: {cfg.bbox_km}")
    print(f"Class counts: {metrics['class_counts']}")
    print(f"Class medians: {metrics['class_medians']} (order ok = {metrics.get('median_order_ok')})")
    print(f"Saved: nodes → {paths['nodes']}\n       preview → {paths['preview']}\n       meta → {paths['meta']}")
    print(f"Metrics hash: {compute_metrics_hash(metrics)}")
    return df


# ------------------------------
# Run (notebook-friendly)
# ------------------------------
_cfg = V1Config(
    seed=30,
    n_nodes=30,
    bbox_km=(0.0, 0.0, 200.0, 200.0),
    class_prob={"large": 0.10, "medium": 0.30, "small": 0.60},
    class_ranges={
        "large":  {"min": 600_000, "max": 1_500_000},
        "medium": {"min": 100_000, "max":   300_000},
        "small":  {"min":   1_000, "max":   60_000},
    },
    out_dir="maps/sv1.1/dv0.2_v1_classes",
)

# Jupyter notebook cell — Animation V1 (classes → scatter growth, 15s, MP4+GIF)
from __future__ import annotations
import os, gc
from typing import List, Tuple
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import StrMethodFormatter
from matplotlib.lines import Line2D
from tqdm.auto import tqdm

# --- encoders (required) ---
import cv2                  # MP4
import imageio.v2 as imageio  # GIF

# ---------- helpers ----------
def _ease_in_out(t: np.ndarray, power: float = 2.2) -> np.ndarray:
    t = np.clip(t, 0.0, 1.0)
    a = t**power / (t**power + (1 - t)**power)
    a[np.isnan(a)] = 0.0
    return a

def _plot_partial_v1(df, cfg, save_path, reveal_frac: float, grow: float,
                     vmin: float, vmax: float) -> None:
    """
    Dibuja un frame parcial:
      - reveal_frac: fracción de nodos visibles (0..1) en orden por clase small→medium→large
      - grow: factor de crecimiento de tamaño (0.05..1.0) para los visibles
    """
    minx, miny, maxx, maxy = cfg.bbox_km
    markers = {"large": "s", "medium": "^", "small": "o"}
    order = ["small", "medium", "large"]

    # Orden estable por clase y, dentro de cada clase, por población ascendente (estético)
    parts = [df[df["class"] == cls].sort_values("pop") for cls in order]
    df_ord = np.concatenate([p.index.values for p in parts])
    df_ord = df.loc[df_ord]

    n = len(df_ord)
    k = max(1, int(np.floor(reveal_frac * n)))
    shown = df_ord.iloc[:k].copy()

    vmax_pop = df["pop"].max()
    base_sizes = 10 + 90 * np.sqrt(shown["pop"].values / vmax_pop)
    sizes = base_sizes * np.clip(grow, 0.05, 1.0)

    plt.figure(figsize=(6, 6))
    sc_last = None
    for cls in order:
        sub = shown[shown["class"] == cls]
        if sub.empty:
            continue
        sc_last = plt.scatter(
            sub["x_km"], sub["y_km"],
            s=sizes[shown["class"] == cls],
            c=sub["pop"].values.astype(float), vmin=vmin, vmax=vmax,
            marker=markers.get(cls, "o"), cmap="viridis", label=cls.title()
        )

    if sc_last is not None:
        cbar = plt.colorbar(sc_last)
        cbar.set_label("Population")
        try:
            cbar.ax.yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
        except Exception:
            pass

    handles = [Line2D([], [], marker=markers.get(cls, "o"), linestyle="None", label=cls.title())
               for cls in order]
    plt.legend(handles=handles, title="Class", loc="best", framealpha=0.8)

    plt.title("Nodes — V1 (probabilistic classes; uniform ranges)")
    plt.xlabel("x (km)"); plt.ylabel("y (km)")
    plt.xlim(minx, maxx); plt.ylim(miny, maxy)
    plt.gca().set_aspect("equal", adjustable="box")
    plt.tight_layout()
    plt.savefig(save_path, dpi=150)
    plt.close()

def make_v1_animation(cfg,
                      total_duration_s: float = 15.0,
                      fps: int = 15,
                      frames_dir_name: str = "frames_v1") -> Tuple[List[str], str, str]:
    """
    Genera frames + MP4 + GIF (siempre) para V1.
    - Duración EXACTA: min(total_duration_s, 15s)
    - Último frame = preview final (mismo estilo que preview_nodes)
    """
    # 1) Datos deterministas (usa tu seed del cfg)
    set_seed(cfg.seed)
    df = generate_nodes_v1(cfg)

    # 2) Carpetas
    out_dir = cfg.out_dir
    os.makedirs(out_dir, exist_ok=True)
    frames_dir = os.path.join(out_dir, frames_dir_name)
    os.makedirs(frames_dir, exist_ok=True)

    # 3) Timing: N-1 frames de "build" + 1 frame final = total exacto
    duration = min(15.0, float(total_duration_s))
    total_frames = max(2, int(duration * fps))
    n_build = total_frames - 1

    # Escalado de color global
    vmin, vmax = float(df["pop"].min()), float(df["pop"].max())

    # 4) Frames de construcción
    paths = []
    t = _ease_in_out(np.linspace(0.0, 1.0, n_build), power=2.0)
    for i, r in enumerate(tqdm(t, total=n_build, desc="Building V1 frames")):
        frame_path = os.path.join(frames_dir, f"frame_{i:04d}.png")
        _plot_partial_v1(df, cfg, frame_path, reveal_frac=float(r), grow=float(r),
                         vmin=vmin, vmax=vmax)
        paths.append(frame_path)

    # 5) Último frame = preview completo (garantiza coincidencia con preview.png)
    final_path = os.path.join(frames_dir, f"frame_{n_build:04d}.png")
    preview_nodes(df, cfg, final_path)  # reutiliza tu función existente
    paths.append(final_path)

    # 6) MP4 (siempre)
    mp4_path = os.path.join(out_dir, "nodes_v1_classes.mp4")
    first = cv2.imread(paths[0])
    if first is None:
        raise RuntimeError("Cannot read first frame for video encoding.")
    H, W = first.shape[:2]
    writer = cv2.VideoWriter(mp4_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (W, H))
    if not writer.isOpened():
        raise RuntimeError("Failed to open VideoWriter for MP4.")
    for p in tqdm(paths, desc="Encoding MP4"):
        im = cv2.imread(p)
        if im is None:
            continue
        if im.shape[:2] != (H, W):
            im = cv2.resize(im, (W, H), interpolation=cv2.INTER_AREA)
        writer.write(im)
    writer.release()

    # 7) GIF (siempre)
    gif_path = os.path.join(out_dir, "nodes_v1_classes.gif")
    with imageio.get_writer(gif_path, mode="I", duration=1.0 / fps) as gifw:
        for p in tqdm(paths, desc="Encoding GIF"):
            gifw.append_data(imageio.imread(p))

    # Limpieza
    del first; gc.collect()

    return paths, mp4_path, gif_path


# ------------------------------
# Ejecutar el builder (15s exactos)
# ------------------------------
anim_cfg_v1 = _cfg  # reutiliza tu configuración V1 de arriba
frame_list_v1, mp4_out_v1, gif_out_v1 = make_v1_animation(
    anim_cfg_v1,
    total_duration_s=15.0,   # EXACTO 15s
    fps=15,                  # 225 frames → 15s
    frames_dir_name="frames_v1"
)

print(f"\nSaved {len(frame_list_v1)} frames in: {os.path.join(anim_cfg_v1.out_dir, 'frames_v1')}")
print(f"MP4: {mp4_out_v1}")
print(f"GIF: {gif_out_v1}")


Building V1 frames: 100%|██████████| 224/224 [00:44<00:00,  5.03it/s]
Encoding MP4: 100%|██████████| 225/225 [00:04<00:00, 50.79it/s]
Encoding GIF: 100%|██████████| 225/225 [00:01<00:00, 200.00it/s]



Saved 225 frames in: maps/sv1.1/dv0.2_v1_classes\frames_v1
MP4: maps/sv1.1/dv0.2_v1_classes\nodes_v1_classes.mp4
GIF: maps/sv1.1/dv0.2_v1_classes\nodes_v1_classes.gif


In [6]:
"""
Jupyter notebook cell — Version 0 (Nodes only)
Minimal, reproducible node set in a rectangular region (planar coordinates in km).
- Placement: uniform within bbox
- Population: uniform in [pop_min, pop_max]
- Outputs: nodes.csv, meta.json, preview.png

Usage (in a single notebook cell): just run this cell. Edit `V0Config` as needed.
"""
from __future__ import annotations

import json
import os
import time
import hashlib
from dataclasses import asdict, dataclass
from typing import Tuple, Dict, Any

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import StrMethodFormatter


# ------------------------------
# Config
# ------------------------------
@dataclass
class V0Config:
    seed: int = 42
    n_nodes: int = 50
    bbox_km: Tuple[float, float, float, float] = (0.0, 0.0, 200.0, 200.0)  # (minx, miny, maxx, maxy)
    pop_min: int = 1_000
    pop_max: int = 500_000
    out_dir: str = "maps/sv1.0/dv0.1_v0_uniform"
    crs: str = "EPSG:3857"  # Synthetic planar meters; we store km here for simplicity
    schema_version: str = "1.0"  # node schema version
    dataset_version: str = "0.1"  # dataset/edition version


# ------------------------------
# Core functions
# ------------------------------

def set_seed(seed: int) -> None:
    np.random.seed(seed)


def generate_uniform_nodes(cfg: V0Config) -> pd.DataFrame:
    minx, miny, maxx, maxy = cfg.bbox_km
    if not (minx < maxx and miny < maxy):
        raise ValueError("Invalid bbox: must satisfy minx<maxx and miny<maxy")

    # sample coordinates uniformly in km
    xs = np.random.uniform(minx, maxx, size=cfg.n_nodes)
    ys = np.random.uniform(miny, maxy, size=cfg.n_nodes)

    # sample populations uniformly (ints)
    pops = np.random.randint(cfg.pop_min, cfg.pop_max + 1, size=cfg.n_nodes)

    df = pd.DataFrame({
        "id": np.arange(cfg.n_nodes, dtype=int),
        "x_km": xs,
        "y_km": ys,
        "pop": pops,
    })
    return df


def validate_nodes(df: pd.DataFrame, cfg: V0Config) -> Dict[str, Any]:
    """Return validation metrics and raise on hard failures."""
    minx, miny, maxx, maxy = cfg.bbox_km

    metrics: Dict[str, Any] = {}
    # Count
    n = len(df)
    if n != cfg.n_nodes:
        raise AssertionError(f"Node count mismatch: expected {cfg.n_nodes}, got {n}")
    metrics["n_nodes"] = n

    # Bounds
    inside_x = (df["x_km"] >= minx) & (df["x_km"] <= maxx)
    inside_y = (df["y_km"] >= miny) & (df["y_km"] <= maxy)
    inside = inside_x & inside_y
    violations = int((~inside).sum())
    if violations:
        raise AssertionError(f"{violations} nodes fall outside bbox")
    metrics["bbox"] = {"minx": minx, "miny": miny, "maxx": maxx, "maxy": maxy}

    # Population range
    pmin, pmax = int(df["pop"].min()), int(df["pop"].max())
    if pmin < cfg.pop_min or pmax > cfg.pop_max:
        raise AssertionError(
            f"Population out of range: observed [{pmin},{pmax}] vs cfg [{cfg.pop_min},{cfg.pop_max}]"
        )
    metrics["pop_range_observed"] = {"min": pmin, "max": pmax}

    # Distribution summaries
    metrics["pop_percentiles"] = {q: int(np.percentile(df["pop"], q)) for q in (5, 25, 50, 75, 95)}

    return metrics

def preview_nodes(df: pd.DataFrame, cfg: V0Config, save_path: str) -> None:
    """Scatter plot sized by population with a color gradient and colorbar legend.
    Also annotates the top-3 most populated cities with their population values.
    """
    minx, miny, maxx, maxy = cfg.bbox_km


    # size scaling: sqrt to reduce dynamic range
    s = 10 + 90 * np.sqrt(df["pop"].values / df["pop"].max())


    # Color by population (uses matplotlib's default colormap)
    pop_vals = df["pop"].values.astype(float)


    plt.figure(figsize=(6, 6))
    sc = plt.scatter(df["x_km"], df["y_km"], s=s, c=pop_vals)


    # Colorbar as legend for population
    cbar = plt.colorbar(sc)
    cbar.set_label("Population")
    try:
        cbar.ax.yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
    except Exception:
        pass # fallback to default ticks if formatter not available


    # Annotate top-3 by population
    top3 = df.nlargest(3, "pop").copy()
    # Offset annotations by a small fraction of the bbox size to avoid overlap
    dx = 0.01 * (maxx - minx)
    dy = 0.01 * (maxy - miny)
    for _, row in top3.iterrows():
        label = f"{int(row['pop']):,}"
        plt.text(
            row["x_km"] + dx,
            row["y_km"] + dy,
            label,
            fontsize=8,
            ha="left",
            va="bottom",
            bbox=dict(boxstyle="round,pad=0.2", fc="white", ec="none", alpha=0.7),
        )


    plt.title("Nodes — V0 (uniform placement) — color = population")
    plt.xlabel("x (km)")
    plt.ylabel("y (km)")
    plt.xlim(minx, maxx)
    plt.ylim(miny, maxy)
    plt.gca().set_aspect("equal", adjustable="box")
    plt.tight_layout()
    plt.savefig(save_path, dpi=150)
    plt.close()

def compute_metrics_hash(metrics: Dict[str, Any]) -> str:
    blob = json.dumps(metrics, sort_keys=True).encode("utf-8")
    return hashlib.sha256(blob).hexdigest()[:16]


def save_artifacts(df: pd.DataFrame, cfg: V0Config, metrics: Dict[str, Any]) -> Dict[str, str]:
    os.makedirs(cfg.out_dir, exist_ok=True)

    nodes_path = os.path.join(cfg.out_dir, "nodes.csv")
    preview_path = os.path.join(cfg.out_dir, "preview.png")
    meta_path = os.path.join(cfg.out_dir, "meta.json")

    # Save nodes
    df.to_csv(nodes_path, index=False)

    # Preview
    preview_nodes(df, cfg, preview_path)

    # Meta
    meta = {
        "schema_version": cfg.schema_version,
        "dataset_version": cfg.dataset_version,
        "crs": cfg.crs,
        "seed": cfg.seed,
        "generator": {
            "name": "nodes_v0_uniform",
            "params": {
                "n_nodes": cfg.n_nodes,
                "bbox_km": cfg.bbox_km,
                "pop_min": cfg.pop_min,
                "pop_max": cfg.pop_max,
            },
        },
        "region_bbox": list(cfg.bbox_km),
        "metrics": metrics,
        "metrics_hash": compute_metrics_hash(metrics),
        "created_at_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
    }
    with open(meta_path, "w", encoding="utf-8") as f:
        json.dump(meta, f, indent=2)

    return {"nodes": nodes_path, "preview": preview_path, "meta": meta_path}


# ------------------------------
# Orchestration
# ------------------------------

def main(cfg: V0Config | None = None) -> pd.DataFrame:
    cfg = cfg or V0Config()
    set_seed(cfg.seed)

    df = generate_uniform_nodes(cfg)
    metrics = validate_nodes(df, cfg)
    paths = save_artifacts(df, cfg, metrics)

    # Summary printout
    print("\n[Nodes V0] Build complete:\n" + "-" * 40)
    print(f"Nodes: {len(df)} | bbox: {cfg.bbox_km} | pop ∈ [{cfg.pop_min},{cfg.pop_max}]")
    print(f"Saved: nodes → {paths['nodes']}\n       preview → {paths['preview']}\n       meta → {paths['meta']}")
    print(f"Metrics hash: {compute_metrics_hash(metrics)}")
    return df


# ------------------------------
_cfg = V0Config(
    seed=42,
    n_nodes=30,
    bbox_km=(0.0, 0.0, 200.0, 200.0),
    pop_min=1_000,
    pop_max=1_000_000,
    out_dir="maps/sv1.0/dv0.1_v0_uniform",
)

# _ = main(_cfg)

# Jupyter notebook cell — Animation V0 (uniform nodes), 15s, MP4+GIF, frames saved
from __future__ import annotations
import os, gc
from typing import List, Tuple
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import StrMethodFormatter
from tqdm.auto import tqdm

# Encoders (requeridos)
import cv2
import imageio.v2 as imageio

# ---------- helpers ----------
def _ease_in_out(t: np.ndarray, power: float = 2.2) -> np.ndarray:
    t = np.clip(t, 0.0, 1.0)
    a = t**power / (t**power + (1 - t)**power)
    a[np.isnan(a)] = 0.0
    return a

def _plot_partial_v0(df, cfg: V0Config, save_path: str,
                     reveal_frac: float, grow: float,
                     vmin: float, vmax: float) -> None:
    """
    Dibuja un frame parcial:
      - reveal_frac ∈ [0..1]: fracción de nodos visibles (orden por población ascendente)
      - grow: factor de crecimiento del tamaño (0.05..1.0) para los visibles
    """
    minx, miny, maxx, maxy = cfg.bbox_km

    # Orden estable por población ascendente (estético)
    df_ord = df.sort_values("pop")
    n = len(df_ord)
    k = max(1, int(np.floor(reveal_frac * n)))
    shown = df_ord.iloc[:k].copy()

    vmax_pop = df["pop"].max()
    base_sizes = 10 + 90 * np.sqrt(shown["pop"].values / vmax_pop)
    sizes = base_sizes * np.clip(grow, 0.05, 1.0)

    plt.figure(figsize=(6, 6))
    sc = plt.scatter(
        shown["x_km"], shown["y_km"],
        s=sizes, c=shown["pop"].values.astype(float),
        vmin=vmin, vmax=vmax, cmap="viridis"
    )

    cbar = plt.colorbar(sc)
    cbar.set_label("Population")
    try:
        cbar.ax.yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
    except Exception:
        pass

    plt.title("Nodes — V0 (uniform placement) — color = population")
    plt.xlabel("x (km)"); plt.ylabel("y (km)")
    plt.xlim(minx, maxx); plt.ylim(miny, maxy)
    plt.gca().set_aspect("equal", adjustable="box")
    plt.tight_layout()
    plt.savefig(save_path, dpi=150)
    plt.close()

def make_v0_animation(cfg: V0Config,
                      total_duration_s: float = 15.0,
                      fps: int = 15,
                      frames_dir_name: str = "frames_v0") -> Tuple[List[str], str, str]:
    """
    Genera frames + MP4 + GIF (siempre) para V0.
    - Duración EXACTA: min(total_duration_s, 15s)
    - Último frame = preview final (matching preview_nodes)
    """
    # 1) Datos deterministas
    set_seed(cfg.seed)
    df = generate_uniform_nodes(cfg)

    # 2) Carpetas
    out_dir = cfg.out_dir
    os.makedirs(out_dir, exist_ok=True)
    frames_dir = os.path.join(out_dir, frames_dir_name)
    os.makedirs(frames_dir, exist_ok=True)

    # 3) Timing
    duration = min(15.0, float(total_duration_s))
    total_frames = max(2, int(duration * fps))
    n_build = total_frames - 1  # último = preview completo

    # Escala de color global fija
    vmin, vmax = float(df["pop"].min()), float(df["pop"].max())

    # 4) Frames de construcción (reveal + grow)
    paths: List[str] = []
    t = _ease_in_out(np.linspace(0.0, 1.0, n_build), power=2.0)
    for i, r in enumerate(tqdm(t, total=n_build, desc="Building V0 frames")):
        frame_path = os.path.join(frames_dir, f"frame_{i:04d}.png")
        _plot_partial_v0(df, cfg, frame_path, reveal_frac=float(r), grow=float(r),
                         vmin=vmin, vmax=vmax)
        paths.append(frame_path)

    # 5) Último frame = preview completo (con anotaciones top-3)
    final_path = os.path.join(frames_dir, f"frame_{n_build:04d}.png")
    preview_nodes(df, cfg, final_path)
    paths.append(final_path)

    # 6) MP4 (siempre)
    mp4_path = os.path.join(out_dir, "nodes_v0_uniform.mp4")
    first = cv2.imread(paths[0])
    if first is None:
        raise RuntimeError("Cannot read first frame for MP4 encoding.")
    H, W = first.shape[:2]
    writer = cv2.VideoWriter(mp4_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (W, H))
    if not writer.isOpened():
        raise RuntimeError("Failed to open VideoWriter.")
    for p in tqdm(paths, desc="Encoding MP4"):
        im = cv2.imread(p)
        if im is None:
            continue
        if im.shape[:2] != (H, W):
            im = cv2.resize(im, (W, H), interpolation=cv2.INTER_AREA)
        writer.write(im)
    writer.release()

    # 7) GIF (siempre)
    gif_path = os.path.join(out_dir, "nodes_v0_uniform.gif")
    with imageio.get_writer(gif_path, mode="I", duration=1.0 / fps) as gifw:
        for p in tqdm(paths, desc="Encoding GIF"):
            gifw.append_data(imageio.imread(p))

    del first; gc.collect()
    return paths, mp4_path, gif_path


# ------------------------------
# Ejecutar (15s exactos)
# ------------------------------
anim_cfg_v0 = _cfg  # reutiliza tu V0Config de arriba
frame_list_v0, mp4_out_v0, gif_out_v0 = make_v0_animation(
    anim_cfg_v0,
    total_duration_s=15.0,   # EXACT 15s
    fps=15,                  # 225 frames
    frames_dir_name="frames_v0"
)

print(f"\nSaved {len(frame_list_v0)} frames in: {os.path.join(anim_cfg_v0.out_dir, 'frames_v0')}")
print(f"MP4: {mp4_out_v0}")
print(f"GIF: {gif_out_v0}")


Building V0 frames: 100%|██████████| 224/224 [00:24<00:00,  9.19it/s]
Encoding MP4: 100%|██████████| 225/225 [00:05<00:00, 44.97it/s]
Encoding GIF: 100%|██████████| 225/225 [00:01<00:00, 208.55it/s]



Saved 225 frames in: maps/sv1.0/dv0.1_v0_uniform\frames_v0
MP4: maps/sv1.0/dv0.1_v0_uniform\nodes_v0_uniform.mp4
GIF: maps/sv1.0/dv0.1_v0_uniform\nodes_v0_uniform.gif


In [7]:
# Jupyter notebook cell — Collage V0+V1+V2 → MP4+GIF (15s), frames guardados
from __future__ import annotations
import os, gc
import numpy as np
from tqdm.auto import tqdm
import cv2
import imageio.v2 as imageio

# Rutas por defecto (ajústalas si cambiaste out_dir/frames)
DIR_V0 = "maps/sv1.0/dv0.1_v0_uniform/frames_v0"
DIR_V1 = "maps/sv1.1/dv0.2_v1_classes/frames_v1"
DIR_V2 = "maps/sv1.2/dv0.1_v2_density_cities/frames"

OUT_DIR = "maps/collages/v0_v1_v2"
FRAMES_OUT = os.path.join(OUT_DIR, "frames_collage")
FPS = 15
DURATION_S = 15.0  # exacto

os.makedirs(OUT_DIR, exist_ok=True)
os.makedirs(FRAMES_OUT, exist_ok=True)

def _list_pngs(d):
    files = sorted([os.path.join(d, f) for f in os.listdir(d) if f.lower().endswith(".png")])
    if not files:
        raise RuntimeError(f"No PNG frames found in: {d}")
    return files

def _sample_indices(n, target):
    # indices equiespaciados para asegurar EXACTAMENTE 15s aunque el nº de frames difiera
    return np.rint(np.linspace(0, n - 1, target)).astype(int)

def _resize_to_height(img, H):
    h, w = img.shape[:2]
    if h == H: 
        return img
    new_w = int(round(w * (H / h)))
    return cv2.resize(img, (new_w, H), interpolation=cv2.INTER_AREA)

# 1) Cargar listas de frames
f0 = _list_pngs(DIR_V0)
f1 = _list_pngs(DIR_V1)
f2 = _list_pngs(DIR_V2)

target_frames = int(FPS * min(15.0, float(DURATION_S)))
idx0 = _sample_indices(len(f0), target_frames)
idx1 = _sample_indices(len(f1), target_frames)
idx2 = _sample_indices(len(f2), target_frames)

# 2) Determinar tamaño final leyendo el primer trío
im0 = cv2.imread(f0[idx0[0]]); im1 = cv2.imread(f1[idx1[0]]); im2 = cv2.imread(f2[idx2[0]])
if im0 is None or im1 is None or im2 is None:
    raise RuntimeError("Failed to read first frames to determine canvas size.")
base_H = min(im0.shape[0], im1.shape[0], im2.shape[0])  # normalizamos por altura
gutter_w = 6  # separador vertical
gutter = lambda H: np.full((H, gutter_w, 3), 255, dtype=np.uint8)  # blanco

# 3) Generar frames del collage
frame_paths = []
for i in tqdm(range(target_frames), desc="Building collage frames"):
    a = _resize_to_height(cv2.imread(f0[idx0[i]]), base_H)
    b = _resize_to_height(cv2.imread(f1[idx1[i]]), base_H)
    c = _resize_to_height(cv2.imread(f2[idx2[i]]), base_H)
    canvas = np.concatenate([a, gutter(base_H), b, gutter(base_H), c], axis=1)
    out_path = os.path.join(FRAMES_OUT, f"frame_{i:04d}.png")
    cv2.imwrite(out_path, canvas)
    frame_paths.append(out_path)

# 4) MP4 (siempre)
first = cv2.imread(frame_paths[0])
H, W = first.shape[:2]
mp4_path = os.path.join(OUT_DIR, "collage_uniform_classes_density.mp4")
vw = cv2.VideoWriter(mp4_path, cv2.VideoWriter_fourcc(*"mp4v"), FPS, (W, H))
if not vw.isOpened():
    raise RuntimeError("Failed to open VideoWriter for MP4.")
for p in tqdm(frame_paths, desc="Encoding MP4"):
    img = cv2.imread(p)
    if img.shape[:2] != (H, W):
        img = cv2.resize(img, (W, H), interpolation=cv2.INTER_AREA)
    vw.write(img)
vw.release()

# 5) GIF (siempre)
gif_path = os.path.join(OUT_DIR, "collage_uniform_classes_density.gif")
with imageio.get_writer(gif_path, mode="I", duration=1.0 / FPS) as gifw:
    for p in tqdm(frame_paths, desc="Encoding GIF"):
        gifw.append_data(imageio.imread(p))

del first; gc.collect()
print(f"\nFrames: {len(frame_paths)} → {FRAMES_OUT}")
print(f"MP4:    {mp4_path}")
print(f"GIF:    {gif_path}")


Building collage frames: 100%|██████████| 225/225 [00:06<00:00, 32.16it/s]
Encoding MP4: 100%|██████████| 225/225 [00:07<00:00, 30.46it/s]
Encoding GIF: 100%|██████████| 225/225 [00:03<00:00, 66.48it/s]



Frames: 225 → maps/collages/v0_v1_v2\frames_collage
MP4:    maps/collages/v0_v1_v2\collage_uniform_classes_density.mp4
GIF:    maps/collages/v0_v1_v2\collage_uniform_classes_density.gif
