# Partitioning trade-offs in QuASAr

This notebook explores how QuASAr's cost estimator responds to different partitioning strategies.  Synthetic fragments are used to probe backend feasibility, runtime and memory projections, and the conversion primitives required to glue heterogeneous plans together.

## Goals

* Provide an editable sandbox for fragment parameters (size, sparsity, locality, resource ceilings) and visualise backend feasibility.
* Compare monolithic execution against two- and three-segment plans that incorporate conversion costs, highlighting when partitioning improves the projected runtime or memory footprint.
* Map how gate mix, boundary widths and conversion primitives interact to make partitioning advantageous according to the model.

In [None]:
from __future__ import annotations

from pathlib import Path
from typing import Iterable, Sequence

import itertools
import math

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from IPython.display import display

PROJECT_ROOT = Path.cwd().resolve()
if not (PROJECT_ROOT / "pyproject.toml").exists():
    for candidate in PROJECT_ROOT.parents:
        if (candidate / "pyproject.toml").exists():
            PROJECT_ROOT = candidate
            break

import sys
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from docs.utils.partitioning_analysis import (
    FragmentStats,
    BoundarySpec,
    evaluate_fragment_backends,
    aggregate_partitioned_plan,
    aggregate_single_backend_plan,
)
from quasar.cost import Backend, CostEstimator

plt.style.use("seaborn-v0_8")
plt.rcParams["figure.figsize"] = (10, 4)
plt.rcParams["axes.grid"] = True
pd.set_option("display.max_columns", 0)
pd.set_option("display.float_format", lambda v: f"{v:,.3g}")

ESTIMATOR = CostEstimator()


In [None]:
def synthesise_fragment(
    num_qubits: int,
    depth: int,
    entangling_ratio: float,
    measurement_ratio: float = 0.0,
    *,
    is_clifford: bool = False,
    is_local: bool = False,
    frontier: int | None = None,
    frontier_scale: float | None = None,
    chi: int | Sequence[int] | None = None,
) -> FragmentStats:
    """Create a FragmentStats instance using coarse circuit descriptors."""

    depth = max(int(depth), 1)
    entangling_ratio = float(entangling_ratio)
    entangling_layers = max(0, min(depth, int(round(depth * entangling_ratio))))
    one_qubit_layers = depth - entangling_layers
    entangling_pairs = max(num_qubits - 1, 1)
    num_2q_gates = entangling_layers * entangling_pairs
    num_1q_gates = one_qubit_layers * num_qubits
    num_measurements = int(round(measurement_ratio * num_qubits))
    if frontier is None and frontier_scale is not None:
        frontier = max(1, int(round(num_qubits * frontier_scale)))
    return FragmentStats(
        num_qubits=num_qubits,
        num_1q_gates=num_1q_gates,
        num_2q_gates=num_2q_gates,
        num_measurements=num_measurements,
        is_clifford=is_clifford,
        is_local=is_local,
        frontier=frontier,
        chi=chi,
    )


def _to_iterable(value) -> list:
    if isinstance(value, (list, tuple, set, range)):
        return list(value)
    if hasattr(value, "tolist"):
        return list(value)
    return [value]


def _limit_to_bytes(value: float | None) -> float | None:
    if value is None:
        return None
    return float(value) * (1024**3)


def safe_log10(values: Iterable[float], *, floor: float = -12.0) -> np.ndarray:
    array = np.asarray(list(values), dtype=float)
    finite = np.maximum(array, 10 ** floor)
    return np.log10(finite)


def evaluate_parameter_grid(
    fragment_axes: dict,
    metric_axes: dict,
    resource_limits: dict,
    *,
    allow_tableau: bool = True,
    estimator: CostEstimator | None = None,
) -> pd.DataFrame:
    """Sweep synthetic fragment parameters and record backend selections."""

    estimator = estimator or CostEstimator()
    max_memory = _limit_to_bytes(resource_limits.get("max_memory_gb"))
    max_time = resource_limits.get("max_time_s")

    fragment_keys = list(fragment_axes.keys())
    metric_keys = list(metric_axes.keys())
    metric_products = [_to_iterable(metric_axes[key]) for key in metric_keys]
    if not metric_products:
        metric_products = [[]]

    rows: list[dict] = []
    for frag_values in itertools.product(*(_to_iterable(fragment_axes[key]) for key in fragment_keys)):
        frag_params = dict(zip(fragment_keys, frag_values))
        stats = synthesise_fragment(**frag_params)
        for metric_values in itertools.product(*metric_products):
            metrics = dict(zip(metric_keys, metric_values))
            backend, diag = evaluate_fragment_backends(
                stats,
                sparsity=metrics.get("sparsity"),
                phase_rotation_diversity=metrics.get("phase_rotation_diversity"),
                amplitude_rotation_diversity=metrics.get("amplitude_rotation_diversity"),
                allow_tableau=allow_tableau,
                max_memory=max_memory,
                max_time=max_time,
                estimator=estimator,
            )
            if backend is None:
                selected_time = math.nan
                selected_memory = math.nan
            else:
                selected_time = diag["selected_cost"].time
                selected_memory = diag["selected_cost"].memory
            row = {**frag_params, **metrics}
            row["selected_backend"] = backend.name if backend else None
            row["selected_time"] = selected_time
            row["selected_memory"] = selected_memory
            for cand_backend, entry in diag["backends"].items():
                label = cand_backend.name.lower()
                feasible = entry.get("feasible") if isinstance(entry, dict) else None
                cost = entry.get("cost") if isinstance(entry, dict) else None
                row[f"{label}_feasible"] = feasible
                row[f"{label}_time"] = cost.time if cost else math.nan
                row[f"{label}_memory"] = cost.memory if cost else math.nan
            rows.append(row)
    return pd.DataFrame(rows)


def run_plan(
    fragment_stats: Sequence[FragmentStats],
    fragment_metrics: Sequence[dict],
    *,
    boundaries: Sequence[BoundarySpec] | None = None,
    resource_limits: dict | None = None,
    allow_tableau: bool = True,
    estimator: CostEstimator | None = None,
) -> dict:
    """Choose backends for each fragment and aggregate plan costs."""

    if len(fragment_stats) != len(fragment_metrics):
        raise ValueError("metrics must align with fragment list")
    estimator = estimator or CostEstimator()
    resource_limits = resource_limits or {}
    max_memory = _limit_to_bytes(resource_limits.get("max_memory_gb"))
    max_time = resource_limits.get("max_time_s")

    selections: list[tuple[Backend, object]] = []
    diagnostics: list[dict] = []
    for stats, metrics in zip(fragment_stats, fragment_metrics):
        backend, diag = evaluate_fragment_backends(
            stats,
            sparsity=metrics.get("sparsity"),
            phase_rotation_diversity=metrics.get("phase_rotation_diversity"),
            amplitude_rotation_diversity=metrics.get("amplitude_rotation_diversity"),
            allow_tableau=allow_tableau,
            max_memory=max_memory,
            max_time=max_time,
            estimator=estimator,
        )
        if backend is None:
            raise RuntimeError("fragment infeasible under the selected limits")
        selections.append((backend, diag["selected_cost"]))
        diagnostics.append(diag)

    if boundaries:
        plan = aggregate_partitioned_plan(selections, boundaries, estimator=estimator)
        total_cost = plan["total_cost"]
        conversions = plan["conversions"]
    else:
        total_cost = aggregate_single_backend_plan(selections)
        conversions = []
    return {
        "fragments": diagnostics,
        "selections": selections,
        "total_cost": total_cost,
        "conversions": conversions,
    }


def plan_overview(label: str, plan: dict) -> pd.DataFrame:
    cost = plan["total_cost"]
    backends = " → ".join(selection[0].name for selection in plan["selections"])
    return pd.DataFrame(
        [
            {
                "plan": label,
                "backends": backends,
                "total_time": cost.time,
                "peak_memory": cost.memory,
                "conversion_time": cost.conversion,
            }
        ]
    )


def fragment_breakdown(plan: dict) -> pd.DataFrame:
    rows = []
    for idx, (backend, cost) in enumerate(plan["selections"]):
        diag = plan["fragments"][idx]
        metrics = diag.get("metrics", {})
        rows.append(
            {
                "fragment": idx,
                "backend": backend.name,
                "num_qubits": metrics.get("num_qubits"),
                "num_gates": metrics.get("num_gates"),
                "sparsity": metrics.get("sparsity"),
                "time": cost.time,
                "memory": cost.memory,
            }
        )
    return pd.DataFrame(rows)


def conversion_breakdown(plan: dict) -> pd.DataFrame:
    if not plan["conversions"]:
        return pd.DataFrame(columns=["index", "source", "target", "primitive", "time", "memory"])
    rows = []
    for entry in plan["conversions"]:
        rows.append(
            {
                "index": entry["index"],
                "source": entry["source"].name,
                "target": entry["target"].name,
                "primitive": entry["primitive"],
                "time": entry["cost"].time,
                "memory": entry["cost"].memory,
            }
        )
    return pd.DataFrame(rows)


def evaluate_partition_advantage(
    gate_mixes: Sequence[float],
    boundary_qubits: Sequence[int],
    ranks: Sequence[int],
    *,
    total_qubits: int = 34,
    total_depth: int = 72,
    local_threshold: float = 0.32,
    estimator: CostEstimator | None = None,
) -> pd.DataFrame:
    """Map when a two-fragment plan beats a monolithic execution."""

    estimator = estimator or CostEstimator()
    rows: list[dict] = []
    depth_a = int(round(total_depth * 0.55))
    depth_b = total_depth - depth_a

    for gate_mix, boundary, rank in itertools.product(gate_mixes, boundary_qubits, ranks):
        monolithic = synthesise_fragment(total_qubits, total_depth, gate_mix, is_local=False)
        mono_backend, mono_diag = evaluate_fragment_backends(
            monolithic,
            sparsity=max(0.35, 1.0 - gate_mix * 1.4),
            estimator=estimator,
        )
        mono_cost = mono_diag["selected_cost"]

        local_flag = gate_mix <= local_threshold
        if local_flag:
            frag_a = synthesise_fragment(
                18,
                depth_a,
                max(0.08, gate_mix * 0.55),
                is_local=True,
                frontier_scale=0.24,
                chi=64,
            )
            sparsity_a = min(0.95, 0.8 + (0.26 - gate_mix) * 1.5)
        else:
            frag_a = synthesise_fragment(
                total_qubits,
                depth_a,
                gate_mix * 0.9,
                is_local=False,
            )
            sparsity_a = max(0.35, 1.0 - gate_mix * 1.1)

        frag_b = synthesise_fragment(total_qubits, depth_b, gate_mix, is_local=False)
        sparsity_b = max(0.35, 1.0 - gate_mix * 1.3)

        sel_a, diag_a = evaluate_fragment_backends(
            frag_a,
            sparsity=sparsity_a,
            estimator=estimator,
        )
        sel_b, diag_b = evaluate_fragment_backends(
            frag_b,
            sparsity=sparsity_b,
            estimator=estimator,
        )

        boundary_spec = BoundarySpec(
            num_qubits=boundary,
            rank=rank,
            frontier=max(boundary, 12),
            window=min(10, boundary // 2 + 2),
            window_1q_gates=boundary * 4,
            window_2q_gates=boundary * 2,
        )

        plan = aggregate_partitioned_plan(
            [(sel_a, diag_a["selected_cost"]), (sel_b, diag_b["selected_cost"])],
            [boundary_spec],
            estimator=estimator,
        )
        total = plan["total_cost"]
        primitive = plan["conversions"][0]["primitive"] if plan["conversions"] else "None"
        rows.append(
            {
                "gate_mix": gate_mix,
                "boundary_qubits": boundary,
                "rank": rank,
                "fragment_a_backend": sel_a.name,
                "fragment_b_backend": sel_b.name,
                "monolithic_backend": mono_backend.name if mono_backend else None,
                "partition_time": total.time,
                "monolithic_time": mono_cost.time,
                "speedup": mono_cost.time / total.time if total.time else float("inf"),
                "primitive": primitive,
                "partition_wins": total.time < mono_cost.time,
            }
        )
    return pd.DataFrame(rows)


## Parameter grid (edit me)

The dictionaries below define the parameter sweep for the feasibility study.  Update the lists to explore different fragment sizes, sparsities or resource ceilings.

In [None]:
fragment_axes = {
    "num_qubits": [18, 24, 30],
    "depth": [48, 64],
    "entangling_ratio": [0.18, 0.28, 0.38],
    "measurement_ratio": [0.0],
    "is_clifford": [False],
    "is_local": [False, True],
    "frontier": [None],
    "frontier_scale": [0.25],
    "chi": [None],
}
metric_axes = {
    "sparsity": np.linspace(0.55, 0.9, 4),
    "phase_rotation_diversity": [6],
    "amplitude_rotation_diversity": [8],
}
resource_limits = {"max_memory_gb": 64, "max_time_s": None}
ALLOW_TABLEAU = True


In [None]:
grid_results = evaluate_parameter_grid(
    fragment_axes,
    metric_axes,
    resource_limits,
    allow_tableau=ALLOW_TABLEAU,
    estimator=ESTIMATOR,
)
grid_results["log_selected_time"] = safe_log10(grid_results["selected_time"])
grid_results["log_selected_memory"] = safe_log10(grid_results["selected_memory"])
grid_results.head()


### Backend feasibility maps

The heatmaps below show the preferred backend across fragment sizes and sparsities, separated by the locality flag.  The accompanying plot visualises the log10 runtime estimate for the selected backend.

In [None]:
backend_order = ["None", "STATEVECTOR", "DECISION_DIAGRAM", "MPS", "TABLEAU"]
backend_to_idx = {name: idx for idx, name in enumerate(backend_order)}
colors = ["#dcdcdc", "#4477aa", "#66c2a5", "#ffa600", "#aa3377"]
backend_cmap = ListedColormap(colors)

selected_depth = _to_iterable(fragment_axes["depth"])[0]
subset = grid_results[grid_results["depth"] == selected_depth]
if "phase_rotation_diversity" in subset and not subset["phase_rotation_diversity"].isna().all():
    subset = subset[subset["phase_rotation_diversity"] == _to_iterable(metric_axes["phase_rotation_diversity"])[0]]
if "amplitude_rotation_diversity" in subset and not subset["amplitude_rotation_diversity"].isna().all():
    subset = subset[subset["amplitude_rotation_diversity"] == _to_iterable(metric_axes["amplitude_rotation_diversity"])[0]]

def _heatmap_payload(frame: pd.DataFrame):
    qubits = sorted(frame["num_qubits"].unique())
    sparsities = sorted(frame["sparsity"].unique())
    data = np.full((len(sparsities), len(qubits)), np.nan)
    log_time = np.full_like(data, np.nan)
    for i, sparsity in enumerate(sparsities):
        for j, nq in enumerate(qubits):
            sample = frame[(frame["num_qubits"] == nq) & (frame["sparsity"] == sparsity)]
            if sample.empty:
                continue
            backend = sample.iloc[0]["selected_backend"] or "None"
            data[i, j] = backend_to_idx.get(backend, np.nan)
            log_time[i, j] = sample.iloc[0]["log_selected_time"]
    return qubits, sparsities, data, log_time

fig, axes = plt.subplots(2, 2, figsize=(12, 8), sharex=True, sharey=True)
for ax_idx, is_local in enumerate([False, True]):
    frame = subset[subset["is_local"] == is_local]
    qubits, sparsities, data, log_time = _heatmap_payload(frame)
    axes[0, ax_idx].imshow(data, cmap=backend_cmap, aspect="auto", origin="lower", vmin=0, vmax=len(backend_order) - 1)
    axes[0, ax_idx].set_title(f"is_local = {is_local}")
    axes[0, ax_idx].set_xticks(range(len(qubits)), qubits)
    axes[0, ax_idx].set_yticks(range(len(sparsities)), [f"{val:.2f}" for val in sparsities])
    axes[0, ax_idx].set_ylabel("sparsity")
    axes[1, ax_idx].imshow(log_time, cmap="viridis", aspect="auto", origin="lower")
    axes[1, ax_idx].set_xticks(range(len(qubits)), qubits)
    axes[1, ax_idx].set_yticks(range(len(sparsities)), [f"{val:.2f}" for val in sparsities])
    axes[1, ax_idx].set_xlabel("fragment qubits")
    axes[1, ax_idx].set_ylabel("sparsity")
fig.colorbar(plt.cm.ScalarMappable(cmap=backend_cmap), ax=axes[0, :], ticks=range(len(backend_order)), orientation="horizontal", pad=0.1, label="selected backend")
fig.colorbar(plt.cm.ScalarMappable(cmap="viridis"), ax=axes[1, :], orientation="horizontal", pad=0.1, label="log10 runtime estimate")
plt.suptitle("Backend selection and runtime across the parameter sweep")
plt.tight_layout()
plt.show()


### Case study: two segments with a decision diagram prefix

A sparse, locally entangling prefix can often be simulated efficiently with the decision-diagram backend before converting into a dense statevector section.

In [None]:
resource_case1 = {"max_memory_gb": 64}
monolithic_stats = synthesise_fragment(34, 70, 0.35, is_local=False)
monolithic_plan = run_plan([monolithic_stats], [{"sparsity": 0.62}], resource_limits=resource_case1, estimator=ESTIMATOR)

fragment_a = synthesise_fragment(18, 44, 0.18, is_local=True, frontier_scale=0.25, chi=48)
fragment_b = synthesise_fragment(34, 32, 0.32, is_local=False)
boundary_ab = BoundarySpec(num_qubits=14, rank=48, frontier=20, window=8, window_1q_gates=60, window_2q_gates=16)
partition_plan = run_plan(
    [fragment_a, fragment_b],
    [{"sparsity": 0.88}, {"sparsity": 0.58}],
    boundaries=[boundary_ab],
    resource_limits=resource_case1,
    estimator=ESTIMATOR,
)

overview = pd.concat(
    [
        plan_overview("Monolithic statevector", monolithic_plan),
        plan_overview("DD → statevector", partition_plan),
    ],
    ignore_index=True,
)
overview["time_speedup_vs_monolithic"] = overview.loc[0, "total_time"] / overview["total_time"]
overview["memory_ratio_vs_monolithic"] = overview.loc[0, "peak_memory"] / overview["peak_memory"]
display(overview)

display(fragment_breakdown(partition_plan))
conversion_df = conversion_breakdown(partition_plan)
if not conversion_df.empty:
    display(conversion_df)

fig, axes = plt.subplots(1, 2, figsize=(10, 3))
axes[0].bar(overview["plan"], safe_log10(overview["total_time"]))
axes[0].set_ylabel("log10 total time")
axes[1].bar(overview["plan"], safe_log10(overview["peak_memory"]))
axes[1].set_ylabel("log10 peak memory")
plt.suptitle("Two-segment plan compared to the monolithic execution")
plt.tight_layout()
plt.show()


### Case study: three segments with conversions

A Clifford initialisation, a local MPS window and a dense finale illustrate how multiple conversions accumulate while still reducing projected runtime.

In [None]:
resource_case2 = {"max_memory_gb": 128}
mono_stats = synthesise_fragment(48, 90, 0.32, is_local=False)
mono_plan = run_plan([mono_stats], [{"sparsity": 0.58}], resource_limits=resource_case2, estimator=ESTIMATOR)

frag1 = synthesise_fragment(48, 18, 0.0, is_clifford=True, is_local=False)
frag2 = synthesise_fragment(32, 50, 0.18, is_local=True, frontier_scale=0.2, chi=48)
frag3 = synthesise_fragment(48, 36, 0.34, is_local=False)

boundary_12 = BoundarySpec(num_qubits=12, rank=32, frontier=24, window=6, window_1q_gates=40, window_2q_gates=12)
boundary_23 = BoundarySpec(num_qubits=18, rank=64, frontier=30, window=8, window_1q_gates=60, window_2q_gates=20)
three_plan = run_plan(
    [frag1, frag2, frag3],
    [{"sparsity": 0.95}, {"sparsity": 0.88}, {"sparsity": 0.55}],
    boundaries=[boundary_12, boundary_23],
    resource_limits=resource_case2,
    estimator=ESTIMATOR,
)

overview_three = pd.concat(
    [
        plan_overview("Monolithic statevector", mono_plan),
        plan_overview("Tableau → MPS → statevector", three_plan),
    ],
    ignore_index=True,
)
overview_three["time_speedup_vs_monolithic"] = overview_three.loc[0, "total_time"] / overview_three["total_time"]
overview_three["memory_ratio_vs_monolithic"] = overview_three.loc[0, "peak_memory"] / overview_three["peak_memory"]
display(overview_three)

display(fragment_breakdown(three_plan))
conv_three = conversion_breakdown(three_plan)
if not conv_three.empty:
    display(conv_three)

fig, axes = plt.subplots(1, 2, figsize=(10, 3))
axes[0].bar(overview_three["plan"], safe_log10(overview_three["total_time"]))
axes[0].set_ylabel("log10 total time")
axes[1].bar(overview_three["plan"], safe_log10(overview_three["peak_memory"]))
axes[1].set_ylabel("log10 peak memory")
plt.suptitle("Three-segment plan compared to the monolithic execution")
plt.tight_layout()
plt.show()


### Feature map for partition advantage

The following sweep varies the gate mix (fraction of entangling layers), conversion boundary width and Schmidt-rank cap.  It highlights where the model predicts a win for the two-fragment plan and which conversion primitive is selected.

In [None]:
partition_df = evaluate_partition_advantage(
    gate_mixes=[0.22, 0.28, 0.34, 0.4],
    boundary_qubits=[8, 12, 16, 20, 24, 28],
    ranks=[16, 32, 64],
    estimator=ESTIMATOR,
)
partition_df.head()


In [None]:
advantage = partition_df[partition_df["partition_wins"]].copy()
losses = partition_df[~partition_df["partition_wins"]].copy()

summary = (
    advantage.groupby(["gate_mix", "primitive"])
    .agg(
        min_boundary=("boundary_qubits", "min"),
        max_speedup=("speedup", "max"),
    )
    .reset_index()
    .sort_values(["gate_mix", "min_boundary"])
)
summary


In [None]:
if not losses.empty:
    loss_summary = (
        losses.groupby(["gate_mix", "primitive"])
        .agg(
            min_boundary=("boundary_qubits", "min"),
            max_boundary=("boundary_qubits", "max"),
            worst_speedup=("speedup", "min"),
        )
        .reset_index()
        .sort_values(["gate_mix", "min_boundary"])
    )
    display(loss_summary)
else:
    print("Partitioning won for every sampled configuration.")


In [None]:
fig, axes = plt.subplots(1, len(sorted(partition_df["rank"].unique())), figsize=(15, 3), sharey=True)
for ax, rank in zip(np.atleast_1d(axes), sorted(partition_df["rank"].unique())):
    view = partition_df[partition_df["rank"] == rank]
    pivot = view.pivot_table(index="gate_mix", columns="boundary_qubits", values="speedup", aggfunc="mean")
    im = ax.imshow(np.log10(pivot.values), cmap="viridis", aspect="auto", origin="lower")
    ax.set_title(f"rank ≤ {rank}")
    ax.set_xticks(range(len(pivot.columns)), pivot.columns)
    ax.set_yticks(range(len(pivot.index)), [f"{v:.2f}" for v in pivot.index])
    ax.set_xlabel("boundary qubits")
axes[0].set_ylabel("gate mix (entangling fraction)")
fig.colorbar(im, ax=axes, orientation="horizontal", fraction=0.04, pad=0.1, label="log10 speedup")
plt.suptitle("Speedup heatmap across boundary sizes and gate mixes")
plt.tight_layout()
plt.show()


The remaining tables characterise losing configurations and conversion choices.  When the initial fragment is forced onto the dense statevector backend, the conversions disappear and the partition offers no benefit, underscoring the importance of sparsity or locality for heterogeneous plans.