In [104]:
# Cell 1: Imports and Setup
"""
Gadget Smoke Test Notebook

This notebook tests all fault-tolerant gadgets (transversal gates, 
teleportation-based gates, and lattice surgery operations) on various codes.
"""

import sys
import os
import time
import warnings
import numpy as np
from dataclasses import dataclass
from typing import Optional, Dict, Any, List
from IPython.display import clear_output

warnings.filterwarnings('ignore')

# Ensure src is in path
src_path = os.path.abspath(os.path.join(os.getcwd(), '..'))
if src_path not in sys.path:
    sys.path.insert(0, src_path)

# CRITICAL: Clear ALL qectostim module cache for fresh import
# This ensures code changes are picked up without kernel restart
import importlib
modules_to_clear = [k for k in list(sys.modules.keys()) if 'qectostim' in k]
for mod in modules_to_clear:
    del sys.modules[mod]
print(f"Cleared {len(modules_to_clear)} cached modules")

# Import testing utilities
from qectostim.testing import (
    load_all_decoders,
    STATUS_OK, STATUS_WARN, STATUS_SKIP, STATUS_FAIL,
)

# Import code discovery
from qectostim.codes import discover_all_codes

# Import noise models
from qectostim.noise.models import NoiseModel, CircuitDepolarizingNoise

# Import gadgets
from qectostim.gadgets import (
    # Transversal gates
    TransversalHadamard,
    TransversalS,
    TransversalT,
    TransversalX,
    TransversalY,
    TransversalZ,
    TransversalCNOT,
    TransversalCZ,
    TransversalSWAP,
    # Teleportation-based gates
    TeleportedHadamard,
    TeleportedS,
    TeleportedT,
    TeleportedIdentity,
    # CSS surgery
    LatticeZZMerge,
    LatticeXXMerge,
    SurgeryCNOT,
)

print("✓ All imports successful")
print(f"  Transversal gates available: TransversalHadamard, TransversalS, TransversalX, ...")
print(f"  Teleported gates available: TeleportedHadamard, TeleportedS, TeleportedT, ...")
print(f"  Surgery operations available: LatticeZZMerge, SurgeryCNOT, ...")

Cleared 93 cached modules
✓ All imports successful
  Transversal gates available: TransversalHadamard, TransversalS, TransversalX, ...
  Teleported gates available: TeleportedHadamard, TeleportedS, TeleportedT, ...
  Surgery operations available: LatticeZZMerge, SurgeryCNOT, ...


In [93]:
# Cell 2: Load Decoders and Define Test Infrastructure

decoder_classes = load_all_decoders()
print(f"Loaded {len(decoder_classes)} decoders: {list(decoder_classes.keys())}")

# Use the actual CircuitDepolarizingNoise model for proper DEM generation
# p1 = single-qubit error prob, p2 = two-qubit error prob
noise = CircuitDepolarizingNoise(p1=0.001, p2=0.001)
print(f"Using noise model: CircuitDepolarizingNoise(p1=0.001, p2=0.001)")

# Define gadget configurations - enable stabilizer rounds for proper detector testing
SINGLE_QUBIT_GADGETS = {
    'TransversalH': (TransversalHadamard, {'include_stabilizer_rounds': True, 'num_rounds_before': 1, 'num_rounds_after': 1}),
    'TransversalS': (TransversalS, {'include_stabilizer_rounds': True, 'num_rounds_before': 1, 'num_rounds_after': 1}),
    'TransversalX': (TransversalX, {'include_stabilizer_rounds': True, 'num_rounds_before': 1, 'num_rounds_after': 1}),
    'TransversalZ': (TransversalZ, {'include_stabilizer_rounds': True, 'num_rounds_before': 1, 'num_rounds_after': 1}),
    'TeleportedH': (TeleportedHadamard, {'include_stabilizer_rounds': True}),
    'TeleportedS': (TeleportedS, {'include_stabilizer_rounds': True}),
    'TeleportedT': (TeleportedT, {'include_stabilizer_rounds': True}),
}

TWO_QUBIT_GADGETS = {
    'TransversalCNOT': (TransversalCNOT, {'include_stabilizer_rounds': True, 'num_rounds_before': 1, 'num_rounds_after': 1}),
    'TransversalCZ': (TransversalCZ, {'include_stabilizer_rounds': True, 'num_rounds_before': 1, 'num_rounds_after': 1}),
    'LatticeZZMerge': (LatticeZZMerge, {'num_merge_rounds': 1, 'include_stabilizer_rounds': True}),
    'SurgeryCNOT': (SurgeryCNOT, {'num_rounds_before': 2, 'num_rounds_after': 2}),
}

print(f"\nSingle-qubit gadgets: {list(SINGLE_QUBIT_GADGETS.keys())}")
print(f"Two-qubit gadgets: {list(TWO_QUBIT_GADGETS.keys())}")

Loaded 10 decoders: ['PyMatching', 'FusionBlossom', 'BeliefMatching', 'BPOSD', 'Tesseract', 'UnionFind', 'MLE', 'Hypergraph', 'Chromobius', 'Concatenated']
Using noise model: CircuitDepolarizingNoise(p1=0.001, p2=0.001)

Single-qubit gadgets: ['TransversalH', 'TransversalS', 'TransversalX', 'TransversalZ', 'TeleportedH', 'TeleportedS', 'TeleportedT']
Two-qubit gadgets: ['TransversalCNOT', 'TransversalCZ', 'LatticeZZMerge', 'SurgeryCNOT']


In [107]:
# Cell 1.5: Test new emit_next_phase() interface
"""
Test the new phase-based gadget interface:
- emit_next_phase() returns PhaseResult
- num_phases property indicates phases count
- FaultTolerantGadgetExperiment uses phase loop
"""

from qectostim.gadgets.base import PhaseResult, PhaseType
from qectostim.gadgets.layout import QubitAllocation
from qectostim.experiments.stabilizer_rounds import DetectorContext
from qectostim.experiments.ft_gadget_experiment import FaultTolerantGadgetExperiment
from qectostim.codes.small.four_two_two import FourQubit422Code
import stim

print("=" * 60)
print("Testing emit_next_phase() Interface")
print("=" * 60)

# Test 1: TransversalHadamard (single-phase)
print("\n1. TransversalHadamard (single-phase gadget):")
code = FourQubit422Code()
h_gadget = TransversalHadamard(include_stabilizer_rounds=False)
print(f"   num_phases: {h_gadget.num_phases}")
assert h_gadget.num_phases == 1, "TransversalHadamard should have 1 phase"

# Create allocation and context for testing
alloc = QubitAllocation.from_codes([code])  # Block names auto-generated as block_0, block_1, etc.
ctx = DetectorContext()
circuit = stim.Circuit()
alloc.emit_qubit_coords(circuit)
alloc.emit_reset_all(circuit)

h_gadget.reset_phases()
result = h_gadget.emit_next_phase(circuit, alloc, ctx)
print(f"   Phase 0 result: is_final={result.is_final}, type={result.phase_type}")
assert result.is_final == True, "Single-phase gadget should be final after phase 0"
assert result.phase_type == PhaseType.GATE, "TransversalH should be GATE type"
print(f"   ✓ TransversalHadamard emit_next_phase works!")

# Test 2: TeleportedHadamard (multi-phase)
print("\n2. TeleportedHadamard (3-phase gadget):")
t_h = TeleportedHadamard(include_stabilizer_rounds=False)
print(f"   num_phases: {t_h.num_phases}")
assert t_h.num_phases == 3, "TeleportedHadamard should have 3 phases"

# Create allocation for teleportation (needs 2 blocks)
alloc2 = QubitAllocation.from_codes([code, code])  # block_0 and block_1
ctx2 = DetectorContext()
circuit2 = stim.Circuit()
alloc2.emit_qubit_coords(circuit2)
alloc2.emit_reset_all(circuit2)

t_h.reset_phases()
for i in range(3):
    result = t_h.emit_next_phase(circuit2, alloc2, ctx2)
    print(f"   Phase {i} result: is_final={result.is_final}, type={result.phase_type}")
    if i < 2:
        assert not result.is_final, f"Phase {i} should not be final"
    else:
        assert result.is_final, "Phase 2 should be final"

print(f"   ✓ TeleportedHadamard emit_next_phase works!")

# Test 3: SurgeryCNOT (multi-phase)
print("\n3. SurgeryCNOT (2-phase gadget):")
s_cnot = SurgeryCNOT(num_rounds_before=0, num_rounds_after=0, num_merge_rounds=1)
print(f"   num_phases: {s_cnot.num_phases}")
assert s_cnot.num_phases == 2, "SurgeryCNOT should have 2 phases"

alloc3 = QubitAllocation.from_codes([code, code])  # block_0 and block_1
ctx3 = DetectorContext()
circuit3 = stim.Circuit()
alloc3.emit_qubit_coords(circuit3)
alloc3.emit_reset_all(circuit3)

s_cnot.reset_phases()
for i in range(2):
    result = s_cnot.emit_next_phase(circuit3, alloc3, ctx3)
    print(f"   Phase {i} result: is_final={result.is_final}, type={result.phase_type}")
    print(f"            needs_stabilizer_rounds={result.needs_stabilizer_rounds}")

print(f"   ✓ SurgeryCNOT emit_next_phase works!")

# Test 4: FaultTolerantGadgetExperiment phase loop
print("\n4. FaultTolerantGadgetExperiment with phase loop:")
noise = CircuitDepolarizingNoise(p1=0.001, p2=0.001)
exp = FaultTolerantGadgetExperiment(
    codes=[code],
    gadget=TransversalHadamard(include_stabilizer_rounds=False),
    noise_model=noise,
    num_rounds_before=1,
    num_rounds_after=1,
)
ft_circuit = exp.to_stim()
print(f"   Generated circuit with {len(ft_circuit)} instructions")
print(f"   Detectors: {ft_circuit.num_detectors}")
print(f"   ✓ FaultTolerantGadgetExperiment phase loop works!")

print("\n" + "=" * 60)
print("All emit_next_phase() interface tests PASSED!")
print("=" * 60)

Testing emit_next_phase() Interface

1. TransversalHadamard (single-phase gadget):
   num_phases: 1
   Phase 0 result: is_final=True, type=PhaseType.GATE
   ✓ TransversalHadamard emit_next_phase works!

2. TeleportedHadamard (3-phase gadget):
   num_phases: 3
   Phase 0 result: is_final=False, type=PhaseType.PREPARATION
   Phase 1 result: is_final=False, type=PhaseType.GATE
   Phase 2 result: is_final=True, type=PhaseType.MEASUREMENT
   ✓ TeleportedHadamard emit_next_phase works!

3. SurgeryCNOT (2-phase gadget):
   num_phases: 2
   Phase 0 result: is_final=False, type=PhaseType.GATE
            needs_stabilizer_rounds=1
   Phase 1 result: is_final=True, type=PhaseType.GATE
            needs_stabilizer_rounds=0
   ✓ SurgeryCNOT emit_next_phase works!

4. FaultTolerantGadgetExperiment with phase loop:
   Generated circuit with 83 instructions
   Detectors: 3
   ✓ FaultTolerantGadgetExperiment phase loop works!

All emit_next_phase() interface tests PASSED!


In [94]:
# Cell 3: Test Helper Functions
from qectostim.experiments.ft_gadget_experiment import FaultTolerantGadgetExperiment

# Status constants with N/A for truly impossible combinations
STATUS_OK = 'OK'
STATUS_WARN = 'WARN'
STATUS_FAIL = 'FAIL'
STATUS_SKIP = 'SKIP'  # Temporary skip (e.g., no detectors, can be fixed)
STATUS_NA = 'N/A'     # Truly impossible combination (e.g., Chromobius on non-color)

@dataclass
class GadgetTestResult:
    """Result of a gadget test."""
    code_name: str
    code_type: str
    gadget_name: str
    status: str  # 'OK', 'WARN', 'FAIL', 'SKIP', 'N/A'
    ler: Optional[float] = None
    ler_no_decode: Optional[float] = None  # Raw LER without decoder
    time_ms: Optional[float] = None
    error: Optional[str] = None
    circuit_stats: Optional[Dict[str, Any]] = None


def test_gadget_on_code(
    code,
    code_name: str,
    code_type: str,
    gadget_class,
    gadget_name: str,
    gadget_kwargs: Dict,
    decoder_class,
    decoder_name: str,
    p: float = 0.001,
    shots: int = 1000,
    is_two_qubit: bool = False,
) -> GadgetTestResult:
    """
    Test a gadget on a code with a specific decoder.
    
    Uses FTGadgetExperiment for proper detector and observable generation.
    
    Returns GadgetTestResult with status, LER, timing, and any errors.
    
    Status meanings:
    - OK: Decoder worked and LER calculated
    - WARN: Decoder raised error but circuit was built
    - FAIL: Circuit/DEM generation failed  
    - SKIP: Temporary issue (e.g., no detectors) - could be fixed
    - N/A: Fundamentally impossible combination (e.g., Chromobius on non-color)
    """
    # Pre-flight N/A checks - truly impossible combinations
    
    # Check for k=0 codes (no logical qubits)
    k = getattr(code, 'k', None)
    if k is not None and k == 0:
        return GadgetTestResult(
            code_name=code_name,
            code_type=code_type,
            gadget_name=gadget_name,
            status=STATUS_SKIP,
            error='k=0: no logical qubits',
        )
    
    if decoder_name == 'Chromobius' and 'color' not in code_type.lower():
        return GadgetTestResult(
            code_name=code_name,
            code_type=code_type,
            gadget_name=gadget_name,
            status=STATUS_NA,
            error='Chromobius requires color codes',
        )
    
    if decoder_name == 'Concatenated':
        # ConcatenatedDecoder requires ConcatenatedCode - N/A for all others
        return GadgetTestResult(
            code_name=code_name,
            code_type=code_type,
            gadget_name=gadget_name,
            status=STATUS_NA,
            error='Concatenated requires ConcatenatedCode',
        )
    
    try:
        # Create gadget (without stabilizer round params - FTGadgetExperiment handles those)
        # Extract only the gadget-specific kwargs (not stabilizer round params)
        gadget_only_kwargs = {k: v for k, v in gadget_kwargs.items() 
                             if k not in ['include_stabilizer_rounds', 'num_rounds_before', 'num_rounds_after']}
        gadget = gadget_class(**gadget_only_kwargs)
        
        # For two-qubit gadgets, use two copies of the same code
        codes = [code, code] if is_two_qubit else [code]
        
        # Use FTGadgetExperiment for proper circuit generation with detectors + observables
        start_time = time.time()
        experiment = FaultTolerantGadgetExperiment(
            codes=codes,
            gadget=gadget,
            noise_model=noise,  # Use global noise model
            num_rounds_before=2,  # Need at least 2 for detectors
            num_rounds_after=2,
            measurement_basis="Z",
        )
        circuit = experiment.to_stim()
        circuit_time = (time.time() - start_time) * 1000
        
        # Collect circuit stats
        circuit_str = str(circuit)
        circuit_stats = {
            'num_qubits': circuit.num_qubits,
            'num_ops': len(circuit),
            'ticks': circuit_str.count('TICK'),
            'detectors': circuit_str.count('DETECTOR'),
            'observables': circuit_str.count('OBSERVABLE_INCLUDE'),
        }
        
        # Try to get DEM with multiple fallbacks
        dem = None
        dem_warning = None
        
        # Attempt 1: Full decomposition
        try:
            dem = circuit.detector_error_model(decompose_errors=True)
        except Exception as e1:
            # Attempt 2: Ignore decomposition failures
            try:
                dem = circuit.detector_error_model(
                    decompose_errors=True, 
                    ignore_decomposition_failures=True
                )
            except Exception as e2:
                # Attempt 3: No decomposition (for non-deterministic circuits)
                try:
                    dem = circuit.detector_error_model(decompose_errors=False)
                    dem_warning = 'Using undecomposed DEM'
                except Exception as dem_err:
                    # Check if it's a non-deterministic error
                    err_str = str(dem_err).lower()
                    if 'non-deterministic' in err_str:
                        return GadgetTestResult(
                            code_name=code_name,
                            code_type=code_type,
                            gadget_name=gadget_name,
                            status=STATUS_WARN,
                            error='Non-deterministic circuit',
                            circuit_stats=circuit_stats,
                        )
                    return GadgetTestResult(
                        code_name=code_name,
                        code_type=code_type,
                        gadget_name=gadget_name,
                        status=STATUS_FAIL,
                        error=f'DEM error: {str(dem_err)[:80]}',
                        circuit_stats=circuit_stats,
                    )
        
        # Skip if DEM has no detectors (can't decode)
        if dem.num_detectors == 0:
            return GadgetTestResult(
                code_name=code_name,
                code_type=code_type,
                gadget_name=gadget_name,
                status=STATUS_SKIP,
                error='No detectors in DEM',
                circuit_stats=circuit_stats,
            )
        
        # Skip if no observables (can't compute LER)
        if dem.num_observables == 0:
            return GadgetTestResult(
                code_name=code_name,
                code_type=code_type,
                gadget_name=gadget_name,
                status=STATUS_SKIP,
                error='No observables in DEM',
                circuit_stats=circuit_stats,
            )
        
        # MLE N/A for large DEMs (too slow to be practical)
        if decoder_name == 'MLE' and dem.num_detectors > 20:
            return GadgetTestResult(
                code_name=code_name,
                code_type=code_type,
                gadget_name=gadget_name,
                status=STATUS_NA,
                error='MLE impractical for large DEM',
                circuit_stats=circuit_stats,
            )
        
        # Try to run with decoder
        try:
            # Sample detector outcomes from DEM
            sampler = dem.compile_sampler()
            raw = sampler.sample(shots, bit_packed=False)
            
            if isinstance(raw, tuple):
                det_samples = np.asarray(raw[0], dtype=np.uint8)
                obs_samples = np.asarray(raw[1], dtype=np.uint8) if raw[1] is not None else None
            else:
                arr = np.asarray(raw, dtype=np.uint8)
                det_samples = arr[:, :dem.num_detectors]
                obs_samples = arr[:, dem.num_detectors:] if dem.num_observables > 0 else None
            
            # Calculate no-decode LER FIRST (before any decoder processing)
            ler_no_decode = None
            if obs_samples is not None and len(obs_samples.shape) > 1 and obs_samples.shape[1] > 0:
                ler_no_decode = float(obs_samples[:, 0].mean())
            
            # Create decoder with DEM
            decoder = decoder_class(dem=dem)
            
            # Decode
            decode_start = time.time()
            corrections = decoder.decode_batch(det_samples)
            decode_time = (time.time() - decode_start) * 1000
            
            corrections = np.asarray(corrections, dtype=np.uint8)
            if corrections.ndim == 1:
                corrections = corrections.reshape(-1, max(1, dem.num_observables))
            
            # Compute actual LER (with decoder)
            ler = None
            if obs_samples is not None and len(obs_samples.shape) > 1 and obs_samples.shape[1] > 0:
                logical_errors = (corrections[:, 0] ^ obs_samples[:, 0]).astype(np.uint8)
                ler = float(logical_errors.mean())
            
            return GadgetTestResult(
                code_name=code_name,
                code_type=code_type,
                gadget_name=gadget_name,
                status=STATUS_OK,
                ler=ler,
                ler_no_decode=ler_no_decode,
                time_ms=decode_time,
                circuit_stats=circuit_stats,
            )
            
        except Exception as dec_error:
            # Decoder failed but circuit was built
            error_str = str(dec_error)
            return GadgetTestResult(
                code_name=code_name,
                code_type=code_type,
                gadget_name=gadget_name,
                status=STATUS_WARN,
                time_ms=circuit_time,
                error=error_str[:100],
                circuit_stats=circuit_stats,
            )
    
    except Exception as e:
        return GadgetTestResult(
            code_name=code_name,
            code_type=code_type,
            gadget_name=gadget_name,
            status=STATUS_FAIL,
            error=str(e)[:200],
        )

print("✓ Test helper functions defined (using FTGadgetExperiment)")

✓ Test helper functions defined (using FTGadgetExperiment)


In [95]:
# Cell 4: Circuit Analysis Helper

def analyze_gadget_circuit(code, gadget_class, gadget_kwargs, is_two_qubit=False) -> Dict[str, Any]:
    """
    Analyze a gadget circuit for parallelism and structure metrics.
    
    Returns dict with:
    - num_qubits, ticks, 2q_gates, 1q_gates
    - layer_utilization
    - has_timing, has_coords
    """
    # Check for k=0 codes (no logical qubits)
    k = getattr(code, 'k', None)
    if k is not None and k == 0:
        return {
            'status': 'FAIL',
            'error': f'Logical index 0 >= k={k}',
        }
    
    try:
        gadget = gadget_class(**gadget_kwargs)
        codes = [code, code] if is_two_qubit else [code]
        circuit = gadget.to_stim(codes, noise)
        
        circuit_str = str(circuit)
        
        # Count gates
        two_qubit_gates = sum([
            circuit_str.count(gate) 
            for gate in ['CX', 'CNOT', 'CZ', 'SWAP', 'ISWAP']
        ])
        single_qubit_gates = sum([
            circuit_str.count(gate)
            for gate in ['H ', 'S ', 'T ', 'X ', 'Y ', 'Z ', 'R ', 'M ', 'MR ']
        ])
        
        ticks = circuit_str.count('TICK')
        has_coords = 'QUBIT_COORDS' in circuit_str
        
        # Layer utilization: avg ops per tick / num_qubits
        total_ops = two_qubit_gates + single_qubit_gates
        if ticks > 0 and circuit.num_qubits > 0:
            utilization = (total_ops / ticks) / circuit.num_qubits
        else:
            utilization = 0.0
        
        return {
            'status': 'OK',
            'num_qubits': circuit.num_qubits,
            'ticks': ticks,
            '2q_gates': two_qubit_gates,
            '1q_gates': single_qubit_gates,
            'utilization': utilization,
            'has_timing': ticks > 0,
            'has_coords': has_coords,
        }
    except Exception as e:
        return {
            'status': 'FAIL',
            'error': str(e)[:100],
        }

print("✓ Circuit analysis helper defined")

✓ Circuit analysis helper defined


In [96]:
# Cell 4b: Discover Codes for Testing
clear_output(wait=True)

print("="*70)
print("DISCOVERING CODES FOR GADGET TESTING")
print("="*70)

# Discover codes
raw_codes = discover_all_codes(
    max_qubits=30,  # Keep small for faster testing
    include_qldpc=True,
    include_subsystem=False,
    include_floquet=False,
    include_bosonic=False,
    include_qudit=False,
    include_non_css=True,
    timeout_per_code=2.0,
)

# Build code registry with type info
all_codes = {}
for name, code in raw_codes.items():
    # Determine code type
    if hasattr(code, 'hx') and hasattr(code, 'hz'):
        if 'Colour' in name or 'Color' in name:
            code_type = 'Color CSS'
        elif 'Surface' in name or 'Toric' in name:
            code_type = 'Topological CSS'
        else:
            code_type = 'CSS'
    else:
        code_type = 'Non-CSS'
    
    all_codes[name] = (code_type, code)

print(f"\nTotal codes discovered: {len(all_codes)}")

# Show breakdown by type
type_counts = {}
for name, (code_type, code) in all_codes.items():
    type_counts[code_type] = type_counts.get(code_type, 0) + 1

print("\nBy type:")
for code_type, count in sorted(type_counts.items()):
    print(f"  {code_type}: {count}")

# List first 15 codes
print("\n" + "-"*70)
print(f"{'Code Name':<40} {'Type':<15} {'n':>5}")
print("-"*70)
for name, (code_type, code) in sorted(all_codes.items(), key=lambda x: x[1][1].n)[:15]:
    print(f"{name:<40} {code_type:<15} {code.n:>5}")
if len(all_codes) > 15:
    print(f"... and {len(all_codes) - 15} more")

DISCOVERING CODES FOR GADGET TESTING

Total codes discovered: 47

By type:
  CSS: 21
  Color CSS: 13
  Non-CSS: 5
  Topological CSS: 8

----------------------------------------------------------------------
Code Name                                Type                n
----------------------------------------------------------------------
Repetition_3                             CSS                 3
FourQubit422_[[4,2,2]]                   CSS                 4
Perfect_513                              Non-CSS             5
Mixed_512                                Non-CSS             5
Repetition_5                             CSS                 5
C6                                       CSS                 6
NonCSS_642                               Non-CSS             6
Steane_713                               CSS                 7
Hamming_CSS_7                            CSS                 7
BareAncilla_713                          Non-CSS             7
Repetition_7                 

In [101]:
# Quick test of the new emit_gates() interface
# This cell validates that the refactored gadget interface works correctly

from qectostim.codes.small.four_two_two import FourQubit422Code
from qectostim.gadgets.transversal import TransversalGate, TransversalHadamard
from qectostim.gadgets.layout import QubitAllocation
from qectostim.experiments.stabilizer_rounds import DetectorContext
import stim

print("=" * 70)
print("TESTING NEW emit_gates() INTERFACE")
print("=" * 70)

# Create a simple code
code = FourQubit422Code()
print(f"Using code: {type(code).__name__} with n={code.n}")

# Test 1: QubitAllocation.from_codes()
print("\n[Test 1] QubitAllocation.from_codes()")
alloc = QubitAllocation.from_codes([code])
print(f"  ✓ Created allocation with {alloc.total_qubits} qubits")
block = alloc.get_block("block_0")
print(f"  ✓ Block 'block_0': data={list(block.data_range)}, x_anc={list(block.x_anc_range)}")

# Test 2: TransversalGate interface methods
print("\n[Test 2] TransversalHadamard interface")
h_gadget = TransversalHadamard()
print(f"  - gate_name: {h_gadget.gate_name}")
print(f"  - has emit_gates: {hasattr(h_gadget, 'emit_gates')}")
print(f"  - has get_stabilizer_transform: {hasattr(h_gadget, 'get_stabilizer_transform')}")
print(f"  - has get_observable_transform: {hasattr(h_gadget, 'get_observable_transform')}")

# Test 3: emit_gates()
print("\n[Test 3] emit_gates()")
circuit = stim.Circuit()
alloc.emit_qubit_coords(circuit)
alloc.emit_reset_all(circuit)
ctx = DetectorContext()

h_gadget.emit_gates(circuit, alloc, ctx)

# Check circuit has H gates
circuit_str = str(circuit)
has_h = "H " in circuit_str or "H(" in circuit_str
has_tick = "TICK" in circuit_str
print(f"  ✓ emit_gates() completed")
print(f"  - Circuit has H gate: {has_h}")
print(f"  - Circuit has TICK: {has_tick}")
print(f"  - Total instructions: {len(list(circuit))}")

# Test 4: get_stabilizer_transform()
print("\n[Test 4] get_stabilizer_transform()")
transform = h_gadget.get_stabilizer_transform()
print(f"  - x_becomes: {transform.x_becomes}")
print(f"  - z_becomes: {transform.z_becomes}")
print(f"  - swap_xz: {transform.swap_xz}")
print(f"  - clear_history: {transform.clear_history}")

# Test 5: get_observable_transform()
print("\n[Test 5] get_observable_transform()")
obs_transform = h_gadget.get_observable_transform()
print(f"  - transform: {obs_transform.transform}")

# Test 6: Full FaultTolerantGadgetExperiment with new interface
print("\n[Test 6] FaultTolerantGadgetExperiment with emit_gates()")
from qectostim.experiments.ft_gadget_experiment import FaultTolerantGadgetExperiment

noise = CircuitDepolarizingNoise(p1=0.001, p2=0.001)
exp = FaultTolerantGadgetExperiment(
    codes=[code],
    gadget=h_gadget,
    noise_model=noise,
    num_rounds_before=1,
    num_rounds_after=1,
)
ft_circuit = exp.to_stim()
print(f"  ✓ Generated FT circuit with {len(list(ft_circuit))} instructions")

# Check for key components
ft_str = str(ft_circuit)
checks = {
    "QUBIT_COORDS": "QUBIT_COORDS" in ft_str,
    "RESET": "R " in ft_str,
    "H gate": "H " in ft_str or "H(" in ft_str,
    "TICK": "TICK" in ft_str,
    "DETECTOR": "DETECTOR" in ft_str,
    "OBSERVABLE": "OBSERVABLE" in ft_str,
}
print("  Circuit components:")
for name, present in checks.items():
    status = "✓" if present else "✗"
    print(f"    {status} {name}")

print("\n" + "=" * 70)
print("NEW INTERFACE TESTS COMPLETED")
print("=" * 70)

TESTING NEW emit_gates() INTERFACE
Using code: FourQubit422Code with n=4

[Test 1] QubitAllocation.from_codes()
  ✓ Created allocation with 6 qubits
  ✓ Block 'block_0': data=[0, 1, 2, 3], x_anc=[4]

[Test 2] TransversalHadamard interface
  - gate_name: H
  - has emit_gates: True
  - has get_stabilizer_transform: True
  - has get_observable_transform: True

[Test 3] emit_gates()
  ✓ emit_gates() completed
  - Circuit has H gate: True
  - Circuit has TICK: True
  - Total instructions: 10

[Test 4] get_stabilizer_transform()
  - x_becomes: Z
  - z_becomes: X
  - swap_xz: True
  - clear_history: True

[Test 5] get_observable_transform()
  - transform: {'X': 'Z', 'Z': 'X', 'Y': '-Y'}

[Test 6] FaultTolerantGadgetExperiment with emit_gates()
  ✓ Generated FT circuit with 83 instructions
  Circuit components:
    ✓ QUBIT_COORDS
    ✓ RESET
    ✓ H gate
    ✓ TICK
    ✓ DETECTOR
    ✓ OBSERVABLE

NEW INTERFACE TESTS COMPLETED


In [102]:
# Test TeleportedGate and SurgeryCNOT emit_gates() interface

from qectostim.gadgets.teleportation import TeleportedHadamard
from qectostim.gadgets.css_surgery import SurgeryCNOT

print("=" * 70)
print("TESTING TELEPORTEDGATE AND SURGERYCNOT emit_gates()")
print("=" * 70)

# Test TeleportedHadamard
print("\n[TeleportedHadamard]")
t_h = TeleportedHadamard()
print(f"  - has emit_gates: {hasattr(t_h, 'emit_gates')}")
print(f"  - has get_stabilizer_transform: {hasattr(t_h, 'get_stabilizer_transform')}")

transform = t_h.get_stabilizer_transform()
print(f"  - stabilizer transform: x→{transform.x_becomes}, z→{transform.z_becomes}")

# Test SurgeryCNOT
print("\n[SurgeryCNOT]")
s_cnot = SurgeryCNOT()
print(f"  - has emit_gates: {hasattr(s_cnot, 'emit_gates')}")
print(f"  - has get_stabilizer_transform: {hasattr(s_cnot, 'get_stabilizer_transform')}")

transform = s_cnot.get_stabilizer_transform()
print(f"  - stabilizer transform: x→{transform.x_becomes}, z→{transform.z_becomes}")

# Test emit_gates for TeleportedHadamard (needs 2 blocks: data + ancilla)
print("\n[Test TeleportedHadamard emit_gates()]")
# Teleportation needs block_0 (data) and block_1 (ancilla)
alloc2 = QubitAllocation.from_codes([code, code])  # Two copies for teleportation
circuit2 = stim.Circuit()
alloc2.emit_qubit_coords(circuit2)
alloc2.emit_reset_all(circuit2)
ctx2 = DetectorContext()

try:
    t_h.emit_gates(circuit2, alloc2, ctx2)
    print(f"  ✓ emit_gates() completed successfully")
    circuit_str = str(circuit2)
    has_cnot = "CNOT" in circuit_str or "CX" in circuit_str
    has_m = "M " in circuit_str or "M(" in circuit_str
    print(f"  - Has CNOT: {has_cnot}")
    print(f"  - Has M (measurement): {has_m}")
except Exception as e:
    print(f"  ✗ emit_gates() failed: {type(e).__name__}: {e}")

# Test emit_gates for SurgeryCNOT
print("\n[Test SurgeryCNOT emit_gates()]")
try:
    circuit3 = stim.Circuit()
    alloc2.emit_qubit_coords(circuit3)
    alloc2.emit_reset_all(circuit3)
    ctx3 = DetectorContext()
    
    s_cnot.emit_gates(circuit3, alloc2, ctx3)
    print(f"  ✓ emit_gates() completed successfully")
    circuit_str = str(circuit3)
    has_cnot = "CNOT" in circuit_str or "CX" in circuit_str
    has_cz = "CZ" in circuit_str
    print(f"  - Has CNOT: {has_cnot}")
    print(f"  - Has CZ: {has_cz}")
except Exception as e:
    print(f"  ✗ emit_gates() failed: {type(e).__name__}: {e}")

print("\n" + "=" * 70)
print("ALL INTERFACE TESTS COMPLETED SUCCESSFULLY")
print("=" * 70)

TESTING TELEPORTEDGATE AND SURGERYCNOT emit_gates()

[TeleportedHadamard]
  - has emit_gates: True
  - has get_stabilizer_transform: True
  - stabilizer transform: x→Z, z→X

[SurgeryCNOT]
  - has emit_gates: True
  - has get_stabilizer_transform: True
  - stabilizer transform: x→X, z→Z

[Test TeleportedHadamard emit_gates()]
  ✓ emit_gates() completed successfully
  - Has CNOT: True
  - Has M (measurement): True

[Test SurgeryCNOT emit_gates()]
  ✓ emit_gates() completed successfully
  - Has CNOT: True
  - Has CZ: True

ALL INTERFACE TESTS COMPLETED SUCCESSFULLY


## Single-Qubit Gadget Tests

Test all single-qubit gadgets across all codes.

In [19]:
# Cell 5: Transversal Hadamard - Circuit Analysis Table
from IPython.display import clear_output
clear_output(wait=True)

gadget_name = 'TransversalH'
gadget_class, gadget_kwargs = SINGLE_QUBIT_GADGETS[gadget_name]

print("=" * 120)
print(f"CIRCUIT ANALYSIS TABLE - {gadget_name}")
print("=" * 120)
print()

# Header
header = f"{'Code':<40} | {'Type':<10} | {'n':>4} | {'Ticks':>5} | {'2Q':>6} | {'1Q':>6} | {'Util':>5} | {'Timed':>5} | {'Coords':>6} | Status"
print(header)
print("-" * len(header))

# Store results
transversal_h_results = {}
errors_encountered = []

for code_name, (code_type, code) in sorted(all_codes.items()):
    result = analyze_gadget_circuit(code, gadget_class, gadget_kwargs, is_two_qubit=False)
    transversal_h_results[code_name] = result
    
    if result['status'] == 'OK':
        row = f"{code_name:<40} | {code_type:<10} | {result['num_qubits']:>4} | {result['ticks']:>5} | {result['2q_gates']:>6} | {result['1q_gates']:>6} | {result['utilization']:>5.2f} | {'✓' if result['has_timing'] else '✗':>5} | {'✓' if result['has_coords'] else '✗':>6} | ✓ OK"
    else:
        row = f"{code_name:<40} | {code_type:<10} | {'-':>4} | {'-':>5} | {'-':>6} | {'-':>6} | {'-':>5} | {'-':>5} | {'-':>6} | ✗ FAIL"
        errors_encountered.append((code_name, result.get('error', 'Unknown')))
    
    print(row)

print("-" * len(header))
print(f"\nTotal: {len(all_codes)} codes analyzed")
ok_count = sum(1 for r in transversal_h_results.values() if r['status'] == 'OK')
print(f"  ✓ OK: {ok_count}")
print(f"  ✗ FAIL: {len(transversal_h_results) - ok_count}")

if errors_encountered:
    print(f"\n--- ERRORS ENCOUNTERED ({len(errors_encountered)}) ---")
    for name, error in errors_encountered[:10]:  # Show first 10
        print(f"  {name}: {error}")
    if len(errors_encountered) > 10:
        print(f"  ... and {len(errors_encountered) - 10} more")

CIRCUIT ANALYSIS TABLE - TransversalH

Code                                     | Type       |    n | Ticks |     2Q |     1Q |  Util | Timed | Coords | Status
------------------------------------------------------------------------------------------------------------------------
BaconShor_3x3                            | CSS        |   13 |    32 |     24 |     38 |  0.15 |     ✓ |      ✓ | ✓ OK
BallColor_3D_[[12,0,None]]               | Color CSS  |   26 |    26 |     18 |     32 |  0.07 |     ✓ |      ✓ | ✓ OK
BallColor_4D_[[24,0,None]]               | Color CSS  |   64 |    32 |     24 |     38 |  0.03 |     ✓ |      ✓ | ✓ OK
BallColor_[[12,0,None]]                  | Color CSS  |   26 |    26 |     18 |     32 |  0.07 |     ✓ |      ✓ | ✓ OK
BallColor_4D_[[24,0,None]]               | Color CSS  |   64 |    32 |     24 |     38 |  0.03 |     ✓ |      ✓ | ✓ OK
BallColor_[[12,0,None]]                  | Color CSS  |   26 |    26 |     18 |     32 |  0.07 |     ✓ |      ✓ | ✓ OK
BallC

In [20]:
# Cell 6: Teleported Hadamard - Circuit Analysis Table
from IPython.display import clear_output
clear_output(wait=True)

gadget_name = 'TeleportedH'
gadget_class, gadget_kwargs = SINGLE_QUBIT_GADGETS[gadget_name]

print("=" * 120)
print(f"CIRCUIT ANALYSIS TABLE - {gadget_name}")
print("=" * 120)
print()

header = f"{'Code':<40} | {'Type':<10} | {'n':>4} | {'Ticks':>5} | {'2Q':>6} | {'1Q':>6} | {'Util':>5} | {'Timed':>5} | {'Coords':>6} | Status"
print(header)
print("-" * len(header))

teleported_h_results = {}
errors_encountered = []

for code_name, (code_type, code) in sorted(all_codes.items()):
    result = analyze_gadget_circuit(code, gadget_class, gadget_kwargs, is_two_qubit=False)
    teleported_h_results[code_name] = result
    
    if result['status'] == 'OK':
        row = f"{code_name:<40} | {code_type:<10} | {result['num_qubits']:>4} | {result['ticks']:>5} | {result['2q_gates']:>6} | {result['1q_gates']:>6} | {result['utilization']:>5.2f} | {'✓' if result['has_timing'] else '✗':>5} | {'✓' if result['has_coords'] else '✗':>6} | ✓ OK"
    else:
        row = f"{code_name:<40} | {code_type:<10} | {'-':>4} | {'-':>5} | {'-':>6} | {'-':>6} | {'-':>5} | {'-':>5} | {'-':>6} | ✗ FAIL"
        errors_encountered.append((code_name, result.get('error', 'Unknown')))
    
    print(row)

print("-" * len(header))
print(f"\nTotal: {len(all_codes)} codes analyzed")
ok_count = sum(1 for r in teleported_h_results.values() if r['status'] == 'OK')
print(f"  ✓ OK: {ok_count}")
print(f"  ✗ FAIL: {len(teleported_h_results) - ok_count}")

if errors_encountered:
    print(f"\n--- ERRORS ENCOUNTERED ({len(errors_encountered)}) ---")
    for name, error in errors_encountered[:10]:
        print(f"  {name}: {error}")

CIRCUIT ANALYSIS TABLE - TeleportedH

Code                                     | Type       |    n | Ticks |     2Q |     1Q |  Util | Timed | Coords | Status
------------------------------------------------------------------------------------------------------------------------
BaconShor_3x3                            | CSS        |   22 |    19 |     13 |     23 |  0.09 |     ✓ |      ✓ | ✓ OK
BallColor_3D_[[12,0,None]]               | Color CSS  |   38 |    16 |     10 |     20 |  0.05 |     ✓ |      ✓ | ✓ OK
BallColor_4D_[[24,0,None]]               | Color CSS  |   88 |    19 |     13 |     23 |  0.02 |     ✓ |      ✓ | ✓ OK
BallColor_[[12,0,None]]                  | Color CSS  |   38 |    16 |     10 |     20 |  0.05 |     ✓ |      ✓ | ✓ OK
BallColor_4D_[[24,0,None]]               | Color CSS  |   88 |    19 |     13 |     23 |  0.02 |     ✓ |      ✓ | ✓ OK
BallColor_[[12,0,None]]                  | Color CSS  |   38 |    16 |     10 |     20 |  0.05 |     ✓ |      ✓ | ✓ OK
BallCo

## Two-Qubit Gadget Tests

Test all two-qubit gadgets (CNOT, CZ, Surgery) across all codes.

In [21]:
# Cell 7: Transversal CNOT - Circuit Analysis Table
from IPython.display import clear_output
clear_output(wait=True)

gadget_name = 'TransversalCNOT'
gadget_class, gadget_kwargs = TWO_QUBIT_GADGETS[gadget_name]

print("=" * 120)
print(f"CIRCUIT ANALYSIS TABLE - {gadget_name} (same code × same code)")
print("=" * 120)
print()

header = f"{'Code':<40} | {'Type':<10} | {'n':>4} | {'Ticks':>5} | {'2Q':>6} | {'1Q':>6} | {'Util':>5} | {'Timed':>5} | {'Coords':>6} | Status"
print(header)
print("-" * len(header))

transversal_cnot_results = {}
errors_encountered = []

for code_name, (code_type, code) in sorted(all_codes.items()):
    result = analyze_gadget_circuit(code, gadget_class, gadget_kwargs, is_two_qubit=True)
    transversal_cnot_results[code_name] = result
    
    if result['status'] == 'OK':
        row = f"{code_name:<40} | {code_type:<10} | {result['num_qubits']:>4} | {result['ticks']:>5} | {result['2q_gates']:>6} | {result['1q_gates']:>6} | {result['utilization']:>5.2f} | {'✓' if result['has_timing'] else '✗':>5} | {'✓' if result['has_coords'] else '✗':>6} | ✓ OK"
    else:
        row = f"{code_name:<40} | {code_type:<10} | {'-':>4} | {'-':>5} | {'-':>6} | {'-':>6} | {'-':>5} | {'-':>5} | {'-':>6} | ✗ FAIL"
        errors_encountered.append((code_name, result.get('error', 'Unknown')))
    
    print(row)

print("-" * len(header))
print(f"\nTotal: {len(all_codes)} codes analyzed")
ok_count = sum(1 for r in transversal_cnot_results.values() if r['status'] == 'OK')
print(f"  ✓ OK: {ok_count}")
print(f"  ✗ FAIL: {len(transversal_cnot_results) - ok_count}")

if errors_encountered:
    print(f"\n--- ERRORS ENCOUNTERED ({len(errors_encountered)}) ---")
    for name, error in errors_encountered[:10]:
        print(f"  {name}: {error}")

CIRCUIT ANALYSIS TABLE - TransversalCNOT (same code × same code)

Code                                     | Type       |    n | Ticks |     2Q |     1Q |  Util | Timed | Coords | Status
------------------------------------------------------------------------------------------------------------------------
BaconShor_3x3                            | CSS        |   22 |    62 |     49 |     74 |  0.09 |     ✓ |      ✓ | ✓ OK
BallColor_3D_[[12,0,None]]               | Color CSS  |   38 |    50 |     37 |     62 |  0.05 |     ✓ |      ✓ | ✓ OK
BallColor_3D_[[12,0,None]]               | Color CSS  |   38 |    50 |     37 |     62 |  0.05 |     ✓ |      ✓ | ✓ OK
BallColor_4D_[[24,0,None]]               | Color CSS  |   88 |    62 |     49 |     74 |  0.02 |     ✓ |      ✓ | ✓ OK
BallColor_4D_[[24,0,None]]               | Color CSS  |   88 |    62 |     49 |     74 |  0.02 |     ✓ |      ✓ | ✓ OK
BallColor_[[12,0,None]]                  | Color CSS  |   38 |    50 |     37 |     62 |  0.05 | 

In [22]:
# Cell 8: Surgery CNOT - Circuit Analysis Table
from IPython.display import clear_output
clear_output(wait=True)

gadget_name = 'SurgeryCNOT'
gadget_class, gadget_kwargs = TWO_QUBIT_GADGETS[gadget_name]

print("=" * 120)
print(f"CIRCUIT ANALYSIS TABLE - {gadget_name} (CSS surgery)")
print("=" * 120)
print()

header = f"{'Code':<40} | {'Type':<10} | {'n':>4} | {'Ticks':>5} | {'2Q':>6} | {'1Q':>6} | {'Util':>5} | {'Timed':>5} | {'Coords':>6} | Status"
print(header)
print("-" * len(header))

surgery_cnot_results = {}
errors_encountered = []

for code_name, (code_type, code) in sorted(all_codes.items()):
    result = analyze_gadget_circuit(code, gadget_class, gadget_kwargs, is_two_qubit=True)
    surgery_cnot_results[code_name] = result
    
    if result['status'] == 'OK':
        row = f"{code_name:<40} | {code_type:<10} | {result['num_qubits']:>4} | {result['ticks']:>5} | {result['2q_gates']:>6} | {result['1q_gates']:>6} | {result['utilization']:>5.2f} | {'✓' if result['has_timing'] else '✗':>5} | {'✓' if result['has_coords'] else '✗':>6} | ✓ OK"
    else:
        row = f"{code_name:<40} | {code_type:<10} | {'-':>4} | {'-':>5} | {'-':>6} | {'-':>6} | {'-':>5} | {'-':>5} | {'-':>6} | ✗ FAIL"
        errors_encountered.append((code_name, result.get('error', 'Unknown')))
    
    print(row)

print("-" * len(header))
print(f"\nTotal: {len(all_codes)} codes analyzed")
ok_count = sum(1 for r in surgery_cnot_results.values() if r['status'] == 'OK')
print(f"  ✓ OK: {ok_count}")
print(f"  ✗ FAIL: {len(surgery_cnot_results) - ok_count}")

if errors_encountered:
    print(f"\n--- ERRORS ENCOUNTERED ({len(errors_encountered)}) ---")
    for name, error in errors_encountered[:10]:
        print(f"  {name}: {error}")

CIRCUIT ANALYSIS TABLE - SurgeryCNOT (CSS surgery)

Code                                     | Type       |    n | Ticks |     2Q |     1Q |  Util | Timed | Coords | Status
------------------------------------------------------------------------------------------------------------------------
BaconShor_3x3                            | CSS        |   32 |   127 |    100 |    159 |  0.06 |     ✓ |      ✓ | ✓ OK
BallColor_3D_[[12,0,None]]               | Color CSS  |    - |     - |      - |      - |     - |     - |      - | ✗ FAIL
BallColor_4D_[[24,0,None]]               | Color CSS  |    - |     - |      - |      - |     - |     - |      - | ✗ FAIL
BallColor_[[12,0,None]]                  | Color CSS  |    - |     - |      - |      - |     - |     - |      - | ✗ FAIL
BallColor_[[24,0,None]]                  | Color CSS  |    - |     - |      - |      - |     - |     - |      - | ✗ FAIL
BareAncilla_713                          | Non-CSS    |    - |     - |      - |      - |     - |     - 

## Gadget × Decoder Compatibility Matrix

Test each gadget across a subset of codes with all decoders, showing LER comparison table.

In [97]:
# Cell 9: Select subset of codes for decoder testing
# Use a representative subset to avoid extremely long run times

# Select 1-2 codes per type
subset_codes = {}
codes_per_type = 2

for code_name, (code_type, code) in all_codes.items():
    type_codes = [k for k, (t, c) in subset_codes.items() if t == code_type]
    if len(type_codes) < codes_per_type:
        subset_codes[code_name] = (code_type, code)

print(f"Selected {len(subset_codes)} codes for decoder testing:")
for name, (ctype, _) in sorted(subset_codes.items()):
    print(f"  {name} ({ctype})")

Selected 8 codes for decoder testing:
  C6 (CSS)
  FourQubit422_[[4,2,2]] (CSS)
  NonCSS_642 (Non-CSS)
  Perfect_513 (Non-CSS)
  RotatedSurface_[[25,1,5]] (Topological CSS)
  RotatedSurface_[[9,1,3]] (Topological CSS)
  TriangularColour_d3 (Color CSS)
  TriangularColour_d5 (Color CSS)


In [98]:
# Cell 10: Comprehensive Gadget × Decoder Compatibility Test - ALL GADGETS
from IPython.display import clear_output
from collections import defaultdict
clear_output(wait=True)

print("=" * 160)
print("COMPREHENSIVE GADGET × DECODER COMPATIBILITY TEST")
print("=" * 160)
print(f"\nTesting {len(SINGLE_QUBIT_GADGETS) + len(TWO_QUBIT_GADGETS)} gadgets on {len(subset_codes)} codes with {len(decoder_classes)} decoders")
print("Status: ✓=OK (with LER), ⚠️=WARN, ✗=FAIL, -=SKIP, ·=N/A")
print("NoDec column shows raw error rate without any decoder correction\n")

# Aggregate results across all gadgets
all_gadget_decoder_results = {}
all_errors = []
error_by_type = defaultdict(list)
status_counts = {'OK': 0, 'WARN': 0, 'FAIL': 0, 'SKIP': 0, 'N/A': 0}

dec_names = list(decoder_classes.keys())

# Combine all gadgets
ALL_GADGETS = {
    **{name: (cls, kwargs, False) for name, (cls, kwargs) in SINGLE_QUBIT_GADGETS.items()},
    **{name: (cls, kwargs, True) for name, (cls, kwargs) in TWO_QUBIT_GADGETS.items()},
}

for gadget_name, (gadget_class, gadget_kwargs, is_two_qubit) in ALL_GADGETS.items():
    print("=" * 160)
    print(f"GADGET: {gadget_name} ({'Two-Qubit' if is_two_qubit else 'Single-Qubit'})")
    print("=" * 160)
    
    # Header with NoDec column
    header = f"{'Code':<35} | {'Type':<15} | {'NoDec':>8}"
    for dec_name in dec_names:
        header += f" | {dec_name[:10]:^10}"
    print(header)
    print("-" * len(header))
    
    all_gadget_decoder_results[gadget_name] = {}
    
    for code_name, (code_type, code) in sorted(subset_codes.items()):
        # Store no-decode LER for this code (from first successful test)
        no_decode_ler = None
        
        all_gadget_decoder_results[gadget_name][code_name] = {}
        
        # First pass: run all decoders to collect results
        decoder_results = {}
        for dec_name in dec_names:
            result = test_gadget_on_code(
                code=code,
                code_name=code_name,
                code_type=code_type,
                gadget_class=gadget_class,
                gadget_name=gadget_name,
                gadget_kwargs=gadget_kwargs,
                decoder_class=decoder_classes[dec_name],
                decoder_name=dec_name,
                is_two_qubit=is_two_qubit,
            )
            decoder_results[dec_name] = result
            all_gadget_decoder_results[gadget_name][code_name][dec_name] = result
            status_counts[result.status] += 1
            
            # Capture no-decode LER from first successful result
            if no_decode_ler is None and result.ler_no_decode is not None:
                no_decode_ler = result.ler_no_decode
            
            # Track errors
            if result.status == 'WARN':
                error_key = (result.error or 'Unknown')[:50]
                error_by_type[error_key].append((gadget_name, code_name, dec_name))
            elif result.status == 'FAIL':
                all_errors.append((gadget_name, code_name, dec_name, result.error))
                error_key = (result.error or 'Unknown')[:50]
                error_by_type[error_key].append((gadget_name, code_name, dec_name))
        
        # Build row with NoDec column
        no_dec_str = f"{no_decode_ler:.3f}" if no_decode_ler is not None else "  --  "
        row = f"{code_name:<35} | {code_type:<15} | {no_dec_str:>8}"
        
        for dec_name in dec_names:
            result = decoder_results[dec_name]
            
            if result.status == 'OK':
                # Always show LER value for OK status
                if result.ler is not None:
                    cell = f"✓{result.ler:.3f}"
                else:
                    cell = "✓ OK"
            elif result.status == 'N/A':
                cell = "   ·   "  # N/A - truly impossible
            elif result.status == 'SKIP':
                cell = " - SKIP"
            elif result.status == 'WARN':
                cell = "⚠️ WARN"
            else:
                cell = " ✗ FAIL"
            
            row += f" | {cell:^10}"
        
        print(row)
    
    print("-" * len(header))
    print()

# ============================================================================
# ERROR SUMMARY
# ============================================================================
print("=" * 160)
print("ERROR SUMMARY - Grouped by Error Type")
print("=" * 160)

if error_by_type:
    for error_msg, occurrences in sorted(error_by_type.items(), key=lambda x: -len(x[1])):
        print(f"\n❌ Error: {error_msg}")
        print(f"   Occurrences: {len(occurrences)}")
        # Show first 5 examples
        for gadget, code, dec in occurrences[:5]:
            print(f"      - {gadget} + {code} + {dec}")
        if len(occurrences) > 5:
            print(f"      ... and {len(occurrences) - 5} more")
else:
    print("\n✓ No errors or warnings!")

# ============================================================================
# STATUS COUNTS
# ============================================================================
print("\n" + "=" * 160)
print("OVERALL STATUS COUNTS")
print("=" * 160)
total = sum(status_counts.values())
print(f"  ✓ OK:   {status_counts['OK']:>5} ({100*status_counts['OK']/total:.1f}%)")
print(f"  ⚠️ WARN: {status_counts['WARN']:>5} ({100*status_counts['WARN']/total:.1f}%)")
print(f"  ✗ FAIL: {status_counts['FAIL']:>5} ({100*status_counts['FAIL']/total:.1f}%)")
print(f"  - SKIP: {status_counts['SKIP']:>5} ({100*status_counts['SKIP']/total:.1f}%)")
print(f"  · N/A:  {status_counts['N/A']:>5} ({100*status_counts['N/A']/total:.1f}%)")
print(f"  Total:  {total:>5}")

# Compute effective rates (excluding N/A)
effective_total = total - status_counts['N/A']
if effective_total > 0:
    print(f"\n  Effective rates (excluding N/A):")
    print(f"  ✓ OK:   {status_counts['OK']:>5} ({100*status_counts['OK']/effective_total:.1f}%)")
    print(f"  ⚠️ WARN: {status_counts['WARN']:>5} ({100*status_counts['WARN']/effective_total:.1f}%)")
    print(f"  ✗ FAIL: {status_counts['FAIL']:>5} ({100*status_counts['FAIL']/effective_total:.1f}%)")
    print(f"  - SKIP: {status_counts['SKIP']:>5} ({100*status_counts['SKIP']/effective_total:.1f}%)")

COMPREHENSIVE GADGET × DECODER COMPATIBILITY TEST

Testing 11 gadgets on 8 codes with 10 decoders
Status: ✓=OK (with LER), ⚠️=WARN, ✗=FAIL, -=SKIP, ·=N/A
NoDec column shows raw error rate without any decoder correction

GADGET: TransversalH (Single-Qubit)
Code                                | Type            |    NoDec | PyMatching | FusionBlos | BeliefMatc |   BPOSD    | Tesseract  | UnionFind  |    MLE     | Hypergraph | Chromobius | Concatenat
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
C6                                  | CSS             |     --   |  ⚠️ WARN   |  ⚠️ WARN   |  ⚠️ WARN   |  ⚠️ WARN   |  ⚠️ WARN   |  ⚠️ WARN   |  ⚠️ WARN   |  ⚠️ WARN   |     ·      |     ·     
C6                                  | CSS             |     --   |  ⚠️ WARN   |  ⚠️ WARN   |  ⚠️ WARN   |  ⚠️ WARN   |  ⚠️ WARN   |  ⚠️ WARN   |  ⚠️ WARN   |  

KeyboardInterrupt: 

## Summary

Aggregate pass/fail counts across all gadgets and codes.

In [83]:
# Cell 11: Comprehensive Summary Statistics
from IPython.display import clear_output
from collections import defaultdict
clear_output(wait=True)

print("=" * 120)
print("GADGET SMOKE TEST - FINAL SUMMARY")
print("=" * 120)

# ============================================================================
# CIRCUIT ANALYSIS SUMMARY
# ============================================================================
print("\n" + "-" * 120)
print("CIRCUIT ANALYSIS RESULTS (by Gadget)")
print("-" * 120)

# Collect all circuit analysis results
circuit_results = {
    'TransversalH': transversal_h_results,
    'TeleportedH': teleported_h_results,
    'TransversalCNOT': transversal_cnot_results,
    'SurgeryCNOT': surgery_cnot_results,
}

header = f"{'Gadget':<25} | {'OK':>6} | {'FAIL':>6} | {'Total':>6} | {'Success%':>10}"
print(header)
print("-" * len(header))

circuit_ok_total = 0
circuit_fail_total = 0

for gadget_name, results in circuit_results.items():
    ok = sum(1 for r in results.values() if r.get('status') == 'OK')
    fail = len(results) - ok
    total = len(results)
    pct = (ok / total * 100) if total > 0 else 0
    print(f"{gadget_name:<25} | {ok:>6} | {fail:>6} | {total:>6} | {pct:>9.1f}%")
    circuit_ok_total += ok
    circuit_fail_total += fail

print("-" * len(header))
total_circuit = circuit_ok_total + circuit_fail_total
pct = (circuit_ok_total / total_circuit * 100) if total_circuit > 0 else 0
print(f"{'TOTAL':<25} | {circuit_ok_total:>6} | {circuit_fail_total:>6} | {total_circuit:>6} | {pct:>9.1f}%")

# ============================================================================
# DECODER COMPATIBILITY SUMMARY (by Gadget)
# ============================================================================
print("\n" + "-" * 120)
print("DECODER COMPATIBILITY RESULTS (by Gadget)")
print("-" * 120)

if 'all_gadget_decoder_results' in dir():
    header = f"{'Gadget':<25} | {'OK':>6} | {'WARN':>6} | {'SKIP':>6} | {'N/A':>6} | {'FAIL':>6} | {'Total':>6} | {'OK%':>8}"
    print(header)
    print("-" * len(header))
    
    decoder_totals = {'OK': 0, 'WARN': 0, 'SKIP': 0, 'N/A': 0, 'FAIL': 0}
    
    for gadget_name, code_results in all_gadget_decoder_results.items():
        counts = {'OK': 0, 'WARN': 0, 'SKIP': 0, 'N/A': 0, 'FAIL': 0}
        for code_name, dec_results in code_results.items():
            for dec_name, result in dec_results.items():
                counts[result.status] += 1
                decoder_totals[result.status] += 1
        
        total = sum(counts.values())
        effective = total - counts['N/A']
        pct = (counts['OK'] / effective * 100) if effective > 0 else 0
        print(f"{gadget_name:<25} | {counts['OK']:>6} | {counts['WARN']:>6} | {counts['SKIP']:>6} | {counts['N/A']:>6} | {counts['FAIL']:>6} | {total:>6} | {pct:>7.1f}%")
    
    print("-" * len(header))
    total_dec = sum(decoder_totals.values())
    effective_dec = total_dec - decoder_totals['N/A']
    pct = (decoder_totals['OK'] / effective_dec * 100) if effective_dec > 0 else 0
    print(f"{'TOTAL':<25} | {decoder_totals['OK']:>6} | {decoder_totals['WARN']:>6} | {decoder_totals['SKIP']:>6} | {decoder_totals['N/A']:>6} | {decoder_totals['FAIL']:>6} | {total_dec:>6} | {pct:>7.1f}%")
else:
    print("  (Run Cell 10 first to see decoder compatibility results)")

# ============================================================================
# DECODER COMPATIBILITY SUMMARY (by Decoder)
# ============================================================================
print("\n" + "-" * 120)
print("DECODER COMPATIBILITY RESULTS (by Decoder)")
print("-" * 120)

if 'all_gadget_decoder_results' in dir():
    dec_stats = defaultdict(lambda: {'OK': 0, 'WARN': 0, 'SKIP': 0, 'N/A': 0, 'FAIL': 0})
    
    for gadget_name, code_results in all_gadget_decoder_results.items():
        for code_name, dec_results in code_results.items():
            for dec_name, result in dec_results.items():
                dec_stats[dec_name][result.status] += 1
    
    header = f"{'Decoder':<20} | {'OK':>6} | {'WARN':>6} | {'SKIP':>6} | {'N/A':>6} | {'FAIL':>6} | {'Total':>6} | {'OK%':>8}"
    print(header)
    print("-" * len(header))
    
    for dec_name in sorted(dec_stats.keys()):
        counts = dec_stats[dec_name]
        total = sum(counts.values())
        effective = total - counts['N/A']
        pct = (counts['OK'] / effective * 100) if effective > 0 else 0
        print(f"{dec_name:<20} | {counts['OK']:>6} | {counts['WARN']:>6} | {counts['SKIP']:>6} | {counts['N/A']:>6} | {counts['FAIL']:>6} | {total:>6} | {pct:>7.1f}%")
    
    print("-" * len(header))

# ============================================================================
# CODE TYPE SUMMARY
# ============================================================================
print("\n" + "-" * 120)
print("RESULTS BY CODE TYPE")
print("-" * 120)

code_type_stats = defaultdict(lambda: {'circuit_ok': 0, 'circuit_fail': 0, 'decoder_ok': 0, 'decoder_other': 0})

for code_name, (code_type, _) in all_codes.items():
    # Circuit analysis
    for gadget_results in circuit_results.values():
        if code_name in gadget_results:
            if gadget_results[code_name].get('status') == 'OK':
                code_type_stats[code_type]['circuit_ok'] += 1
            else:
                code_type_stats[code_type]['circuit_fail'] += 1
    
    # Decoder compatibility
    if 'all_gadget_decoder_results' in dir():
        for gadget_name, code_results in all_gadget_decoder_results.items():
            if code_name in code_results:
                for dec_name, result in code_results[code_name].items():
                    if result.status == 'OK':
                        code_type_stats[code_type]['decoder_ok'] += 1
                    elif result.status != 'N/A':  # Don't count N/A as "other"
                        code_type_stats[code_type]['decoder_other'] += 1

header = f"{'Code Type':<20} | {'Circuit OK':>10} | {'Circuit Fail':>12} | {'Decoder OK':>10} | {'Decoder Other':>13}"
print(header)
print("-" * len(header))

for code_type in sorted(code_type_stats.keys()):
    stats = code_type_stats[code_type]
    print(f"{code_type:<20} | {stats['circuit_ok']:>10} | {stats['circuit_fail']:>12} | {stats['decoder_ok']:>10} | {stats['decoder_other']:>13}")

print("-" * len(header))

# ============================================================================
# FINAL STATUS
# ============================================================================
print("\n" + "=" * 120)
print("FINAL STATUS")
print("=" * 120)

circuit_success = circuit_ok_total == total_circuit
decoder_success = decoder_totals.get('FAIL', 0) == 0 if 'decoder_totals' in dir() else True

if circuit_success and decoder_success:
    print("\n✓ ALL TESTS PASSED - Gadget smoke test complete!")
else:
    issues = []
    if not circuit_success:
        issues.append(f"Circuit analysis: {circuit_fail_total} failures")
    if not decoder_success:
        issues.append(f"Decoder compatibility: {decoder_totals.get('FAIL', 0)} failures, {decoder_totals.get('WARN', 0)} warnings")
    print(f"\n⚠️ SOME ISSUES DETECTED:")
    for issue in issues:
        print(f"   - {issue}")

print("=" * 120)

GADGET SMOKE TEST - FINAL SUMMARY

------------------------------------------------------------------------------------------------------------------------
CIRCUIT ANALYSIS RESULTS (by Gadget)
------------------------------------------------------------------------------------------------------------------------
Gadget                    |     OK |   FAIL |  Total |   Success%
-----------------------------------------------------------------
TransversalH              |     47 |      0 |     47 |     100.0%
TeleportedH               |     47 |      0 |     47 |     100.0%
TransversalCNOT           |     47 |      0 |     47 |     100.0%
SurgeryCNOT               |     36 |     11 |     47 |      76.6%
-----------------------------------------------------------------
TOTAL                     |    177 |     11 |    188 |      94.1%

------------------------------------------------------------------------------------------------------------------------
DECODER COMPATIBILITY RESULTS (by Ga

In [84]:
# Cell: Concise Diagnostic Summary
from collections import defaultdict

print("=" * 80)
print("QUICK DIAGNOSTIC SUMMARY")
print("=" * 80)

# Count by gadget and status
gadget_status = defaultdict(lambda: defaultdict(int))
error_summary = defaultdict(int)

if 'all_gadget_decoder_results' in dir():
    for gadget_name, code_results in all_gadget_decoder_results.items():
        for code_name, dec_results in code_results.items():
            for dec_name, result in dec_results.items():
                gadget_status[gadget_name][result.status] += 1
                if result.status in ('FAIL', 'WARN') and result.error:
                    # Normalize error messages
                    err = result.error[:60].strip()
                    error_summary[err] += 1

print("\n--- STATUS BY GADGET ---")
for gadget_name in sorted(gadget_status.keys()):
    stats = gadget_status[gadget_name]
    total = sum(stats.values())
    ok_pct = 100 * stats['OK'] / total if total > 0 else 0
    print(f"{gadget_name:<20} OK:{stats['OK']:>3} WARN:{stats['WARN']:>3} FAIL:{stats['FAIL']:>3} SKIP:{stats['SKIP']:>3} N/A:{stats['N/A']:>3} ({ok_pct:5.1f}% OK)")

print("\n--- TOP 10 ERROR TYPES (non-N/A) ---")
for err, count in sorted(error_summary.items(), key=lambda x: -x[1])[:10]:
    print(f"  [{count:>3}x] {err}")

print("\n--- OVERALL ---")
total_ok = sum(gadget_status[g]['OK'] for g in gadget_status)
total_warn = sum(gadget_status[g]['WARN'] for g in gadget_status)
total_fail = sum(gadget_status[g]['FAIL'] for g in gadget_status)
total_skip = sum(gadget_status[g]['SKIP'] for g in gadget_status)
total_na = sum(gadget_status[g]['N/A'] for g in gadget_status)
total = total_ok + total_warn + total_fail + total_skip + total_na
effective = total - total_na

print(f"Total tests: {total}")
print(f"  OK:   {total_ok:>4} ({100*total_ok/effective:.1f}% of effective)")
print(f"  WARN: {total_warn:>4} ({100*total_warn/effective:.1f}% of effective)")
print(f"  FAIL: {total_fail:>4} ({100*total_fail/effective:.1f}% of effective)")
print(f"  SKIP: {total_skip:>4} ({100*total_skip/effective:.1f}% of effective)")
print(f"  N/A:  {total_na:>4} (excluded from effective)")
print("=" * 80)

QUICK DIAGNOSTIC SUMMARY

--- STATUS BY GADGET ---
LatticeZZMerge       OK: 42 WARN:  3 FAIL:  0 SKIP: 16 N/A: 19 ( 52.5% OK)
SurgeryCNOT          OK: 42 WARN:  3 FAIL:  0 SKIP: 16 N/A: 19 ( 52.5% OK)
TeleportedH          OK: 43 WARN:  3 FAIL:  0 SKIP: 16 N/A: 18 ( 53.8% OK)
TeleportedS          OK: 43 WARN:  3 FAIL:  0 SKIP: 16 N/A: 18 ( 53.8% OK)
TeleportedT          OK: 43 WARN:  3 FAIL:  0 SKIP: 16 N/A: 18 ( 53.8% OK)
TransversalCNOT      OK: 42 WARN:  3 FAIL:  0 SKIP: 16 N/A: 19 ( 52.5% OK)
TransversalCZ        OK: 41 WARN:  4 FAIL:  0 SKIP: 16 N/A: 19 ( 51.2% OK)
TransversalH         OK:  0 WARN: 66 FAIL:  0 SKIP:  0 N/A: 14 (  0.0% OK)
TransversalS         OK: 43 WARN:  3 FAIL:  0 SKIP: 16 N/A: 18 ( 53.8% OK)
TransversalX         OK: 43 WARN:  3 FAIL:  0 SKIP: 16 N/A: 18 ( 53.8% OK)
TransversalZ         OK: 43 WARN:  3 FAIL:  0 SKIP: 16 N/A: 18 ( 53.8% OK)

--- TOP 10 ERROR TYPES (non-N/A) ---
  [ 66x] Non-deterministic circuit
  [ 20x] Expected all detectors to have color annot

In [90]:
# Cell: Deep Dive Investigation - Test TransversalH Fix
print("=" * 80)
print("INVESTIGATING TRANSVERSALH NON-DETERMINISTIC CIRCUIT ISSUE")
print("=" * 80)

# Test TransversalH on codes that were already loaded
# Pick a simple CSS code from the already-discovered codes
test_code_name = 'FourQubit422_[[4,2,2]]'
_, test_code = all_codes[test_code_name]

print(f"\nTesting TransversalH on {test_code_name} (n={test_code.n}, k={getattr(test_code, 'k', '?')})")

try:
    gadget = TransversalHadamard()
    experiment = FaultTolerantGadgetExperiment(
        codes=[test_code],
        gadget=gadget,
        noise_model=noise,
        num_rounds_before=2,
        num_rounds_after=2,
        measurement_basis="Z",
    )
    circuit = experiment.to_stim()
    
    print(f"\n✓ Circuit generated successfully")
    print(f"  Qubits: {circuit.num_qubits}")
    print(f"  Detectors: {str(circuit).count('DETECTOR')}")
    print(f"  Observables: {str(circuit).count('OBSERVABLE_INCLUDE')}")
    
    # Try to get DEM
    try:
        dem = circuit.detector_error_model(decompose_errors=True)
        print(f"  DEM detectors: {dem.num_detectors}")
        print(f"  DEM observables: {dem.num_observables}")
        print(f"\n✓ DEM generated successfully - FIX WORKS!")
        
        # Quick decode test
        sampler = dem.compile_sampler()
        det, obs = sampler.sample(100, bit_packed=False)
        print(f"  Quick sample: {det.shape[0]} shots, {det.shape[1]} dets")
        
    except Exception as dem_err:
        print(f"\n✗ DEM failed: {dem_err}")
        
        # Check if it's non-deterministic
        if 'non-deterministic' in str(dem_err).lower():
            print("\n  This is still the non-deterministic error.")
            print("  Dumping first 80 lines of circuit for debugging:")
            print("-" * 80)
            for i, line in enumerate(str(circuit).split('\n')[:80]):
                print(f"  {i+1:3}: {line}")
            print("-" * 80)
            
except Exception as e:
    print(f"\n✗ Circuit generation failed: {e}")
    import traceback
    traceback.print_exc()

print("=" * 80)

INVESTIGATING TRANSVERSALH NON-DETERMINISTIC CIRCUIT ISSUE

Testing TransversalH on FourQubit422_[[4,2,2]] (n=4, k=2)

✓ Circuit generated successfully
  Qubits: 6
  Detectors: 7
  Observables: 1

✗ DEM failed: The circuit contains non-deterministic detectors.

To make an SVG picture of the problem, you can use the python API like this:
    your_circuit.diagram('detslice-with-ops-svg', tick=range(0, 5), filter_coords=['D3', ])
or the command line API like this:
    stim diagram --in your_circuit_file.stim --type detslice-with-ops-svg --tick 0:5 --filter_coords D3 > output_image.svg

This was discovered while analyzing a Z-basis reset (R) on:
    qubit 3 [coords (0, 1)]

The collapse anti-commuted with these detectors/observables:
    D3 [coords (0.5, 0, 2)]

The backward-propagating error sensitivity for D3 was:
    X0 [coords (0, 0)]
    X1 [coords (1, 0)]
    X2 [coords (1, 1)]
    X3 [coords (0, 1)]

Circuit stack trace:
    during TICK layer #1 of 47
    at instruction #7 [which is

In [26]:
# Cell 12: Error Grouping Analysis
from collections import defaultdict

print("=" * 100)
print("ERROR GROUPING ANALYSIS")
print("=" * 100)

# ============================================================================
# Group errors by error type/message
# ============================================================================
if 'all_gadget_errors' in dir() and all_gadget_errors:
    print("\n" + "-" * 100)
    print("ERRORS GROUPED BY TYPE")
    print("-" * 100)
    
    # Group by error message (first 80 chars)
    error_groups = defaultdict(list)
    for err in all_gadget_errors:
        # Get first line of error, truncate if too long
        err_msg = str(err.get('error', 'Unknown'))
        first_line = err_msg.split('\n')[0][:80]
        error_groups[first_line].append(err)
    
    # Sort by frequency (most common first)
    sorted_groups = sorted(error_groups.items(), key=lambda x: -len(x[1]))
    
    print(f"\nTotal unique error types: {len(error_groups)}")
    print(f"Total error occurrences: {len(all_gadget_errors)}")
    print()
    
    for i, (err_msg, occurrences) in enumerate(sorted_groups[:10], 1):  # Top 10
        print(f"Error Type #{i} ({len(occurrences)} occurrences)")
        print(f"  Message: {err_msg}")
        print(f"  Affected:")
        
        # Group by gadget
        gadget_counts = defaultdict(int)
        for occ in occurrences:
            gadget_counts[occ['gadget']] += 1
        for gadget, count in sorted(gadget_counts.items(), key=lambda x: -x[1])[:5]:
            print(f"    - {gadget}: {count} codes")
        print()
    
    if len(sorted_groups) > 10:
        print(f"  ... and {len(sorted_groups) - 10} more error types")
else:
    print("\n✓ No decoder errors found!")

# ============================================================================
# Circuit failures detail
# ============================================================================
print("\n" + "-" * 100)
print("CIRCUIT FAILURES DETAIL")
print("-" * 100)

circuit_failures = []
circuit_results_all = {
    'TransversalH': transversal_h_results,
    'TeleportedH': teleported_h_results,
    'TransversalCNOT': transversal_cnot_results,
    'SurgeryCNOT': surgery_cnot_results,
}

for gadget_name, results in circuit_results_all.items():
    for code_name, result in results.items():
        if result.get('status') != 'OK':
            circuit_failures.append({
                'gadget': gadget_name,
                'code': code_name,
                'error': result.get('error', 'Unknown'),
                'warning': result.get('warning', None)
            })

if circuit_failures:
    # Group by error type
    error_groups = defaultdict(list)
    for f in circuit_failures:
        err_key = str(f.get('error', 'Unknown'))[:50]
        error_groups[err_key].append(f)
    
    print(f"\nTotal circuit failures: {len(circuit_failures)}")
    print(f"Unique error types: {len(error_groups)}\n")
    
    for err_msg, failures in sorted(error_groups.items(), key=lambda x: -len(x[1])):
        print(f"Error: {err_msg}")
        print(f"  Affected: {len(failures)} cases")
        for f in failures[:3]:  # Show first 3
            print(f"    - {f['gadget']} × {f['code']}")
        if len(failures) > 3:
            print(f"    ... and {len(failures) - 3} more")
        print()
else:
    print("\n✓ No circuit analysis failures!")

print("=" * 100)
print("END OF GADGET SMOKE TEST")
print("=" * 100)

ERROR GROUPING ANALYSIS

✓ No decoder errors found!

----------------------------------------------------------------------------------------------------
CIRCUIT FAILURES DETAIL
----------------------------------------------------------------------------------------------------

Total circuit failures: 11
Unique error types: 2

Error: Logical index 0 >= k=0
  Affected: 6 cases
    - SurgeryCNOT × BallColor_3D_[[12,0,None]]
    - SurgeryCNOT × BallColor_4D_[[24,0,None]]
    - SurgeryCNOT × BallColor_[[12,0,None]]
    ... and 3 more

Error: object of type 'method' has no len()
  Affected: 5 cases
    - SurgeryCNOT × BareAncilla_713
    - SurgeryCNOT × Mixed_512
    - SurgeryCNOT × NonCSS_1023
    ... and 2 more

END OF GADGET SMOKE TEST


In [74]:
# Debug: Analyze SKIP and WARN reasons
from collections import defaultdict

print("=" * 120)
print("SKIP/WARN ANALYSIS - Understanding why tests are not OK")
print("=" * 120)

skip_reasons = defaultdict(int)
warn_reasons = defaultdict(int)
fail_reasons = defaultdict(int)

for gadget_name, code_results in all_gadget_decoder_results.items():
    for code_name, dec_results in code_results.items():
        for dec_name, result in dec_results.items():
            if result.status == 'SKIP':
                reason = result.error or 'Unknown'
                skip_reasons[reason] += 1
            elif result.status == 'WARN':
                reason = result.error or 'Unknown'
                warn_reasons[reason[:80]] += 1
            elif result.status == 'FAIL':
                reason = result.error or 'Unknown'
                fail_reasons[reason[:80]] += 1

print("\n" + "-" * 120)
print("SKIP REASONS (sorted by frequency)")
print("-" * 120)
for reason, count in sorted(skip_reasons.items(), key=lambda x: -x[1]):
    print(f"  {count:>5}x  {reason}")

print("\n" + "-" * 120)
print("WARN REASONS (sorted by frequency)")
print("-" * 120)
for reason, count in sorted(warn_reasons.items(), key=lambda x: -x[1]):
    print(f"  {count:>5}x  {reason}")

print("\n" + "-" * 120)
print("FAIL REASONS (sorted by frequency)")
print("-" * 120)
for reason, count in sorted(fail_reasons.items(), key=lambda x: -x[1]):
    print(f"  {count:>5}x  {reason}")

SKIP/WARN ANALYSIS - Understanding why tests are not OK

------------------------------------------------------------------------------------------------------------------------
SKIP REASONS (sorted by frequency)
------------------------------------------------------------------------------------------------------------------------
    160x  No detectors in DEM

------------------------------------------------------------------------------------------------------------------------
WARN REASONS (sorted by frequency)
------------------------------------------------------------------------------------------------------------------------
     66x  Non-deterministic circuit
     20x  Expected all detectors to have color annotations (coord[3] in 0-5)
     11x  BeliefMatchingDecoder failed to initialize: A hyperedge error mechanism was foun

------------------------------------------------------------------------------------------------------------------------
FAIL REASONS (sorted by frequenc

In [75]:
# Summary of fixes applied in this session
print("=" * 80)
print("FIXES APPLIED IN THIS SESSION")
print("=" * 80)

print("""
1. layout.py - Fixed hx/hz attribute checking
   - Changed from: hasattr(code, 'hx') followed by code.hx.shape[0]
   - Changed to: getattr with hasattr(hx, 'shape') check
   - Issue: Non-CSS codes don't have hx/hz properties

2. stabilizer_rounds.py - Fixed hx/hz attribute checking  
   - Same pattern fix as layout.py
   - Added hasattr(hx_raw, 'shape') validation

3. ft_gadget_experiment.py - Fixed hx/hz attribute checking
   - Same pattern fix for qubit allocation
   
4. scheduling.py - Fixed hx/hz attribute checking  
   - Same pattern fix for stabilizer scheduling

5. css_surgery.py (and _new.py, _replaced.py) - Fixed hx/hz attribute checking
   - Same pattern fix for surgery gadgets

6. stabilizer_rounds.py::get_logical_support() - Fixed method vs property handling
   - Changed from: ops = code.logical_x_ops  
   - Changed to: check if callable, and call it if so
   - Issue: Non-CSS codes define logical_x_ops as method, not @property

7. ft_gadget_experiment.py::_emit_observable() - Fixed measurement index mapping
   - Changed from: formula using qubit index offset
   - Changed to: proper qubit_to_meas mapping
   - Issue: For two-qubit gadgets, data qubits are not contiguous

RESULTS:
- Before: 176 FAILs (method has no len) + 168 FAILs (lookback error) = 344 FAILs
- After: 0 FAILs ✓

Remaining issues (non-blocking):
- 160 SKIPs: No detectors in DEM (Non-CSS codes without CSS-style stabilizers)
- 66 WARNs: Non-deterministic circuit (TransversalHadamard on some codes)
- 20 WARNs: Color annotations missing (Chromobius on non-color-annotated codes)
- 11 WARNs: BeliefMatching hyperedge issue (known limitation)
""")

FIXES APPLIED IN THIS SESSION

1. layout.py - Fixed hx/hz attribute checking
   - Changed from: hasattr(code, 'hx') followed by code.hx.shape[0]
   - Changed to: getattr with hasattr(hx, 'shape') check
   - Issue: Non-CSS codes don't have hx/hz properties

2. stabilizer_rounds.py - Fixed hx/hz attribute checking  
   - Same pattern fix as layout.py
   - Added hasattr(hx_raw, 'shape') validation

3. ft_gadget_experiment.py - Fixed hx/hz attribute checking
   - Same pattern fix for qubit allocation

4. scheduling.py - Fixed hx/hz attribute checking  
   - Same pattern fix for stabilizer scheduling

5. css_surgery.py (and _new.py, _replaced.py) - Fixed hx/hz attribute checking
   - Same pattern fix for surgery gadgets

6. stabilizer_rounds.py::get_logical_support() - Fixed method vs property handling
   - Changed from: ops = code.logical_x_ops  
   - Changed to: check if callable, and call it if so
   - Issue: Non-CSS codes define logical_x_ops as method, not @property

7. ft_gadget_exper