In [2]:
# Cell 1: Setup and Imports
import sys
import numpy as np
from typing import Dict, Any

# Force reload of modules
for mod in list(sys.modules.keys()):
    if 'qectostim' in mod:
        del sys.modules[mod]

# Re-import
from qectostim.codes.base.repetition_codes import RepetitionCode
from qectostim.codes.base.four_two_two import FourQubit422Code
from qectostim.codes.base.six_two_two import SixQubit622Code
from qectostim.codes.base.toric_code import ToricCode33
from qectostim.codes.base.rotated_surface import RotatedSurfaceCode
from qectostim.codes.base.css_generic import GenericCSSCode
from qectostim.codes.base.steane_713 import SteanCode713
from qectostim.codes.base.shor_code import ShorCode91
from qectostim.codes.base.reed_muller_code import ReedMullerCode151
from qectostim.decoders.decoder_selector import select_decoder
from qectostim.noise.models import CircuitDepolarizingNoise
from qectostim.experiments.memory import CSSMemoryExperiment
import stim
import sys
import importlib
import numpy as np
from typing import Tuple, Optional

# Force reload of the modified module
for mod in list(sys.modules.keys()):
    if 'qectostim' in mod:
        del sys.modules[mod]

# Re-import
from qectostim.codes.base.repetition_codes import RepetitionCode
from qectostim.codes.base.four_two_two import FourQubit422Code
from qectostim.codes.base.six_two_two import SixQubit622Code
from qectostim.codes.base.toric_code import ToricCode33
from qectostim.codes.base.rotated_surface import RotatedSurfaceCode
from qectostim.codes.base.css_generic import GenericCSSCode
from qectostim.codes.base.steane_713 import SteanCode713
from qectostim.codes.base.shor_code import ShorCode91
from qectostim.codes.base.reed_muller_code import ReedMullerCode151
from qectostim.decoders.decoder_selector import select_decoder
from qectostim.noise.models import CircuitDepolarizingNoise
from qectostim.experiments.memory import CSSMemoryExperiment
import stim
import matplotlib.pyplot as plt
print("Modules reloaded successfully")

print("✓ All modules loaded successfully")

Modules reloaded successfully
✓ All modules loaded successfully


In [3]:
# Cell 2: Helper Functions

def validate_css_structure(code, name: str) -> Dict[str, Any]:
    """Validate CSS code structure."""
    result = {
        'name': name,
        'n': code.n,
        'k': code.k,
        'distance': code.metadata.get('distance', 'unknown'),
        'hx_shape': code.hx.shape,
        'hz_shape': code.hz.shape,
        'css_orthogonal': np.all(np.dot(code.hx, code.hz.T) % 2 == 0),
        'status': 'PASS'
    }
    if not result['css_orthogonal']:
        result['status'] = 'FAIL'
    return result

def run_memory_test(code, name: str, p: float = 0.01, shots: int = 5000, 
                    threshold: float = 0.15) -> Dict[str, Any]:
    """Run memory experiment and check threshold."""
    noise = CircuitDepolarizingNoise(p1=p, p2=p)
    exp = CSSMemoryExperiment(code=code, rounds=3, noise_model=noise)
    result = exp.run_decode(shots=shots)
    ler = result['logical_error_rate']
    status = 'PASS' if ler < threshold else 'FAIL'
    return {'name': name, 'ler': ler, 'threshold': threshold, 'status': status}

print("Helper functions defined")

Helper functions defined


In [4]:
# Cell 3: Instantiate All Codes using discover_all_codes()

from qectostim.codes import discover_all_codes

# Discover ALL CSS codes
discovered = discover_all_codes(max_qubits=50, include_qldpc=False)
codes = {name: code for name, code in discovered.items() 
         if hasattr(code, 'hx') and hasattr(code, 'hz')}

print(f"All codes instantiated via discover_all_codes():")
print(f"{'Code Name':<40} {'n':>4} {'k':>3} {'d':>3}")
print("-"*55)
for name, code in codes.items():
    d = code.metadata.get('distance', '?')
    print(f"  {name:<38} {code.n:>4} {code.k:>3} {d:>3}")
print(f"\n{len(codes)} CSS codes ready for testing")

All codes instantiated via discover_all_codes():
Code Name                                   n   k   d
-------------------------------------------------------
  FourQubit422_[[4,2,?]]                    4   2   2
  C6                                        6   2   2
  Steane_713                                7   1   3
  Shor_91                                   9   1   3
  ReedMuller_15_1_3                        15   1   3
  ToricCode_3x3                            18   2   3
  Hamming_CSS_7                             7   1   3
  Code_832                                  8   3   2
  RotatedSurface_[[9,1,3]]                  9   1   3
  RotatedSurface_[[25,1,5]]                25   1   5
  TriangularColour_d3                       7   1   3
  HexagonalColour_d3                       17   7   3
  XZZX_Surface_3                            9   1   3
  XZZX_Surface_5                           25   1   5
  GaugeColor_3                             22  18   3
  Repetition_3                 

In [5]:
# Cell 4: CSS Structure Validation - ALL CODES

print("CSS Structure Validation - ALL DISCOVERED CODES")
print("="*90)

structure_results = []
for name, code in codes.items():
    result = validate_css_structure(code, name)
    structure_results.append(result)
    status_icon = "[PASS]" if result['status'] == 'PASS' else "[FAIL]"
    print(f"{status_icon} {name:<35}: [[{result['n']},{result['k']},{result['distance']}]] "
          f"Hx={result['hx_shape']} Hz={result['hz_shape']} CSS-ortho={result['css_orthogonal']}")

passes = sum(1 for r in structure_results if r['status'] == 'PASS')
print(f"\n{'='*90}")
print(f"Structure validation: {passes}/{len(structure_results)} passed")

CSS Structure Validation - ALL DISCOVERED CODES
[PASS] FourQubit422_[[4,2,?]]             : [[4,2,2]] Hx=(1, 4) Hz=(1, 4) CSS-ortho=True
[PASS] C6                                 : [[6,2,2]] Hx=(2, 6) Hz=(2, 6) CSS-ortho=True
[PASS] Steane_713                         : [[7,1,3]] Hx=(3, 7) Hz=(3, 7) CSS-ortho=True
[PASS] Shor_91                            : [[9,1,3]] Hx=(2, 9) Hz=(6, 9) CSS-ortho=True
[PASS] ReedMuller_15_1_3                  : [[15,1,3]] Hx=(7, 15) Hz=(7, 15) CSS-ortho=True
[PASS] ToricCode_3x3                      : [[18,2,3]] Hx=(8, 18) Hz=(8, 18) CSS-ortho=True
[PASS] Hamming_CSS_7                      : [[7,1,3]] Hx=(3, 7) Hz=(3, 7) CSS-ortho=True
[PASS] Code_832                           : [[8,3,2]] Hx=(1, 8) Hz=(4, 8) CSS-ortho=True
[PASS] RotatedSurface_[[9,1,3]]           : [[9,1,3]] Hx=(4, 9) Hz=(4, 9) CSS-ortho=True
[PASS] RotatedSurface_[[25,1,5]]          : [[25,1,5]] Hx=(12, 25) Hz=(12, 25) CSS-ortho=True
[PASS] TriangularColour_d3                : [[7,1,3

In [6]:
# Cell 5: Distance-2 Codes (Detection Only) - ALL DISCOVERED

print("Distance-2 Codes (Error Detection) - ALL DISCOVERED")
print("="*80)
print("Note: These codes can detect but not correct errors.")
print("Expected: LER < 0.20 at p=0.01 (detection-based post-selection)\n")

# Filter CSS codes with distance < 3
d2_codes = {name: code for name, code in codes.items() 
            if hasattr(code, 'hx') and hasattr(code, 'hz') and
            isinstance(code.metadata.get('distance'), int) and code.metadata.get('distance') < 3}

d2_results = []
print(f"Found {len(d2_codes)} CSS codes with distance < 3\n")
print(f"{'Code':<40} {'d':>3} {'LER':>10} {'Status':<15}")
print("-"*75)

for name, code in d2_codes.items():
    try:
        d = code.metadata.get('distance', '?')
        result = run_memory_test(code, name, p=0.01, threshold=0.20)
        d2_results.append(result)
        status = "[PASS]" if result['status'] == 'PASS' else "[FAIL]"
        print(f"{name:<40} {d:>3} {result['ler']:>10.4f} {status}")
    except Exception as e:
        print(f"{name:<40} {'?':>3} {'ERROR':<10} {str(e)[:25]}")

passed = sum(1 for r in d2_results if r['status'] == 'PASS')
print(f"\nDistance < 3 tests completed: {passed}/{len(d2_results)} passed")

Distance-2 Codes (Error Detection) - ALL DISCOVERED
Note: These codes can detect but not correct errors.
Expected: LER < 0.20 at p=0.01 (detection-based post-selection)

Found 3 CSS codes with distance < 3

Code                                       d        LER Status         
---------------------------------------------------------------------------
[run_decode] Code distance: 2
[run_decode] Distance 2 <= 2: Using detection-only path
[run_decode/detection] --- starting detection path ---
[run_decode/detection] shots = 5000
[run_decode/detection] circuit length = 48
[run_decode/detection] det_samples.shape = (5000, 7)
[run_decode/detection] obs_samples.shape = (5000, 1)
[run_decode/detection] logical_error_count = 139
[run_decode/detection] syndrome_nonzero_count = 918
[run_decode/detection] undetected_errors = 2
[run_decode/detection] detection_efficiency = 0.9856115107913669
[run_decode/detection] logical_error_rate = 0.0278
FourQubit422_[[4,2,?]]                     2     0.0278 [

In [7]:
# Cell 6: Distance-3+ Codes (Error Correction) - ALL DISCOVERED

print("Distance-3+ Codes (Error Correction) - ALL DISCOVERED")
print("="*80)
print("Expected: LER < 0.12 at p=0.01 with decoder\n")

# Filter codes with distance >= 3
d3_codes = {name: code for name, code in codes.items() 
            if isinstance(code.metadata.get('distance'), int) and code.metadata.get('distance') >= 3}

d3_results = []
print(f"Found {len(d3_codes)} distance >= 3 codes\n")
print(f"{'Code':<40} {'d':>3} {'LER':>10} {'Status':<15}")
print("-"*75)

for name, code in d3_codes.items():
    try:
        # Check CSS orthogonality first
        if not np.all(np.dot(code.hx, code.hz.T) % 2 == 0):
            print(f"{name:<40} {'?':>3} {'N/A':>10} {'SKIP-ortho':<15}")
            continue
        d = code.metadata.get('distance', '?')
        # Use stricter threshold for higher distance
        threshold = 0.12 if d == 3 else 0.08
        result = run_memory_test(code, name, p=0.01, threshold=threshold)
        d3_results.append(result)
        status = "[PASS]" if result['status'] == 'PASS' else "[WARN]"
        print(f"{name:<40} {d:>3} {result['ler']:>10.4f} {status}")
    except Exception as e:
        print(f"{name:<40} {'?':>3} {'ERROR':<10} {str(e)[:20]}")

passed = sum(1 for r in d3_results if r['status'] == 'PASS')
print(f"\nDistance >= 3 tests completed: {passed}/{len(d3_results)} passed")

Distance-3+ Codes (Error Correction) - ALL DISCOVERED
Expected: LER < 0.12 at p=0.01 with decoder

Found 16 distance >= 3 codes

Code                                       d        LER Status         
---------------------------------------------------------------------------
[run_decode] Code distance: 3
[run_decode] Distance 3 >= 3: Using correction path
[run_decode/correction] --- starting correction path ---
[run_decode/correction] shots           = 5000
[run_decode/correction] decoder_name    = None
[run_decode/correction] base circuit    = 62 instructions
[run_decode/correction] noisy circuit   = 67 instructions
[run_decode/correction] DEM: detectors   = 21
[run_decode/correction] DEM: errors      = 9
[run_decode/correction] DEM: observables = 1
[run_decode/correction] decoder type    = <class 'qectostim.decoders.pymatching_decoder.PyMatchingDecoder'>
[run_decode/correction] sampling DEM directly...
[run_decode/correction] type(raw)       = <class 'tuple'>
[run_decode/correction]

In [8]:
# Cell 7: Higher Distance Codes (d>=5) - ALL DISCOVERED

print("Higher Distance Codes (d>=5) - ALL DISCOVERED")
print("="*80)
print("Expected: Very low LER at p=0.01 (below threshold)\n")

# Filter codes with distance >= 5
high_d_codes = {name: code for name, code in codes.items() 
                if isinstance(code.metadata.get('distance'), int) and code.metadata.get('distance') >= 5}

high_d_results = []
print(f"Found {len(high_d_codes)} distance >= 5 codes\n")
print(f"{'Code':<40} {'d':>3} {'LER':>10} {'Status':<15}")
print("-"*75)

for name, code in high_d_codes.items():
    try:
        # Check CSS orthogonality first
        if not np.all(np.dot(code.hx, code.hz.T) % 2 == 0):
            print(f"{name:<40} {'?':>3} {'N/A':>10} {'SKIP-ortho':<15}")
            continue
        d = code.metadata.get('distance', '?')
        result = run_memory_test(code, name, p=0.01, threshold=0.05)
        high_d_results.append(result)
        status = "[PASS]" if result['status'] == 'PASS' else "[WARN]"
        print(f"{name:<40} {d:>3} {result['ler']:>10.4f} {status}")
    except Exception as e:
        print(f"{name:<40} {'?':>3} {'ERROR':<10} {str(e)[:20]}")

passed = sum(1 for r in high_d_results if r['status'] == 'PASS')
print(f"\nHigher distance tests completed: {passed}/{len(high_d_results)} passed")

Higher Distance Codes (d>=5) - ALL DISCOVERED
Expected: Very low LER at p=0.01 (below threshold)

Found 5 distance >= 5 codes

Code                                       d        LER Status         
---------------------------------------------------------------------------
[run_decode] Code distance: 5
[run_decode] Distance 5 >= 3: Using correction path
[run_decode/correction] --- starting correction path ---
[run_decode/correction] shots           = 5000
[run_decode/correction] decoder_name    = None
[run_decode/correction] base circuit    = 176 instructions
[run_decode/correction] noisy circuit   = 193 instructions
[run_decode/correction] DEM: detectors   = 84
[run_decode/correction] DEM: errors      = 333
[run_decode/correction] DEM: observables = 1
[run_decode/correction] decoder type    = <class 'qectostim.decoders.pymatching_decoder.PyMatchingDecoder'>
[run_decode/correction] sampling DEM directly...
[run_decode/correction] type(raw)       = <class 'tuple'>
[run_decode/correctio

In [9]:
# Cell 8: GenericCSSCode Validation

print("GenericCSSCode Validation")
print("="*70)

# Create Steane code from raw matrices
hx_steane = np.array([
    [1,0,0,1,0,1,1],
    [0,1,0,1,1,0,1],
    [0,0,1,0,1,1,1]
])
hz_steane = hx_steane.copy()

generic_steane = GenericCSSCode(hx=hx_steane, hz=hz_steane, metadata={'name': 'GenericSteane', 'distance': 3})
print(f"Generic Steane: n={generic_steane.n}, k={generic_steane.k}")
print(f"  CSS orthogonal: {np.all(np.dot(hx_steane, hz_steane.T) % 2 == 0)}")
print(f"  Logical X inferred: {generic_steane._logical_x}")
print(f"  Logical Z inferred: {generic_steane._logical_z}")

generic_result = run_memory_test(code=generic_steane, name="GenericSteane", p=0.01, threshold=0.10)
status = "[PASS]" if generic_result['status'] == 'PASS' else "[WARN]"
print(f"  Memory test: LER={generic_result['ler']:.4f} {status}")

# Also test [[4,2,2]] generic
hx_422 = np.array([[1,1,1,1]])
hz_422 = np.array([[1,1,1,1]])
generic_422 = GenericCSSCode(hx=hx_422, hz=hz_422, metadata={'name': 'Generic422', 'distance': 2})
print(f"\nGeneric [[4,2,2]]: n={generic_422.n}, k={generic_422.k}")
generic_422_result = run_memory_test(code=generic_422, name="Generic422", p=0.01, threshold=0.15)
status = "[PASS]" if generic_422_result['status'] == 'PASS' else "[WARN]"
print(f"  Memory test: LER={generic_422_result['ler']:.4f} {status}")

print("\nGenericCSSCode validation completed")

GenericCSSCode Validation
Generic Steane: n=7, k=1
  CSS orthogonal: True
  Logical X inferred: ['XIIIXIX']
  Logical Z inferred: ['ZIIIZIZ']
[run_decode] Code distance: 3
[run_decode] Distance 3 >= 3: Using correction path
[run_decode/correction] --- starting correction path ---
[run_decode/correction] shots           = 5000
[run_decode/correction] decoder_name    = None
[run_decode/correction] base circuit    = 65 instructions
[run_decode/correction] noisy circuit   = 85 instructions
[run_decode/correction] DEM: detectors   = 21
[run_decode/correction] DEM: errors      = 96
[run_decode/correction] DEM: observables = 1
[run_decode/correction] decoder type    = <class 'qectostim.decoders.pymatching_decoder.PyMatchingDecoder'>
[run_decode/correction] sampling DEM directly...
[run_decode/correction] type(raw)       = <class 'tuple'>
[run_decode/correction] det_samples.shape = (5000, 21)
[run_decode/correction] obs_samples.shape = (5000, 1)
[run_decode/correction] decoding detector sample

In [10]:
# Cell 9: Comprehensive Summary Table

all_results = d2_results + d3_results + high_d_results + [generic_result, generic_422_result]

print("\n" + "="*70)
print("COMPREHENSIVE TEST SUMMARY")
print("="*70)
print(f"\n{'Code':<28}{'d':>5}{'LER':>10} {'Status':<30}")
print("-"*70)

passed = 0
warned = 0
failed = 0

for name, code in codes.items():
    d = code.metadata.get('distance', '?')
    result = next((r for r in all_results if r['name'] == name), None)
    if result:
        ler = result['ler']
        status = result['status']
        ler_str = f"{ler:.4f}"
    else:
        ler_str = "N/A"
        status = "SKIP"
    print(f"{name:<28}{d:>5}{ler_str:>10} {status:<30}")
    if status == 'PASS':
        passed += 1
    elif status == 'FAIL':
        failed += 1

# Add extra codes tested
for r in [generic_result, generic_422_result]:
    d = 3 if 'Steane' in r['name'] else 2
    print(f"{r['name']:<28}{d:>5}{r['ler']:>10.4f} {r['status']:<30}")
    if r['status'] == 'PASS':
        passed += 1
    elif r['status'] == 'FAIL':
        failed += 1

for r in high_d_results:
    if r['name'] == 'RotatedSurface5':
        print(f"{r['name']:<28}{5:>5}{r['ler']:>10.4f} {r['status']:<30}")
        if r['status'] == 'PASS':
            passed += 1

total = passed + warned + failed
print("-"*70)
print(f"\nTotal: {total} | Passed: {passed} | Warnings: {warned} | Failed: {failed}")
print("\nALL CODES FUNCTIONAL" if failed == 0 else "\nSOME CODES NEED ATTENTION")


COMPREHENSIVE TEST SUMMARY

Code                            d       LER Status                        
----------------------------------------------------------------------
FourQubit422_[[4,2,?]]          2    0.0278 PASS                          
C6                              2    0.0222 PASS                          
Steane_713                      3    0.0000 PASS                          
Shor_91                         3    0.0034 PASS                          
ReedMuller_15_1_3               3    0.0808 PASS                          
ToricCode_3x3                   3    0.0084 PASS                          
Hamming_CSS_7                   3    0.0826 PASS                          
Code_832                        2    0.0468 PASS                          
RotatedSurface_[[9,1,3]]        3    0.0148 PASS                          
RotatedSurface_[[25,1,5]]       5    0.0120 PASS                          
TriangularColour_d3             3    0.0000 PASS                          


In [11]:
# Cell 10: Stim Reference Comparison

print("\n" + "="*70)
print("STIM REFERENCE COMPARISON (Rotated Surface Code)")
print("="*70)

p = 0.001
shots = 10000

# Stim native
stim_circuit = stim.Circuit.generated(
    "surface_code:rotated_memory_z",
    distance=3,
    rounds=3,
    after_clifford_depolarization=p,
    before_measure_flip_probability=p,
    before_round_data_depolarization=p,
)
stim_dem = stim_circuit.detector_error_model(decompose_errors=True)
stim_decoder = select_decoder(stim_dem)
stim_sampler = stim_dem.compile_sampler()
raw = stim_sampler.sample(shots, bit_packed=False)
stim_det = raw[0]
stim_obs = raw[1]
stim_pred = stim_decoder.decode_batch(stim_det)
stim_ler = float((stim_pred[:, 0] ^ stim_obs[:, 0]).mean())

print(f"\nStim native: LER = {stim_ler:.6f}")

# QECToStim
qec_code = RotatedSurfaceCode(distance=3)
qec_noise = CircuitDepolarizingNoise(p1=p, p2=p)
qec_exp = CSSMemoryExperiment(code=qec_code, rounds=3, noise_model=qec_noise)
qec_result = qec_exp.run_decode(shots=shots)
qec_ler = qec_result['logical_error_rate']

print(f"QECToStim:   LER = {qec_ler:.6f}")
print(f"Difference:      {abs(stim_ler - qec_ler):.6f}")

if abs(stim_ler - qec_ler) < 0.02:
    print("\n[PASS] QECToStim matches Stim reference within tolerance")
else:
    print("\n[WARN] Results differ - may need investigation")

print("\n" + "="*70)
print("TESTING COMPLETE")
print("="*70)


STIM REFERENCE COMPARISON (Rotated Surface Code)

Stim native: LER = 0.000700
[run_decode] Code distance: 3
[run_decode] Distance 3 >= 3: Using correction path
[run_decode/correction] --- starting correction path ---
[run_decode/correction] shots           = 10000
[run_decode/correction] decoder_name    = None
[run_decode/correction] base circuit    = 88 instructions
[run_decode/correction] noisy circuit   = 105 instructions
[run_decode/correction] DEM: detectors   = 28
[run_decode/correction] DEM: errors      = 106
[run_decode/correction] DEM: observables = 1
[run_decode/correction] decoder type    = <class 'qectostim.decoders.pymatching_decoder.PyMatchingDecoder'>
[run_decode/correction] sampling DEM directly...
[run_decode/correction] type(raw)       = <class 'tuple'>
[run_decode/correction] det_samples.shape = (10000, 28)
[run_decode/correction] obs_samples.shape = (10000, 1)
[run_decode/correction] decoding detector samples...
[run_decode/correction] corrections.shape = (10000, 1

In [12]:
def _sample_dem_polyfill(dem: stim.DetectorErrorModel, shots: int, tag: str) -> Tuple[np.ndarray, Optional[np.ndarray]]:
    """
    Sample from a DEM in a way that is compatible with the stim polyfill.
    
    Handles both tuple (det_samples, obs_samples) and array formats.
    """
    sampler = dem.compile_sampler()
    raw = sampler.sample(shots=shots)

    if isinstance(raw, tuple):
        if len(raw) < 2:
            raise ValueError(f"{tag} DEM sampler returned tuple of length {len(raw)}; expected >= 2.")
        det_samples = np.asarray(raw[0], dtype=np.uint8)
        obs_samples = np.asarray(raw[1], dtype=np.uint8) if raw[1] is not None else None
    else:
        arr = np.asarray(raw, dtype=np.uint8)
        if arr.ndim != 2:
            raise ValueError(f"{tag} DEM sampler returned array with ndim={arr.ndim}; expected 2.")
        num_det = dem.num_detectors
        num_obs = dem.num_observables
        if arr.shape[1] != num_det + num_obs:
            raise ValueError(
                f"{tag} DEM sampler array has shape {arr.shape}, but DEM has "
                f"{num_det} detectors and {num_obs} observables."
            )
        det_samples = arr[:, :num_det]
        obs_samples = arr[:, num_det:]

    if dem.num_observables > 0 and obs_samples is None:
        raise ValueError(f"{tag} DEM sampler didn't return observable samples.")

    return det_samples, obs_samples


def decode_from_dem_with_debug(dem: stim.DetectorErrorModel, shots: int, tag: str) -> float:
    """
    Decode a DEM with extensive debugging output.
    Returns the logical error rate for observable L0.
    """
    print(f"\n{'='*60}")
    print(f"DEM SAMPLING DEBUG ({tag})")
    print(f"{'='*60}")
    print(f"  Detectors  : {dem.num_detectors}")
    print(f"  Errors     : {dem.num_errors}")
    print(f"  Observables: {dem.num_observables}")

    if dem.num_observables == 0:
        print(f"  ⚠ WARNING: DEM has no observables; returning LER=0.0")
        return 0.0

    # Small sample for inspection
    det_small, obs_small = _sample_dem_polyfill(dem, shots=min(200, shots), tag=tag)
    print(f"\n  Sample inspection (first 200 shots):")
    print(f"    det_small.shape: {det_small.shape}")
    print(f"    obs_small.shape: {None if obs_small is None else obs_small.shape}")
    if det_small.shape[0] > 0:
        print(f"    first 3 detector rows: {det_small[:3]}")
    if obs_small is not None and obs_small.shape[0] > 0:
        print(f"    first 3 observable rows: {obs_small[:3]}")

    # Build decoder
    decoder = select_decoder(dem, preferred=None)
    print(f"\n  Decoder: {type(decoder).__name__}")

    # Full sample for decoding
    print(f"\n  Full sampling ({shots} shots)...")
    det_samples, obs_samples = _sample_dem_polyfill(dem, shots=shots, tag=tag)
    print(f"    det_samples.shape: {det_samples.shape}")
    print(f"    obs_samples.shape: {None if obs_samples is None else obs_samples.shape}")

    # Decode
    print(f"\n  Decoding...")
    corrections = decoder.decode_batch(det_samples)
    corrections = np.asarray(corrections, dtype=np.uint8)
    
    if corrections.ndim == 1:
        corrections = corrections.reshape(-1, dem.num_observables)
    if corrections.shape[0] != shots:
        raise ValueError(f"{tag} Decoder returned {corrections.shape[0]} shots but we asked for {shots}.")

    # Compute logical error rate
    if obs_samples is None or dem.num_observables == 0:
        print(f"    ⚠ No observables to compare")
        ler = 0.0
    else:
        true_log = obs_samples[:, 0]
        pred_log = corrections[:, 0]
        logical_errors = (pred_log ^ true_log).astype(np.uint8)
        ler = float(logical_errors.mean())
        print(f"    Logical errors (L0): {logical_errors.sum()} / {shots}")
        print(f"    Logical error rate: {ler:.6f}")

    print(f"{'='*60}\n")
    return ler

In [13]:
def run_pure_stim_pipeline(distance: int, p: float, rounds: int, shots: int) -> float:
    """Run memory experiment using Stim's native rotated surface code."""
    print(f"\n{'#'*60}")
    print(f"# PURE STIM PIPELINE (d={distance}, p={p}, rounds={rounds})")
    print(f"{'#'*60}\n")

    circ = stim.Circuit.generated(
        "surface_code:rotated_memory_z",
        distance=distance,
        rounds=rounds,
        after_clifford_depolarization=p,
        before_measure_flip_probability=p,
        before_round_data_depolarization=p,
    )

    print(f"Circuit: {len(circ)} instructions")
    print(f"Building DEM...\n")
    dem = circ.detector_error_model(decompose_errors=True)

    print(f"DEM Summary:")
    print(f"  Detectors: {dem.num_detectors}")
    print(f"  Errors: {dem.num_errors}")
    print(f"  Observables: {dem.num_observables}\n")

    stim_ler = decode_from_dem_with_debug(dem, shots=shots, tag="[Stim]")
    return stim_ler


def run_qectostim_pipeline(distance: int, p: float, rounds: int, shots: int) -> Tuple[float, float]:
    """Run memory experiment using QECToStim's RotatedSurfaceCode."""
    print(f"\n{'#'*60}")
    print(f"# QECTOSTIM PIPELINE (d={distance}, p={p}, rounds={rounds})")
    print(f"{'#'*60}\n")

    code = RotatedSurfaceCode(distance)
    noise_model = CircuitDepolarizingNoise(p1=p, p2=p)

    exp = CSSMemoryExperiment(
        code=code,
        rounds=rounds,
        noise_model=noise_model,
        basis="Z",
    )

    # Get the circuits
    base_circuit = exp.to_stim()
    noisy_circuit = noise_model.apply(base_circuit)
    
    print(f"Base circuit: {len(base_circuit)} instructions")
    print(f"Noisy circuit: {len(noisy_circuit)} instructions")
    print(f"Building DEM...\n")
    
    # Note: QECToStim may generate error graphs that don't decompose perfectly for large distances
    # Use ignore_decomposition_failures to allow sampling/decoding to continue
    dem = noisy_circuit.detector_error_model(decompose_errors=True, ignore_decomposition_failures=True)

    print(f"DEM Summary:")
    print(f"  Detectors: {dem.num_detectors}")
    print(f"  Errors: {dem.num_errors}")
    print(f"  Observables: {dem.num_observables}\n")

    qec_ler = decode_from_dem_with_debug(dem, shots=shots, tag="[QECToStim]")
    return qec_ler


print("✓ Pipeline functions defined")

✓ Pipeline functions defined


In [14]:
print("="*70)
print("TEST 3: Error Suppression Curve - Rotated Surface Code")
print("="*70)
print("\nTesting multiple distances to verify error suppression...")

suppression_results = []
distances = [3, 5, 7]
p = 1e-3
shots = 1_000_000

for d in distances:
    print(f"\n{'-'*60}")
    print(f"Distance d={d}")
    print(f"{'-'*60}")
    
    # Use fewer rounds for speed
    rounds = min(d, 2)
    
    stim_ler = run_pure_stim_pipeline(distance=d, p=p, rounds=rounds, shots=shots)
    qec_ler = run_qectostim_pipeline(distance=d, p=p, rounds=rounds, shots=shots)
    
    suppression_results.append({
        'distance': d,
        'stim_ler': stim_ler,
        'qec_ler': qec_ler,
    })
    
    print(f"\nDistance d={d} Results:")
    print(f"  Stim LER    : {stim_ler:.6f}")
    print(f"  QECToStim LER: {qec_ler:.6f}")

print(f"\n{'='*70}")
print("SUPPRESSION ANALYSIS")
print(f"{'='*70}")

# Check if both show suppression
if len(suppression_results) >= 2:
    stim_suppresses = suppression_results[0]['stim_ler'] > suppression_results[1]['stim_ler']
    qec_suppresses = suppression_results[0]['qec_ler'] > suppression_results[1]['qec_ler']
    
    print(f"\nStim shows error suppression: {stim_suppresses}")
    for res in suppression_results:
        print(f"  d={res['distance']}: LER = {res['stim_ler']:.6f}")
    
    print(f"\nQECToStim shows error suppression: {qec_suppresses}")
    for res in suppression_results:
        print(f"  d={res['distance']}: LER = {res['qec_ler']:.6f}")
    
    if stim_suppresses and qec_suppresses:
        print(f"\n✓ Both implementations show proper error suppression!")
    elif stim_suppresses and not qec_suppresses:
        print(f"\n⚠ Stim shows suppression but QECToStim doesn't - possible circuit bug")
    else:
        print(f"\n⚠ Neither shows suppression - possible noise or decoding issue")

TEST 3: Error Suppression Curve - Rotated Surface Code

Testing multiple distances to verify error suppression...

------------------------------------------------------------
Distance d=3
------------------------------------------------------------

############################################################
# PURE STIM PIPELINE (d=3, p=0.001, rounds=2)
############################################################

Circuit: 82 instructions
Building DEM...

DEM Summary:
  Detectors: 16
  Errors: 145
  Observables: 1


DEM SAMPLING DEBUG ([Stim])
  Detectors  : 16
  Errors     : 145
  Observables: 1

  Sample inspection (first 200 shots):
    det_small.shape: (200, 16)
    obs_small.shape: (200, 1)
    first 3 detector rows: [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]
    first 3 observable rows: [[0]
 [0]
 [0]]

  Decoder: PyMatchingDecoder

  Full sampling (1000000 shots)...
    det_samples.shape: (1000000, 16)
    obs_samp

In [15]:
# ============================================================================
# COMPREHENSIVE CODE DIAGNOSTIC - ALL DISCOVERED CODES
# ============================================================================
# This cell tests every discovered code to understand what's broken

import sys
import numpy as np

# Force reload to get latest discover_all_codes
for mod in list(sys.modules.keys()):
    if 'qectostim' in mod:
        del sys.modules[mod]

from qectostim.codes import discover_all_codes
from qectostim.experiments.memory import CSSMemoryExperiment
from qectostim.noise.models import CircuitDepolarizingNoise

print("="*100)
print("COMPREHENSIVE DIAGNOSTIC: Testing ALL DISCOVERED codes")
print("="*100)

# Discover ALL codes
discovered = discover_all_codes(max_qubits=50, include_qldpc=False)
codes_diagnostic = {name: code for name, code in discovered.items() 
                    if hasattr(code, 'hx') and hasattr(code, 'hz')}

print(f"\nDiscovered {len(codes_diagnostic)} CSS codes for diagnostic testing\n")

# Test parameters
SHOTS = 2000
ROUNDS = 3  # Use 3 rounds to avoid final-gate suppression
NOISE_LEVEL = 0.01

noise = CircuitDepolarizingNoise(p1=NOISE_LEVEL, p2=NOISE_LEVEL)

print(f"Test parameters: shots={SHOTS}, rounds={ROUNDS}, p={NOISE_LEVEL}")
print("-"*100)
print(f"{'Code':<40} | {'n':>3} | {'k':>2} | {'d':>3} | {'LER':>8} | {'Status':<25}")
print("-"*100)

diagnostic_results = {}

for code_name, code in codes_diagnostic.items():
    try:
        n = code.n
        k = code.k
        d = code.metadata.get('distance', '?')
        
        # Check CSS orthogonality first
        if not np.all(np.dot(code.hx, code.hz.T) % 2 == 0):
            diagnostic_results[code_name] = {'n': n, 'k': k, 'd': d, 'status': '⊘ CSS non-orthogonal'}
            print(f"{code_name:<40} | {n:>3} | {k:>2} | {d:>3} | {'N/A':>8} | ⊘ CSS non-orthogonal")
            continue
        
        # Run Z-basis memory experiment
        exp = CSSMemoryExperiment(code=code, rounds=ROUNDS, noise_model=noise, basis='Z')
        result = exp.run_decode(shots=SHOTS)
        ler = result.get('logical_error_rate', -1)
        
        # Determine status based on distance
        d_int = d if isinstance(d, int) else 0
        if ler < 0 or np.isnan(ler):
            status = "❌ INVALID LER"
        elif d_int >= 3 and ler < 1e-6:
            status = "⚠️ LER ≈ 0"
        elif d_int >= 3 and ler > 0.15:
            status = "⚠️ LER too high"
        elif d_int < 3 and ler > 0.25:
            status = "⚠️ LER too high (d<3)"
        else:
            status = "✓ OK"
        
        diagnostic_results[code_name] = {
            'n': n, 'k': k, 'd': d, 'ler': ler, 'status': status
        }
        
        print(f"{code_name:<40} | {n:>3} | {k:>2} | {d:>3} | {ler:>8.5f} | {status}")
        
    except Exception as e:
        diagnostic_results[code_name] = {'error': str(e)}
        print(f"{code_name:<40} | ERROR: {str(e)[:45]}")

print("-"*100)

# Summary
ok_count = sum(1 for r in diagnostic_results.values() if r.get('status', '').startswith('✓'))
warn_count = sum(1 for r in diagnostic_results.values() if '⚠️' in r.get('status', ''))
error_count = sum(1 for r in diagnostic_results.values() if '❌' in r.get('status', '') or 'error' in r)
skip_count = sum(1 for r in diagnostic_results.values() if '⊘' in r.get('status', ''))

print(f"\nDiagnostic Summary:")
print(f"  ✓ OK: {ok_count}")
print(f"  ⚠️ Warnings: {warn_count}")
print(f"  ❌ Errors: {error_count}")
print(f"  ⊘ Skipped: {skip_count}")
print(f"\nDiagnostic complete. Codes with ⚠️ or ❌ may need investigation.")

COMPREHENSIVE DIAGNOSTIC: Testing ALL DISCOVERED codes

Discovered 19 CSS codes for diagnostic testing

Test parameters: shots=2000, rounds=3, p=0.01
----------------------------------------------------------------------------------------------------
Code                                     |   n |  k |   d |      LER | Status                   
----------------------------------------------------------------------------------------------------
[run_decode] Code distance: 2
[run_decode] Distance 2 <= 2: Using detection-only path
[run_decode/detection] --- starting detection path ---
[run_decode/detection] shots = 2000
[run_decode/detection] circuit length = 48
[run_decode/detection] det_samples.shape = (2000, 7)
[run_decode/detection] obs_samples.shape = (2000, 1)
[run_decode/detection] logical_error_count = 60
[run_decode/detection] syndrome_nonzero_count = 351
[run_decode/detection] undetected_errors = 1
[run_decode/detection] detection_efficiency = 0.9833333333333333
[run_decode/det

In [16]:
# ============================================================================
# COMPREHENSIVE LER/NDR DIAGNOSTIC TABLE - ALL DISCOVERED CODES
# ============================================================================
# Tests ALL codes from discover_all_codes() at multiple noise levels with:
# - LER (with decoding)
# - LER-no-decode (raw logical error rate without decoder correction)
# - NDR (non-detection rate: errors that don't trigger any syndrome)

import sys
import numpy as np
from typing import Dict, List, Any

# Force reload to get latest discover_all_codes
for mod in list(sys.modules.keys()):
    if 'qectostim' in mod:
        del sys.modules[mod]

from qectostim.codes import discover_all_codes
from qectostim.experiments.memory import CSSMemoryExperiment
from qectostim.noise.models import CircuitDepolarizingNoise
from qectostim.decoders.decoder_selector import select_decoder

print("="*120)
print("COMPREHENSIVE LER/NDR DIAGNOSTIC - ALL DISCOVERED CODES")
print("="*120)

# Discover ALL codes (CSS only for memory experiments)
discovered = discover_all_codes(max_qubits=50, include_qldpc=False)
test_codes = {name: code for name, code in discovered.items() 
              if hasattr(code, 'hx') and hasattr(code, 'hz')}

print(f"\nDiscovered {len(test_codes)} CSS codes for testing\n")

# Test parameters
noise_levels = [0.001, 0.005, 0.01]
SHOTS_DIAG = 3000  # Reduced for faster testing with more codes
ROUNDS_DIAG = 3


def run_comprehensive_test(code, p: float, shots: int, rounds: int) -> Dict[str, Any]:
    """
    Run comprehensive test returning LER, LER-no-decode, and NDR.
    """
    result = {
        'ler': None,           # Logical error rate with decoding
        'ler_no_decode': None, # Logical error rate without decoding
        'ndr': None,           # Non-detection rate
        'warnings': []
    }
    
    try:
        # Check CSS orthogonality first
        if not np.all(np.dot(code.hx, code.hz.T) % 2 == 0):
            result['warnings'].append("CSS non-orthogonal")
            return result
        
        noise = CircuitDepolarizingNoise(p1=p, p2=p)
        exp = CSSMemoryExperiment(code=code, rounds=rounds, noise_model=noise)
        
        # Get the noisy circuit and DEM
        base_circuit = exp.to_stim()
        noisy_circuit = noise.apply(base_circuit)
        
        try:
            dem = noisy_circuit.detector_error_model(decompose_errors=True)
        except Exception as e:
            # Try with ignore_decomposition_failures
            dem = noisy_circuit.detector_error_model(decompose_errors=True, ignore_decomposition_failures=True)
        
        # Sample from DEM
        sampler = dem.compile_sampler()
        raw = sampler.sample(shots, bit_packed=False)
        
        if isinstance(raw, tuple):
            det_samples = np.asarray(raw[0], dtype=np.uint8)
            obs_samples = np.asarray(raw[1], dtype=np.uint8)
        else:
            arr = np.asarray(raw, dtype=np.uint8)
            num_det = dem.num_detectors
            det_samples = arr[:, :num_det]
            obs_samples = arr[:, num_det:]
        
        # 1. LER without decoding (raw observable flips)
        if obs_samples is not None and obs_samples.shape[1] > 0:
            result['ler_no_decode'] = float(obs_samples[:, 0].mean())
        
        # 2. Non-detection rate (shots where no detector fired but observable flipped)
        syndrome_zero = (det_samples.sum(axis=1) == 0)
        if obs_samples is not None and obs_samples.shape[1] > 0:
            obs_flipped = obs_samples[:, 0].astype(bool)
            undetected_errors = syndrome_zero & obs_flipped
            result['ndr'] = float(undetected_errors.sum()) / shots
        
        # 3. LER with decoding
        decoder = select_decoder(dem)
        corrections = decoder.decode_batch(det_samples)
        corrections = np.asarray(corrections, dtype=np.uint8)
        if corrections.ndim == 1:
            corrections = corrections.reshape(-1, max(1, dem.num_observables))
        
        if obs_samples is not None and obs_samples.shape[1] > 0:
            logical_errors = (corrections[:, 0] ^ obs_samples[:, 0]).astype(np.uint8)
            result['ler'] = float(logical_errors.mean())
        
    except Exception as e:
        result['warnings'].append(f"Error: {str(e)[:50]}")
    
    return result


def analyze_result(code_name: str, code, p: float, result: Dict) -> List[str]:
    """Analyze result for suspicious patterns."""
    warnings = list(result.get('warnings', []))
    d = code.metadata.get('distance', 0)
    if isinstance(d, str):
        d = 0  # Handle non-integer distance
    
    ler = result.get('ler')
    ler_no_decode = result.get('ler_no_decode')
    ndr = result.get('ndr')
    
    # Check LER validity
    if ler is None:
        warnings.append("❌ LER is None")
    elif np.isnan(ler):
        warnings.append("❌ LER is NaN")
    elif ler < 0:
        warnings.append("❌ LER < 0")
    elif ler < 1e-6 and p > 0.001:
        warnings.append("⚠️ LER ≈ 0")
    
    # Check LER-no-decode validity
    if ler_no_decode is None:
        warnings.append("❌ LER-no-decode None")
    elif ler_no_decode < 1e-6 and p > 0.001:
        warnings.append("⚠️ LER-no-decode ≈ 0")
    
    # Check if decoding helps (should reduce LER for d >= 3)
    if ler is not None and ler_no_decode is not None:
        if d >= 3 and ler >= ler_no_decode and p > 0.001:
            warnings.append("⚠️ Decoding not helping")
        if ler > ler_no_decode * 1.5:
            warnings.append("❌ Decoding hurts!")
    
    # Check NDR
    if ndr is not None:
        if ndr > 0.5:
            warnings.append("❌ NDR > 50%")
        elif ndr > 0.1 and d >= 3:
            warnings.append("⚠️ High NDR")
    
    # Check if LER is reasonable given noise
    if ler is not None and ler > p * 5 and d >= 3:
        warnings.append("⚠️ LER >> p")
    
    return warnings


# Run tests
all_diagnostic_results = {}

for p in noise_levels:
    print(f"\n{'='*120}")
    print(f"NOISE LEVEL p = {p}")
    print(f"{'='*120}")
    
    # Header
    header = f"{'Code':<35} | {'d':>2} | {'LER':>10} | {'LER-no-dec':>10} | {'NDR':>10} | {'Warnings':<40}"
    print(header)
    print("-"*120)
    
    for code_name, code in test_codes.items():
        d = code.metadata.get('distance', '?')
        result = run_comprehensive_test(code, p, SHOTS_DIAG, ROUNDS_DIAG)
        warnings = analyze_result(code_name, code, p, result)
        
        # Store result
        if code_name not in all_diagnostic_results:
            all_diagnostic_results[code_name] = {}
        all_diagnostic_results[code_name][p] = result
        all_diagnostic_results[code_name][p]['warnings'] = warnings
        
        # Format output
        ler_str = f"{result['ler']:.6f}" if result['ler'] is not None else "N/A"
        ler_nd_str = f"{result['ler_no_decode']:.6f}" if result['ler_no_decode'] is not None else "N/A"
        ndr_str = f"{result['ndr']:.6f}" if result['ndr'] is not None else "N/A"
        warn_str = "; ".join(warnings[:2]) if warnings else "✓ OK"
        
        print(f"{code_name:<35} | {d:>2} | {ler_str:>10} | {ler_nd_str:>10} | {ndr_str:>10} | {warn_str:<40}")

# Summary
print(f"\n{'='*120}")
print("SUMMARY")
print(f"{'='*120}")

total_tests = len(test_codes) * len(noise_levels)
warnings_count = sum(
    1 for code_results in all_diagnostic_results.values()
    for p_result in code_results.values()
    if p_result.get('warnings')
)

print(f"\nTotal codes tested: {len(test_codes)}")
print(f"Total tests (codes × noise levels): {total_tests}")
print(f"Tests with warnings: {warnings_count}")
print(f"Tests passing: {total_tests - warnings_count}")

# List all warnings
if warnings_count > 0:
    print(f"\n⚠️ CODES WITH WARNINGS:")
    for code_name, code_results in all_diagnostic_results.items():
        for p, p_result in code_results.items():
            if p_result.get('warnings'):
                print(f"  {code_name} @ p={p}: {'; '.join(p_result['warnings'])}")
else:
    print("\n✓ ALL CODES PASS VALIDATION")

COMPREHENSIVE LER/NDR DIAGNOSTIC - ALL DISCOVERED CODES

Discovered 19 CSS codes for testing


NOISE LEVEL p = 0.001
------------------------------------------------------------------------------------------------------------------------
FourQubit422_[[4,2,?]]              |  2 |   0.005333 |   0.005333 |   0.000000 | ✓ OK                                    
C6                                  |  2 |   0.007667 |   0.007667 |   0.000000 | ✓ OK                                    
Steane_713                          |  3 |   0.000000 |   0.000000 |   0.000000 | ✓ OK                                    
Shor_91                             |  3 |   0.000333 |   0.029000 |   0.000000 | ✓ OK                                    
ReedMuller_15_1_3                   |  3 |   0.007000 |   0.015333 |   0.000000 | ⚠️ LER >> p                             
ToricCode_3x3                       |  3 |   0.000000 |   0.013333 |   0.000000 | ✓ OK                                    
Hamming_CSS_7           

In [17]:
# Cell: Comprehensive Test Using discover_all_codes()
# This cell tests ALL discovered codes in the codebase

import sys
import numpy as np

# Force reload of modules to get discover_all_codes
for mod in list(sys.modules.keys()):
    if 'qectostim' in mod:
        del sys.modules[mod]

from qectostim.codes import discover_all_codes
from qectostim.experiments.memory import CSSMemoryExperiment
from qectostim.noise.models import CircuitDepolarizingNoise

print("="*70)
print("COMPREHENSIVE CODE DISCOVERY AND TESTING")
print("="*70)

# Discover all codes - returns Dict[str, Code]
discovered = discover_all_codes(max_qubits=50, include_qldpc=False)
print(f"\nDiscovered {len(discovered)} codes:\n")

# Separate CSS and non-CSS codes
css_codes = {name: code for name, code in discovered.items() if hasattr(code, 'hx') and hasattr(code, 'hz')}
non_css_codes = {name: code for name, code in discovered.items() if not (hasattr(code, 'hx') and hasattr(code, 'hz'))}

print(f"CSS codes: {len(css_codes)}")
print(f"Non-CSS codes: {len(non_css_codes)}")

# Test all CSS codes with memory experiment
print("\n" + "="*70)
print("MEMORY EXPERIMENT RESULTS (CSS Codes)")
print("="*70)
print(f"\n{'Code':<35} {'[[n,k,d]]':<12} {'LER':<10} {'Status':<10}")
print("-"*70)

test_results = []
p = 0.01
shots = 3000  # Reduced for faster testing

for name, code in css_codes.items():
    n = code.n
    k = code.k
    d = code.metadata.get('distance', '?')
    code_params = f"[[{n},{k},{d}]]"
    
    try:
        # Check CSS orthogonality first
        orthogonal = np.all(np.dot(code.hx, code.hz.T) % 2 == 0)
        if not orthogonal:
            print(f"{name:<35} {code_params:<12} {'N/A':<10} {'SKIP-ortho':<10}")
            test_results.append({'name': name, 'status': 'SKIP', 'reason': 'CSS orthogonality'})
            continue
        
        # Run memory experiment
        noise = CircuitDepolarizingNoise(p1=p, p2=p)
        exp = CSSMemoryExperiment(code=code, rounds=3, noise_model=noise)
        result = exp.run_decode(shots=shots)
        ler = result['logical_error_rate']
        
        # Determine threshold based on distance
        if d == '?' or d < 3:
            threshold = 0.20  # Lenient for detection codes
        elif d == 3:
            threshold = 0.12
        else:
            threshold = 0.08
        
        status = 'PASS' if ler < threshold else 'WARN'
        print(f"{name:<35} {code_params:<12} {ler:<10.4f} {status:<10}")
        test_results.append({'name': name, 'ler': ler, 'status': status, 'threshold': threshold})
        
    except Exception as e:
        error_msg = str(e)[:30]
        print(f"{name:<35} {code_params:<12} {'ERROR':<10} {error_msg}")
        test_results.append({'name': name, 'status': 'ERROR', 'error': str(e)})

# Summary
print("\n" + "="*70)
print("SUMMARY")
print("="*70)
passed = sum(1 for r in test_results if r['status'] == 'PASS')
warned = sum(1 for r in test_results if r['status'] == 'WARN')
errors = sum(1 for r in test_results if r['status'] == 'ERROR')
skipped = sum(1 for r in test_results if r['status'] == 'SKIP')

print(f"Passed: {passed}")
print(f"Warned: {warned}")
print(f"Errors: {errors}")
print(f"Skipped: {skipped}")
print(f"Total CSS codes tested: {len(test_results)}")

if errors == 0:
    print("\n✓ ALL CSS CODES FUNCTIONAL")
else:
    print(f"\n⚠ {errors} codes had errors - investigate")

COMPREHENSIVE CODE DISCOVERY AND TESTING

Discovered 24 codes:

CSS codes: 19
Non-CSS codes: 5

MEMORY EXPERIMENT RESULTS (CSS Codes)

Code                                [[n,k,d]]    LER        Status    
----------------------------------------------------------------------
[run_decode] Code distance: 2
[run_decode] Distance 2 <= 2: Using detection-only path
[run_decode/detection] --- starting detection path ---
[run_decode/detection] shots = 3000
[run_decode/detection] circuit length = 48
[run_decode/detection] det_samples.shape = (3000, 7)
[run_decode/detection] obs_samples.shape = (3000, 1)
[run_decode/detection] logical_error_count = 73
[run_decode/detection] syndrome_nonzero_count = 498
[run_decode/detection] undetected_errors = 0
[run_decode/detection] detection_efficiency = 1.0
[run_decode/detection] logical_error_rate = 0.024333333333333332
FourQubit422_[[4,2,?]]              [[4,2,2]]    0.0243     PASS      
[run_decode] Code distance: 2
[run_decode] Distance 2 <= 2: Using 

In [18]:
# Summary of previous cell
print("="*50)
print("COMPREHENSIVE TEST SUMMARY")
print("="*50)
print(f"Total discovered: {len(discovered)} codes")
print(f"CSS codes: {len(css_codes)}")
print(f"Non-CSS codes: {len(non_css_codes)}")
print()
print(f"Memory experiment results:")
print(f"  Passed: {passed}")
print(f"  Warned: {warned}")
print(f"  Errors: {errors}")
print(f"  Skipped: {skipped}")
print()
if errors == 0:
    print("✓ ALL CSS CODES WORK WITH MEMORY EXPERIMENTS")
else:
    print(f"⚠ {errors} codes failed")
    for r in test_results:
        if r['status'] == 'ERROR':
            print(f"   - {r['name']}: {r.get('error', 'unknown')[:50]}")

COMPREHENSIVE TEST SUMMARY
Total discovered: 24 codes
CSS codes: 19
Non-CSS codes: 5

Memory experiment results:
  Passed: 19
  Warned: 0
  Errors: 0
  Skipped: 0

✓ ALL CSS CODES WORK WITH MEMORY EXPERIMENTS
