In [1]:
# ==========================================================================
# Cell 1a: SETUP AND IMPORTS - COMPUTATION
# ==========================================================================
import sys
import time
import numpy as np
import warnings
from IPython.display import clear_output

clear_output(wait=True)
warnings.filterwarnings('ignore')

# Import testing utilities - provides all shared infrastructure
from qectostim.testing import (
    # Status indicators
    STATUS_OK, STATUS_WARN, STATUS_SKIP, STATUS_FAIL,
    # Module management
    clear_qectostim_modules,
    # Test function
    test_decoder_on_code,
    # Decoder/code loading utilities
    load_all_decoders,
    discover_and_categorize_codes,
    print_code_summary,
    # Results formatting
    format_status,
    compute_summary_stats,
)

# Clear module cache for fresh testing
clear_qectostim_modules()

# Discover and categorize all codes WITH FAILURE REPORTING
# NOTE: Bosonic (GKP, rotor), qudit (Galois), and fracton (XCube, Haah) codes
# are excluded by default as they don't work with standard qubit decoders
categories, all_codes, failures = discover_and_categorize_codes(
    max_qubits=100,
    include_qldpc=True,
    include_subsystem=True,
    include_floquet=True,
    include_bosonic=False,   # Exclude - continuous variables
    include_qudit=False,     # Exclude - d>2 dimensions
    include_fracton=False,   # Exclude - exotic excitations
    report_failures=True,    # Report codes that failed to instantiate
)

print(f"✓ Discovered {len(all_codes)} codes in {len(categories)} categories")

✓ Discovered 76 codes in 6 categories


In [2]:
# ==========================================================================
# Cell 1b: SETUP AND IMPORTS - DISPLAY
# ==========================================================================

# Print summary
print_code_summary(categories, "DECODER SMOKE TEST - ALL CODE TYPES")

# Show failed codes if any
if failures:
    print("\n" + "=" * 70)
    print("FAILED TO INSTANTIATE")
    print("=" * 70)
    print(f"\n{len(failures)} codes failed to load:\n")
    
    # Group by error type for readability
    skipped = {k: v for k, v in failures.items() if v.startswith("SKIPPED:")}
    timeout = {k: v for k, v in failures.items() if v.startswith("TIMEOUT:")}
    not_found = {k: v for k, v in failures.items() if v.startswith("NOT_FOUND:")}
    errors = {k: v for k, v in failures.items() 
              if not v.startswith(("SKIPPED:", "TIMEOUT:", "NOT_FOUND:"))}
    
    if errors:
        print(f"ERRORS ({len(errors)} codes - bugs to fix):")
        for name, error in sorted(errors.items()):
            print(f"  ❌ {name}: {error}")
    
    if not_found:
        print(f"\nNOT FOUND ({len(not_found)} codes):")
        for name in sorted(not_found.keys()):
            print(f"  ❓ {name}")
    
    if timeout:
        print(f"\nTIMEOUT ({len(timeout)} codes):")
        for name in sorted(timeout.keys()):
            print(f"  ⏱️ {name}")
    
    if skipped:
        print(f"\nSKIPPED ({len(skipped)} codes - known slow/special):")
        for name in sorted(skipped.keys()):
            print(f"  ⏭️ {name}")

DECODER SMOKE TEST - ALL CODE TYPES

Total discovered: 76 codes
  CSS: 47
  Color: 4
  Non-CSS: 5
  Subsystem: 4
  Floquet: 6
  QLDPC: 10

CSS Codes:
Code Name                                   n   k   d
-------------------------------------------------------
  FourQubit422_[[4,2,2]]                    4   2   2
  C6                                        6   2   2
  Steane_713                                7   1   3
  Shor_91                                   9   1   3
  ReedMuller_15_1_3                        15   1   3
  Hamming_CSS_7                             7   1   3
  Hamming_CSS_15                           15   7   3
  Hamming_CSS_31                           31  21   3
  Code_832                                  8   3   2
  Repetition_3                              3   1   3
  Repetition_5                              5   1   5
  Repetition_7                              7   1   7
  RotatedSurface_[[9,1,3]]                  9   1   3
  RotatedSurface_[[25,1,5]]           

In [3]:
# Cell 2: Load Available Decoders - Using testing_utils

decoder_classes = load_all_decoders()
print(f"Loaded {len(decoder_classes)} decoders: {list(decoder_classes.keys())}")

Loaded 13 decoders: ['PyMatching', 'FusionBlossom', 'BeliefMatching', 'BPOSD', 'Tesseract', 'UnionFind', 'MLE', 'Hypergraph', 'Chromobius', 'Concatenated', 'FlatConcat', 'Hierarchical', 'SingleShot']


In [4]:
# ==========================================================================
# Cell 3a: DECODER × CODE COMPATIBILITY MATRIX - COMPUTATION
# ==========================================================================
from IPython.display import clear_output
clear_output(wait=True)

p = 0.001
shots = 5000
rounds = 1

# Store results
full_results = {}
dec_names = list(decoder_classes.keys())

# Process each code
for code_name, (code_type, code) in all_codes.items():
    full_results[code_name] = {'type': code_type}
    
    for dec_name in dec_names:
        result = test_decoder_on_code(
            code=code,
            decoder_class=decoder_classes[dec_name],
            decoder_name=dec_name,
            code_type=code_type,
            p=p,
            shots=shots,
            rounds=rounds
        )
        full_results[code_name][dec_name] = result.to_dict()

print(f"✓ Tested {len(all_codes)} codes × {len(dec_names)} decoders = {len(all_codes) * len(dec_names)} combinations")

✓ Tested 76 codes × 13 decoders = 988 combinations
✓ Tested 76 codes × 13 decoders = 988 combinations
✓ Tested 76 codes × 13 decoders = 988 combinations


In [5]:
# ==========================================================================
# Cell 3b: DECODER × CODE COMPATIBILITY MATRIX - DISPLAY
# ==========================================================================

print("="*130)
print("DECODER × CODE COMPATIBILITY MATRIX - ALL CODE TYPES")
print("="*130)
print(f"\nTested {len(all_codes)} codes at p={p}, {shots} shots")
print("Code types: CSS, Non-CSS, Subsystem, Floquet, QLDPC, Color")
print("Note: Chromobius requires color-code-like DEMs (shows SKIP for other codes)\n")

# Header
header = f"{'Code':<35} {'Type':<10}"
for dec_name in dec_names:
    header += f" | {dec_name[:10]:^12}"
print(header)
print("-" * len(header))

# Display each code
for code_name, code_results in full_results.items():
    code_type = code_results.get('type', 'Unknown')
    row = f"{code_name:<35} {code_type:<10}"
    
    for dec_name in dec_names:
        res = code_results.get(dec_name, {})
        status = res.get('status') if isinstance(res, dict) else None
        ler = res.get('ler') if isinstance(res, dict) else None
        warnings_list = res.get('warnings', []) if isinstance(res, dict) else []
        
        if status == 'OK':
            if warnings_list:
                cell = f"⚠️{ler:.4f}" if ler is not None else "⚠️N/A"
            else:
                cell = f"✓ {ler:.4f}" if ler is not None else "✓ N/A"    
        elif status == 'WARN':
            cell = f"⚠️WARN"
        elif status == 'SKIP':
            cell = f"- SKIP"
        else:
            cell = f"✗ FAIL"
        
        row += f" | {cell:^12}"
    
    print(row)

print("-" * len(header))
print(f"\nLegend: ✓=pass, ⚠️=warning, -=skip (expected), ✗=fail")
print(f"Total codes tested: {len(full_results)}")

DECODER × CODE COMPATIBILITY MATRIX - ALL CODE TYPES

Tested 76 codes at p=0.001, 5000 shots
Code types: CSS, Non-CSS, Subsystem, Floquet, QLDPC, Color
Note: Chromobius requires color-code-like DEMs (shows SKIP for other codes)

Code                                Type       |  PyMatching  |  FusionBlos  |  BeliefMatc  |    BPOSD     |  Tesseract   |  UnionFind   |     MLE      |  Hypergraph  |  Chromobius  |  Concatenat  |  FlatConcat  |  Hierarchic  |  SingleShot 
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
FourQubit422_[[4,2,2]]              CSS        |   ✓ 0.0022   |   ✓ 0.0010   |   ✓ 0.0008   |   ✓ 0.0022   |   ✓ 0.0028   |   ✓ 0.0022   |   ✓ 0.0014   |   ✓ 0.0024   |    - SKIP    |    - SKIP    |    - SKIP    |    - SKIP    |    - SKIP   
C6                                  CSS      

In [6]:
# Cell 4: LER COMPARISON TABLE - ALL CODES
from IPython.display import clear_output
clear_output(wait=True)

print("="*140)
print("LER COMPARISON TABLE (p=0.01) - ALL CODE TYPES")
print("="*140)
print(f"\nTotal codes in all_codes: {len(all_codes)}")
print(f"Total codes in full_results: {len(full_results)}")
print("\nLower is better. Best decoder for each code highlighted.")
print("SKIP = decoder incompatible (e.g., Chromobius requires color-code DEMs)\n")

# Header
dec_names = list(decoder_classes.keys())
header = f"{'Code':<35} | {'Type':<10} | {'d':>2} | {'No-decode':>10}"
for dec_name in dec_names:
    header += f" | {dec_name[:10]:>10}"
header += " | Best"
print(header)
print("-" * len(header))

# Use full_results directly since it has all the data from Cell 4
for code_name in sorted(full_results.keys()):
    code_results = full_results[code_name]
    code_type = code_results.get('type', 'Unknown')
    
    # Get code object for distance
    code = all_codes.get(code_name, (None, None))[1]
    d = code.metadata.get('distance', '?') if code else '?'
    
    # Get no-decode LER
    ler_no_decode = None
    for key, res in code_results.items():
        if isinstance(res, dict) and res.get('ler_no_decode') is not None:
            ler_no_decode = res['ler_no_decode']
            break
    
    nd_str = f"{ler_no_decode:.4f}" if ler_no_decode else 'N/A'
    row = f"{code_name:<35} | {code_type:<10} | {str(d):>2} | {nd_str:>10}"
    
    # Find best decoder
    best_ler = float('inf')
    best_decoder = None
    
    for dec_name in dec_names:
        res = code_results.get(dec_name, {})
        status = res.get('status') if isinstance(res, dict) else None
        ler = res.get('ler') if isinstance(res, dict) else None
        
        if status == 'SKIP':
            ler_str = 'SKIP'
        elif ler is not None:
            ler_str = f"{ler:.4f}"
            if ler < best_ler:
                best_ler = ler
                best_decoder = dec_name
        else:
            ler_str = 'FAIL'
        row += f" | {ler_str:>10}"
    
    best_str = best_decoder[:10] if best_decoder else 'N/A'
    row += f" | {best_str}"
    print(row)

print("-" * len(header))
print(f"\nTotal rows: {len(full_results)}")

LER COMPARISON TABLE (p=0.01) - ALL CODE TYPES

Total codes in all_codes: 76
Total codes in full_results: 76

Lower is better. Best decoder for each code highlighted.
SKIP = decoder incompatible (e.g., Chromobius requires color-code DEMs)

Code                                | Type       |  d |  No-decode | PyMatching | FusionBlos | BeliefMatc |      BPOSD |  Tesseract |  UnionFind |        MLE | Hypergraph | Chromobius | Concatenat | FlatConcat | Hierarchic | SingleShot | Best
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
BB_6x6                              | QLDPC      |  ? |        N/A |       FAIL |       FAIL |       FAIL |       FAIL |       FAIL |       FAIL |       FAIL |       FAIL |       FAIL |       FAIL |       FAIL |       FAIL |       FAIL | N/A
BaconShor_3x3                   

In [7]:
# ==========================================================================
# Cell 5a: STIM REFERENCE VALIDATION - COMPUTATION
# ==========================================================================
"""
Compare our implementation vs Stim's built-in Circuit.generated()

Available Stim generated circuits:
- surface_code:rotated_memory_z / rotated_memory_x - Rotated surface codes
- repetition_code:memory - Repetition codes
- color_code:memory_xyz - ONLY XYZ color code (joint measurements with C_XYZ gates)

NOTE: Stim does NOT have a CSS color code variant (memory_z/memory_x).
Our CSS color code (TriangularColourCode + ColorCodeMemoryExperiment + Chromobius)
has no direct Stim reference for comparison but works correctly with Chromobius.

For XYZ color codes:
- Use TriangularColourCodeXYZ + XYZColorCodeMemoryExperiment + PyMatching
- Chromobius CANNOT decode XYZ codes (requires separate X/Z stabilizers)
"""

import stim
import sys
import numpy as np

# Clear module cache to ensure fresh imports with latest fixes
for m in [m for m in sys.modules if m.startswith('qectostim')]:
    del sys.modules[m]

from qectostim.codes.surface import RotatedSurfaceCode
from qectostim.codes.small import RepetitionCode
from qectostim.codes.color import TriangularColourCodeXYZ
from qectostim.experiments.memory import CSSMemoryExperiment, XYZColorCodeMemoryExperiment
from qectostim.noise.models import StimStyleDepolarizingNoise
from qectostim.decoders import PyMatchingDecoder, FusionBlossomDecoder

# QUICK_MODE: Set to True for fast CI testing, False for comprehensive testing
QUICK_MODE = True  # Fast validation for smoke test

# Configuration
if QUICK_MODE:
    STIM_SHOTS = 2000
    STIM_ROUNDS = 5
    STIM_CODES = [
        ("surface_code:rotated_memory_z", [3, 5]),
        ("repetition_code:memory", [5, 7]),
        ("color_code:memory_xyz", [3, 5]),    # XYZ color code - PyMatching only!
    ]
    STIM_NOISE_LEVELS = [0.005, 0.01]
else:
    STIM_SHOTS = 10000
    STIM_ROUNDS = 7
    STIM_CODES = [
        ("surface_code:rotated_memory_z", [3, 5, 7]),
        ("surface_code:rotated_memory_x", [3, 5]),
        ("repetition_code:memory", [5, 7, 9]),
        ("color_code:memory_xyz", [3, 5, 7]), # XYZ color code - PyMatching only!
    ]
    STIM_NOISE_LEVELS = [0.001, 0.005, 0.01]

def get_our_code(stim_task: str, distance: int):
    """Return the appropriate code class for comparison with Stim."""
    if "surface_code:rotated" in stim_task:
        return RotatedSurfaceCode(distance=distance)
    elif "repetition_code" in stim_task:
        return RepetitionCode(N=distance)
    elif "color_code:memory_xyz" in stim_task:
        return TriangularColourCodeXYZ(distance=distance)
    return None

def get_stim_circuit(stim_task: str, distance: int, rounds: int, p: float):
    return stim.Circuit.generated(
        stim_task,
        distance=distance,
        rounds=rounds,
        after_clifford_depolarization=p,
    )

def get_our_circuit(code, rounds: int, p: float, basis: str = "Z", code_type: str = "css"):
    """Build our circuit for comparison."""
    noise = StimStyleDepolarizingNoise(p=p)
    if code_type == "color_xyz":
        exp = XYZColorCodeMemoryExperiment(code=code, rounds=rounds, noise_model=None, basis=basis)
    else:
        exp = CSSMemoryExperiment(code=code, rounds=rounds, noise_model=None, basis=basis)
    return noise.apply(exp.to_stim())

def compute_ler(circuit, decoder_class, shots: int, ignore_decomp: bool = False):
    try:
        if ignore_decomp:
            dem = circuit.detector_error_model(decompose_errors=True, ignore_decomposition_failures=True)
        else:
            dem = circuit.detector_error_model(decompose_errors=True)
        decoder = decoder_class(dem=dem)
        sampler = circuit.compile_detector_sampler()
        samples = sampler.sample(shots, append_observables=True)
        det_samples = samples[:, :circuit.num_detectors]
        obs_samples = samples[:, circuit.num_detectors:]
        predictions = decoder.decode_batch(det_samples)
        if len(predictions.shape) == 1:
            predictions = predictions.reshape(-1, 1)
        errors = np.any(predictions != obs_samples, axis=1)
        return float(np.mean(errors))
    except Exception as e:
        return None

def compute_no_decode_ler(circuit, shots: int):
    try:
        sampler = circuit.compile_detector_sampler()
        samples = sampler.sample(shots, append_observables=True)
        obs_samples = samples[:, circuit.num_detectors:]
        return float(np.mean(obs_samples))
    except:
        return None

# Run all comparisons and store results
print("Running Stim reference validation...")
validation_results = {}
validation_details = []  # Store detailed results for display

for stim_task, distances in STIM_CODES:
    basis = "X" if "_x" in stim_task else "Z"
    is_xyz_color = "color_code:memory_xyz" in stim_task
    
    if is_xyz_color:
        code_type = "color_xyz"
        decoders_to_test = [("PyMatching", PyMatchingDecoder)]
        ignore_decomp = True
    else:
        code_type = "css"
        decoders_to_test = [("PyMatching", PyMatchingDecoder), ("FusionBlossom", FusionBlossomDecoder)]
        ignore_decomp = False
    
    for distance in distances:
        for p in STIM_NOISE_LEVELS:
            result_row = {
                'stim_task': stim_task,
                'distance': distance,
                'p': p,
                'decoders': [],
            }
            
            try:
                stim_circuit = get_stim_circuit(stim_task, distance, STIM_ROUNDS, p)
                our_code = get_our_code(stim_task, distance)
                
                if our_code is None:
                    result_row['error'] = 'NO CODE'
                    validation_details.append(result_row)
                    continue
                
                our_circuit = get_our_circuit(our_code, STIM_ROUNDS, p, basis, code_type)
                
                # No-decode comparison
                stim_nd = compute_no_decode_ler(stim_circuit, STIM_SHOTS)
                our_nd = compute_no_decode_ler(our_circuit, STIM_SHOTS)
                result_row['stim_nd'] = stim_nd
                result_row['our_nd'] = our_nd
                result_row['nd_ratio'] = our_nd / stim_nd if stim_nd and our_nd and stim_nd > 0 else None
                
                # DEM error count comparison
                if ignore_decomp:
                    stim_dem = stim_circuit.detector_error_model(decompose_errors=True, ignore_decomposition_failures=True)
                    our_dem = our_circuit.detector_error_model(decompose_errors=True, ignore_decomposition_failures=True)
                else:
                    stim_dem = stim_circuit.detector_error_model(decompose_errors=True)
                    our_dem = our_circuit.detector_error_model(decompose_errors=True)
                
                stim_errs = len([e for e in stim_dem.flattened() if e.type == 'error'])
                our_errs = len([e for e in our_dem.flattened() if e.type == 'error'])
                result_row['stim_errs'] = stim_errs
                result_row['our_errs'] = our_errs
                result_row['dem_match'] = stim_errs == our_errs
                
                # Decoder comparison
                for dec_name, dec_class in decoders_to_test:
                    stim_ler = compute_ler(stim_circuit, dec_class, STIM_SHOTS, ignore_decomp)
                    our_ler = compute_ler(our_circuit, dec_class, STIM_SHOTS, ignore_decomp)
                    
                    dec_result = {'name': dec_name, 'stim_ler': stim_ler, 'our_ler': our_ler}
                    
                    if stim_ler is not None and our_ler is not None and stim_ler > 0:
                        ratio = our_ler / stim_ler
                        dec_result['ratio'] = ratio
                        key = (stim_task, distance, p, dec_name)
                        validation_results[key] = {'stim_ler': stim_ler, 'our_ler': our_ler, 'ratio': ratio}
                    elif stim_ler == 0 and our_ler == 0:
                        dec_result['ratio'] = 'BOTH_0'
                    else:
                        dec_result['ratio'] = None
                    
                    result_row['decoders'].append(dec_result)
                    
            except Exception as e:
                result_row['error'] = str(e)[:50]
            
            validation_details.append(result_row)

print(f"✓ Completed {len(validation_details)} test configurations")

Running Stim reference validation...
✓ Completed 12 test configurations
✓ Completed 12 test configurations
✓ Completed 12 test configurations


In [8]:
# ==========================================================================
# Cell 5b: STIM REFERENCE VALIDATION - DISPLAY
# ==========================================================================

# Pretty print validation results
print("=" * 80)
print("STIM REFERENCE VALIDATION RESULTS")
print("=" * 80)

for row in validation_details:
    task_short = row['stim_task'].replace("_code:", "/").replace("rotated_memory", "rot")
    print(f"\n{task_short} d={row['distance']} p={row['p']:.4f}")
    
    if 'error' in row:
        print(f"  ❌ ERROR: {row['error']}")
        continue
    
    # DEM comparison
    dem_icon = "✓" if row.get('dem_match') else "✗"
    print(f"  DEM errors: {row.get('our_errs', '?')}/{row.get('stim_errs', '?')} {dem_icon}")
    
    # Decoder results
    for dec in row.get('decoders', []):
        if dec.get('ratio') is not None and dec.get('ratio') not in ['BOTH_0', None]:
            ratio = dec['ratio']
            icon = "✓" if 0.5 < ratio < 2.0 else "?"
            print(f"  {dec['name']}: LER ratio={ratio:.3f} {icon}")
        elif dec.get('ratio') == 'BOTH_0':
            print(f"  {dec['name']}: LER=0 for both ✓")
        else:
            stim_ler = dec.get('stim_ler', 'None')
            our_ler = dec.get('our_ler', 'None')
            print(f"  {dec['name']}: Stim={stim_ler}, Ours={our_ler}")

# Summary statistics
if validation_results:
    ratios = [v['ratio'] for v in validation_results.values() if isinstance(v.get('ratio'), (int, float))]
    if ratios:
        print("\n" + "=" * 80)
        print("SUMMARY")
        print("=" * 80)
        print(f"  Tests completed: {len(validation_results)}")
        print(f"  Average LER ratio: {np.mean(ratios):.3f} ± {np.std(ratios):.3f}")
        print(f"  Range: {min(ratios):.3f} - {max(ratios):.3f}")
        within_2x = sum(1 for r in ratios if 0.5 < r < 2.0) / len(ratios) * 100
        print(f"  Within 2x: {within_2x:.0f}%")
        status = "GOOD" if np.mean(ratios) > 0.5 and np.mean(ratios) < 2.0 and within_2x > 80 else "CHECK"
        print(f"  Status: {status}")

STIM REFERENCE VALIDATION RESULTS

surface/rot_z d=3 p=0.0050
  DEM errors: 570/570 ✓
  PyMatching: LER ratio=1.353 ✓
  FusionBlossom: LER ratio=0.881 ✓

surface/rot_z d=3 p=0.0100
  DEM errors: 570/570 ✓
  PyMatching: LER ratio=1.090 ✓
  FusionBlossom: LER ratio=0.877 ✓

surface/rot_z d=5 p=0.0050
  DEM errors: 1953/1953 ✓
  PyMatching: LER ratio=0.667 ✓
  FusionBlossom: LER ratio=0.924 ✓

surface/rot_z d=5 p=0.0100
  DEM errors: 1953/1953 ✓
  PyMatching: LER ratio=1.204 ✓
  FusionBlossom: LER ratio=0.971 ✓

repetition/memory d=5 p=0.0050
  DEM errors: 95/95 ✓
  PyMatching: LER=0 for both ✓
  FusionBlossom: LER=0 for both ✓

repetition/memory d=5 p=0.0100
  DEM errors: 95/95 ✓
  PyMatching: LER=0 for both ✓
  FusionBlossom: LER=0 for both ✓

repetition/memory d=7 p=0.0050
  DEM errors: 135/135 ✓
  PyMatching: LER=0 for both ✓
  FusionBlossom: LER=0 for both ✓

repetition/memory d=7 p=0.0100
  DEM errors: 135/135 ✓
  PyMatching: LER=0 for both ✓
  FusionBlossom: LER=0 for both ✓

color

In [9]:
# Cell 6: DECODER SPEED COMPARISON
from IPython.display import clear_output
clear_output(wait=True)

print("="*110)
print("DECODER SPEED COMPARISON (time in ms for 1000 shots)")
print("="*110)
print(f"\nTotal codes in full_results: {len(full_results)}\n")

# Header
dec_names = list(decoder_classes.keys())
header = f"{'Code':<30} {'Type':<10}"
for dec_name in dec_names:
    header += f" | {dec_name[:10]:>10}"
print(header)
print("-" * len(header))

# Use full_results directly since it has all the data from Cell 4
for code_name in sorted(full_results.keys()):
    code_results = full_results[code_name]
    code_type = code_results.get('type', 'Unknown')
    row = f"{code_name:<30} {code_type:<10}"
    
    for dec_name in dec_names:
        res = code_results.get(dec_name, {})
        time_ms = res.get('time_ms') if isinstance(res, dict) else None
        time_str = f"{time_ms:.1f}" if time_ms else 'FAIL'
        row += f" | {time_str:>10}"
    
    print(row)

print("-" * len(header))
print(f"\nTotal rows: {len(full_results)}")

DECODER SPEED COMPARISON (time in ms for 1000 shots)

Total codes in full_results: 76

Code                           Type       | PyMatching | FusionBlos | BeliefMatc |      BPOSD |  Tesseract |  UnionFind |        MLE | Hypergraph | Chromobius | Concatenat | FlatConcat | Hierarchic | SingleShot
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
BB_6x6                         QLDPC      |       FAIL |       FAIL |       FAIL |       FAIL |       FAIL |       FAIL |       FAIL |       FAIL |       FAIL |       FAIL |       FAIL |       FAIL |       FAIL
BaconShor_3x3                  Subsystem  |        0.1 |        6.2 |       16.4 |       16.2 |       42.7 |        0.1 |        1.7 |        2.2 |       FAIL |       FAIL |       FAIL |       FAIL |       FAIL
BalancedProduct_5x5_G1         QLDPC      |       FAIL |       FAIL |

In [10]:
# ==========================================================================
# Cell 7a: NOISE LEVEL SCALING TEST - COMPUTATION
# ==========================================================================

# Pick a representative CSS code (RotatedSurface d=3) 
test_code_name = None
test_code_type = None
for name, (ctype, code) in all_codes.items():
    if 'RotatedSurface' in name and code.metadata.get('distance') == 3:
        test_code_name = name
        test_code_type = ctype
        break

if test_code_name is None:
    test_code_name = list(all_codes.keys())[0]
    test_code_type = all_codes[test_code_name][0]

test_code = all_codes[test_code_name][1]
noise_levels = [0.001, 0.005, 0.01, 0.02]

# Store results
noise_scaling_results = {}

for p_val in noise_levels:
    noise_scaling_results[p_val] = {}
    ler_no_decode = None
    
    for dec_name in dec_names:
        result = test_decoder_on_code(
            code=test_code,
            code_type=test_code_type,
            decoder_name=dec_name,
            decoder_class=decoder_classes[dec_name],
            p=p_val,
            shots=2000,
            rounds=3
        )
        
        if ler_no_decode is None and result.ler_no_decode is not None:
            ler_no_decode = result.ler_no_decode
        
        noise_scaling_results[p_val][dec_name] = result.ler
    
    noise_scaling_results[p_val]['no_decode'] = ler_no_decode

print(f"✓ Tested {test_code_name} at {len(noise_levels)} noise levels")

✓ Tested RotatedSurface_[[9,1,3]] at 4 noise levels


In [11]:
# ==========================================================================
# Cell 7b: NOISE LEVEL SCALING TEST - DISPLAY
# ==========================================================================

print("="*110)
print("NOISE LEVEL SCALING TEST")
print("="*110)
print(f"\nTesting {test_code_name} ({test_code_type}) across noise levels with all decoders...\n")

# Header
header = f"{'p':<10}"
for dec_name in dec_names:
    header += f" | {dec_name[:12]:>12}"
header += " | No-decode"
print(header)
print("-" * len(header))

for p_val in noise_levels:
    row = f"{p_val:<10.4f}"
    
    for dec_name in dec_names:
        ler = noise_scaling_results[p_val].get(dec_name)
        ler_str = f"{ler:.6f}" if ler is not None else 'FAIL'
        row += f" | {ler_str:>12}"
    
    nd = noise_scaling_results[p_val].get('no_decode')
    nd_str = f"{nd:.6f}" if nd else 'N/A'
    row += f" | {nd_str}"
    print(row)

print("-" * len(header))

NOISE LEVEL SCALING TEST

Testing RotatedSurface_[[9,1,3]] (CSS) across noise levels with all decoders...

p          |   PyMatching | FusionBlosso | BeliefMatchi |        BPOSD |    Tesseract |    UnionFind |          MLE |   Hypergraph |   Chromobius | Concatenated |   FlatConcat | Hierarchical |   SingleShot | No-decode
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0.0010     |     0.000000 |     0.008500 |     0.003500 |     0.000500 |     0.001000 |     0.000000 |     0.000000 |     0.018500 |         FAIL |         FAIL |         FAIL |         FAIL |         FAIL | 0.009500
0.0050     |     0.002000 |     0.050500 |     0.005000 |     0.006000 |     0.006000 |     0.006000 |     0.013000 |     0.075000 |         FAIL |         FAIL |         FAIL |         FAIL |         FAIL | 0.047000
0.0100     |     0.0180

In [12]:
# ==========================================================================
# Cell 8a: MULTI-CODE NOISE SCALING (Threshold Behavior) - COMPUTATION
# ==========================================================================
# Test multiple codes at multiple distances to demonstrate threshold behavior
# Below threshold: LER decreases with distance
# Above threshold: LER increases with distance

# Clear module cache to ensure fresh imports with latest TICK fixes
import sys
for m in [m for m in sys.modules if m.startswith('qectostim')]:
    del sys.modules[m]

from qectostim.codes.surface import RotatedSurfaceCode
from qectostim.codes.surface import ToricCode
from qectostim.codes.small import SteanCode713, ShorCode91
from qectostim.experiments.memory import CSSMemoryExperiment
from qectostim.noise.models import CircuitDepolarizingNoise
from qectostim.decoders import PyMatchingDecoder

# QUICK_MODE inherited from Stim validation cell (or define here if running standalone)
try:
    QUICK_MODE
except NameError:
    QUICK_MODE = True

# Configuration based on mode
if QUICK_MODE:
    SCALING_SHOTS = 100_000
    SCALING_ROUNDS = 3
    SCALING_CODES = {
        "RotatedSurface": [3, 5],
        "Toric": [(3, 3), (5, 5)],
    }
    SCALING_NOISE = [0.002, 0.005, 0.01, 0.015]
else:
    SCALING_SHOTS = 300_000
    SCALING_ROUNDS = 5
    SCALING_CODES = {
        "RotatedSurface": [3, 5, 7],
        "Toric": [(3, 3), (5, 5), (7, 7)],
        "Steane": [7],
        "Shor": [9],
    }
    SCALING_NOISE = [0.001, 0.002, 0.005, 0.008, 0.01, 0.012, 0.015, 0.02]

def instantiate_code(code_family: str, param):
    """Instantiate a code by family name and parameter."""
    if code_family == "RotatedSurface":
        return RotatedSurfaceCode(distance=param), f"d={param}"
    elif code_family == "Toric":
        return ToricCode(Lx=param[0], Ly=param[1]), f"{param[0]}x{param[1]}"
    elif code_family == "Steane":
        return SteanCode713(), "[[7,1,3]]"
    elif code_family == "Shor":
        return ShorCode91(), "[[9,1,3]]"
    else:
        return None, "Unknown"

def compute_ler_for_scaling(code, p: float, rounds: int, shots: int):
    """Compute LER for a code at given noise level."""
    try:
        noise = CircuitDepolarizingNoise(p1=p, p2=p)
        exp = CSSMemoryExperiment(code=code, rounds=rounds, noise_model=None)
        circuit = noise.apply(exp.to_stim())
        
        if circuit.num_detectors == 0:
            return None
        
        dem = circuit.detector_error_model(decompose_errors=True)
        decoder = PyMatchingDecoder(dem=dem)
        
        sampler = circuit.compile_detector_sampler()
        samples = sampler.sample(shots, append_observables=True)
        
        det_samples = samples[:, :circuit.num_detectors]
        obs_samples = samples[:, circuit.num_detectors:]
        
        predictions = decoder.decode_batch(det_samples)
        if len(predictions.shape) == 1:
            predictions = predictions.reshape(-1, 1)
        
        errors = np.any(predictions != obs_samples, axis=1)
        return float(np.mean(errors))
    except Exception as e:
        return None

# Store results for analysis
scaling_results = {}
scaling_trends = {}  # Store trends for display

for code_family, params in SCALING_CODES.items():
    scaling_trends[code_family] = []
    
    for p in SCALING_NOISE:
        lers = []
        for param in params:
            code, label = instantiate_code(code_family, param)
            ler = compute_ler_for_scaling(code, p, SCALING_ROUNDS, SCALING_SHOTS)
            lers.append(ler)
            if ler is not None:
                key = (code_family, label, p)
                scaling_results[key] = ler
        
        # Determine trend
        valid_lers = [l for l in lers if l is not None]
        if len(valid_lers) >= 2:
            if valid_lers[-1] < valid_lers[0] * 0.8:
                trend = "↓ (sub-thresh)"
            elif valid_lers[-1] > valid_lers[0] * 1.2:
                trend = "↑ (super-thresh)"
            else:
                trend = "~ (threshold)"
        else:
            trend = "N/A"
        
        scaling_trends[code_family].append((p, lers, trend))

print(f"✓ Computed scaling for {len(SCALING_CODES)} code families × {len(SCALING_NOISE)} noise levels")

✓ Computed scaling for 2 code families × 4 noise levels


In [13]:
# ==========================================================================
# Cell 8b: MULTI-CODE NOISE SCALING - DISPLAY
# ==========================================================================

print("="*130)
print("MULTI-CODE NOISE SCALING TEST - Threshold Behavior Analysis")
print("="*130)
print(f"\nMode: {'QUICK' if QUICK_MODE else 'FULL'}")
print(f"Shots: {SCALING_SHOTS}, Rounds: {SCALING_ROUNDS}")
print(f"Decoder: PyMatching")
print("\nExpected behavior:")
print("  - Below threshold: LER decreases as distance increases (↓ with d)")
print("  - Above threshold: LER increases as distance increases (↑ with d)")
print("  - At threshold: LER roughly constant with distance (~)")

for code_family, params in SCALING_CODES.items():
    print(f"\n{'='*100}")
    print(f"Code Family: {code_family}")
    print(f"{'='*100}")
    
    # Header
    code_labels = [instantiate_code(code_family, param)[1] for param in params]
    header = f"{'p':<10}"
    for label in code_labels:
        header += f" | {label:>12}"
    header += " | Trend"
    print(header)
    print("-" * len(header))
    
    for p, lers, trend in scaling_trends[code_family]:
        row = f"{p:<10.4f}"
        for ler in lers:
            if ler is not None:
                row += f" | {ler:>12.6f}"
            else:
                row += f" | {'FAIL':>12}"
        row += f" | {trend}"
        print(row)

# Summary analysis
print("\n" + "="*130)
print("THRESHOLD ANALYSIS SUMMARY")
print("="*130)

for code_family, params in SCALING_CODES.items():
    if len(params) < 2:
        continue
        
    print(f"\n{code_family}:")
    
    for i, p in enumerate(SCALING_NOISE[:-1]):
        p_next = SCALING_NOISE[i + 1]
        
        code_small, label_small = instantiate_code(code_family, params[0])
        code_large, label_large = instantiate_code(code_family, params[-1])
        
        key_small_p = (code_family, label_small, p)
        key_large_p = (code_family, label_large, p)
        key_small_pn = (code_family, label_small, p_next)
        key_large_pn = (code_family, label_large, p_next)
        
        ler_small_p = scaling_results.get(key_small_p)
        ler_large_p = scaling_results.get(key_large_p)
        ler_small_pn = scaling_results.get(key_small_pn)
        ler_large_pn = scaling_results.get(key_large_pn)
        
        if all(x is not None for x in [ler_small_p, ler_large_p, ler_small_pn, ler_large_pn]):
            sub_thresh_at_p = ler_large_p < ler_small_p
            sub_thresh_at_pn = ler_large_pn < ler_small_pn
            
            if sub_thresh_at_p and not sub_thresh_at_pn:
                print(f"  Threshold estimate: between p={p} and p={p_next}")
                break
    else:
        print(f"  Threshold not crossed in tested range")

print("\n" + "="*130)

MULTI-CODE NOISE SCALING TEST - Threshold Behavior Analysis

Mode: QUICK
Shots: 100000, Rounds: 3
Decoder: PyMatching

Expected behavior:
  - Below threshold: LER decreases as distance increases (↓ with d)
  - Above threshold: LER increases as distance increases (↑ with d)
  - At threshold: LER roughly constant with distance (~)

Code Family: RotatedSurface
p          |          d=3 |          d=5 | Trend
------------------------------------------------
0.0020     |     0.000940 |     0.000060 | ↓ (sub-thresh)
0.0050     |     0.004790 |     0.001650 | ↓ (sub-thresh)
0.0100     |     0.019060 |     0.011540 | ↓ (sub-thresh)
0.0150     |     0.039800 |     0.033690 | ~ (threshold)

Code Family: Toric
p          |          3x3 |          5x5 | Trend
------------------------------------------------
0.0020     |         FAIL |         FAIL | N/A
0.0050     |         FAIL |         FAIL | N/A
0.0100     |         FAIL |         FAIL | N/A
0.0150     |         FAIL |         FAIL | N/A

THRE

In [14]:
# ==========================================================================
# Cell 9a: SUB-THRESHOLD ROUNDS SCALING MATRIX - COMPUTATION
# ==========================================================================
# Test Code × Decoder compatibility at low noise with increasing rounds
# Expected: Below threshold, more rounds → lower LER (error correction working!)

# Clear module cache
import sys
for m in [m for m in sys.modules if m.startswith('qectostim')]:
    del sys.modules[m]

from qectostim.codes.surface import RotatedSurfaceCode
from qectostim.codes.surface import ToricCode
from qectostim.codes.small import SteanCode713, ShorCode91, FourQubit422Code
from qectostim.experiments.memory import CSSMemoryExperiment
from qectostim.noise.models import CircuitDepolarizingNoise

try:
    QUICK_MODE
except NameError:
    QUICK_MODE = True

# Configuration
if QUICK_MODE:
    ROUNDS_SHOTS = 100_000
    SUB_THRESH_P = 0.003
    ROUNDS_LIST = [3, 5, 7]
    ROUNDS_CODES = {
        "FourQubit422": FourQubit422Code(),
        "Steane713": SteanCode713(),
        "RotatedSurface_d3": RotatedSurfaceCode(distance=3),
        "RotatedSurface_d5": RotatedSurfaceCode(distance=5),
    }
else:
    ROUNDS_SHOTS = 200_000
    SUB_THRESH_P = 0.003
    ROUNDS_LIST = [3, 5, 7, 11]
    ROUNDS_CODES = {
        "FourQubit422": FourQubit422Code(),
        "Steane713": SteanCode713(),
        "Shor91": ShorCode91(),
        "RotatedSurface_d3": RotatedSurfaceCode(distance=3),
        "RotatedSurface_d5": RotatedSurfaceCode(distance=5),
        "Toric_3x3": ToricCode(Lx=3, Ly=3),
        "Toric_5x5": ToricCode(Lx=5, Ly=5),
    }

# Get decoders
try:
    decoder_classes
except NameError:
    from qectostim.testing import load_all_decoders
    decoder_classes = load_all_decoders()

ROUNDS_DECODERS = ['PyMatching', 'FusionBlossom']
available_decoders = {k: v for k, v in decoder_classes.items() if k in ROUNDS_DECODERS}

def test_rounds_scaling(code, decoder_class, decoder_name: str, rounds_list: list, p: float, shots: int):
    """Test LER at multiple round counts."""
    results = {}
    for rounds in rounds_list:
        try:
            noise = CircuitDepolarizingNoise(p1=p, p2=p)
            exp = CSSMemoryExperiment(code=code, rounds=rounds, noise_model=None)
            circuit = noise.apply(exp.to_stim())
            
            if circuit.num_detectors == 0:
                results[rounds] = None
                continue
            
            dem = circuit.detector_error_model(decompose_errors=True)
            decoder = decoder_class(dem=dem)
            
            sampler = circuit.compile_detector_sampler()
            samples = sampler.sample(shots, append_observables=True)
            
            det_samples = samples[:, :circuit.num_detectors]
            obs_samples = samples[:, circuit.num_detectors:]
            
            predictions = decoder.decode_batch(det_samples)
            if len(predictions.shape) == 1:
                predictions = predictions.reshape(-1, 1)
            
            errors = np.any(predictions != obs_samples, axis=1)
            results[rounds] = float(np.mean(errors))
        except Exception as e:
            results[rounds] = None
    
    return results

def analyze_trend(lers: dict):
    """Analyze trend in LER vs rounds."""
    valid = [(r, l) for r, l in sorted(lers.items()) if l is not None]
    if len(valid) < 2:
        return "N/A", None
    
    first_ler = valid[0][1]
    last_ler = valid[-1][1]
    
    if last_ler < first_ler * 0.7:
        return "✓↓", last_ler / first_ler if first_ler > 0 else None
    elif last_ler > first_ler * 1.3:
        return "⚠↑", last_ler / first_ler if first_ler > 0 else None
    else:
        return "→", last_ler / first_ler if first_ler > 0 else None

# Store all results
rounds_results = {}

for dec_name in ROUNDS_DECODERS:
    if dec_name not in available_decoders:
        continue
    dec_class = available_decoders[dec_name]
    
    for code_name, code in ROUNDS_CODES.items():
        lers = test_rounds_scaling(code, dec_class, dec_name, ROUNDS_LIST, SUB_THRESH_P, ROUNDS_SHOTS)
        rounds_results[(code_name, dec_name)] = lers

print(f"✓ Tested {len(ROUNDS_CODES)} codes × {len(ROUNDS_DECODERS)} decoders × {len(ROUNDS_LIST)} rounds")

✓ Tested 4 codes × 2 decoders × 3 rounds


In [15]:
# ==========================================================================
# Cell 9b: SUB-THRESHOLD ROUNDS SCALING MATRIX - DISPLAY
# ==========================================================================

print("="*140)
print("SUB-THRESHOLD ROUNDS SCALING MATRIX")
print("="*140)
print(f"\nMode: {'QUICK' if QUICK_MODE else 'FULL'}")
print(f"Physical error rate: p = {SUB_THRESH_P} (sub-threshold for most codes)")
print(f"Rounds tested: {ROUNDS_LIST}")
print(f"Shots per test: {ROUNDS_SHOTS}")
print("\nExpected behavior: Below threshold, more rounds → LOWER LER (error correction succeeds!)")
print("  ✓↓ = LER decreasing with rounds (healthy sub-threshold behavior)")
print("  ⚠↑ = LER increasing with rounds (above threshold or bug)")
print("  →  = LER flat (threshold region)")
print()

for dec_name in ROUNDS_DECODERS:
    if dec_name not in available_decoders:
        continue
        
    print(f"\n{'='*100}")
    print(f"Decoder: {dec_name}")
    print(f"{'='*100}")
    
    # Header
    header = f"{'Code':<25}"
    for r in ROUNDS_LIST:
        header += f" | R={r:>3}"
    header += " | Trend | Ratio"
    print(header)
    print("-" * len(header))
    
    for code_name in ROUNDS_CODES.keys():
        row = f"{code_name:<25}"
        lers = rounds_results.get((code_name, dec_name), {})
        
        for r in ROUNDS_LIST:
            ler = lers.get(r)
            if ler is not None:
                row += f" | {ler:>6.4f}"
            else:
                row += f" | {'FAIL':>6}"
        
        trend, ratio = analyze_trend(lers)
        ratio_str = f"{ratio:.2f}" if ratio is not None else "N/A"
        row += f" | {trend:^5} | {ratio_str:>5}"
        
        print(row)
    
    print("-" * len(header))

# Summary
print("\n" + "="*140)
print("ROUNDS SCALING SUMMARY")
print("="*140)

for dec_name in ROUNDS_DECODERS:
    if dec_name not in available_decoders:
        continue
        
    improving = worsening = flat = failed = 0
    
    for code_name in ROUNDS_CODES.keys():
        lers = rounds_results.get((code_name, dec_name), {})
        trend, _ = analyze_trend(lers)
        
        if trend == "✓↓":
            improving += 1
        elif trend == "⚠↑":
            worsening += 1
        elif trend == "→":
            flat += 1
        else:
            failed += 1
    
    total = improving + worsening + flat + failed
    print(f"\n{dec_name}:")
    print(f"  ✓↓ Improving (sub-threshold): {improving}/{total}")
    print(f"  →  Flat (threshold region):   {flat}/{total}")
    print(f"  ⚠↑ Worsening (super-thresh):  {worsening}/{total}")
    print(f"  N/A Failed:                   {failed}/{total}")
    
    if worsening > 0:
        print(f"  ⚠️ WARNING: {worsening} codes showing above-threshold behavior at p={SUB_THRESH_P}")
    elif improving == total - failed:
        print(f"  ✓ All codes showing healthy sub-threshold behavior!")

print("\n" + "="*140)

SUB-THRESHOLD ROUNDS SCALING MATRIX

Mode: QUICK
Physical error rate: p = 0.003 (sub-threshold for most codes)
Rounds tested: [3, 5, 7]
Shots per test: 100000

Expected behavior: Below threshold, more rounds → LOWER LER (error correction succeeds!)
  ✓↓ = LER decreasing with rounds (healthy sub-threshold behavior)
  ⚠↑ = LER increasing with rounds (above threshold or bug)
  →  = LER flat (threshold region)


Decoder: PyMatching
Code                      | R=  3 | R=  5 | R=  7 | Trend | Ratio
-----------------------------------------------------------------
FourQubit422              | 0.0243 | 0.0376 | 0.0521 |  ⚠↑   |  2.15
Steane713                 | 0.0311 | 0.0537 | 0.0758 |  ⚠↑   |  2.44
RotatedSurface_d3         | 0.0017 | 0.0034 | 0.0055 |  ⚠↑   |  3.15
RotatedSurface_d5         | 0.0004 | 0.0008 | 0.0012 |  ⚠↑   |  2.95
-----------------------------------------------------------------

Decoder: FusionBlossom
Code                      | R=  3 | R=  5 | R=  7 | Trend | Ratio
----

In [16]:
# ==========================================================================
# Cell 10a: SURFACE CODE THRESHOLD SCALING - COMPUTATION
# ==========================================================================
import numpy as np
import stim
import time
import sys

for mod_name in list(sys.modules.keys()):
    if 'qectostim' in mod_name:
        del sys.modules[mod_name]

from qectostim.codes.surface.rotated_surface import RotatedSurfaceCode
from qectostim.codes.color.triangular_colour import TriangularColourCode
from qectostim.experiments.memory import CSSMemoryExperiment, ColorCodeMemoryExperiment
from qectostim.noise.models import CircuitDepolarizingNoise
from qectostim.decoders.decoder_selector import select_decoder

distances = [3, 5, 7]
p_values = [0.001, 0.005, 0.01]
shots = 1000

results_surface = {}

for d in distances:
    code = RotatedSurfaceCode(distance=d)
    results_surface[d] = {}
    
    for p in p_values:
        noise = CircuitDepolarizingNoise(p1=p, p2=p)
        exp = CSSMemoryExperiment(code=code, noise_model=noise, rounds=d, basis="Z")
        
        start = time.time()
        circuit = noise.apply(exp.to_stim())
        dem = circuit.detector_error_model(decompose_errors=True)
        decoder = select_decoder(dem, preferred="pymatching")
        
        sampler = dem.compile_sampler()
        raw = sampler.sample(shots=shots)
        det_samples = np.asarray(raw[0], dtype=np.uint8)
        obs_samples = np.asarray(raw[1], dtype=np.uint8)
        
        corrections = decoder.decode_batch(det_samples)
        corrections = np.asarray(corrections, dtype=np.uint8).reshape(-1, dem.num_observables)
        
        logical_errors = (corrections[:, 0] ^ obs_samples[:, 0]).astype(np.uint8)
        ler = float(logical_errors.mean())
        elapsed = time.time() - start
        
        results_surface[d][p] = ler

print(f"✓ Computed surface code LER for {len(distances)} distances × {len(p_values)} noise levels")

✓ Computed surface code LER for 3 distances × 3 noise levels


In [17]:
# ==========================================================================
# Cell 10b: SURFACE CODE THRESHOLD SCALING - DISPLAY
# ==========================================================================

print("=" * 70)
print("THRESHOLD SCALING: SURFACE CODES")
print("=" * 70)

for d in distances:
    for p in p_values:
        print(f"d={d}, p={p:.3f}: LER = {results_surface[d][p]:.4f}")

print("\n" + "-" * 50)
print("SURFACE CODE THRESHOLD SUMMARY")
print("-" * 50)
print(f"{'p':>8} | " + " | ".join(f"d={d:2d}" for d in distances))
print("-" * 50)
for p in p_values:
    row = f"{p:>8.3f} | " + " | ".join(f"{results_surface[d].get(p, float('nan')):.4f}" for d in distances)
    print(row)

# Threshold analysis
p_low = 0.001
lers = [results_surface[d][p_low] for d in distances]
print(f"\nAt p={p_low}:")
arrow1 = '↓' if lers[0] > lers[1] else ('→' if lers[0] == lers[1] else '↑')
arrow2 = '↓' if lers[1] > lers[2] else ('→' if lers[1] == lers[2] else '↑')
print(f"  d=3→d=5: {lers[0]:.4f} → {lers[1]:.4f} {arrow1}")
print(f"  d=5→d=7: {lers[1]:.4f} → {lers[2]:.4f} {arrow2}")

THRESHOLD SCALING: SURFACE CODES
d=3, p=0.001: LER = 0.0000
d=3, p=0.005: LER = 0.0070
d=3, p=0.010: LER = 0.0120
d=5, p=0.001: LER = 0.0000
d=5, p=0.005: LER = 0.0020
d=5, p=0.010: LER = 0.0270
d=7, p=0.001: LER = 0.0000
d=7, p=0.005: LER = 0.0040
d=7, p=0.010: LER = 0.0230

--------------------------------------------------
SURFACE CODE THRESHOLD SUMMARY
--------------------------------------------------
       p | d= 3 | d= 5 | d= 7
--------------------------------------------------
   0.001 | 0.0000 | 0.0000 | 0.0000
   0.005 | 0.0070 | 0.0020 | 0.0040
   0.010 | 0.0120 | 0.0270 | 0.0230

At p=0.001:
  d=3→d=5: 0.0000 → 0.0000 →
  d=5→d=7: 0.0000 → 0.0000 →


In [18]:
# ==========================================================================
# Cell 11a: COLOR CODE THRESHOLD SCALING - COMPUTATION
# ==========================================================================

results_color = {}

for d in distances:
    code = TriangularColourCode(distance=d)
    results_color[d] = {}
    
    for p in p_values:
        noise = CircuitDepolarizingNoise(p1=p, p2=p)
        exp = ColorCodeMemoryExperiment(code=code, noise_model=noise, rounds=d, basis="Z")
        
        start = time.time()
        circuit = noise.apply(exp.to_stim())
        dem = circuit.detector_error_model(decompose_errors=True, ignore_decomposition_failures=True)
        decoder = select_decoder(dem, preferred="chromobius")
        
        sampler = dem.compile_sampler()
        raw = sampler.sample(shots=shots)
        det_samples = np.asarray(raw[0], dtype=np.uint8)
        obs_samples = np.asarray(raw[1], dtype=np.uint8)
        
        corrections = decoder.decode_batch(det_samples)
        corrections = np.asarray(corrections, dtype=np.uint8).reshape(-1, dem.num_observables)
        
        logical_errors = (corrections[:, 0] ^ obs_samples[:, 0]).astype(np.uint8)
        ler = float(logical_errors.mean())
        elapsed = time.time() - start
        
        results_color[d][p] = ler

print(f"✓ Computed color code LER for {len(distances)} distances × {len(p_values)} noise levels")

✓ Computed color code LER for 3 distances × 3 noise levels


In [19]:
# ==========================================================================
# Cell 11b: COLOR CODE THRESHOLD SCALING - DISPLAY
# ==========================================================================

print("=" * 70)
print("THRESHOLD SCALING: COLOR CODES (Chromobius)")
print("=" * 70)

for d in distances:
    for p in p_values:
        print(f"d={d}, p={p:.3f}: LER = {results_color[d][p]:.4f}")

print("\n" + "-" * 50)
print("COLOR CODE THRESHOLD SUMMARY")
print("-" * 50)
print(f"{'p':>8} | " + " | ".join(f"d={d:2d}" for d in distances))
print("-" * 50)
for p in p_values:
    row = f"{p:>8.3f} | " + " | ".join(f"{results_color[d].get(p, float('nan')):.4f}" for d in distances)
    print(row)

# Threshold analysis
lers = [results_color[d][p_low] for d in distances]
print(f"\nAt p={p_low}:")
arrow1 = '↓' if lers[0] > lers[1] else ('→' if lers[0] == lers[1] else '↑')
arrow2 = '↓' if lers[1] > lers[2] else ('→' if lers[1] == lers[2] else '↑')
print(f"  d=3→d=5: {lers[0]:.4f} → {lers[1]:.4f} {arrow1}")
print(f"  d=5→d=7: {lers[1]:.4f} → {lers[2]:.4f} {arrow2}")

THRESHOLD SCALING: COLOR CODES (Chromobius)
d=3, p=0.001: LER = 0.0030
d=3, p=0.005: LER = 0.0290
d=3, p=0.010: LER = 0.0700
d=5, p=0.001: LER = 0.0010
d=5, p=0.005: LER = 0.0190
d=5, p=0.010: LER = 0.0810
d=7, p=0.001: LER = 0.0000
d=7, p=0.005: LER = 0.0220
d=7, p=0.010: LER = 0.1370

--------------------------------------------------
COLOR CODE THRESHOLD SUMMARY
--------------------------------------------------
       p | d= 3 | d= 5 | d= 7
--------------------------------------------------
   0.001 | 0.0030 | 0.0010 | 0.0000
   0.005 | 0.0290 | 0.0190 | 0.0220
   0.010 | 0.0700 | 0.0810 | 0.1370

At p=0.001:
  d=3→d=5: 0.0030 → 0.0010 ↓
  d=5→d=7: 0.0010 → 0.0000 ↓


In [20]:
# ==========================================================================
# COMBINED THRESHOLD COMPARISON: Surface vs Color
# ==========================================================================

print("=" * 70)
print("COMBINED THRESHOLD COMPARISON")
print("=" * 70)

print("\n" + "=" * 50)
print("SURFACE CODE (PyMatching)")
print("=" * 50)
print(f"{'p':>8} | " + " | ".join(f"d={d:2d}" for d in distances))
print("-" * 50)
for p in p_values:
    row = f"{p:>8.3f} | " + " | ".join(f"{results_surface[d].get(p, float('nan')):.4f}" for d in distances)
    print(row)

print("\n" + "=" * 50)
print("COLOR CODE (Chromobius)")
print("=" * 50)
print(f"{'p':>8} | " + " | ".join(f"d={d:2d}" for d in distances))
print("-" * 50)
for p in p_values:
    row = f"{p:>8.3f} | " + " | ".join(f"{results_color[d].get(p, float('nan')):.4f}" for d in distances)
    print(row)

# Threshold analysis
print("\n" + "=" * 50)
print("THRESHOLD BEHAVIOR ANALYSIS (p=0.001)")
print("=" * 50)

surface_lers = [results_surface[d][p_low] for d in distances]
color_lers = [results_color[d][p_low] for d in distances]

print(f"\nSurface Code: d=3→d=5→d=7")
print(f"  LER: {surface_lers[0]:.4f} → {surface_lers[1]:.4f} → {surface_lers[2]:.4f}")
decreasing_surface = surface_lers[0] >= surface_lers[1] >= surface_lers[2]
print(f"  Threshold behavior: {'✅ PASS' if decreasing_surface else '❌ FAIL'}")

print(f"\nColor Code: d=3→d=5→d=7")
print(f"  LER: {color_lers[0]:.4f} → {color_lers[1]:.4f} → {color_lers[2]:.4f}")
decreasing_color = color_lers[0] >= color_lers[1] >= color_lers[2]
print(f"  Threshold behavior: {'✅ PASS' if decreasing_color else '❌ FAIL'}")

if decreasing_surface and decreasing_color:
    print("\n" + "=" * 50)
    print("🎉 BOTH CODE FAMILIES EXHIBIT CORRECT THRESHOLD SCALING!")
    print("=" * 50)

COMBINED THRESHOLD COMPARISON

SURFACE CODE (PyMatching)
       p | d= 3 | d= 5 | d= 7
--------------------------------------------------
   0.001 | 0.0000 | 0.0000 | 0.0000
   0.005 | 0.0070 | 0.0020 | 0.0040
   0.010 | 0.0120 | 0.0270 | 0.0230

COLOR CODE (Chromobius)
       p | d= 3 | d= 5 | d= 7
--------------------------------------------------
   0.001 | 0.0030 | 0.0010 | 0.0000
   0.005 | 0.0290 | 0.0190 | 0.0220
   0.010 | 0.0700 | 0.0810 | 0.1370

THRESHOLD BEHAVIOR ANALYSIS (p=0.001)

Surface Code: d=3→d=5→d=7
  LER: 0.0000 → 0.0000 → 0.0000
  Threshold behavior: ✅ PASS

Color Code: d=3→d=5→d=7
  LER: 0.0030 → 0.0010 → 0.0000
  Threshold behavior: ✅ PASS

🎉 BOTH CODE FAMILIES EXHIBIT CORRECT THRESHOLD SCALING!
