In [2]:
import h5py
import numpy as np
from optimization.analyze_codes.decoder_performance_from_state import evaluate_performance_of_state
from optimization.experiments_settings import from_edgelist


def validate_and_find_best_code(
    filepath,
    code_name,
    p=0.03,
    k=20,
    MC_budget_validation=int(1e6),
    verbose=True
):
    """
    Loads the top k performing codes from an HDF5 file, re-evaluates them 
    with a higher Monte Carlo budget, and returns the statistically best code.
    
    Returns:
        tuple: (best_state, best_stats_dict)
    """

    if verbose:
        print(f"\nProcessing: {code_name} from {filepath}")
        print(f"--- Step 1: Screening Top {k} Candidates ---")

    top_candidates = []

    # --- 1. Load Data ---
    with h5py.File(filepath, "r") as f:
        if code_name not in f:
            raise KeyError(f"Code group '{code_name}' not found in {filepath}")

        grp = f[code_name]

        # Load LERs and handle zeros/inf
        if 'logical_error_rates' not in grp:
            raise KeyError("Dataset 'logical_error_rates' missing.")

        lers = grp['logical_error_rates'][:]
        lers = np.where(lers > 0, lers, np.inf)

        # Get indices of the top k smallest LERs
        sorted_indices = np.argsort(lers)[:k]

        if verbose:
            print(f"Top {k} indices found: {sorted_indices}")

        # Retrieve the code structures
        if 'states' in grp:
            # Load all states (be mindful if dataset is massive, but usually fine for code search)
            all_edgelists = grp['states'][:]

            for idx in sorted_indices:
                candidate_edgelist = all_edgelists[idx]
                old_ler = lers[idx]
                top_candidates.append((idx, candidate_edgelist, old_ler))
        else:
            raise KeyError("Could not find 'states' dataset in HDF5 group.")

    # --- 2. Validation Loop ---
    if verbose:
        print(
            f"\n--- Step 2: Validating with High Budget ({MC_budget_validation}) ---")
        print(f"{'Index':<8} | {'Old LER':<12} | {'New LER':<12} | {'Stderr':<10} | {'Dist':<5} | {'Change'}")
        print("-" * 75)

    validated_results = []

    for idx, edgelist, old_ler in top_candidates:
        # Reconstruct state
        current_state = from_edgelist(edgelist)

        # High-precision evaluation
        cost_result = evaluate_performance_of_state(
            state=current_state,
            p_vals=[p],
            MC_budget=MC_budget_validation,
            canskip=False
        )

        # Extract results
        new_ler = cost_result['logical_error_rates'][0]
        new_stderr = cost_result['stderrs'][0]
        distance = cost_result['d_quantum']

        validated_results.append({
            'index': idx,
            'ler': new_ler,
            'stderr': new_stderr,
            'distance': distance,
            'state': current_state,
            'old_ler': old_ler
        })

        if verbose:
            change = new_ler - old_ler
            print(
                f"{idx:<8} | {old_ler:.5f}      | {new_ler:.5f}      | {new_stderr:.5f}    | {distance:<5} | {change:+.5f}")

    # --- 3. Pick Winner ---
    # Sort by the NEW high-precision LER
    validated_results.sort(key=lambda x: x['ler'])

    best_result = validated_results[0]

    if verbose:
        print("\n" + "="*35)
        print(f"REAL Best Code Found for {code_name}")
        print(f"Original Index: {best_result['index']}")
        print(
            f"Validated LER:  {best_result['ler']} ± {best_result['stderr']}")
        print(f"Distance:       {best_result['distance']}")
        print("="*35)

    return best_result['state'], best_result

In [4]:
# Settings
p_val = 0.03
# TIP: Set this to 1e6 or higher for actual research results to reduce error bars
validation_budget = int(5 * 1e6)
target_files = [
    ("optimization/results/best_neighbor_search_early_stop.hdf5", "[625,25]"),
    # Add more tuples here if you have other files/codes
]

for fpath, cname in target_files:
    try:
        best_state, stats = validate_and_find_best_code(
            filepath=fpath,
            code_name=cname,
            p=p_val,
            k=20,
            MC_budget_validation=validation_budget
        )

        # --- Do something with the best code here ---
        # e.g., save it to a new "champions" file
        # save_champion(best_state, stats)

    except Exception as e:
        print(f"Skipping {cname}: {e}")


Processing: [625,25] from optimization/results/best_neighbor_search_early_stop.hdf5
--- Step 1: Screening Top 20 Candidates ---
Top 20 indices found: [234 259 192 211 243 293 304 173 392 294 199 286 252 385 361 354 332 289
 267 384]

--- Step 2: Validating with High Budget (5000000) ---
Index    | Old LER      | New LER      | Stderr     | Dist  | Change
---------------------------------------------------------------------------
H: [20, 5, 8]
H^T: [15, 0, inf]
Q: [[625, 25, 8]]
Hx, Hz, Lx, Lz: (300, 625), (300, 625), (25, 625), (25, 625)
BP max iterations: 62, OSD order: 2, MS scaling factor: 0.625
Decoder Best neighbor search finished in 57.0m 29.13s with 18326 failures out of 5000000 runs.
Logical error rate for Best neighbor search: 0.0036652 ± 0.0000270 (stderr)
234      | 0.00332      | 0.00367      | 0.00003    | 8     | +0.00035
H: [20, 5, 8]
H^T: [15, 0, inf]
Q: [[625, 25, 8]]
Hx, Hz, Lx, Lz: (300, 625), (300, 625), (25, 625), (25, 625)
BP max iterations: 62, OSD order: 2, MS 

In [7]:
# Settings
p_val = 0.03
# TIP: Set this to 1e6 or higher for actual research results to reduce error bars
validation_budget = int(5 * 1e6)
target_files = [
    ("optimization/results/best_neighbor_search_early_stop_run2.hdf5", "[625,25]"),
    # Add more tuples here if you have other files/codes
]

for fpath, cname in target_files:
    try:
        best_state, stats = validate_and_find_best_code(
            filepath=fpath,
            code_name=cname,
            p=p_val,
            k=10,
            MC_budget_validation=validation_budget
        )

        # --- Do something with the best code here ---
        # e.g., save it to a new "champions" file
        # save_champion(best_state, stats)

    except Exception as e:
        print(f"Skipping {cname}: {e}")


Processing: [625,25] from optimization/results/best_neighbor_search_early_stop_run2.hdf5
--- Step 1: Screening Top 10 Candidates ---
Top 10 indices found: [246 219 335 373 234 399 368 354 303 334]

--- Step 2: Validating with High Budget (5000000) ---
Index    | Old LER      | New LER      | Stderr     | Dist  | Change
---------------------------------------------------------------------------
H: [20, 5, 8]
H^T: [15, 0, inf]
Q: [[625, 25, 8]]
Hx, Hz, Lx, Lz: (300, 625), (300, 625), (25, 625), (25, 625)
BP max iterations: 62, OSD order: 2, MS scaling factor: 0.625




Decoder Best neighbor search finished in 57.0m 50.71s with 22011 failures out of 5000000 runs.
Logical error rate for Best neighbor search: 0.0044022 ± 0.0000296 (stderr)
246      | 0.00385      | 0.00440      | 0.00003    | 8     | +0.00055
H: [20, 5, 8]
H^T: [15, 0, inf]
Q: [[625, 25, 8]]
Hx, Hz, Lx, Lz: (300, 625), (300, 625), (25, 625), (25, 625)
BP max iterations: 62, OSD order: 2, MS scaling factor: 0.625
Decoder Best neighbor search finished in 57.0m 20.05s with 21824 failures out of 5000000 runs.
Logical error rate for Best neighbor search: 0.0043648 ± 0.0000295 (stderr)
219      | 0.00385      | 0.00436      | 0.00003    | 8     | +0.00051
H: [20, 5, 8]
H^T: [15, 0, inf]
Q: [[625, 25, 8]]
Hx, Hz, Lx, Lz: (300, 625), (300, 625), (25, 625), (25, 625)
BP max iterations: 62, OSD order: 2, MS scaling factor: 0.625
Decoder Best neighbor search finished in 67.0m 44.98s with 22149 failures out of 5000000 runs.
Logical error rate for Best neighbor search: 0.0044298 ± 0.0000297 (stderr)