In [None]:
import os
import pickle
import matplotlib
import numpy as np
from scipy.stats import sem
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
from matplotlib.ticker import FormatStrFormatter

plt.rcParams["text.usetex"] = True  # Enable LaTeX in matplotlib
plt.rcParams["font.family"] = "serif"  # Optional: sets font family to serif

In [None]:
def load_experiment_data_pickle(file_key):
    """Load the experiment data from a pickle file."""
    with open(file_key, "rb") as pickle_file:
        data = pickle.load(pickle_file)
    return data

In [None]:
system_sizes = [24]  # , 48, 96, 192, 384]
max_bond_dims = [4, 8, 16, 32, 64, 128, 256, 512, 1024]

failure_rates = {}
error_bars = {}
directory = "data-classical-ldpc"

# Dictionary to store error rates for each system size and bond dimension combination
error_rates_dict = {}

for system_size in system_sizes:
    for chi_max in max_bond_dims:
        all_failures_statistics = {}
        error_rates = []  # Collect error rates for this specific combination

        for file_name in os.listdir(directory):
            if f"numbits{system_size}_bonddim{chi_max}_errorrate" in file_name:
                try:
                    # Extract the error rate from the filename
                    file_error_rate_part = file_name.split("_errorrate")[1]
                    if "seed" in file_error_rate_part:
                        file_error_rate = float(file_error_rate_part.split("_seed")[0])
                    else:
                        file_error_rate = float(file_error_rate_part.split(".pkl")[0])

                    # Round the error rate to 12 decimal places
                    file_error_rate = round(file_error_rate, 12)

                    # Add the error rate to the list if not already present
                    if file_error_rate not in error_rates:
                        error_rates.append(file_error_rate)

                    # Initialize list if this error rate hasn't been seen before
                    if file_error_rate not in all_failures_statistics:
                        all_failures_statistics[file_error_rate] = []

                    # Load the file if it matches the criteria
                    failures_statistics = load_experiment_data_pickle(
                        os.path.join(directory, file_name)
                    )["failures"]
                    all_failures_statistics[file_error_rate].extend(failures_statistics)
                except (IndexError, ValueError):
                    continue

        # Sort and store the error rates
        error_rates.sort()
        error_rates_dict[(system_size, chi_max)] = error_rates

        for error_rate, failures_statistics in all_failures_statistics.items():
            if failures_statistics:
                # Calculate mean failure rate and error bar over all seeds
                failure_rates[system_size, chi_max, error_rate] = np.mean(
                    failures_statistics
                )
                error_bars[system_size, chi_max, error_rate] = sem(failures_statistics)
            else:
                print(
                    f"No data found for numbits={system_size}, bonddim={chi_max}, errorrate={error_rate}"
                )

In [None]:
for system_size in system_sizes:
    plt.figure(figsize=(5, 4))

    green_cmap = matplotlib.colormaps["viridis_r"]
    norm = Normalize(vmin=0, vmax=len(max_bond_dims) - 1)

    for index, chi_max in enumerate(max_bond_dims):
        # Get the error rates for the current system size and chi_max
        error_rates = error_rates_dict[(system_size, chi_max)]

        plt.errorbar(
            error_rates,
            [
                failure_rates[system_size, chi_max, error_rate]
                for error_rate in error_rates
            ],
            yerr=[
                error_bars[system_size, chi_max, error_rate]
                for error_rate in error_rates
            ],
            fmt="o--",
            label=f"System size: {system_size}, max bond dim: {chi_max}",
            linewidth=3,
            color=green_cmap(norm(index)),
        )

    # plt.xticks(
    #    np.linspace(min(error_rates), max(error_rates), len(error_rates)),
    #    [f"{x:.2f}" for x in np.linspace(min(error_rates), max(error_rates), len(error_rates))]
    # )
    plt.title(f"System size: {system_size}")
    plt.legend(fontsize=7)
    plt.xlabel("Error rate")
    plt.ylabel("Failure rate")
    # plt.xscale("log")
    # plt.yscale("log")
    plt.grid()
    # plt.savefig(f"ldpc-failure-rate-system-size-{system_size}.pdf")
    plt.show()

In [None]:
for chi_max in max_bond_dims:
    plt.figure(figsize=(5, 4))

    green_cmap = matplotlib.colormaps["viridis_r"]
    norm = Normalize(vmin=0, vmax=len(system_sizes) - 1)

    for index, system_size in enumerate(system_sizes):
        # Get the error rates for the current bond dimension and system size
        error_rates = error_rates_dict[(system_size, chi_max)]

        plt.errorbar(
            error_rates,
            [
                failure_rates[system_size, chi_max, error_rate]
                for error_rate in error_rates
            ],
            yerr=[
                error_bars[system_size, chi_max, error_rate]
                for error_rate in error_rates
            ],
            fmt="o--",
            label=f"System size: {system_size}, max bond dim: {chi_max}",
            linewidth=3,
            color=green_cmap(norm(index)),
        )

    # plt.xticks(
    #    np.linspace(min(error_rates), max(error_rates), len(error_rates)),
    #    [f"{x:.2f}" for x in np.linspace(min(error_rates), max(error_rates), len(error_rates))]
    # )
    plt.title(f"Max bond dimension: {chi_max}")
    plt.legend(fontsize=7)
    plt.xlabel("Error rate")
    plt.ylabel("Failure rate")
    # plt.xscale("log")
    # plt.yscale("log")
    plt.grid()
    # plt.savefig(f"ldpc-failure-rate-max-bond-dim-{chi_max}.pdf")
    plt.show()

In [None]:
# The same plotting but with fixed error rates

In [None]:
system_sizes = [24]  # , 48, 96, 192, 384]
max_bond_dims = [8, 16, 32, 64, 128, 256, 512]
error_rates = np.linspace(0.1, 0.3, 10)

failure_rates = {}
error_bars = {}
directory = "data-classical-ldpc"

for system_size in system_sizes:
    for chi_max in max_bond_dims:
        for error_rate in error_rates:
            all_failures_statistics = []

            for file_name in os.listdir(directory):
                if (
                    f"numbits{system_size}_bonddim{chi_max}_errorrate" in file_name
                    and "seed" not in file_name
                ):
                    try:
                        # Extract the error rate from the filename
                        file_error_rate_part = file_name.split("_errorrate")[1]
                        file_error_rate = float(file_error_rate_part.split(".npy")[0])

                        # Check if the error rate is close enough
                        if abs(file_error_rate - error_rate) <= 1e-15:
                            # Load the file if it matches the criteria
                            failures_statistics = np.load(
                                os.path.join(directory, file_name)
                            )
                            all_failures_statistics.extend(failures_statistics)
                    except (IndexError, ValueError):
                        continue

            # Second pass: look for files with "seed" in the name
            for file_name in os.listdir(directory):
                if (
                    f"numbits{system_size}_bonddim{chi_max}_errorrate" in file_name
                    and "seed" in file_name
                ):
                    try:
                        # Extract the error rate from the filename
                        file_error_rate_part = file_name.split("_errorrate")[1]
                        file_error_rate = float(file_error_rate_part.split("_seed")[0])

                        # Check if the error rate is close enough
                        if abs(file_error_rate - error_rate) <= 1e-15:
                            # Load the file if it matches the criteria
                            failures_statistics = np.load(
                                os.path.join(directory, file_name)
                            )
                            all_failures_statistics.extend(failures_statistics)
                    except (IndexError, ValueError):
                        continue

            if all_failures_statistics:
                # Calculate mean failure rate and error bar over all seeds
                failure_rates[system_size, chi_max, error_rate] = np.mean(
                    all_failures_statistics
                )
                error_bars[system_size, chi_max, error_rate] = np.std(
                    all_failures_statistics
                ) / np.sqrt(
                    len(all_failures_statistics)
                )  # The standard error of the mean
            else:
                print(
                    f"No data found for numbits={system_size}, bonddim={chi_max}, errorrate={error_rate}"
                )

In [None]:
for system_size in system_sizes:
    plt.figure(figsize=(5, 4))

    green_cmap = matplotlib.colormaps["viridis_r"]
    norm = Normalize(vmin=0, vmax=len(max_bond_dims) - 1)

    for index, chi_max in enumerate(max_bond_dims):
        plt.errorbar(
            error_rates,
            [
                failure_rates[system_size, chi_max, error_rate]
                for error_rate in error_rates
            ],
            yerr=[
                error_bars[system_size, chi_max, error_rate]
                for error_rate in error_rates
            ],
            fmt="o--",
            label=f"System size: {system_size}, max bond dim: {chi_max}",
            linewidth=3,
            color=green_cmap(norm(index)),
        )

    plt.title(f"System size: {system_size}")
    plt.legend(fontsize=7)
    plt.xlabel("Error rate")
    plt.ylabel("Failure rate")
    plt.grid()

    plt.show()