In [None]:
import subprocess
import os
from datetime import datetime
import glob
import re
import os        
from scipy.optimize import curve_fit
import numpy as np
import plotly.graph_objects as go
import pandas as pd

In [229]:
cwd = os.getcwd()
base_dir = os.path.join(cwd, "data/initial_paper_results")
os.makedirs(base_dir, exist_ok=True)

In [231]:
import re

decoders = ["uf", "clayg"]

distances = [2]

probabilities = [0.0001, 0.0005, 0.001]
start = 0.001
end = 10**-2
step = 1.2
use_probability_list = True

existing_ids = [
    int(match.group(1))
    for f in os.listdir(base_dir)
    if os.path.isdir(os.path.join(base_dir, f))
    and (match := re.match(r"(\d+)-", f))
]
next_id = max(existing_ids, default=0) + 1
if next_id not in existing_ids:
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    output_dir = os.path.join(base_dir, f"{next_id}-{timestamp}")
    os.makedirs(output_dir, exist_ok=True)
else:
    output_dir = next(
        os.path.join(base_dir, f)
        for f in os.listdir(base_dir)
        if f.startswith(str(next_id))
    )

if use_probability_list:
    for d in distances:
        for p in probabilities:
            exit(1)
            command = f"/home/tommaso-peduzzi/Documents/clayg/cmake-build-debug/clayg {d} {d} {p} {p*1.5} {','.join(decoders)} {output_dir} p_step=*2 dump=false runs=200000; sleep 1; exit"
            subprocess.Popen(["xterm", "-e", f'cd "{cwd}" && {command}; bash'])
else:
    for d in distances:
        exit(1)
        command = f"/home/tommaso-peduzzi/Documents/clayg/cmake-build-debug/clayg {d} {d} {start} {end} {','.join(decoders)} {output_dir} p_step=*{step} dump=false runs=200000; sleep 1; exit"
        subprocess.Popen(["xterm", "-e", f'cd "{cwd}" && {command}; bash'])


In [None]:
decoder_colors = {
    'uf': [
        "#deebf7", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6",
        "#3182bd", "#1f77b4", "#2171b5", "#08519c", "#08306b"
    ],
    'clayg': [
        "#fdae6b", "#ffbb78", "#ff8c00", "#fd8d3c", "#ffa726",
        "#f16913", "#ff7f0e", "#d95f02", "#d94801", "#a63603",    
    ]
}

decoder_names = {
    'uf': 'UF',
    'clayg': 'ClAYG'
}

In [222]:
plot_ids = [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]

collected_data = {
    "average_operations": {},
    "results": {}
}

data = pd.DataFrame(columns=["metric", "decoder", "depth", "p", "value"])

for plot_id in plot_ids:
    # in base_dir look for folders starting with plot_id
    plot_folders = [f for f in glob.glob(os.path.join(base_dir, f"{plot_id}-*")) if os.path.isdir(f)]
    
    if not plot_folders:
        print(f"No folders found for plot_id {plot_id}")
        continue
    
    folder = plot_folders[0]
    files = glob.glob(os.path.join(folder, "*.txt"))

    pattern = re.compile(r"(average_operations|results)_(\w+)_d=(\d+)\.txt")
    for file in files:
        match = pattern.match(os.path.basename(file))
        if not match:
            continue
        metric, decoder, depth = match.groups()
        depth = int(depth)
        with open(file, 'r') as f:
            lines = f.readlines()
            for line in lines:
                if line.strip():
                    parts = line.strip().split()
                    if len(parts) == 2:
                        key, value = parts
                        # check if line is header or not
                        if key == "p":
                            continue
                        try:
                            p = float(key)
                            value = float(value)
                        except ValueError:
                            print(f"Skipping line with non-numeric key or value: {line.strip()}")
                            continue
                        # add data to dataframe
                        data.loc[len(data)] = {
                            "metric": metric,
                            "decoder": decoder,
                            "depth": depth,
                            "p": p,
                            "value": value
                        }

No folders found for plot_id 17
No folders found for plot_id 24
No folders found for plot_id 25
No folders found for plot_id 26
No folders found for plot_id 27
No folders found for plot_id 28


In [224]:
data

Unnamed: 0,metric,decoder,depth,p,value
0,results,uf,8,1.000000e-04,0.0
1,results,uf,8,1.500000e-04,0.0
2,results,uf,8,2.000000e-04,0.0
3,results,uf,8,2.500000e-04,0.0
4,results,uf,8,3.000000e-04,0.0
...,...,...,...,...,...
231,results,clayg,2,5.000000e-09,0.0
232,results,clayg,2,2.500000e-13,0.0
233,results,uf,2,1.000000e-04,0.0
234,results,uf,2,5.000000e-09,0.0


In [226]:
# Plot results
results = data.loc[(data['metric'] == 'results') & (data['p'] > 0) & (data['value'] > 0)].groupby(['decoder', 'depth']).apply(
    lambda x: x.set_index('p')['value'].to_dict()
).to_dict()

fig = go.Figure()

print(results)

for (decoder, depth), values in results.items():
    print(decoder)
    colors = decoder_colors.get(decoder)
    decoder_name = decoder_names.get(decoder)
    x = list(values.keys())
    y = list(values.values())
    sorted_indices = np.argsort(x)
    x = np.array(x)[sorted_indices]
    y = np.array(y)[sorted_indices]
    print(f"Plotting {decoder_name} d={depth}, data points: {len(x)}")
    fig.add_trace(go.Scatter(
        x=x,
        y=y,
        mode='lines+markers',
        name=f"{decoder_name} d={depth}",
        line=dict(color=colors[depth % len(colors)], width=2),
        marker=dict(size=5),
        legendgroup=decoder_name,
        legendgrouptitle_text=decoder_name,
        hovertemplate=f"{decoder_name} d={depth}<br>p: %{{x:.2e}}<br>L: %{{y:.2e}}",
        showlegend=True,
    ))

fig.update_layout(
    title=f"Results",
    legend_title="Decoder",
    template="plotly_white",
    xaxis=dict(type='log', title='p (log scale)'),
    yaxis=dict(type='log', title='L (log scale)'),
)

fig.show(renderer="browser")

{('clayg', 4): {0.000158489: 5e-06, 0.000251189: 5e-06, 0.000630957: 4.5e-05, 0.001: 0.00011, 0.00158489: 0.000325, 0.00251189: 0.000735, 0.00398107: 0.001775, 0.00630957: 0.0044, 0.01: 0.011705, 0.0158489: 0.028875, 0.0251189: 0.067185, 0.0005: 2e-05}, ('clayg', 6): {0.000630957: 1e-05, 0.00158489: 2.5e-05, 0.00251189: 9.5e-05, 0.00398107: 0.000445, 0.00630957: 0.001555, 0.01: 0.006775, 0.0158489: 0.025755, 0.0251189: 0.087285, 0.025: 0.08806, 0.001: 5e-06}, ('clayg', 8): {0.001: 1e-05}, ('uf', 4): {0.000251189: 5e-06, 0.000630957: 4.5e-05, 0.001: 9e-05, 0.00158489: 0.000275, 0.00251189: 0.00059, 0.00398107: 0.00146, 0.00630957: 0.00344, 0.01: 0.009135, 0.0158489: 0.02311, 0.0251189: 0.055305, 0.0005: 1e-05}, ('uf', 6): {0.000630957: 5e-06, 0.00158489: 1.5e-05, 0.00251189: 2.5e-05, 0.00398107: 0.000115, 0.00630957: 0.00055, 0.01: 0.00235, 0.0158489: 0.0114, 0.0251189: 0.048265, 0.025: 0.048565}, ('uf', 8): {0.00205: 5e-06, 0.00255: 5e-06, 0.00265: 5e-06, 0.0027: 5e-06, 0.00275: 5e-06,





In [228]:
# Perform regression on results
results = data.loc[data['metric'] == 'results'].groupby(['decoder', 'depth'], group_keys=False).apply(
    lambda x: x.set_index('p')['value'].to_dict()
).to_dict()

for (decoder, depth), values in results.items():
    decoder_name = decoder_names.get(decoder)
    # Filter out non-positive values
    filtered_data = [(xi, yi) for xi, yi in zip(values.keys(), values.values()) if xi > 0 and yi > 0]
    x = np.array([float(xi) for xi, _ in filtered_data])
    y = np.array([float(yi) for _, yi in filtered_data])
    if len(x) < 2:
        print(f"Not enough data points for fitting for {decoder_name} d={depth}")
        continue

    def power_law(x, a, b):
        return a * np.power(x, b)
    try:
        popt, pcov = curve_fit(power_law, x, y)
        a, b = popt
        print(f"{decoder_name} d={depth}, (d+1)/2={(depth+1)/2}, b={b:.4f}")
    except Exception as e:
        print(f"Error fitting data for {decoder_name} d={depth}: {e}")

Not enough data points for fitting for ClAYG d=2
ClAYG d=4, (d+1)/2=2.5, b=1.8834
ClAYG d=6, (d+1)/2=3.5, b=2.7091
Not enough data points for fitting for ClAYG d=8
Not enough data points for fitting for ClAYG d=10
Not enough data points for fitting for UF d=2
UF d=4, (d+1)/2=2.5, b=1.9350
UF d=6, (d+1)/2=3.5, b=3.1777
UF d=8, (d+1)/2=4.5, b=2.5233
Not enough data points for fitting for UF d=10






In [None]:
# Plot average operations
average_operations = data.loc[data['metric'] == 'average_operations'].groupby(['decoder', 'depth'], group_keys=False).apply(
    lambda x: x.set_index('p')['value'].to_dict()
).to_dict()

fig = go.Figure()

for (decoder, depth), values in average_operations.items():
    colors = decoder_colors.get(decoder)
    decoder_name = decoder_names.get(decoder)
    x = list(values.keys())
    y = list(values.values())
    # sort by x
    sorted_indices = np.argsort(x)
    x = np.array(x)[sorted_indices]
    y = np.array(y)[sorted_indices]
    print(f"Plotting {decoder_name} d={depth}, data points: {len(x)}")
    fig.add_trace(go.Scatter(
        x=x,
        y=y,
        mode='lines+markers',
        name=f"{decoder_name} d={depth}",
        line=dict(color=colors[depth % len(colors)], width=2),
        marker=dict(size=5),
        legendgroup=decoder_name,
        legendgrouptitle_text=decoder_name,
        hovertemplate=f"{decoder_name} d={depth}<br>p: %{{x:.2e}}<br>avg. # of operations: %{{y:.3}}",
        showlegend=True,
    ))

fig.update_layout(
    title=f"Average Operations",
    legend_title="Decoder",
    template="plotly_white",
    xaxis=dict(type='linear', title='p (linear scale)'),
    yaxis=dict(type='linear', title='Average Operations (linear scale)'),
)

fig.show(renderer="browser")





Plotting ClAYG d=2, data points: 3
Plotting ClAYG d=4, data points: 3
Plotting ClAYG d=6, data points: 3
Plotting ClAYG d=8, data points: 3
Plotting ClAYG d=10, data points: 3
Plotting ClAYG d=12, data points: 3
Plotting ClAYG d=14, data points: 3
Plotting ClAYG d=16, data points: 3
Plotting ClAYG d=18, data points: 3
Plotting ClAYG d=20, data points: 3
Plotting ClAYG d=22, data points: 3
Plotting ClAYG d=24, data points: 3
Plotting ClAYG d=26, data points: 3
Plotting UF d=2, data points: 3
Plotting UF d=4, data points: 3
Plotting UF d=6, data points: 3
Plotting UF d=8, data points: 3
Plotting UF d=10, data points: 3
Plotting UF d=12, data points: 3
Plotting UF d=14, data points: 3
Plotting UF d=16, data points: 3
Plotting UF d=18, data points: 3
Plotting UF d=20, data points: 3
Plotting UF d=22, data points: 3
Plotting UF d=24, data points: 3
Plotting UF d=26, data points: 3
Opening in existing browser session.


In [None]:
# Plot average operations
average_operations = data.loc[data['metric'] == 'average_operations'].groupby(['decoder', 'p'], group_keys=False).apply(
    lambda x: x.set_index('depth')['value'].to_dict()
).to_dict()

fig = go.Figure()

for (decoder, p), values in average_operations.items():
    colors = decoder_colors.get(decoder)
    decoder = decoder_names.get(decoder)
    x = list(values.keys())
    y = list(values.values())
    # sort by x
    sorted_indices = np.argsort(x)
    x = np.array(x)[sorted_indices]
    y = np.array(y)[sorted_indices]
    print(f"Plotting {decoder} p={p}, data points: {len(x)}")
    fig.add_trace(go.Scatter(
        x=x,
        y=y,
        mode='lines+markers',
        name=f"{decoder} p={p}",
        line=dict(color=colors[i % len(colors)], width=2),
        marker=dict(size=5),
        legendgroup=decoder,
        legendgrouptitle_text=decoder,
        hovertemplate=f"{decoder} p={p}<br>d: %{{x}}<br>avg. # of operations: %{{y:.3}}",
        showlegend=True,
    ))

fig.update_layout(
    title=f"Average Operations",
    legend_title="Decoder",
    template="plotly_white",
    xaxis=dict(type='linear', title='distance d (linear scale)'),
    yaxis=dict(type='linear', title='Average Operations (linear scale)'),
)

fig.show(renderer="browser")





Plotting ClAYG p=0.0001, data points: 13
Plotting ClAYG p=0.0005, data points: 13
Plotting ClAYG p=0.001, data points: 13
Plotting UF p=0.0001, data points: 13
Plotting UF p=0.0005, data points: 13
Plotting UF p=0.001, data points: 13
Opening in existing browser session.
