In [40]:
import subprocess
import os
from datetime import datetime
import glob
import re
import os        
from scipy.optimize import curve_fit
from scipy.stats import norm
import numpy as np
import plotly.graph_objects as go
import pandas as pd

In [None]:
# Parameters for plotting

decoder_colors = {
    'uf': [
        "#6baed6", "#4292c6","#3182bd", "#1f77b4", "#2171b5", 
        "#08519c", "#08306b", "#08519c", "#08306b", "#08306b",
    ],
    'clayg': [
        "#fdae6b", "#ffbb78", "#ff8c00", "#fd8d3c", "#ffa726",
        "#f16913", "#ff7f0e", "#d95f02", "#d94801", "#a63603",    
    ],
    'sl_clayg': [
        "#31a354", "#74c476", "#238b45", "#31a354", "#74c476",
        "#006d2c", "#00441b", "#006d2c", "#00441b", "#006d2c",
    ],
    'other': [
        "#e377c2", "#d62728", "#ff9896", "#c51b7d", "#8c564b",
        "#e377c2", "#d62728", "#ff9896", "#c51b7d", "#8c564b",
    ]
}

other_decoders = ['clayg_third_growth', 'clayg_faster_backwards_growth', 'sl_clayg_third_growth']
for decoder in other_decoders:
    decoder_colors[decoder] = decoder_colors['other']

decoder_names = {
    'uf': 'UF',
    'clayg': 'ClAYG',
    'sl_clayg': 'Single Layer ClAYG',
    'clayg_third_growth': 'ClAYG ⅓ Growth',
    'clayg_faster_backwards_growth': 'ClAYG w/ Faster Backwards Growth',
    'sl_clayg_third_growth': 'Single Layer ClAYG ⅓ Growth'
}

In [6]:
def collect_data_old(base_dir, plot_ids):
    data = pd.DataFrame(columns=["metric", "decoder", "depth", "p", "value"])

    for plot_id in plot_ids:
        plot_folders = [f for f in glob.glob(os.path.join(base_dir, f"{plot_id}-*")) if os.path.isdir(f)]

        if not plot_folders:
            print(f"No folders found for plot_id {plot_id}")
            continue
        
        folder = plot_folders[0]
        files = glob.glob(os.path.join(folder, "*.txt"))

        pattern = re.compile(r"(average_operations|results)_(\w+)_d=(\d+)\.txt")
        for file in files:
            match = pattern.match(os.path.basename(file))
            if not match:
                continue
            metric, decoder, depth = match.groups()
            depth = int(depth)
            with open(file, 'r') as f:
                lines = f.readlines()
                for line in lines:
                    if line.strip():
                        parts = line.strip().split()
                        if len(parts) == 2:
                            key, value = parts
                            # check if line is header or not
                            if key == "p":
                                continue
                            try:
                                p = float(key)
                                value = float(value)
                            except ValueError:
                                print(f"Skipping line with non-numeric key or value: {line.strip()}")
                                continue
                            # add data to dataframe
                            data.loc[len(data)] = {
                                "metric": metric,
                                "decoder": decoder,
                                "depth": depth,
                                "p": p,
                                "value": value
                            }
    return data

class Data:
    results : pd.DataFrame
    steps : pd.DataFrame

def collect_data(base_dir, plot_ids):
    data = Data()
    data.results = pd.DataFrame(columns=["decoder", "depth", "p", "l"])
    data.steps = pd.DataFrame(columns=["decoder", "depth", "p", "value", "occurences"])

    for plot_id in plot_ids:
        plot_folders = [f for f in glob.glob(os.path.join(base_dir, f"{plot_id}-*")) if os.path.isdir(f)]

        if not plot_folders:
            print(f"No folders found for plot_id {plot_id}")
            continue
        
        folder = plot_folders[0]
        results_files = glob.glob(os.path.join(folder, "results", "*.txt"))
        steps_files = glob.glob(os.path.join(folder, "steps", "*.txt"))  
        
        results_file_pattern = re.compile(r"(\w+)_d=(\d+)\.txt")
        for file in results_files:
            results_match = results_file_pattern.match(os.path.basename(file))
            if not results_match:
                continue
            decoder, depth = results_match.groups()
            depth = int(depth)
            with open(file, 'r') as f:
                lines = f.readlines()
                for line in lines:
                    if line.strip():
                        parts = line.strip().split()
                        if len(parts) == 2:
                            key, value = parts
                            # check if line is header or not
                            if key == "p":
                                continue
                            try:
                                p = float(key)
                                value = float(value)
                            except ValueError:
                                print(f"Skipping line with non-numeric key or value: {line.strip()}")
                                continue
                            # add data to dataframe
                            data.results.loc[len(data.results)] = {
                                "decoder": decoder,
                                "depth": depth,
                                "p": p,
                                "value": value
                            }
                                
        steps_file_pattern = re.compile(r"(\w+)_d=(\d+)_p=(\d+.\d+)\.txt")
        for file in steps_files: 
            steps_match = steps_file_pattern.match(os.path.basename(file))
            if not steps_match:
                continue
            decoder, depth, p = steps_match.groups()
            depth = int(depth)
            p = float(p)
            with open(file, 'r') as f:
                lines = f.readlines()
                for line in lines:
                    if line.strip():
                        parts = line.strip().split()
                        if len(parts) == 2:
                            key, value = parts
                            try:
                                steps = float(key)
                                occurences = int(value)
                            except ValueError:
                                print(f"Skipping line with non-numeric key or value: {line.strip()}")
                                continue
                            # add data to dataframe
                            data.steps.loc[len(data.steps)] = {
                                "decoder": decoder,
                                "depth": depth,
                                "p": p,
                                "value": steps,
                                "occurences": occurences
                            }
                
    return data

In [52]:
def threshold_plot(results):
    fig = go.Figure()

    for (decoder, depth), values in results.items():
        colors = decoder_colors.get(decoder)
        decoder_name = decoder_names.get(decoder)
        x = list(values.keys())
        y = list(values.values())
        sorted_indices = np.argsort(x)
        
        x = np.array(x)[sorted_indices]
        y = np.array(y)[sorted_indices]
        
        # Compute Wilson score uncertainties
        # phat is the estimated proportion of failures
        z = norm.ppf(1 - 0.05 / 2)  # for 95% confidence
        n = 200000 
        sigma = [1 / (1 + z**2 / n) * (phat + z/(2*n)*(z + np.sqrt(4*n*phat*1-phat)+z**2)) for phat in y]
        sigma = np.array(sigma)
        
        fig.add_trace(go.Scatter(
            x=x,
            y=y,
            error_y=dict(
                type='data',
                array=sigma,
                visible=False,
                thickness=1.5,
                width=3
            ),
            mode='lines+markers',
            name=f"{decoder_name} d={depth}",
            line=dict(color=colors[depth % len(colors)], width=2),
            marker=dict(size=5),
            legendgroup=decoder_name,
            legendgrouptitle_text=decoder_name,
            hovertemplate=f"{decoder_name} d={depth}<br>p: %{{x:.2e}}<br>L: %{{y:.2e}}",
            showlegend=True,
        ))
                

        if len(x) < 2:
            print(f"Not enough data points for fitting for {decoder_name} d={depth}")
            continue

        def power_law(x, a, b):
            return a * np.power(x, b)

        try:
            popt, pcov = curve_fit(
                power_law, x, y, sigma=sigma, absolute_sigma=True
            )
            a, b = popt
            print(f"{decoder_name} d={depth}, (d+1)/2={(depth+1)/2}, b={b:.4f}")
            fit_x = np.linspace(min(x), max(x), 100)
            fit_y = power_law(fit_x, *popt)
            fig.add_trace(go.Scatter(
                x=fit_x,
                y=fit_y,
                mode='lines',
                name=f"{decoder_name} fit d={depth}",
                line=dict(color=colors[depth % len(colors)], width=1, dash='dash'),
                legendgroup=f"{decoder_name} fit",
                legendgrouptitle_text=f"{decoder_name} fit",
                hovertemplate=f"{decoder_name} fit d={depth}: <br> parameters: c={b:.4f}<br> d/2={(depth)/2}",
                showlegend=True,
            ))
        except Exception as e:
            print(f"Error fitting data for {decoder_name} d={depth}: {e}")
        
    # Set legend groups ending with fit to not selected by default
    for trace in fig.data:
        if 'fit' in trace.name:
            trace.visible = 'legendonly'

    fig.update_layout(
        title=f"Results",
        legend_title="Decoder",
        template="plotly_white",
        xaxis=dict(type='log', title='p (log scale)'),
        yaxis=dict(type='log', title='L (log scale)'),
    )
    
    return fig

In [11]:
# Plot results for the tests with different growth rates
base_dir = "data/old/special_clayg_tests"
average_operations = collect_data_old(base_dir, [2])
results = average_operations.loc[(average_operations['metric'] == 'results') & (average_operations['p'] > 0) & (average_operations['value'] > 0)].groupby(['decoder', 'depth']).apply(
    lambda x: x.set_index('p')['value'].to_dict()
).to_dict()



fig = threshold_plot(results)

fig.show(renderer="browser")





ClAYG d=4, (d+1)/2=2.5, b=1.7267
ClAYG d=6, (d+1)/2=3.5, b=1.9987
ClAYG d=8, (d+1)/2=4.5, b=1.9733
ClAYG ⅓ Growth d=4, (d+1)/2=2.5, b=0.9211
ClAYG ⅓ Growth d=6, (d+1)/2=3.5, b=1.2076
ClAYG ⅓ Growth d=8, (d+1)/2=4.5, b=1.3670
Opening in existing browser session.


[9407:9407:0716/191001.580930:ERROR:ui/gl/gl_surface_presentation_helper.cc:260] GetVSyncParametersIfAvailable() failed for 1 times!
[9407:9407:0716/191004.030244:ERROR:ui/gl/gl_surface_presentation_helper.cc:260] GetVSyncParametersIfAvailable() failed for 2 times!
[9407:9407:0716/191054.842409:ERROR:ui/gl/gl_surface_presentation_helper.cc:260] GetVSyncParametersIfAvailable() failed for 3 times!


In [None]:
# Plot results for the tests with clayg with faster backwards growth
base_dir = "data/old/special_clayg_grow_faster_backwards"
average_operations = collect_data_old(base_dir, [1])
print(len(average_operations))
average_operations = average_operations.loc[(average_operations['metric'] == 'results') & (average_operations['p'] > 0) & (average_operations['value'] > 0)]
results = average_operations.groupby(['decoder', 'depth'], group_keys=False).apply(
    lambda x: x.set_index('p')['value'].to_dict()
).to_dict()

fig = threshold_plot(results)

fig.show(renderer="browser")

In [53]:
# Plot results for the tests with single layer clayg
base_dir = "data/old/single_layer_clayg_tests"
average_operations = collect_data_old(base_dir, [1,2,3])
average_operations = average_operations.loc[(average_operations['metric'] == 'results') & (average_operations['p'] > 0) & (average_operations['value'] > 0)]
results = average_operations.groupby(['decoder', 'depth'], group_keys=False).apply(
    lambda x: x.set_index('p')['value'].to_dict()
).to_dict()

fig = threshold_plot(results)

fig.show(renderer="browser")

ClAYG d=4, (d+1)/2=2.5, b=1.8692
ClAYG d=6, (d+1)/2=3.5, b=2.4928
ClAYG d=8, (d+1)/2=4.5, b=2.7345
ClAYG d=10, (d+1)/2=5.5, b=4.1724
Single Layer ClAYG d=4, (d+1)/2=2.5, b=1.8643
Single Layer ClAYG d=6, (d+1)/2=3.5, b=2.4920
Single Layer ClAYG d=8, (d+1)/2=4.5, b=2.6218
Single Layer ClAYG d=10, (d+1)/2=5.5, b=3.7079
UF d=4, (d+1)/2=2.5, b=1.9729
UF d=6, (d+1)/2=3.5, b=3.1165
UF d=8, (d+1)/2=4.5, b=4.0940
UF d=10, (d+1)/2=5.5, b=5.8422
Opening in existing browser session.






In [None]:
from collections import defaultdict

base_dir = "data/new_average_operations"

data = collect_data(base_dir, [1,2,3,4])

steps = data.steps.groupby(['decoder', 'depth', 'p']).apply(lambda x: x.set_index('value')['occurences'].to_dict()).to_dict()

avg_steps = (
    data.steps
    .groupby(['decoder', 'depth', 'p'])
    .apply(lambda x: np.average(x['value'], weights=x['occurences']))
    .reset_index(name='avg_steps')
)

average_steps_by_d = (
    avg_steps
    .pivot_table(index=['decoder', 'p'], columns='depth', values='avg_steps')
    .apply(lambda row: row.dropna().to_dict(), axis=1)
    .to_dict()
)

average_steps_by_p = (
    avg_steps
    .pivot_table(index=['decoder', 'depth'], columns='p', values='avg_steps')
    .apply(lambda row: row.dropna().to_dict(), axis=1)
    .to_dict()
)

results = data.results.groupby(['decoder', 'depth']).apply(lambda x: x.set_index('p')['l'].to_dict()).to_dict()

#threshold_plot(results).show()
plot_step_distribution_by_p(steps).show()
plot_average_operations_against_d(average_steps_by_d).show()
#plot_average_operations_against_p(average_steps_by_p).show()









In [33]:
from plotly.subplots import make_subplots
from collections import defaultdict

def plot_average_operations_against_p(average_operations):
    fig = go.Figure()

    for (decoder, depth), values in average_operations.items():
        colors = decoder_colors.get(decoder)
        decoder_name = decoder_names.get(decoder)
        x = list(values.keys())
        y = list(values.values())
        # sort by x
        sorted_indices = np.argsort(x)
        x = np.array(x)[sorted_indices]
        y = np.array(y)[sorted_indices]
        print(f"Plotting {decoder_name} d={depth}, data points: {len(x)}")
        fig.add_trace(go.Scatter(
            x=x,
            y=y,
            mode='lines+markers',
            name=f"{decoder_name} d={depth}",
            line=dict(color=colors[depth % len(colors)], width=2),
            marker=dict(size=5),
            legendgroup=decoder_name,
            legendgrouptitle_text=decoder_name,
            hovertemplate=f"{decoder_name} d={depth}<br>p: %{{x:.2e}}<br>avg. # of operations: %{{y:.3}}",
            showlegend=True,
        ))

    fig.update_layout(
        title=f"Average Operations",
        legend_title="Decoder",
        template="plotly_white",
        xaxis=dict(type='linear', title='p (linear scale)'),
        yaxis=dict(type='linear', title='Average Operations (linear scale)'),
    )
    
    return fig

def plot_average_operations_against_d(average_operations):
    fig = go.Figure()

    for (decoder, p), values in average_operations.items():
        colors = decoder_colors.get(decoder)
        decoder = decoder_names.get(decoder)
        x = list(values.keys())
        y = list(values.values())
        # sort by x
        sorted_indices = np.argsort(x)
        x = np.array(x)[sorted_indices]
        y = np.array(y)[sorted_indices]
        print(f"Plotting {decoder} p={p}, data points: {len(x)}")
        fig.add_trace(go.Scatter(
            x=x,
            y=y,
            mode='lines+markers',
            name=f"{decoder} p={p}",
            line=dict(color=colors[int(p*1000) % len(colors)], width=2),
            marker=dict(size=5),
            legendgroup=decoder,
            legendgrouptitle_text=decoder,
            hovertemplate=f"{decoder} p={p}<br>d: %{{x}}<br>avg. # of operations: %{{y:.3}}",
            showlegend=True,
        ))

    fig.update_layout(
        title=f"Average Operations",
        legend_title="Decoder",
        template="plotly_white",
        xaxis=dict(type='linear', title='distance d (linear scale)'),
        yaxis=dict(type='linear', title='Average Operations (log scale)'),
    )
    
    return fig

def plot_step_distribution_by_p(steps):
    # Get all unique p values
    ps = set(p for (_, _, p) in steps.keys())
    if len(ps) > 1:
        # Multiple p: group by (decoder, depth), columns for each p
        group = defaultdict(list)
        for decoder, depth, p in steps.keys():
            group[(decoder, depth)].append(p)
        row_keys = list(group.keys())
        max_cols = max(len(ps) for ps in group.values())
        fig = make_subplots(
            rows=len(row_keys), cols=max_cols,
            subplot_titles=[
                f"{decoder_names.get(decoder, decoder)} d={depth}, p={p:.1e}"
                for (decoder, depth), ps in group.items() for p in sorted(ps)
            ],
            horizontal_spacing=0.08, vertical_spacing=0.02
        )
        for row, (decoder, depth) in enumerate(row_keys, start=1):
            ps_sorted = sorted(group[(decoder, depth)])
            for col, p in enumerate(ps_sorted, start=1):
                values = steps[(decoder, depth, p)]
                x = []
                for step_val, count in sorted(values.items()):
                    x.extend([step_val] * count)
                colors = decoder_colors.get(decoder)
                fig.add_trace(
                    go.Histogram(
                        x=x,
                        marker_color=colors[depth % len(colors)],
                        showlegend=False
                    ),
                    row=row, col=col
                )
    else:
        # Only one p: group by decoder, columns for each depth
        group = defaultdict(list)
        for decoder, depth, p in steps.keys():
            group[decoder].append(depth)
        row_keys = list(group.keys())
        max_cols = max(len(ds) for ds in group.values())
        fig = make_subplots(
            rows=len(row_keys), cols=max_cols,
            subplot_titles=[
                f"{decoder_names.get(decoder, decoder)} d={depth}"
                for decoder, ds in group.items() for depth in sorted(ds)
            ],
            horizontal_spacing=0.08, vertical_spacing=0.04
        )
        for row, decoder in enumerate(row_keys, start=1):
            ds_sorted = sorted(group[decoder])
            for col, depth in enumerate(ds_sorted, start=1):
                # Since only one p, get it
                p = next(p for (d, dep, p) in steps.keys() if d == decoder and dep == depth)
                values = steps[(decoder, depth, p)]
                x = []
                for step_val, count in sorted(values.items()):
                    x.extend([step_val] * count)
                colors = decoder_colors.get(decoder)
                fig.add_trace(
                    go.Histogram(
                        x=x,
                        marker_color=colors[depth % len(colors)],
                        showlegend=False
                    ),
                    row=row, col=col
                )
    fig.update_layout(
        title="Step Distribution by Decoder, Depth, and p",
        template="plotly_white",
        height=300 * len(row_keys),
        width=500 * max_cols
    )
    for i in range(1, len(row_keys) + 1):
        fig.update_yaxes(title_text="Count", row=i, col=1)
    for j in range(1, max_cols + 1):
        fig.update_xaxes(title_text="Steps", row=1, col=j)
    return fig

In [70]:
# Plot average operations against p
base_dir = "data/treshold_plots"
average_operations = collect_data_old(base_dir, [18,19,20,21, 22])
average_operations = average_operations.loc[average_operations['metric'] == 'average_operations'].groupby(['decoder', 'depth'], group_keys=False).apply(
    lambda x: x.set_index('p')['value'].to_dict()
).to_dict()

fig = plot_average_operations_against_p(average_operations)

fig.show(renderer="browser")

No folders found for plot_id 18
No folders found for plot_id 19
No folders found for plot_id 20
No folders found for plot_id 21
No folders found for plot_id 22






NameError: name 'plot_average_operations_against_p' is not defined

In [None]:
# Plot average operations against p
base_dir = "data/special_clayg_grow_faster_backwards"
average_operations = collect_data_old(base_dir, [1,2,3])
average_operations = average_operations.loc[average_operations['metric'] == 'average_operations'].groupby(['decoder', 'depth'], group_keys=False).apply(
    lambda x: x.set_index('p')['value'].to_dict()
).to_dict()

fig = plot_average_operations_against_p(average_operations)

fig.show(renderer="browser")

No folders found for plot_id 2
No folders found for plot_id 3
Plotting ClAYG d=4, data points: 35
Plotting ClAYG d=6, data points: 35
Plotting ClAYG d=8, data points: 35
Plotting ClAYG w/ Faster Backwards Growth d=4, data points: 35
Plotting ClAYG w/ Faster Backwards Growth d=6, data points: 35
Plotting ClAYG w/ Faster Backwards Growth d=8, data points: 35
Opening in existing browser session.






In [None]:
# Plot average operations against d
base_dir = "data/average_operations_initial"
data = collect_data_old(base_dir, [2,3])
average_operations = data.loc[data['metric'] == 'average_operations'].groupby(['decoder', 'p'], group_keys=False).apply(
    lambda x: x.set_index('depth')['value'].to_dict()
).to_dict()

fig = plot_average_operations_against_d(average_operations)

fig.show(renderer="browser")

Plotting ClAYG p=0.0001, data points: 13
Plotting ClAYG p=0.0005, data points: 13
Plotting ClAYG p=0.001, data points: 13
Plotting UF p=0.0001, data points: 13
Plotting UF p=0.0005, data points: 13
Plotting UF p=0.001, data points: 13
Opening in existing browser session.




