In [2]:
import os
import glob
import re
import os        
from scipy.optimize import curve_fit
from scipy.stats import norm
from scipy.spatial import ConvexHull
import numpy as np
import plotly.graph_objects as go
import pandas as pd
from plotly.subplots import make_subplots
from collections import defaultdict

In [12]:
# Parameters for plotting

decoder_colors = {
    'uf': [
        "#6baed6", "#4292c6","#3182bd", "#1f77b4", "#2171b5", 
        "#08519c", "#08306b", "#08519c", "#08306b", "#08306b",
    ],
    'clayg': [
        "#fdae6b", "#ffbb78", "#ff8c00", "#fd8d3c", "#ffa726",
        "#f16913", "#ff7f0e", "#d95f02", "#d94801", "#a63603",    
    ],
    'sl_clayg': [
        "#31a354", "#74c476", "#238b45", "#31a354", "#74c476",
        "#006d2c", "#00441b", "#006d2c", "#00441b", "#006d2c",
    ],
    'clayg_stop_early' : [
        "#525252", "#252525", "#737373", "#525252", "#252525",
        "#000000",
    ],
    'other': [
        "#e377c2", "#d62728", "#ff9896", "#c51b7d", "#8c564b",
        "#e377c2", "#d62728", "#ff9896", "#c51b7d", "#8c564b",
    ]
}

decoder_names = {
    'uf': 'UF',
    'clayg': 'ClAYG',
    'sl_clayg': 'Single Layer ClAYG',
    'clayg_third_growth': 'ClAYG ⅓ Growth',
    'clayg_faster_backwards_growth': 'ClAYG w/ Faster Backwards Growth',
    'sl_clayg_third_growth': 'Single Layer ClAYG ⅓ Growth',
    'clayg_stop_early': 'ClAYG Stop Early',
    'sl_clayg_stop_early': 'Single Layer ClAYG Stop Early',
}

In [4]:
class Plot:
    fig : go.Figure
    title : str
    def __init__(self, fig, title):
        self.fig = fig
        self.title = title
        self.fig.update_layout(
            title=self.title,
        )
    
    def show(self):
        # render into html
        html = self.fig.to_html(full_html=True, include_plotlyjs='cdn')
        html = html.replace('<head>', f'<head><title>{self.title}</title><meta name="viewport" content="width=device-width, initial-scale=1">')
        # open in browser
        file_name = self.title.replace(',', '').replace(' ', '_').lower()
        file_name = f'plots/{file_name}.html'
        with open(file_name, 'w') as f:
            f.write(html)
        os.system(f'xdg-open "{file_name}"')
    
    def set_title(self, title):
        self.title = title
        self.fig.update_layout(title=self.title)

In [23]:
def collect_data_old(base_dir, plot_ids) -> pd.DataFrame:
    data = pd.DataFrame(columns=["metric", "decoder", "distance", "p", "value"])

    for plot_id in plot_ids:
        plot_folders = [f for f in glob.glob(os.path.join(base_dir, f"{plot_id}-*")) if os.path.isdir(f)]

        if not plot_folders:
            print(f"No folders found for plot_id {plot_id}")
            continue
        
        folder = plot_folders[0]
        files = glob.glob(os.path.join(folder, "*.txt"))

        pattern = re.compile(r"(average_operations|results)_(\w+)_d=(\d+)\.txt")
        for file in files:
            match = pattern.match(os.path.basename(file))
            if not match:
                continue
            metric, decoder, distance = match.groups()
            distance = int(distance)
            with open(file, 'r') as f:
                lines = f.readlines()
                for line in lines:
                    if line.strip():
                        parts = line.strip().split()
                        if len(parts) == 2:
                            key, value = parts
                            # check if line is header or not
                            if key == "p":
                                continue
                            try:
                                p = float(key)
                                value = float(value)
                            except ValueError:
                                print(f"Skipping line with non-numeric key or value: {line.strip()}")
                                continue
                            # add data to dataframe
                            data.loc[len(data)] = {
                                "metric": metric,
                                "decoder": decoder,
                                "distance": distance,
                                "p": p,
                                "value": value
                            }
    return data

class Data:
    results : pd.DataFrame
    steps : pd.DataFrame

def collect_data(base_dirs, plot_ids) -> Data:
    data = Data()
    data.results = pd.DataFrame(columns=["decoder", "distance", "p", "l", "n", "N"])
    data.steps = pd.DataFrame(columns=["decoder", "distance", "p", "value", "occurences"])

    plot_folders = []
    for base_dir in base_dirs:
        if not plot_ids:
            plot_folders.extend([f for f in glob.glob(os.path.join(base_dir, "*")) if os.path.isdir(f)])
        else:
            for plot_id in plot_ids:
                plot_folders.extend([f for f in glob.glob(os.path.join(base_dir, f"{plot_id}*")) if os.path.isdir(f)])
    
    for folder in plot_folders:
        results_files = glob.glob(os.path.join(folder, "results", "*.txt"))
        steps_files = glob.glob(os.path.join(folder, "steps", "*.txt"))
        
        results_file_pattern = re.compile(
            r"^(?P<decoder>(\w+(?:_\w+)*(?:_\d+(?:\.\d+)*)?))_d=(?P<distance>\d+)(?:_(?:idlingtimeconstant|N)=(?P<N>\d+(?:\.\d+)?))?\.txt$"
        )

        for file in results_files:
            match = results_file_pattern.match(os.path.basename(file))
            if not match:
                continue

            decoder = match["decoder"]
            distance = int(match["distance"])
            N = float(match["N"]) if match["N"] else np.nan
            
            with open(file, 'r') as f:
                lines = f.readlines()
                for line in lines:
                    if line.strip():
                        parts = line.strip().split()
                        try:
                            if len(parts) == 2:
                                key, value = parts
                                n = None
                            elif len(parts) == 3:
                                key, value, n = parts
                            else:
                                raise ValueError("Unexpected number of parts in line")
                            p = float(key)
                            l = float(value)
                            n = float(n) if not n == None else None
                        except ValueError:
                            print(f"Skipping line with NaN: {line.strip()}")
                            continue
                        # add data to dataframe
                        data.results.loc[len(data.results)] = {
                            "decoder": decoder,
                            "distance": distance,
                            "p": p,
                            "l": l,
                            "n": n,
                            "N": N,  
                        }
                                
        steps_file_pattern = re.compile(r"([\w\._]+)_d=(\d+)_p=([\d\.]+)\.txt")
        for file in steps_files: 
            steps_match = steps_file_pattern.match(os.path.basename(file))
            if not steps_match:
                continue
            decoder, distance, p = steps_match.groups()
            distance = int(distance)
            p = float(p)
            with open(file, 'r') as f:
                lines = f.readlines()
                for line in lines:
                    if line.strip():
                        parts = line.strip().split()
                        if len(parts) == 2:
                            key, value = parts
                            try:
                                steps = float(key)
                                occurences = int(value)
                            except ValueError:
                                print(f"Skipping line with NaN: {line.strip()}")
                                continue
                            # add data to dataframe
                            data.steps.loc[len(data.steps)] = {
                                "decoder": decoder,
                                "distance": distance,
                                "p": p,
                                "value": steps,
                                "occurences": occurences
                            }
                
    return data

In [None]:
def logical_vs_p_and_idlingtimconstant_plot(data: pd.DataFrame) -> Plot:
    # 3d plot: l on z axis, p on y axis, 1/N on x axis, different plots for different distances
    distances = sorted(data['distance'].unique())
    cols = 2
    rows = int(np.ceil(len(distances) / cols))

    # create 3D subplot grid (one scene per distance)
    fig = make_subplots(
        rows=rows, cols=cols,
        specs=[[{'type': 'scene'} for _ in range(cols)] for _ in range(rows)],
        subplot_titles=[f"d={d}" for d in distances]
    )

    seen_decoders = set()
    for idx, distance in enumerate(distances):
        row = idx // cols + 1
        col = idx % cols + 1
        df_d = data[data['distance'] == distance]
        for decoder in df_d['decoder'].unique():
            df_dec = df_d[df_d['decoder'] == decoder]
            x = [1.0/val  if not np.isnan(val) else 0 for val in df_dec['N'].values]
            y = df_dec['p'].values
            z = df_dec['l'].values
            colors = decoder_colors.get(decoder, ['#000000'])
            color = colors[distance % len(colors)]
            showlegend = decoder not in seen_decoders
            fig.add_trace(
                go.Scatter3d(
                    x=x, y=y, z=z,
                    mode='markers',
                    marker=dict(size=5, color=color),
                    name=decoder_names.get(decoder, decoder),
                    showlegend=showlegend
                ),
                row=row, col=col
            )
            seen_decoders.add(decoder)

    # set axis titles for every scene
    for i in range(1, len(distances) + 1):
        scene_key = 'scene' if i == 1 else f'scene{i}'
        fig.update_layout(**{
            scene_key: dict(
                xaxis=dict(title='1/N'),
                yaxis=dict(title='p'),
                zaxis=dict(title='l')
            )
        })

    fig.update_layout(title='Logical error rate vs p and 1/N (by distance)', showlegend=True)
    fig.update_traces(marker=dict(size=5))
    fig.update_layout(scene=dict(
        xaxis_title='1/N',
        yaxis_title='p',
        zaxis_title='l',
    ))
    
    return Plot(fig, "Logical error rate vs p and idling time constant N⁻¹ (by distance)")

def idling_threshold_plots(data: pd.DataFrame, decoder_a: str, decoder_b: str, display_thresholds: bool = False) -> Plot:
    # Expect columns: distance, decoder, N, p, l
    distances = sorted(data['distance'].unique())
    cols = 2
    rows = int(np.ceil(len(distances) / cols))

    fig = make_subplots(
        rows=rows, cols=cols,
        specs=[[{'type': 'heatmap'} for _ in range(cols)] for _ in range(rows)],
        subplot_titles=[f"d={d}" for d in distances]
    )

    for idx, distance in enumerate(distances):
        row, col = divmod(idx, cols)
        row += 1
        col += 1
        df_d = data[data['distance'] == distance]

        # Filter for just the two decoders
        df_a = df_d[df_d['decoder'] == decoder_a].copy()
        df_b = df_d[df_d['decoder'] == decoder_b].copy()
        if df_a.empty or df_b.empty:
            continue  # skip if one of them missing

        # Merge on p and N to align values
        merged = pd.merge(df_a, df_b, on=['p', 'N'], suffixes=('_a', '_b'))

        # Compute relative difference (A vs B)
        merged['inv_N'] = 1 / merged['N']
        merged['rel_diff'] = (merged['l_a'] - merged['l_b']) / (0.5*(merged['l_a'] + merged['l_b']))

        # Pivot for heatmap
        pivot = merged.pivot_table(index='p', columns='inv_N', values='rel_diff')

        # Add the heatmap
        fig.add_trace(
            go.Heatmap(
                z=pivot.values,
                x=pivot.columns,
                y=pivot.index,
                coloraxis = "coloraxis",
                zmid=0,
                name=f"{decoder_a} vs {decoder_b}",
            ),
            row=row, col=col
        )

        # --- Compute threshold (zero-crossing line) ---
        thresholds = []
        for invN, group in merged.groupby('inv_N'):
            group = group.sort_values('p')
            sign = np.sign(group['rel_diff'].values)
            change_indices = np.where(np.diff(sign))[0]
            if len(change_indices) > 0:
                i0 = change_indices[0]
                p1, p2 = group['p'].iloc[i0], group['p'].iloc[i0 + 1]
                y1, y2 = group['rel_diff'].iloc[i0], group['rel_diff'].iloc[i0 + 1]
                # Linear interpolation to find p where rel_diff = 0
                p_thresh = p1 - y1 * (p2 - p1) / (y2 - y1)
                thresholds.append((invN, p_thresh))

        if thresholds and display_thresholds:
            thresholds = np.array(sorted(thresholds, key=lambda t: t[0]))
            thresholds = np.array(sorted(thresholds, key=lambda t: t[0]))
            # rolling average (smooth p thresholds vs 1/N)
            x = thresholds[:, 0].astype(float)
            y = thresholds[:, 1].astype(float)
            y_smooth = pd.Series(y).rolling(window=20, center=True, min_periods=1).mean().values

            # plot raw (faint dashed) and smoothed (solid) threshold lines
            fig.add_trace(
                go.Scatter(
                    x=x,
                    y=y,
                    mode='lines',
                    line=dict(color='grey', width=1, dash='dash'),
                    name=f"threshold (raw) d={distance}"
                ),
                row=row, col=col
            )
            fig.add_trace(
                go.Scatter(
                    x=x,
                    y=y_smooth,
                    mode='lines',
                    line=dict(color='grey', width=2, shape='spline'),
                    name=f"threshold (rolling avg.) d={distance}"
                ),
                row=row, col=col
            )

    fig.update_layout(
        title=f"Relative logical error rate and thresholds: {decoder_a} vs {decoder_b}",
        height=400 * rows,
        showlegend=False,
        coloraxis=dict(colorscale='RdBu', cmin=-1, cmax=1, colorbar=dict(title=f"({decoder_a} - {decoder_b}) / ⟨{decoder_a}, {decoder_b}⟩")),
    )
    fig.update_layout()

    # Common axis labels
    for i in range(1, len(distances) + 1):
        fig.update_xaxes(title_text='1/N', row=(i - 1)//cols + 1, col=(i - 1)%cols + 1)
        fig.update_yaxes(title_text='p', row=(i - 1)//cols + 1, col=(i - 1)%cols + 1)

    return Plot(fig, f"Relative logical error rate: {decoder_a} vs {decoder_b}")

def idling_threshold_comparison_plot(data: pd.DataFrame, decoder_a: str, decoder_b: str, eps: float = 0.1) -> Plot:
    # Expect columns: distance, decoder, N, p, l
    df_a = data[data['decoder'] == decoder_a].copy()
    df_b = data[data['decoder'] == decoder_b].copy()

    merged = pd.merge(df_a, df_b, on=['distance', 'p', 'N'], suffixes=('_a', '_b'))
    merged['inv_N'] = 1 / merged['N']
    merged['rel_diff'] = (merged['l_a'] - merged['l_b']) / (0.5 * (merged['l_a'] + merged['l_b']))

    fig = go.Figure()
    distances = sorted(merged['distance'].unique())
    colors = [
        "rgb(255,0,0)",    # red
        "rgb(0,128,0)",    # green
        "rgb(0,0,255)",    # blue
    ]

    for i, d in enumerate(distances):
        df_d = merged[merged['distance'] == d]
        color = colors[i % len(colors)]

        # --- near-zero filled region (|rel_diff| < eps) ---
        band_points = []
        for invN, group in df_d.groupby('inv_N'):
            group = group.sort_values('p')
            near = group[np.abs(group['rel_diff']) < eps]
            if not near.empty:
                band_points.append((invN, near['p'].min(), near['p'].max()))

        if band_points:
            band_points = np.array(sorted(band_points, key=lambda x: x[0]))
            x = band_points[:, 0]
            y_low = band_points[:, 1]
            y_high = band_points[:, 2]
            y_low = pd.Series(y_low).rolling(window=20, center=True, min_periods=1).mean().values
            y_high = pd.Series(y_high).rolling(window=20, center=True, min_periods=1).mean().values
            
            fig.add_trace(go.Scatter(
                x=np.concatenate([x, x[::-1]]),
                y=np.concatenate([y_high, y_low[::-1]]),
                fill='toself',
                fillcolor=color.replace('rgb', 'rgba').replace(')', ',0.15)'),  # ~15% opacity
                line=dict(color='rgba(0,0,0,0)'),
                hoverinfo='skip',
                name=f"d={d} ɛ={eps}",
                legendgroup=f"d={d}",
                showlegend=False
            ))

        # --- threshold points (zero crossing) ---
        thresholds = []
        for invN, group in df_d.groupby('inv_N'):
            group = group.sort_values('p')
            sign = np.sign(group['rel_diff'].values)
            change_indices = np.where(np.diff(sign))[0]
            if len(change_indices) > 0:
                i0 = change_indices[0]
                p1, p2 = group['p'].iloc[i0], group['p'].iloc[i0 + 1]
                y1, y2 = group['rel_diff'].iloc[i0], group['rel_diff'].iloc[i0 + 1]
                p_thresh = p1 - y1 * (p2 - p1) / (y2 - y1)
                thresholds.append((invN, p_thresh))

        if thresholds:
            thresholds = np.array(sorted(thresholds, key=lambda t: t[0]))
            x = thresholds[:, 0].astype(float)
            y = thresholds[:, 1].astype(float)

            # --- raw dashed threshold line ---
            fig.add_trace(go.Scatter(
                x=x,
                y=y,
                mode='lines',
                line=dict(color=color, width=1, dash='dash'),
                name=f"d={d} (raw)",
                legendgroup=f"d={d}",
                showlegend=False,
            ))

            # --- smooth line (rolling average) ---
            window = min(20, max(3, int(len(y) / 5)))
            y_smooth = pd.Series(y).rolling(window=window, center=True, min_periods=1).mean().values

            fig.add_trace(go.Scatter(
                x=x,
                y=y_smooth,
                mode='lines',
                line=dict(color=color, width=3, shape='spline'),
                name=f"d={d}",
                legendgroup=f"d={d}",
            ))

    fig.update_layout(
        title=f"Threshold comparison: {decoder_a} vs {decoder_b}",
        xaxis_title="1/N",
        yaxis_title="p (threshold where rel_diff≈0)",
        height=650,
        template="plotly_white",
        legend_title="Distance",
    )

    return Plot(fig, f"Threshold comparison: {decoder_a} vs {decoder_b}")

SyntaxError: invalid syntax. Perhaps you forgot a comma? (1161008530.py, line 210)

In [32]:
base_dirs = [
    "../data/ccluster/results_idling_d6",
]

data = collect_data(base_dirs, [])
# group by decoder, p and N, take mean of l
data_mean = data.results.groupby(['decoder', 'distance', 'N', 'p'], dropna=False).mean().reset_index()

# Choose all lines where N is NaN or 1/N <= 0.05
data_mean = data_mean[(data_mean['N'].isna()) | (1.0/data_mean['N'] <= 0.005)]

In [32]:
plot = idling_threshold_plots(data_mean, 'uf', 'clayg', display_thresholds=True)
plot.show()

In [55]:
plot = idling_threshold_comparison_plot(data_mean, 'uf', 'clayg', eps=0.1)
plot.show()

Gtk-Message: 12:43:48.718: Not loading module "atk-bridge": The functionality is provided by GTK natively. Please try to not load it.


In [7]:
def threshold_plot(results) -> Plot:
    fig = go.Figure()

    distances = set()
    
    for (decoder, distance), values in results.items():
        distances.add(distance)
        colors = decoder_colors.get(decoder, decoder_colors['other'])
        decoder_name = decoder_names.get(decoder, decoder)
        ps = list(values.keys())
        ls, ns = zip(*(values.values()))
        ls = list(ls)
        ns = list(ns)

        sorted_indices = np.argsort(ps)
        ps = np.array(ps)[sorted_indices]
        ls = np.array(ls)[sorted_indices]
        
        # Compute Wilson score uncertainties
        # phat is the estimated proportion of failures
        z = norm.ppf(1 - 0.05 / 2)  # for 95% confidence
        ns = [n if not n == None and not np.isnan(n) else 200000 for n in ns]
        sigma = [1 / (1 + z**2 / n) * (l + z/(2*n)*(z + np.sqrt(4*n*l*(1-l))+z**2)) for l, n in zip(ls, ns)]
        sigma = np.array(sigma)
        
        fig.add_trace(go.Scatter(
            x=ps,
            y=ls,
            error_y=dict(
                type='data',
                array=sigma,
                visible=False,
                thickness=1.5,
                width=3
            ),
            mode='lines+markers',
            name=f"{decoder_name} d={distance}",
            line=dict(color=colors[distance % len(colors)], width=2),
            marker=dict(size=5),
            legendgroup=decoder_name,
            legendgrouptitle_text=decoder_name,
            hovertemplate=f"{decoder_name} d={distance}<br>p: %{{x:.2e}}<br>L: %{{y:.2e}}",
            showlegend=True,
        ))
                

        if len(ps) < 2:
            print(f"Not enough data points for fitting for {decoder_name} d={distance}")
            continue

        def power_law(x, a, b):
            return a * np.power(x, b)

        try:
            popt, pcov = curve_fit(
                power_law, ps, ls, sigma=sigma, absolute_sigma=True
            )
            a, b = popt
            print(f"{decoder_name} d={distance}, d/2={(distance)/2}, b={b:.4f}")
            fit_x = np.linspace(min(ps), max(ps), 100)
            fit_y = power_law(fit_x, *popt)
            fig.add_trace(go.Scatter(
                x=fit_x,
                y=fit_y,
                mode='lines',
                name=f"{decoder_name} fit d={distance}",
                line=dict(color=colors[distance % len(colors)], width=1, dash='dash'),
                legendgroup=f"{decoder_name} fit",
                legendgrouptitle_text=f"{decoder_name} fit",
                hovertemplate=f"{decoder_name} fit d={distance}: <br> parameters: c={b:.4f}<br> d/2={(distance)/2}",
                showlegend=True,
            ))
        except Exception as e:
            print(f"Error fitting data for {decoder_name} d={distance}: {e}")
    
    # Set legend groups ending with fit to not selected by default
    for trace in fig.data:
        if 'fit' in trace.name:
            trace.visible = 'legendonly'

    fig.update_layout(
        title=f"Results",
        legend_title="Decoder",
        template="plotly_white",
        xaxis=dict(type='log', title='p (log scale)'),
        yaxis=dict(type='log', title='L (log scale)'),
    )
    
    return Plot(fig, "Threshold Plot")

In [None]:
base_dirs = [
    "../data/ccluster/test_cluster_lifetime/results_test_cluster_lifetime/",
]

data = collect_data(base_dirs, [])
results = data.results.groupby(['decoder', 'distance']).apply(
    lambda x: x.set_index('p')[['l', 'n']].apply(lambda row: (row['l'], row['n']), axis=1).to_dict()
).to_dict()
threshold_plot(results).show()

  results = data.results.groupby(['decoder', 'distance']).apply(


clayg_lifetime_0.0 d=6, d/2=3.0, b=2.8883
clayg_lifetime_0.0 d=8, d/2=4.0, b=3.5399
clayg_lifetime_0.0 d=10, d/2=5.0, b=3.9664
clayg_lifetime_0.0 d=12, d/2=6.0, b=4.3972
clayg_lifetime_0.0 d=14, d/2=7.0, b=4.6170
clayg_lifetime_0.0 d=16, d/2=8.0, b=5.0506
clayg_lifetime_0.25 d=6, d/2=3.0, b=2.8883
clayg_lifetime_0.25 d=8, d/2=4.0, b=3.7604
clayg_lifetime_0.25 d=10, d/2=5.0, b=4.3626
clayg_lifetime_0.25 d=12, d/2=6.0, b=5.0272
clayg_lifetime_0.25 d=14, d/2=7.0, b=5.5669
clayg_lifetime_0.25 d=16, d/2=8.0, b=6.1331
clayg_lifetime_0.5 d=6, d/2=3.0, b=3.0518
clayg_lifetime_0.5 d=8, d/2=4.0, b=3.8243
clayg_lifetime_0.5 d=10, d/2=5.0, b=4.5583
clayg_lifetime_0.5 d=12, d/2=6.0, b=5.1197
clayg_lifetime_0.5 d=14, d/2=7.0, b=5.7101
clayg_lifetime_0.5 d=16, d/2=8.0, b=6.3825
clayg_lifetime_0.75 d=6, d/2=3.0, b=3.0403
clayg_lifetime_0.75 d=8, d/2=4.0, b=3.9240
clayg_lifetime_0.75 d=10, d/2=5.0, b=4.6447
clayg_lifetime_0.75 d=12, d/2=6.0, b=5.3838
clayg_lifetime_0.75 d=14, d/2=7.0, b=6.0080
clayg_li

Gtk-Message: 10:10:38.039: Not loading module "atk-bridge": The functionality is provided by GTK natively. Please try to not load it.


In [None]:
# Plot average and median operations grouped by distance and p
base_dirs = [
    "../data/ccluster/test_cluster_lifetime/results_test_cluster_lifetime",
]

data = collect_data(base_dirs, [])

steps = data.steps.groupby(['decoder', 'distance', 'p']).apply(lambda x: x.set_index('value')['occurences'].to_dict()).to_dict()

def weighted_median(df):
    expanded = []
    for value, count in zip(df['value'], df['occurences']):
        expanded.extend([value] * count)
    return np.median(expanded)

median_steps = (
    data.steps
    .groupby(['decoder', 'distance', 'p'])
    .apply(weighted_median)
    .reset_index(name='avg_steps')
)

median_steps_by_d = (
    median_steps
    .pivot_table(index=['decoder', 'p'], columns='distance', values='avg_steps')
    .apply(lambda row: row.dropna().to_dict(), axis=1)
    .to_dict()
)

median_steps_by_p = (
    median_steps
    .pivot_table(index=['decoder', 'distance'], columns='p', values='avg_steps')
    .apply(lambda row: row.dropna().to_dict(), axis=1)
    .to_dict()
)


avg_steps = (
    data.steps
    .groupby(['decoder', 'distance', 'p'])
    .apply(lambda x: np.average(x['value'], weights=x['occurences']))
    .reset_index(name='avg_steps')
)

average_steps_by_d = (
    avg_steps
    .pivot_table(index=['decoder', 'p'], columns='distance', values='avg_steps')
    .apply(lambda row: row.dropna().to_dict(), axis=1)
    .to_dict()
)

average_steps_by_p = (
    avg_steps
    .pivot_table(index=['decoder', 'distance'], columns='p', values='avg_steps')
    .apply(lambda row: row.dropna().to_dict(), axis=1)
    .to_dict()
)


plot_step_distribution_by_p(steps).show()









Plotting None p=0.001, data points: 6


TypeError: object of type 'NoneType' has no len()

Gtk-Message: 16:57:55.739: Not loading module "atk-bridge": The functionality is provided by GTK natively. Please try to not load it.


In [31]:
average_plot = plot_operations_against_d(average_steps_by_d)
average_plot.set_title("Average Operations against d, grouped by p")
average_plot.show()

average_plot = plot_operations_against_p(average_steps_by_p)
average_plot.set_title("Average Operations against p, grouped by d")
average_plot.show()


median_plot = plot_operations_against_d(median_steps_by_d)
median_plot.set_title("Median Operations against d, grouped by p")
median_plot.show()

median_plot = plot_operations_against_p(median_steps_by_p)
median_plot.set_title("Median Operations against p, grouped by d")
median_plot.show()

Plotting clayg_lifetime_0.0 p=0.001, data points: 6
Plotting clayg_lifetime_0.0 p=0.00128, data points: 6
Plotting clayg_lifetime_0.0 p=0.001638, data points: 6
Plotting clayg_lifetime_0.0 p=0.002097, data points: 6
Plotting clayg_lifetime_0.0 p=0.002684, data points: 6
Plotting clayg_lifetime_0.0 p=0.003436, data points: 6
Plotting clayg_lifetime_0.0 p=0.004398, data points: 6
Plotting clayg_lifetime_0.0 p=0.005629, data points: 6
Plotting clayg_lifetime_0.0 p=0.007206, data points: 6
Plotting clayg_lifetime_0.0 p=0.009223, data points: 6
Plotting clayg_lifetime_0.0 p=0.011806, data points: 6
Plotting clayg_lifetime_0.0 p=0.015112, data points: 6
Plotting clayg_lifetime_0.0 p=0.019343, data points: 6
Plotting clayg_lifetime_0.0 p=0.024, data points: 6
Plotting clayg_lifetime_0.0 p=0.0288, data points: 6
Plotting clayg_lifetime_0.0 p=0.03456, data points: 6
Plotting clayg_lifetime_0.25 p=0.001, data points: 6
Plotting clayg_lifetime_0.25 p=0.00128, data points: 6
Plotting clayg_lifetim

Gtk-Message: 17:00:43.584: Not loading module "atk-bridge": The functionality is provided by GTK natively. Please try to not load it.


Gtk-Message: 17:00:43.664: Not loading module "atk-bridge": The functionality is provided by GTK natively. Please try to not load it.
Gtk-Message: 17:00:43.858: Not loading module "atk-bridge": The functionality is provided by GTK natively. Please try to not load it.
Gtk-Message: 17:00:43.887: Not loading module "atk-bridge": The functionality is provided by GTK natively. Please try to not load it.


In [25]:
data.steps

Unnamed: 0,decoder,distance,p,value,occurences
0,clayg_lifetime_0.0,10,0.00100,0.0,84530
1,clayg_lifetime_0.0,10,0.00100,1.0,14136
2,clayg_lifetime_0.0,10,0.00100,2.0,1310
3,clayg_lifetime_0.0,10,0.00100,3.0,16
4,clayg_lifetime_0.0,10,0.00100,4.0,8
...,...,...,...,...,...
4377,uf,14,0.03456,4.0,15671
4378,uf,14,0.03456,5.0,67854
4379,uf,14,0.03456,6.0,117
4380,uf,14,0.03456,7.0,4706


In [30]:
def plot_operations_against_p(average_operations) -> Plot:
    fig = go.Figure()

    for (decoder, distance), values in average_operations.items():
        colors = decoder_colors.get(decoder, decoder_colors['other'])
        decoder_name = decoder_names.get(decoder, decoder)
        x = list(values.keys())
        y = list(values.values())
        # sort by x
        sorted_indices = np.argsort(x)
        x = np.array(x)[sorted_indices]
        y = np.array(y)[sorted_indices]
        print(f"Plotting {decoder_name} d={distance}, data points: {len(x)}")
        fig.add_trace(go.Scatter(
            x=x,
            y=y,
            mode='lines+markers',
            name=f"{decoder_name} d={distance}",
            line=dict(color=colors[distance % len(colors)], width=2),
            marker=dict(size=5),
            legendgroup=decoder_name,
            legendgrouptitle_text=decoder_name,
            hovertemplate=f"{decoder_name} d={distance}<br>p: %{{x:.2e}}<br>avg. # of operations: %{{y:.3}}",
            showlegend=True,
        ))

    fig.update_layout(
        legend_title="Decoder",
        template="plotly_white",
        xaxis=dict(type='linear', title='p (linear scale)'),
        yaxis=dict(type='linear', title='Operations (linear scale)'),
    )
    
    return Plot(fig, "Operations against p, grouped by distance")

def plot_operations_against_d(average_operations) -> Plot:
    fig = go.Figure()

    for (decoder, p), values in average_operations.items():
        colors = decoder_colors.get(decoder, decoder_colors['other'])
        decoder = decoder_names.get(decoder, decoder)
        x = list(values.keys())
        y = list(values.values())
        # sort by x
        sorted_indices = np.argsort(x)
        x = np.array(x)[sorted_indices]
        y = np.array(y)[sorted_indices]
        print(f"Plotting {decoder} p={p}, data points: {len(x)}")
        fig.add_trace(go.Scatter(
            x=x,
            y=y,
            mode='lines+markers',
            name=f"{decoder} p={p}",
            line=dict(color=colors[int(p*1000) % len(colors)], width=2),
            marker=dict(size=5),
            legendgroup=decoder,
            legendgrouptitle_text=decoder,
            hovertemplate=f"{decoder} p={p}<br>d: %{{x}}<br>avg. # of operations: %{{y:.3}}",
            showlegend=True,
        ))

    fig.update_layout(
        legend_title="Decoder",
        template="plotly_white",
        xaxis=dict(type='linear', title='distance d (linear scale)'),
        yaxis=dict(type='linear', title='Operations (linear scale)'),
    )
    
    return Plot(fig, "Operations against d, grouped by p")

def plot_step_distribution_by_p(steps) -> Plot:
    # Get all unique p values
    ps = set(p for (_, _, p) in steps.keys())
    if len(ps) > 1:
        # Multiple p: group by (decoder, distance), columns for each p
        group = defaultdict(list)
        for decoder, distance, p in steps.keys():
            group[(decoder, distance)].append(p)
        row_keys = list(group.keys())
        max_cols = max(len(ps) for ps in group.values())
        fig = make_subplots(
            rows=len(row_keys), cols=max_cols,
            subplot_titles=[
                f"{decoder_names.get(decoder, decoder)} d={distance}, p={p:.1e}" if p is not None else ""
                for (decoder, distance), ps in group.items() for p in (sorted(ps) if len(ps) == max_cols else sorted(ps) + [None] * (max_cols - len(ps)))
            ],
        )
        for row, (decoder, distance) in enumerate(row_keys, start=1):
            ps_sorted = sorted(group[(decoder, distance)])
            for col, p in enumerate(ps_sorted, start=1):
                values = steps[(decoder, distance, p)]
                x = []
                for step_val, count in sorted(values.items()):
                    x.extend([step_val] * count)
                colors = decoder_colors.get(decoder, decoder_colors['other'])
                fig.add_trace(
                    go.Histogram(
                        x=x,
                        marker_color=colors[distance % len(colors)],
                        showlegend=False
                    ),
                    row=row, col=col
                )
    else:
        # Only one p: group by decoder, columns for each distance
        group = defaultdict(list)
        for decoder, distance, p in steps.keys():
            group[decoder].append(distance)
        row_keys = list(group.keys())
        max_cols = max(len(ds) for ds in group.values())
        fig = make_subplots(
            rows=len(row_keys), cols=max_cols,
            subplot_titles=[
                f"{decoder_names.get(decoder, decoder)} d={distance}" if distance is not None else ""
                for decoder, ds in group.items() for distance in (sorted(ds) if len(ds) == max_cols else sorted(ds)+ [None]*(max_cols - len(ds)))
            ],
        )
        for row, decoder in enumerate(row_keys, start=1):
            ds_sorted = sorted(group[decoder])
            for col, distance in enumerate(ds_sorted, start=1):
                # Since only one p, get it
                p = next(p for (d, dep, p) in steps.keys() if d == decoder and dep == distance)
                values = steps[(decoder, distance, p)]
                x = []
                for step_val, count in sorted(values.items()):
                    x.extend([step_val] * count)
                colors = decoder_colors.get(decoder)
                fig.add_trace(
                    go.Histogram(
                        x=x,
                        marker_color=colors[distance % len(colors)],
                        showlegend=False
                    ),
                    row=row, col=col
                )
    fig.update_layout(
        template="plotly_white",
        height=300 * len(row_keys),
        width=500 * max_cols
    )
    for i in range(1, len(row_keys) + 1):
        fig.update_yaxes(title_text="Count", row=i, col=1)
    for j in range(1, max_cols + 1):
        fig.update_xaxes(title_text="Steps", row=1, col=j)
    
    return Plot(fig, "Step Distribution by Decoder, Distance, and p")

In [None]:
# Plot average and median operations grouped by distance and p
base_dirs = [
    "../data/ccluster/steps_new",
]

data = collect_data(base_dirs, [])

steps = data.steps.groupby(['decoder', 'distance', 'p']).apply(lambda x: x.set_index('value')['occurences'].to_dict()).to_dict()

def weighted_median(df):
    expanded = []
    for value, count in zip(df['value'], df['occurences']):
        expanded.extend([value] * count)
    return np.median(expanded)

median_steps = (
    data.steps
    .groupby(['decoder', 'distance', 'p'])
    .apply(weighted_median)
    .reset_index(name='avg_steps')
)

median_steps_by_d = (
    median_steps
    .pivot_table(index=['decoder', 'p'], columns='distance', values='avg_steps')
    .apply(lambda row: row.dropna().to_dict(), axis=1)
    .to_dict()
)

median_steps_by_p = (
    median_steps
    .pivot_table(index=['decoder', 'distance'], columns='p', values='avg_steps')
    .apply(lambda row: row.dropna().to_dict(), axis=1)
    .to_dict()
)


avg_steps = (
    data.steps
    .groupby(['decoder', 'distance', 'p'])
    .apply(lambda x: np.average(x['value'], weights=x['occurences']))
    .reset_index(name='avg_steps')
)

average_steps_by_d = (
    avg_steps
    .pivot_table(index=['decoder', 'p'], columns='distance', values='avg_steps')
    .apply(lambda row: row.dropna().to_dict(), axis=1)
    .to_dict()
)

average_steps_by_p = (
    avg_steps
    .pivot_table(index=['decoder', 'distance'], columns='p', values='avg_steps')
    .apply(lambda row: row.dropna().to_dict(), axis=1)
    .to_dict()
)


plot_step_distribution_by_p(steps).show()

average_plot = plot_operations_against_d(average_steps_by_d)
average_plot.set_title("Average Operations against d, grouped by p")
average_plot.show()

average_plot = plot_operations_against_p(average_steps_by_p)
average_plot.set_title("Average Operations against p, grouped by d")
average_plot.show()


median_plot = plot_operations_against_d(median_steps_by_d)
median_plot.set_title("Median Operations against d, grouped by p")
median_plot.show()

median_plot = plot_operations_against_p(median_steps_by_p)
median_plot.set_title("Median Operations against p, grouped by d")
median_plot.show()









NameError: name 'plot_step_distribution_by_p' is not defined

In [None]:
# Threshold plots for new peeling decoder
base_dirs = [
    "../data/ccluster/results_new_peeling",
]

data = collect_data(base_dirs, [])
results = data.results.groupby(['decoder', 'distance']).apply(
    lambda x: x.set_index('p')[['l', 'n']].apply(lambda row: (row['l'], row['n']), axis=1).to_dict()
).to_dict()
threshold_plot(results).show()

ClAYG d=6, d/2=3.0, b=2.8623
ClAYG d=8, d/2=4.0, b=3.5901
ClAYG d=10, d/2=5.0, b=4.0491
ClAYG d=12, d/2=6.0, b=4.3566
ClAYG d=14, d/2=7.0, b=4.5684
ClAYG d=16, d/2=8.0, b=5.2777
ClAYG Stop Early d=6, d/2=3.0, b=2.8657
ClAYG Stop Early d=8, d/2=4.0, b=3.2075
ClAYG Stop Early d=10, d/2=5.0, b=3.4306
ClAYG Stop Early d=12, d/2=6.0, b=3.5557
ClAYG Stop Early d=14, d/2=7.0, b=3.6406
ClAYG Stop Early d=16, d/2=8.0, b=3.6499
Single Layer ClAYG d=6, d/2=3.0, b=2.7617
Single Layer ClAYG d=8, d/2=4.0, b=3.3287
Single Layer ClAYG d=10, d/2=5.0, b=3.4472
Single Layer ClAYG d=12, d/2=6.0, b=3.6408
Single Layer ClAYG d=14, d/2=7.0, b=3.6280
Single Layer ClAYG d=16, d/2=8.0, b=3.7053
Single Layer ClAYG Stop Early d=6, d/2=3.0, b=2.7767
Single Layer ClAYG Stop Early d=8, d/2=4.0, b=3.0895
Single Layer ClAYG Stop Early d=10, d/2=5.0, b=3.1983
Single Layer ClAYG Stop Early d=12, d/2=6.0, b=3.2839
Single Layer ClAYG Stop Early d=14, d/2=7.0, b=3.3253
Single Layer ClAYG Stop Early d=16, d/2=8.0, b=3.2992






Gtk-Message: 16:09:50.609: Not loading module "atk-bridge": The functionality is provided by GTK natively. Please try to not load it.


In [None]:
# Plot results for the tests with different growth rates
base_dir = "data/old/special_clayg_tests"
average_operations = collect_data_old(base_dir, [2])
results = average_operations.loc[(average_operations['metric'] == 'results') & (average_operations['p'] > 0) & (average_operations['value'] > 0)].groupby(['decoder', 'distance']).apply(
    lambda x: x.set_index('p')['value'].to_dict()
).to_dict()



fig = threshold_plot(results)

fig.show(renderer="browser")

In [None]:
# Plot results for the tests with clayg with faster backwards growth
base_dir = "data/old/special_clayg_grow_faster_backwards"
average_operations = collect_data_old(base_dir, [1])
print(len(average_operations))
average_operations = average_operations.loc[(average_operations['metric'] == 'results') & (average_operations['p'] > 0) & (average_operations['value'] > 0)]
results = average_operations.groupby(['decoder', 'distance'], group_keys=False).apply(
    lambda x: x.set_index('p')['value'].to_dict()
).to_dict()

fig = threshold_plot(results)

fig.show(renderer="browser")

In [None]:
# Plot results for the tests with single layer clayg
base_dir = "data/old/single_layer_clayg_tests"
average_operations = collect_data_old(base_dir, [1,2,3])
average_operations = average_operations.loc[(average_operations['metric'] == 'results') & (average_operations['p'] > 0) & (average_operations['value'] > 0)]
results = average_operations.groupby(['decoder', 'distance'], group_keys=False).apply(
    lambda x: x.set_index('p')['value'].to_dict()
).to_dict()

fig = threshold_plot(results)

fig.show(renderer="browser")

In [None]:
# Plot average operations against p
base_dir = "data/treshold_plots"
average_operations = collect_data_old(base_dir, [18,19,20,21, 22])
average_operations = average_operations.loc[average_operations['metric'] == 'average_operations'].groupby(['decoder', 'distance'], group_keys=False).apply(
    lambda x: x.set_index('p')['value'].to_dict()
).to_dict()

fig = plot_operations_against_p(average_operations)

fig.show(renderer="browser")

In [None]:
# Plot average operations against p
base_dir = "data/special_clayg_grow_faster_backwards"
average_operations = collect_data_old(base_dir, [1,2,3])
average_operations = average_operations.loc[average_operations['metric'] == 'average_operations'].groupby(['decoder', 'distance'], group_keys=False).apply(
    lambda x: x.set_index('p')['value'].to_dict()
).to_dict()

fig = plot_operations_against_p(average_operations)

fig.show(renderer="browser")

In [None]:
# Plot average operations against d
base_dir = "data/average_operations_initial"
data = collect_data_old(base_dir, [2,3])
average_operations = data.loc[data['metric'] == 'average_operations'].groupby(['decoder', 'p'], group_keys=False).apply(
    lambda x: x.set_index('distance')['value'].to_dict()
).to_dict()

fig = plot_operations_against_d(average_operations)

fig.show(renderer="browser")