# Process Clifford annealing output

In [24]:
import os
import re
import glob
import numpy as np
import pandas as pd

def load_and_summarize(path):
    # Regex to extract n, m, k from filename CSA_<n>_<m>_<k>.npy
    pattern = re.compile(r"CSA_(\d+)_(\d+)_(\d+)\.npy")

    rows = []

    # Iterate over all matching files
    for fname in glob.glob(os.path.join(path, "CSA_*_*_*.npy")):
        m = pattern.search(os.path.basename(fname))
        if not m:
            continue
        n, m_val, k = map(int, m.groups())
        ratio = m_val // n  # int(m/n)

        arr = np.load(fname) * 100

        for trial, val in enumerate(arr):
            rows.append({
                "n": n,
                "r": ratio,
                "k": k,
                "trial": trial,
                "value": val
            })

    df = pd.DataFrame(rows)

    # Compute summary statistics
    summary = (
        df.groupby(["n", "r", "k"])["value"]
        .agg(["mean", "std"])
        .reset_index()
    )

    summary[["mean", "std"]] = summary[["mean", "std"]].round(2)

    return df, summary


In [None]:
df, summary = load_and_summarize("/Users/jzlu/Dropbox/data_hdqi/CSA")

summary_800 = summary[summary["n"] == 800]

with pd.option_context("display.max_rows", None, "display.max_columns", None):
        print(summary_800)

hi
      n   r  k   mean   std
36  800   3  3  77.92  0.29
37  800   3  4  76.87  0.32
38  800   3  5  75.68  0.30
39  800   3  6  74.60  0.51
40  800   6  3  70.10  0.26
41  800   6  4  69.35  0.29
42  800   6  5  68.56  0.28
43  800   6  6  67.46  0.21
44  800  10  3  65.44  0.31
45  800  10  4  64.81  0.18
46  800  10  5  64.31  0.27
47  800  10  6  63.60  0.32
