# Process Clifford annealing output

In [50]:
import os
import re
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['text.usetex'] = True

def load_and_summarize(path):
    # Regex to extract n, m, k from filename CSA_<n>_<m>_<k>.npy
    pattern = re.compile(r"CSA_(\d+)_(\d+)_(\d+)\.npy")

    rows = []

    # Iterate over all matching files
    for fname in glob.glob(os.path.join(path, "CSA_*_*_*.npy")):
        m = pattern.search(os.path.basename(fname))
        if not m:
            continue
        n, m_val, k = map(int, m.groups())
        ratio = m_val // n  # int(m/n)

        arr = np.load(fname) * 100

        for trial, val in enumerate(arr):
            rows.append({
                "n": n,
                "r": ratio,
                "k": k,
                "trial": trial,
                "value": val[0]
            })

    df = pd.DataFrame(rows)

    # Compute summary statistics
    summary = (
        df.groupby(["n", "r", "k"])["value"]
        .agg(["mean", "std"])
        .reset_index()
    )

    summary[["mean", "std"]] = summary[["mean", "std"]].round(2)

    return df, summary


In [54]:
df, summary = load_and_summarize("/Users/jzlu/Dropbox/data_hdqi/CSA")


with pd.option_context("display.max_rows", None, "display.max_columns", None):
    print(summary[summary["n"] == 1200])

       n   r  k   mean   std
0   1200   3  3  78.20  0.31
1   1200   3  4  77.22  0.35
2   1200   3  5  75.84  0.39
3   1200   3  6  74.60  0.32
4   1200   6  3  70.18  0.25
5   1200   6  4  69.44  0.23
6   1200   6  5  68.56  0.25
7   1200   6  6  67.65  0.30
8   1200  10  3  65.59  0.22
9   1200  10  4  64.98  0.20
10  1200  10  5  64.33  0.21
11  1200  10  6  63.72  0.19
