In [3]:
import os
import numpy as np
from sklearn.neural_network import BernoulliRBM
import joblib

np.random.seed(0)

data_dir  = "./data"
model_dir = "models"
os.makedirs(model_dir, exist_ok=True)

print(f"Data resides in         : {data_dir}")
print(f"Models will be saved to : {model_dir}")

Data resides in         : ./data
Models will be saved to : models


In [4]:
def z_meas_to_binary(line: str) -> np.ndarray:
    binary_array = [c == "Z" for c in line.strip()]
    return np.array(binary_array, dtype=np.float32).reshape(-1)

In [5]:
Ns_list       = [50, 100, 200, 500, 1_000, 2_000, 5_000, 10_000, 20_000, 50_000]
chain_lengths = [20, 40, 80]

rbm_kwargs = dict(
    n_components = 20,
    learning_rate = 0.01,
    batch_size = 100,
    n_iter = 500,
    verbose = True,
    random_state = 0
)


for N in chain_lengths:
    master_file = os.path.join(data_dir, f"w_vanilla_{N}_50000.txt")
    if not os.path.isfile(master_file):
        print(f"✗ {master_file} missing – skipping whole N={N} block.")
        continue

    with open(master_file) as f:
        full_data = np.stack([z_meas_to_binary(l) for l in f])

    for Ns in Ns_list:
        idx = np.random.choice(full_data.shape[0], Ns, replace=False)
        subset = full_data[idx]

        rbm = BernoulliRBM(**rbm_kwargs).fit(subset)

        model_path = os.path.join(model_dir, f"rbm_w_vanilla_{N}_{Ns}.pkl")
        joblib.dump(rbm, model_path, compress=3)
        print(f"✓ RBM (N={N}, Ns={Ns}) saved to {model_path}")

[BernoulliRBM] Iteration 1, pseudo-likelihood = -13.29, time = 0.00s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -12.80, time = 0.00s
[BernoulliRBM] Iteration 3, pseudo-likelihood = -12.35, time = 0.00s
[BernoulliRBM] Iteration 4, pseudo-likelihood = -11.93, time = 0.00s
[BernoulliRBM] Iteration 5, pseudo-likelihood = -11.56, time = 0.00s
[BernoulliRBM] Iteration 6, pseudo-likelihood = -11.21, time = 0.00s
[BernoulliRBM] Iteration 7, pseudo-likelihood = -10.90, time = 0.00s
[BernoulliRBM] Iteration 8, pseudo-likelihood = -10.60, time = 0.00s
[BernoulliRBM] Iteration 9, pseudo-likelihood = -10.32, time = 0.00s
[BernoulliRBM] Iteration 10, pseudo-likelihood = -10.05, time = 0.00s
[BernoulliRBM] Iteration 11, pseudo-likelihood = -9.79, time = 0.00s
[BernoulliRBM] Iteration 12, pseudo-likelihood = -9.56, time = 0.00s
[BernoulliRBM] Iteration 13, pseudo-likelihood = -9.35, time = 0.00s
[BernoulliRBM] Iteration 14, pseudo-likelihood = -9.13, time = 0.00s
[BernoulliRBM] Iteration 15, pseu