This is a helper notebook that combines batched SBC run files into a single file.

In [4]:
import torch
import numpy as np
import pickle
import yaml
import os

In [None]:
def view_checkpointed_sbc_ranks(sequential_sbc_ID, results_path):
    # Iterate through all checkpoints
    checkpoint_ID = 0
    print("THE FOLLOWING FILES EXIST:")
    while os.path.exists(results_path + f"sequential_sbc_ranks{sequential_sbc_ID}_checkpoint{checkpoint_ID}" + ".npy"):
        print(f"sequential_sbc_ranks{sequential_sbc_ID}_checkpoint{checkpoint_ID}" + ".npy")
        if os.path.exists(results_path + f"sequential_sbc_ranks{sequential_sbc_ID}_checkpoint{checkpoint_ID}" + ".yaml"):
            print(f"sequential_sbc_ranks{sequential_sbc_ID}_checkpoint{checkpoint_ID}" + ".yaml")
        if os.path.exists(results_path + f"sequential_sbc_ranks{sequential_sbc_ID}_checkpoint{checkpoint_ID}_simulations" + ".npz"):
            print(f"sequential_sbc_ranks{sequential_sbc_ID}_checkpoint{checkpoint_ID}_simulations" + ".npz")
        checkpoint_ID += 1

def retrieve_and_combine_checkpointed_sbc_ranks(sequential_sbc_ID, results_path):
    ranks = []
    config = {}
    sbc_round_ID = 0
    simulations = {}
    N_iter = 0
    sbc_time = 0

    # Iterate through all checkpoints
    checkpoint_ID = 0
    while os.path.exists(results_path + f"sequential_sbc_ranks{sequential_sbc_ID}_checkpoint{checkpoint_ID}" + ".npy"):
        sequential_ranks_name = f"sequential_sbc_ranks{sequential_sbc_ID}_checkpoint{checkpoint_ID}"
        path_to_sequential_ranks = results_path + sequential_ranks_name + ".npy"
        path_to_sequential_ranks_config = results_path + sequential_ranks_name + ".yaml"
        path_to_sequential_ranks_simulations = results_path + sequential_ranks_name + "_simulations" + ".npz"
        
        # Load sbc ranks
        sequential_ranks = np.load(path_to_sequential_ranks)

        # Load config
        with open(path_to_sequential_ranks_config, "r") as f:
            sequential_ranks_config = yaml.safe_load(f)

        # Load samples
        sequential_ranks_simulations = np.load(path_to_sequential_ranks_simulations)

        # Append checkpoint's ranks to collection of ranks
        ranks += list(sequential_ranks)

        # Append checkpoint's samples to collection of samples
        i = 0
        while f"posterior_samples_round_{i}" in sequential_ranks_simulations:
            simulations[f"posterior_samples_round_{sbc_round_ID}"] = sequential_ranks_simulations[f"posterior_samples_round_{i}"]
            simulations[f"prior_sample_round_{sbc_round_ID}"] = sequential_ranks_simulations[f"prior_sample_round_{i}"]
            simulations[f"data_sample_round_{sbc_round_ID}"] = sequential_ranks_simulations[f"data_sample_round_{i}"]
            i += 1
            sbc_round_ID += 1

        # Increment N_iter and sbc_time
        N_iter += sequential_ranks_config["N_iter"]
        sbc_time += sequential_ranks_config["checkpoint_sbc_time"]

        if checkpoint_ID == 0:
            config["num_sequential_rounds"] = sequential_ranks_config["num_sequential_rounds"]
            config["num_simulations_per_round"] = sequential_ranks_config["num_simulations_per_round"]
            config["sigma"] = sequential_ranks_config["sigma"]
            config["N_samp"] = sequential_ranks_config["N_samp"]

        checkpoint_ID += 1
    config["N_iter"] = N_iter
    config["sbc_time"] = sbc_time
    ranks = np.array(ranks)
    sbc_dict = {"ranks": ranks, "config": config, "simulations": simulations}
    return sbc_dict


def save_combined_checkpointed_sbc_ranks(sequential_sbc_ID, results_path, sbc_dict):
    if os.path.exists(results_path + f"sequential_sbc_ranks{sequential_sbc_ID}.npy"):
        raise AssertionError("A folder with this name already exists. Please delete it if you want to overwrite it.")

    ranks_sequential = sbc_dict["ranks"]
    samples_dict = sbc_dict["simulations"]
    config = sbc_dict["config"]

    sequential_sbc_save_path = results_path + f"sequential_sbc_ranks{sequential_sbc_ID}.npy"
    config_save_path = results_path + f"sequential_sbc_ranks{sequential_sbc_ID}.yaml"
    simulations_save_path = results_path + f"sequential_sbc_ranks{sequential_sbc_ID}_simulations.npz"

    print(f"\n Saving ranks to {sequential_sbc_save_path}:")
    np.save(sequential_sbc_save_path, ranks_sequential)
    print("\n Ranks saved successfully.")

    print(f"\n Saving config file to {config_save_path}:")
    with open(config_save_path, "w") as f:
        yaml.safe_dump(config, f)
    print("\n Config file saved successfully.")

    # Save simulations:
    print(f"\n Saving simulations to {simulations_save_path}:")
    np.savez(simulations_save_path, **samples_dict)
    print("\n Simulations saved successfully.")

In [17]:
# Define results path
results_path = "/Users/Lieve/Documents/Masters Project/SBC-SBI/results/toy_examples/norm_norm_diffuse_1d/npe_c/"

In [18]:
# Define the sequential_sbc_ID to READ FROM (this should be checkpointed)
sequential_sbc_ID_read = 1

In [19]:
# Inspect the files
view_checkpointed_sbc_ranks(sequential_sbc_ID_read, results_path)

THE FOLLOWING FILES EXIST:
sequential_sbc_ranks1_checkpoint0.npy
sequential_sbc_ranks1_checkpoint0.yaml
sequential_sbc_ranks1_checkpoint0_simulations.npz
sequential_sbc_ranks1_checkpoint1.npy
sequential_sbc_ranks1_checkpoint1.yaml
sequential_sbc_ranks1_checkpoint1_simulations.npz
sequential_sbc_ranks1_checkpoint2.npy
sequential_sbc_ranks1_checkpoint2.yaml
sequential_sbc_ranks1_checkpoint2_simulations.npz
sequential_sbc_ranks1_checkpoint3.npy
sequential_sbc_ranks1_checkpoint3.yaml
sequential_sbc_ranks1_checkpoint3_simulations.npz
sequential_sbc_ranks1_checkpoint4.npy
sequential_sbc_ranks1_checkpoint4.yaml
sequential_sbc_ranks1_checkpoint4_simulations.npz
sequential_sbc_ranks1_checkpoint5.npy
sequential_sbc_ranks1_checkpoint5.yaml
sequential_sbc_ranks1_checkpoint5_simulations.npz
sequential_sbc_ranks1_checkpoint6.npy
sequential_sbc_ranks1_checkpoint6.yaml
sequential_sbc_ranks1_checkpoint6_simulations.npz
sequential_sbc_ranks1_checkpoint7.npy
sequential_sbc_ranks1_checkpoint7.yaml
sequent

In [None]:
sbc_dict = retrieve_and_combine_checkpointed_sbc_ranks(sequential_sbc_ID=sequential_sbc_ID_read,
                                                       results_path=results_path)

In [27]:
# Inspect ranks to double check they have been combined correctly:
sequential_ranks = sbc_dict["ranks"]
#sequential_ranks

In [22]:
# Inspect config to double check they have been combined correctly:
sequential_ranks_config = sbc_dict["config"]
#sequential_ranks_config

{'num_sequential_rounds': 4,
 'num_simulations_per_round': 5000,
 'sigma': 150.0,
 'N_samp': 10000,
 'N_iter': 900,
 'sbc_time': 249688.01264716126}

In [29]:
# Inspect simulations to double check they have been combined correctly:
sequential_ranks_simulations = sbc_dict["simulations"]
#sequential_ranks_simulations

In [31]:
# ONCE HAPPY:
sequential_sbc_ID_write = 1

# Write the combined files to a single file
save_combined_checkpointed_sbc_ranks(sequential_sbc_ID=sequential_sbc_ID_read,
                                     results_path=results_path,
                                     sbc_dict=sbc_dict)


 Saving ranks to /Users/Lieve/Documents/Masters Project/SBC-SBI/results/toy_examples/norm_norm_diffuse_1d/npe_c/sequential_sbc_ranks1.npy:

 Ranks saved.

 Saving config file to /Users/Lieve/Documents/Masters Project/SBC-SBI/results/toy_examples/norm_norm_diffuse_1d/npe_c/sequential_sbc_ranks1.yaml:

 Config file saved successfully.

 Saving simulations to /Users/Lieve/Documents/Masters Project/SBC-SBI/results/toy_examples/norm_norm_diffuse_1d/npe_c/sequential_sbc_ranks1_simulations.npz:

 Simulations saved successfully.


In [35]:
path_all = "/Users/Lieve/Documents/Masters Project/SBC-SBI/results/toy_examples/norm_norm_diffuse_1d/npe_c/"


In [37]:
# Load sbc ranks
ranks0 = np.load(path_all + "sequential_sbc_ranks0.npy")
ranks1 = np.load(path_all + "sequential_sbc_ranks1.npy")

# Load config
with open(path_all + "sequential_sbc_ranks0.yaml", "r") as f:
    config0 = yaml.safe_load(f)

# Load config
with open(path_all + "sequential_sbc_ranks1.yaml", "r") as f:
    config1 = yaml.safe_load(f) 

# Load samples
samples0 = np.load(path_all + "sequential_sbc_ranks0_simulations.npz")
samples1 = np.load(path_all + "sequential_sbc_ranks1_simulations.npz")



In [49]:
ranks = np.array(list(ranks0) + list(ranks1))

In [52]:
config0

{'N_iter': 900,
 'N_samp': 10000,
 'num_sequential_rounds': 4,
 'num_simulations_per_round': 5000,
 'sbc_time': 249688.01264716126,
 'sigma': 150.0}

In [53]:
config1

{'N_iter': 900,
 'N_samp': 10000,
 'num_sequential_rounds': 4,
 'num_simulations_per_round': 5000,
 'sigma': 150.0,
 'total_sbc_time': 241124.8188946047}

In [61]:
config = config0
config["N_iter"] = 1800
config["total_sbc_time"] = 249688.01264716126 + 241124.8188946047
config["sbc_times"] = [np.nan for i in range(1800)]


In [62]:
config

{'N_iter': 1800,
 'N_samp': 10000,
 'num_sequential_rounds': 4,
 'num_simulations_per_round': 5000,
 'sigma': 150.0,
 'total_sbc_time': 490812.83154176595,
 'sbc_times': [nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan

In [63]:
config.keys()

dict_keys(['N_iter', 'N_samp', 'num_sequential_rounds', 'num_simulations_per_round', 'sigma', 'total_sbc_time', 'sbc_times'])

In [72]:
dict(samples0).keys()

dict_keys(['posterior_samples_round_0', 'prior_sample_round_0', 'data_sample_round_0', 'posterior_samples_round_1', 'prior_sample_round_1', 'data_sample_round_1', 'posterior_samples_round_2', 'prior_sample_round_2', 'data_sample_round_2', 'posterior_samples_round_3', 'prior_sample_round_3', 'data_sample_round_3', 'posterior_samples_round_4', 'prior_sample_round_4', 'data_sample_round_4', 'posterior_samples_round_5', 'prior_sample_round_5', 'data_sample_round_5', 'posterior_samples_round_6', 'prior_sample_round_6', 'data_sample_round_6', 'posterior_samples_round_7', 'prior_sample_round_7', 'data_sample_round_7', 'posterior_samples_round_8', 'prior_sample_round_8', 'data_sample_round_8', 'posterior_samples_round_9', 'prior_sample_round_9', 'data_sample_round_9', 'posterior_samples_round_10', 'prior_sample_round_10', 'data_sample_round_10', 'posterior_samples_round_11', 'prior_sample_round_11', 'data_sample_round_11', 'posterior_samples_round_12', 'prior_sample_round_12', 'data_sample_rou

In [76]:
samples = dict(samples0)

k=900
for j in range(10):
    for i in range(90):
        samples[f"prior_sample_round_{k}"] = samples1[f"prior_sample_round_{i}_checkpoint{j}"]
        samples[f"data_sample_round_{k}"] = samples1[f"data_sample_round_{i}_checkpoint{j}"]
        samples[f"posterior_samples_round_{k}"] = samples1[f"posterior_samples_round_{i}_checkpoint{j}"]
        k += 1

In [86]:
sequential_sbc_save_path = path_all + "sequential_sbc_ranks3.npy"
config_save_path = path_all + "sequential_sbc_ranks3.yaml"
simulations_save_path = path_all + "sequential_sbc_ranks3_simulations.npz"


print(f"\n Saving ranks to {sequential_sbc_save_path}:")
np.save(sequential_sbc_save_path, ranks)
print("\n Ranks saved successfully.")

print(f"\n Saving config file to {config_save_path}:")
with open(config_save_path, "w") as f:
    yaml.safe_dump(config, f)
print("\n Config file saved successfully.")

# Save simulations:
print(f"\n Saving simulations to {simulations_save_path}:")
np.savez(simulations_save_path, **samples)
print("\n Simulations saved successfully.")


 Saving ranks to /Users/Lieve/Documents/Masters Project/SBC-SBI/results/toy_examples/norm_norm_diffuse_1d/npe_c/sequential_sbc_ranks3.npy:

 Ranks saved successfully.

 Saving config file to /Users/Lieve/Documents/Masters Project/SBC-SBI/results/toy_examples/norm_norm_diffuse_1d/npe_c/sequential_sbc_ranks3.yaml:

 Config file saved successfully.

 Saving simulations to /Users/Lieve/Documents/Masters Project/SBC-SBI/results/toy_examples/norm_norm_diffuse_1d/npe_c/sequential_sbc_ranks3_simulations.npz:

 Simulations saved successfully.
