In [1]:
import matplotlib.pyplot as plt
import numpy as np
import mpmath as mp
mp.mp.dps = 50 #higher precision
from scipy.optimize import curve_fit
import scipy.integrate as integrate
from plotDensity import *

In [2]:
def log_sum_exp(to_sum):
    maxval = max(to_sum)
    exp_sum = 0
    for value in to_sum:
        exp_sum += mp.exp(value-maxval)
    res = maxval + mp.log(exp_sum)
    return res


def free_energy(E_list, log_g_list,  T):
    #Need to log sum over g(E)*exp(-E/T) without overflow issues
    to_sum = []
    for i, log_g in enumerate(log_g_list):
        to_sum.append(log_g - E_list[i]/T)
    maxval = max(to_sum)
    exp_sum = 0
    for value in to_sum:
        exp_sum += mp.exp(value-maxval)
    res = maxval + mp.log(exp_sum)
    return -T*res

def get_free_energies(rescaled_results,temperatures):
    free_energies = []
    for seed_results in rescaled_results:
        free_energy_classes = []
        for error_result in seed_results:
            f_values = []
            for T in temperatures:
                f_values.append(free_energy(error_result[0], error_result[1], T)/(-T))
            free_energy_classes.append(f_values)
        free_energies.append(free_energy_classes)
    return free_energies


def process_results(batch_results,X,Y):
    rescaled_results = []
    for seed_results in batch_results:

        rescaled_seed_results = []
        for error_result in seed_results:

            walker_results = error_result
            walker_results = get_renormalized_log_g_values_as_dict_list(walker_results)
            walker_results = average_matching_keys(walker_results)
            results_x = []
            results_y = []
            for result in walker_results:
                results_y.append(np.array(list(result.values())))
                results_x.append(np.array(list(result.keys())))

            derivatives_wrt_e = get_derivative_wrt_e(walker_results)
            minimum_deviation_energies = find_lowest_inverse_temp_deviation(derivatives_wrt_e)
            rescale_results_for_concatenation(results_x, results_y, minimum_deviation_energies)

            x_max = -1 -2*X*Y
            rescaled_x = []
            rescaled_y = []
            for i in range(len(results_x)):
                for j in range(len(results_x[i])):
                    if results_x[i][j] > x_max: #avoid double counting
                        x_max = results_x[i][j]
                        rescaled_x.append(results_x[i][j])
                        rescaled_y.append(results_y[i][j])

            offset = log_sum_exp(rescaled_y)
            rescaled_y = [res + mp.log(2)*X*Y - offset for res in rescaled_y]
            rescaled_seed_results.append([rescaled_x,rescaled_y])
        rescaled_results.append(rescaled_seed_results)
    return rescaled_results


In [4]:
def parse_file(filename):
    data = []
    try:
        with open(filename, 'r') as file:
            content = file.read()
            # print("file content printout (for debugging):")
            # print(content)
    except FileNotFoundError:
        return

    # Split content into individual blocks
    blocks = content.split('}\n{')  # Assuming blocks are separated by double newlines

    for block in blocks:
        # Extract histogram_seed, run_seed, and results
        histogram_seed_match = re.search(r'"histogram_seed": "(\d+)"', block)
        run_seed_match = re.search(r'"run_seed": "(\d+)"', block)
        results_match = re.search(r'"results": \[([^]]*)\]', block)

        if histogram_seed_match and run_seed_match and results_match:
            histogram_seed = histogram_seed_match.group(1)
            run_seed = run_seed_match.group(1)
            results_str = results_match.group(1)

            # Process results
            results = {}
            results_items = results_str.split(',')
            for item in results_items:
                key_value = item.split(':')
                if len(key_value) == 2:
                    key = key_value[0].strip().strip('"')
                    value = float(key_value[1].strip())
                    results[key] = value

            data.append({
                "histogram_seed": histogram_seed,
                "run_seed": run_seed,
                "results": results
            })

    return data

def process_data(data, batch_results, p, size, error):
    for entry in data:
        histogram_seed = entry["histogram_seed"]
        run_seed = entry["run_seed"]
        results = entry["results"]

        E_list = []
        log_g_list = []

        # Process the results
        for key, value in results.items():
            E_list.append(int(key))
            log_g_list.append(float(value))

        batch_results.append({
                'prob': p,
                'size': size,
                'error': error,
                'histogram_seed': histogram_seed,
                'run_seed': run_seed,
                'E': E_list,
                'log_g': log_g_list
            })
        # offset = log_sum_exp(log_g_list)
        # print('deviation from sqrt(2)xy: ', mp.log(2)*size*size - offset, ' log sum exp:', offset)


In [15]:
boundary_type = "periodic"
batch_results = []
probabilities = [0.1]
sizes = [4]
intervals = 10
iterations = 1000
overlap = 0.25
walkers = 8
alpha = 0.8
beta = 1e-6
exchange = 50

batch_results = []

for p in probabilities:
    for size in sizes:
        for error in ["I"]:
            filename = f"../results/periodic_head_old_stitched_handling/prob_{p:.6f}/X_{size}_Y_{size}/error_class_{error}/StitchedHistogram__intervals_{intervals}_iterations_{iterations}_overlap_{overlap:.6f}_walkers_{walkers}_alpha_{alpha:.6f}_beta_{beta:.10f}exchange_offset{exchange}.txt"
            data = parse_file(filename)
            if data:
                process_data(data, batch_results, p, size, error)
print(len(batch_results))

3999


In [19]:
filename

'../results/periodic_head_old_stitched_handling/prob_0.100000/X_4_Y_4/error_class_I/StitchedHistogram__intervals_10_iterations_1000_overlap_0.250000_walkers_8_alpha_0.800000_beta_0.0000010000exchange_offset50.txt'

In [21]:
filename = '../results/periodic_head_old_stitched_handling/prob_0.100000/X_4_Y_4/error_class_I/StitchedHistogram__intervals_10_iterations_1000_overlap_0.250000_walkers_8_alpha_0.800000_beta_0.0000010000exchange_offset50.txt'
data = []

with open(filename, 'r') as file:
    content = file.read()
    # print("file content printout (for debugging):")
    # print(content)

# Split content into individual blocks
blocks = content.split('}\n{')  # Assuming blocks are separated by double newlines

In [30]:
check = np.zeros(2000)

without_duplicates = []

for i in range(len(blocks)):
    histogram_seed_match = re.search(r'"histogram_seed": "(\d+)"', blocks[i])
    hist_seed = int(histogram_seed_match.group(1))
    if check[hist_seed-1] == 0:
        without_duplicates.append(blocks[i])
        check[hist_seed-1] = 1

In [33]:
# Specify the file path
file_path = "output.txt"

# Open the file in write mode
with open(file_path, "w") as file:
    # Write each string in the list to the file
    for entry in without_duplicates:
        file.write(entry)
        file.write("}\n")
        file.write("{")

print(f"Data written to {file_path}")

Data written to output.txt


In [9]:
from collections import defaultdict

# only keep results which come with all different error types:
#Changing structure to match postprocessing and plotting

grouped_results = defaultdict(list)
for result in batch_results:
    key = (result['prob'], result['size'], result['histogram_seed'])
    grouped_results[key].append(result)

filtered_results = defaultdict(list)
for key, results in grouped_results.items():
    newkey = (key[0],key[1])
    errors = set(result['error'] for result in results)
    if errors == {'I', 'X', 'Y', 'Z'}:
        #To be removed once normalization is properly handled in c
        for result in results:
            log_g_list = result['log_g']
            offset = log_sum_exp(log_g_list)
            rescaled_log_g_list = [res + mp.log(2)*size*size - offset for res in log_g_list]
            #print(mp.log(2)*size*size - offset)
            result['log_g'] = rescaled_log_g_list
        filtered_results[newkey].append([[result['E'], result['log_g']] for result in results])
    else:
        print(f"has issue with an error class prob: {key[0]} size: {key[1]} interaction seed: {key[2]} available errors: {errors}")