In [1]:
from plotDensity import *
import mpmath as mp
import json

In [2]:
def log_sum_exp(to_sum):
    maxval = max(to_sum)
    exp_sum = 0
    for value in to_sum:
        exp_sum += mp.exp(value-maxval)
    res = maxval + mp.log(exp_sum)
    return res

In [3]:
def read_results_file(path):

    with open(path, 'r') as file:
        content = file.read()

    content = content.strip().rstrip(',')

    corrected_json = f'[{content}]'

    try:
        data = json.loads(corrected_json)
    except json.JSONDecodeError as e:
        print(f"Failed to parse JSON: {e}")

    return data

In [4]:
def cut_overlapping_histogram_parts(interval_data, stitching_keys):
    for i in range(len(stitching_keys)):
        stitching_energy_of_interval_i = stitching_keys[i]

        # Modify the i-th interval
        current_interval = interval_data[i]
        # Keep only keys <= stitching_energy_of_interval_i
        current_interval = {k: v for k, v in current_interval.items() if k <= stitching_energy_of_interval_i}

        # Modify the (i+1)-th interval if following interval is still in bounds
        if i + 1 < len(interval_data):
            next_interval = interval_data[i + 1]
            # Keep only keys > stitching_energy_of_interval_i
            next_interval = {k: v for k, v in next_interval.items() if k > stitching_energy_of_interval_i}

        # Update the intervals in the original list
        interval_data[i] = current_interval
        if i + 1 < len(interval_data):
            interval_data[i + 1] = next_interval

def process_data(data, batch_results, p, size, error):
    for entry in data:
        histogram_seed = entry["histogram_seed"]
        run_seed = entry["run_seed"]
        results = entry["results"]

        E_list = []
        log_g_list = []

        # Process the results
        for key, value in results.items():
            E_list.append(int(key))
            log_g_list.append(float(value))

        batch_results.append({
                'prob': p,
                'size': size,
                'error': error,
                'histogram_seed': histogram_seed,
                'run_seed': run_seed,
                'E': E_list,
                'log_g': log_g_list
            })
        # offset =

In [5]:
X = 4
Y = 4

num_walker = 8

In [6]:
boundary_type = "periodic"
batch_results = []
probabilities = [0.1]
sizes = [4]
intervals = 5
iterations = 1000
overlap = 0.25
walkers = 8
alpha = 0.8
beta = 1e-6
exchange = 50
error = "X"
p = 0.1
batch_results = []

In [8]:
diffs = {}

for seed in range(1000, 2000):
    results_new = read_data_from_file(f"../results/periodic/prob_0.100000/X_4_Y_4/seed_{seed}/error_class_X/intervals_10_iterations_1000_overlap_0.250000_walkers_8_seed_run_42_alpha_0.800000_beta_0.0000010000exchange_offset50.txt")
    results_old = read_data_from_file(f"../results/periodic_1/prob_0.100000/X_4_Y_4/seed_{seed}/error_class_X/intervals_10_iterations_1000_overlap_0.250000_walkers_8_seed_run_42_alpha_0.800000_beta_0.0000010000exchange_offset50.txt")

    results_new = results_new[0::num_walker]
    results_old = results_old[0::num_walker]

    for i in range(len(results_new)):
        diff = [results_new[i][key] - results_old[i][key] for key in results_new[i]]
        if np.max(np.abs(diff)) > 1e-05:
            diffs[seed] = (i, diff)

print(diffs)

{}


### Check averages

In [7]:
max_all = []

for seed in range(1000, 1150):
    before_run_1 = read_data_from_file(f"../results_before_run_13/periodic/prob_0.100000/X_4_Y_4/seed_{seed}/error_class_X/intervals_10_iterations_1000_overlap_0.250000_walkers_8_seed_run_42_alpha_0.800000_beta_0.0000010000exchange_offset50.txt")
    before_run_2 = read_data_from_file(f"../results_before_run_33/periodic/prob_0.100000/X_4_Y_4/seed_{seed}/error_class_X/intervals_10_iterations_1000_overlap_0.250000_walkers_8_seed_run_42_alpha_0.800000_beta_0.0000010000exchange_offset50.txt")

    diffs = []
    for i in range(len(before_run_1)):
        diff = [before_run_1[i][key] - before_run_2[i][key] for key in before_run_1[i]]
        diffs.append(diff)

    max = []

    for d in diffs:
        max.append(np.max(np.abs(d)))

    max_all.append(np.max(max))

print(np.max(max_all))

0.0


In [8]:
max_all = []

for seed in range(1000, 1150):
    after_run_1 = read_data_from_file(f"../results_after_run_13/periodic/prob_0.100000/X_4_Y_4/seed_{seed}/error_class_X/intervals_10_iterations_1000_overlap_0.250000_walkers_8_seed_run_42_alpha_0.800000_beta_0.0000010000exchange_offset50.txt")
    after_run_2 = read_data_from_file(f"../results_after_run_33/periodic/prob_0.100000/X_4_Y_4/seed_{seed}/error_class_X/intervals_10_iterations_1000_overlap_0.250000_walkers_8_seed_run_42_alpha_0.800000_beta_0.0000010000exchange_offset50.txt")

    diffs = []
    for i in range(len(after_run_1)):
        diff = [after_run_1[i][key] - after_run_2[i][key] for key in after_run_1[i]]
        diffs.append(diff)

    max = []

    for d in diffs:
        max.append(np.max(np.abs(d)))

    max_all.append((seed, np.max(max)))

for m in max_all:
    if m[1] != 0:
        print(m[0])

1140


In [9]:
def average_dicts(dicts):
    from collections import defaultdict

    sums = defaultdict(float)

    # Dictionary to count occurrences of each key
    counts = defaultdict(int)

    # Loop through each dictionary
    for d in dicts:
        for key, value in d.items():
            sums[key] += value
            counts[key] += 1

    # Calculate the averages
    averages = {key: sums[key] / counts[key] for key in sums}

    return averages

In [21]:
seed = 1140

before_seed = read_data_from_file(f"../results_before_run_33/periodic/prob_0.100000/X_4_Y_4/seed_{seed}/error_class_X/intervals_10_iterations_1000_overlap_0.250000_walkers_8_seed_run_42_alpha_0.800000_beta_0.0000010000exchange_offset50.txt")

In [22]:
averages = []
for i in range(8, len(before_seed) + 8, 8):
    eight_walker = before_seed[(i-8):i]
    averages.append(average_dicts(eight_walker))

In [23]:
averages

[{-22: 489.0625, -18: 500.03125, -14: 504.0625, -10: 506.84375},
 {-18: 622.75, -14: 625.078125, -10: 627.171875},
 {-14: 623.0, -10: 625.125, -6: 626.875},
 {-10: 624.453125, -6: 625.15625, -2: 625.390625},
 {-10: 467.390625, -6: 468.65625, -2: 469.28125, 2: 469.671875},
 {-6: 624.75, -2: 625.140625, 2: 625.109375},
 {-2: 625.03125, 2: 625.34375, 6: 624.625},
 {2: 626.15625, 6: 625.015625, 10: 623.828125},
 {2: 470.484375, 6: 469.984375, 10: 468.4375, 14: 466.09375},
 {6: 556.65625, 10: 553.59375, 14: 550.65625, 18: 548.125, 22: 540.96875}]

In [24]:
seed = 1140

patient_zero = read_data_from_file(f"../results_after_run_13/periodic/prob_0.100000/X_4_Y_4/seed_{seed}/error_class_X/intervals_10_iterations_1000_overlap_0.250000_walkers_8_seed_run_42_alpha_0.800000_beta_0.0000010000exchange_offset50.txt")

In [25]:
diffs = []

for walker in range(len(patient_zero)):
    interval_id = int(walker/8)
    diff = [patient_zero[walker][key] - averages[interval_id][key] for key in patient_zero[walker]]

    if np.max(np.abs(diff)!=0):
        diffs.append((walker, diff))

print(diffs)

[(5, [0.0, 0.0, -1.0625, -2.59375]), (6, [-6.1875, 3.09375, 1.1875, 1.90625]), (7, [8.3125, -1.40625, -2.5625, -4.34375])]


In [18]:
averages

[{-22: 489.0625, -18: 500.03125, -14: 504.0625, -10: 506.84375},
 {-18: 622.75, -14: 625.078125, -10: 627.171875},
 {-14: 623.0, -10: 625.125, -6: 626.875},
 {-10: 624.453125, -6: 625.15625, -2: 625.390625},
 {-10: 467.390625, -6: 468.65625, -2: 469.28125, 2: 469.671875},
 {-6: 624.75, -2: 625.140625, 2: 625.109375},
 {-2: 625.03125, 2: 625.34375, 6: 624.625},
 {2: 626.15625, 6: 625.015625, 10: 623.828125},
 {2: 470.484375, 6: 469.984375, 10: 468.4375, 14: 466.09375},
 {6: 556.65625, 10: 553.59375, 14: 550.65625, 18: 548.125, 22: 540.96875}]

In [19]:
patient_zero

[{-22: 489.0625, -18: 500.03125, -14: 504.0625, -10: 506.84375},
 {-22: 489.0625, -18: 500.03125, -14: 504.0625, -10: 506.84375},
 {-22: 489.0625, -18: 500.03125, -14: 504.0625, -10: 506.84375},
 {-22: 489.0625, -18: 500.03125, -14: 504.0625, -10: 506.84375},
 {-22: 489.0625, -18: 500.03125, -14: 504.0625, -10: 506.84375},
 {-22: 489.0625, -18: 500.03125, -14: 503.0, -10: 504.25},
 {-22: 482.875, -18: 503.125, -14: 505.25, -10: 508.75},
 {-22: 497.375, -18: 498.625, -14: 501.5, -10: 502.5},
 {-18: 622.75, -14: 625.078125, -10: 627.171875},
 {-18: 622.75, -14: 625.078125, -10: 627.171875},
 {-18: 622.75, -14: 625.078125, -10: 627.171875},
 {-18: 622.75, -14: 625.078125, -10: 627.171875},
 {-18: 622.75, -14: 625.078125, -10: 627.171875},
 {-18: 622.75, -14: 625.078125, -10: 627.171875},
 {-18: 622.75, -14: 625.078125, -10: 627.171875},
 {-18: 622.75, -14: 625.078125, -10: 627.171875},
 {-14: 623.0, -10: 625.125, -6: 626.875},
 {-14: 623.0, -10: 625.125, -6: 626.875},
 {-14: 623.0, -10: 6

In [14]:
first_run = []

with open('../output.txt', 'r') as file:
    for line in file:
        averages = line.strip().split(' ')[1:]
        first_run.append(list(map(float, averages)))

second_run = []

with open('../output_1.txt', 'r') as file:
    for line in file:
        averages = line.strip().split(' ')[1:]
        second_run.append(list(map(float, averages)))

diff_averages = np.abs(np.array(first_run) - np.array(second_run))

non_zero_indices = np.nonzero(diff_averages)

print(non_zero_indices)

(array([1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4]), array([20222, 20226, 20230, 20222, 20226, 20230, 20222, 20226, 20230,
       20087, 20091, 20095, 20099, 20222, 20226, 20230]))


(array([1, 1, 1, 1, 1, 1, 1]), array([20087, 20091, 20095, 20099, 20222, 20226, 20230]))


In [9]:
first_run_array = np.array(first_run)
second_run_array = np.array(second_run)

for i in range(len(non_zero_indices[0])):
    print(first_run_array[non_zero_indices[0][i], non_zero_indices[1][i]], second_run_array[non_zero_indices[0][i], non_zero_indices[1][i]])

360.625 364.219
374.375 373.75
379.25 377.484
385.75 385.844
488.125 483.0
501.75 503.594
510.125 511.906


In [16]:
first_run_array[4, 20087]

525.609

In [11]:
print(np.arraysecond_run[1,20222])

TypeError: list indices must be integers or slices, not tuple

### Check inits

In [15]:
with open('../script.log', 'r') as file:
    content = file.read()

In [65]:
lines = content.split('\n')

end_runs = []

for l in range(len(lines)):
    if "Total" in lines[l]:
        end_runs.append(l)

first_run = lines[:end_runs[0]-3]
second_run = lines[end_runs[0]+1:end_runs[1]-3]

for l in range(len(first_run)):
    # Use regular expressions to extract the energy and the interval
    first_match = re.search(r"energy: (-?\d+) for interval \[(-?\d+), (-?\d+)\]", first_run[l])
    first_energy = int(first_match.group(1))
    first_interval = (int(first_match.group(2)), int(first_match.group(3)))

    second_match = re.search(r"energy: (-?\d+) for interval \[(-?\d+), (-?\d+)\]", second_run[l])
    second_energy = int(second_match.group(1))
    second_interval = (int(second_match.group(2)), int(second_match.group(3)))

    if (second_interval == first_interval):
        diff = second_energy - first_energy
        if diff!=0:
            print("HELLO")
    else:
        print("ALARM")


### Check Consistency old Code

In [36]:
diffs = {}

diff_between_walker = []
for seed in range(1000, 1150):
    results_new = read_data_from_file(f"../results/periodic/prob_0.100000/X_4_Y_4/seed_{seed}/error_class_X/intervals_10_iterations_1000_overlap_0.250000_walkers_8_seed_run_42_alpha_0.800000_beta_0.0000010000exchange_offset50.txt")

    offset = 0

    for i in range(10):
        first_walker = results_new[offset]
        for j in range(1, 8):
            second_walker = results_new[offset +1]

            diff = [first_walker[key] - second_walker[key] for key in first_walker]

            if np.max(np.abs(diff) != 0):
                diff_between_walker.append((seed, i, j, diff))
        offset += 8

In [37]:
diff_between_walker

[]

In [25]:
diffs = {}

for seed in range(1000, 1150):
    results_new = read_data_from_file(f"../results/periodic_1/prob_0.100000/X_4_Y_4/seed_{seed}/error_class_X/intervals_10_iterations_1000_overlap_0.250000_walkers_8_seed_run_42_alpha_0.800000_beta_0.0000010000exchange_offset50.txt")
    results_old = read_data_from_file(f"../results/periodic/prob_0.100000/X_4_Y_4/seed_{seed}/error_class_X/intervals_10_iterations_1000_overlap_0.250000_walkers_8_seed_run_42_alpha_0.800000_beta_0.0000010000exchange_offset50.txt")

    results_new = results_new[0::num_walker]
    results_old = results_old[0::num_walker]

    for i in range(len(results_new)):
        diff = [results_new[i][key] - results_old[i][key] for key in results_new[i]]
        if np.max(np.abs(diff)) > 1e-05:
            diffs[seed] = (i, diff)

In [26]:
diffs

{1115: (0, [10.875, 4.6875, -8.75, -8.71875])}

### Check Consistency new Code

In [7]:
def get_seed_and_dicts(filename):
    with open(filename, 'r') as file:
        content = file.read()

    blocks = content.split("},\n")[:-1]

    all_results = {}
    for block in blocks:
        histogram_seed_match = int(re.search(r'"histogram_seed": "(\d+)"', block).group(1))
        run_seed_match = int(re.search(r'"run_seed": "(\d+)"', block).group(1))
        results_match = re.search(r'"results": \{([^}]*)\}', block).group(1)
        energy_blocks = results_match.split(',')

        energies = []
        log_g = []

        for e in energy_blocks:
            match = re.search(r'"(-?\d+)": (\d+\.\d{10})', e)
            energies.append(float(match.group(1)))
            log_g.append(float(match.group(2)))

        last_index = 0

        dict_list = []

        for i in range(1,len(energies)):
            if energies[i] < energies[i - 1]:
                dict_list.append(dict(zip(energies[last_index:i], log_g[last_index:i])))
                last_index = i
            if i==(len(energies)-1):
                dict_list.append(dict(zip(energies[last_index:], log_g[last_index:])))

        all_results[histogram_seed_match] = dict_list

    return all_results

In [8]:
file_new = f"../results/periodic_just_now/prob_{p:.6f}/X_{sizes[0]}_Y_{sizes[0]}/error_class_{error}/StitchedHistogram_intervals_{intervals}_iterations_{iterations}_overlap_{overlap:.6f}_walkers_{walkers}_alpha_{alpha:.6f}_beta_{beta:.10f}_exchange_offset{exchange}.txt"
file_old = f"../results/periodic/prob_{p:.6f}/X_{sizes[0]}_Y_{sizes[0]}/error_class_{error}/StitchedHistogram_intervals_{intervals}_iterations_{iterations}_overlap_{overlap:.6f}_walkers_{walkers}_alpha_{alpha:.6f}_beta_{beta:.10f}_exchange_offset{exchange}.txt"

results_new = get_seed_and_dicts(file_new)
results_old = get_seed_and_dicts(file_old)

In [9]:
diffs = []

for i in range(1000, 1200):
    entry_new = results_new[i]
    entry_old = results_old[i]

    for j in range(len(entry_new)):
        result = [entry_new[j][key] - entry_old[j][key] for key in entry_new[j]]
        diffs.append(result)


In [10]:
diffs_final = []
for d in diffs:
    if np.max(np.abs(d)) > 0:
        diffs_final.append(np.max(np.abs(d)))

In [11]:
np.max(diffs_final)

0.08608913420000075

### Difference Checks

In [16]:
filename = f"../results/periodic_error/prob_{p:.6f}/X_{sizes[0]}_Y_{sizes[0]}/error_class_{error}/StitchedHistogram_intervals_{intervals}_iterations_{iterations}_overlap_{overlap:.6f}_walkers_{walkers}_alpha_{alpha:.6f}_beta_{beta:.10f}_exchange_offset{exchange}.txt"

with open(filename, 'r') as file:
    content = file.read()


blocks = content.split("},\n")

## Loop over new code and get values
wrong_seeds = []

for block in blocks:
  histogram_seed_match = int(re.search(r'"histogram_seed": "(\d+)"', block).group(1))
  run_seed_match = int(re.search(r'"run_seed": "(\d+)"', block).group(1))
  results_match = re.search(r'"results": \{([^}]*)\}', block).group(1)
  energy_blocks = results_match.split(',')

  energies = []
  log_g = []

  for e in energy_blocks:
      match = re.search(r'"(-?\d+)": (\d+\.\d{10})', e)
      energies.append(float(match.group(1)))
      log_g.append(float(match.group(2)))

  last_index = 0

  dict_list = []

  for i in range(1,len(energies)):
    if energies[i] < energies[i - 1]:
      dict_list.append(dict(zip(energies[last_index:i], log_g[last_index:i])))
      last_index = i
    if i==(len(energies)-1):
      dict_list.append(dict(zip(energies[last_index:], log_g[last_index:])))

  ## Read in old values
  old_result_handling_out = read_data_from_file(f"../results/periodic/prob_0.100000/X_4_Y_4/seed_{histogram_seed_match}/error_class_X/intervals_10_iterations_1000_overlap_0.250000_walkers_8_seed_run_42_alpha_0.800000_beta_0.0000010000exchange_offset50.txt")

  """averages over walker results per intervals"""
  walker_results = average_matching_keys(old_result_handling_out)

  """normalize the walker results by min value for log results"""
  walker_results = get_renormalized_log_g_values_as_dict_list(old_result_handling_out)

  walker_results = walker_results[0::num_walker]

  for i in range(len(walker_results)):
      if walker_results[i].keys() == dict_list[i].keys():
          diff = np.array([walker_results[i][key] for key in sorted(walker_results[i].keys())]) - np.array([dict_list[i][key] for key in sorted(dict_list[i].keys())])
          if np.max(diff) > 1e-5:
            wrong_seeds.append((histogram_seed_match, i, diff))

AttributeError: 'NoneType' object has no attribute 'group'

In [18]:
wrong_seeds[0]

(1101, 0, array([ 0.        ,  0.000175  , -0.00144148]))

In [19]:
diffs = []

seed = 1101

old_result_handling_out = read_data_from_file(f"../results/periodic/prob_0.100000/X_4_Y_4/seed_{seed}/error_class_X/intervals_10_iterations_1000_overlap_0.250000_walkers_8_seed_run_42_alpha_0.800000_beta_0.0000010000exchange_offset50.txt")

"""averages over walker results per intervals"""
walker_results = average_matching_keys(old_result_handling_out)

"""normalize the walker results by min value for log results"""
walker_results = get_renormalized_log_g_values_as_dict_list(old_result_handling_out)

walker_results = walker_results[0::num_walker]

print(walker_results)

[{-18: 0.0, -14: 2.3060755728999993, -10: 3.7558109760000207}, {-14: 0.0, -10: 1.4316029548000415, -6: 2.420887946999983}, {-14: 0.0, -10: 1.4884452820000433, -6: 2.547107696600051}, {-10: 0.0, -6: 0.9729471207000415, -2: 1.4316787720000548}, {-10: 0.0, -6: 1.0570590495999568, -2: 1.530307769799947}, {-6: 0.0, -2: 0.47184920309996414, 2: 0.49166107179996743}, {-6: 0.0, -2: 0.4978125095000223, 2: 0.5081222057000332}, {-2: 0.5148189067999738, 2: 0.5105583667999554, 6: 0.0}, {-2: 0.46153950689995327, 2: 0.4701387881999608, 6: 0.0}, {2: 5.26059389119996, 6: 4.79037141799995, 10: 3.776184320499965, 14: 2.30865025519995, 18: 0.0}]


In [49]:
for i in range(len(walker_results)):
    if walker_results[i].keys() == dict_list[i].keys():
        diff = np.array([walker_results[i][key] for key in sorted(walker_results[i].keys())]) - np.array([dict_list[i][key] for key in sorted(dict_list[i].keys())])
        if np.max(diff) > 1e-9:
            print(np.max(diff))

In [40]:
walker_results[i]

{8: 4.853178024300007, 12: 3.6080551147999813, 16: 1.8163652420000176, 20: 0.0}

In [43]:
dict_list[i][8] - walker_results[i][8]

-7.802430000669602e-05

In [38]:
np.array(walker_results[i].values())

array(dict_values([0.0, 1.8498768807000943, 3.64570975310005]),
      dtype=object)

In [29]:
diffs = []

for seed in range(1000, 1200):
    old_result_handling_out = read_data_from_file(f"../results/old_result/prob_0.100000/X_4_Y_4/seed_{seed}/error_class_X/intervals_10_iterations_1000_overlap_0.250000_walkers_8_seed_run_42_alpha_0.800000_beta_0.0000010000exchange_offset50.txt")

    """averages over walker results per intervals"""
    walker_results = average_matching_keys(old_result_handling_out)

    """normalize the walker results by min value for log results"""
    walker_results = get_renormalized_log_g_values_as_dict_list(old_result_handling_out)

    walker_results = walker_results[0::num_walker]
    print(walker_results)
    break
    # results_x = []
    # results_y = []
    # for result in walker_results:
    #     results_y.append(np.array(list(result.values())))
    #     results_x.append(np.array(list(result.keys())))

    # derivatives_wrt_e = get_derivative_wrt_e(walker_results)
    # minimum_deviation_energies = find_lowest_inverse_temp_deviation(derivatives_wrt_e)
    # rescale_results_for_concatenation(results_x, results_y, minimum_deviation_energies)

    # """Store concatenate interval results"""
    # concatenated_keys = np.concatenate(results_x)
    # concatenated_values = np.concatenate(results_y)
    # list_of_concat_rescale_dicts = []
    # for keys, values in zip(results_x, results_y):
    #     # Combine keys and values into a dictionary
    #     dict_from_arrays = {k: v for k, v in zip(keys, values)}
    #     list_of_concat_rescale_dicts.append(dict_from_arrays)

    # cut_overlapping_histogram_parts(list_of_concat_rescale_dicts, minimum_deviation_energies)

    # # Flatten the list of dictionaries into a single list of values
    # cut_values = [value for d in list_of_concat_rescale_dicts for value in d.values()]

    # # Convert the list of values into a NumPy array
    # cut_values_array = np.array(cut_values)

    # offset = log_sum_exp(cut_values_array)
    # rescaled_y = [res + mp.log(2)*16 - offset for res in cut_values_array]

    # diffs.append(log_sum_exp(rescaled_y) - np.log(2)*16)

NameError: name 'num' is not defined

In [19]:
np.max(diffs)

mpf('1.7763568394002505e-15')

In [36]:
"""normalize the walker results by min value for log results"""
walker_results = get_renormalized_log_g_values_as_dict_list(walker_results)

"""averages over walker results per intervals"""
walker_results = average_matching_keys(walker_results)

results_x = []
results_y = []
for result in walker_results:
    results_y.append(np.array(list(result.values())))
    results_x.append(np.array(list(result.keys())))

derivatives_wrt_e = get_derivative_wrt_e(walker_results)
minimum_deviation_energies = find_lowest_inverse_temp_deviation(derivatives_wrt_e)
rescale_results_for_concatenation(results_x, results_y, minimum_deviation_energies)

"""Store concatenate interval results"""
concatenated_keys = np.concatenate(results_x)
concatenated_values = np.concatenate(results_y)
list_of_concat_rescale_dicts = []
for keys, values in zip(results_x, results_y):
    # Combine keys and values into a dictionary
    dict_from_arrays = {k: v for k, v in zip(keys, values)}
    list_of_concat_rescale_dicts.append(dict_from_arrays)

cut_overlapping_histogram_parts(list_of_concat_rescale_dicts, minimum_deviation_energies)

# # Flatten the list of dictionaries into a single list of values
# cut_values = [value for d in list_of_concat_rescale_dicts for value in d.values()]

# # Convert the list of values into a NumPy array
# cut_values_array = np.array(cut_values)

Accuracy is set to e-10

In [3]:
X, Y = (4,4)

"""Incoming data to get handled by new result handling function"""
interval_data = [
  627.1095947266,
  628.8754638672,
  630.9088623047,
  632.5491699219,
  633.5110107422,
  404.7702880859,
  406.3506591797,
  407.3104492188,
  407.8216796875,
  407.8045410156,
  404.0828857422,
  405.1276611328,
  405.6312011719,
  405.6431152344,
  405.1499023438,
  401.4141113281,
  401.9133789062,
  401.9149902344,
  401.4306152344,
  400.5290527344,
  402.1429199219,
  402.1175537109,
  401.6156250000,
  400.6624267578,
  399.0767089844,
  613.2126708984,
  612.2446777344,
  610.6416259766,
  608.6352050781,
  606.9015625000
]

"""data after applying min rescaling in new result handling function"""
min_rescaled_data = [
  0.0000000000,
  1.7658691406,
  3.7992675781,
  5.4395751953,
  6.4014160156,
  0.0000000000,
  1.5803710938,
  2.5401611328,
  3.0513916016,
  3.0342529297,
  0.0000000000,
  1.0447753906,
  1.5483154297,
  1.5602294922,
  1.0670166016,
  0.8850585938,
  1.3843261719,
  1.3859375000,
  0.9015625000,
  0.0000000000,
  3.0662109375,
  3.0408447266,
  2.5389160156,
  1.5857177734,
  0.0000000000,
  6.3111083984,
  5.3431152344,
  3.7400634766,
  1.7336425781,
  0.0000000000
]

"""data after rescaling for stitching intervals"""
stitched_data = [
  0.0000000000,
  1.7658691406,
  3.7992675781,
  5.4395751953,
  6.4014160156,
  3.8592041016,
  5.4395751953,
  6.3993652344,
  6.9105957031,
  6.8934570313,
  5.3545898438,
  6.3993652344,
  6.9029052734,
  6.9148193359,
  6.4216064453,
  6.3993652344,
  6.8986328125,
  6.9002441406,
  6.4158691406,
  5.5143066406,
  6.9256103516,
  6.9002441406,
  6.3983154297,
  5.4451171875,
  3.8593994141,
  6.3983154297,
  5.4303222656,
  3.8272705078,
  1.8208496094,
  0.0872070313
]

cut_data = {
  "-22": 0.0000000000,
  "-18": 1.7658691406,
  "-14": 3.7992675781,
  "-10": 5.4395751953,
  "-6": 6.3993652344,
  "-2": 6.8986328125,
  "2": 6.9002441406,
  "6": 6.3983154297,
  "10": 5.4303222656,
  "14": 3.8272705078,
  "18": 1.8208496094,
  "22": 0.0872070312
}

normalized_data = {
  "-22": 2.8611055117,
  "-18": 4.6269746524,
  "-14": 6.6603730899,
  "-10": 8.3006807071,
  "-6": 9.2604707461,
  "-2": 9.7597383242,
  "2": 9.7613496524,
  "6": 9.2594209414,
  "10": 8.2914277774,
  "14": 6.6883760196,
  "18": 4.6819551211,
  "22": 2.9483125430
}


In [20]:
old_result_handling_out = read_data_from_file("../results/periodic/prob_0.100000/X_4_Y_4/seed_1001/error_class_X/intervals_10_iterations_1000_overlap_0.250000_walkers_8_seed_run_42_alpha_0.800000_beta_0.0000010000exchange_offset50.txt")

"""Reducing to single walker resutl per interval"""
walker_results = average_matching_keys(old_result_handling_out)

In [21]:
old_result_handling_out_concatenated = [list(d.values()) for d in walker_results]

DEVIATION INPUT RESULTS

In [22]:
#max(abs((np.concatenate(old_result_handling_out_concatenated)) - np.array(interval_data)))

In [23]:
"""minimum rescaling of results per interval"""
old_result_handling_min_rescaled =  get_renormalized_log_g_values(walker_results)

DEVIATION MIN RESCALE

In [24]:
#max(abs(np.concatenate(old_result_handling_min_rescaled[1])- np.array(min_rescaled_data)))

In [25]:
walker_results = old_result_handling_out

"""normalize the walker results by min value for log results"""
walker_results = get_renormalized_log_g_values_as_dict_list(walker_results)

"""averages over walker results per intervals"""
walker_results = average_matching_keys(walker_results)

results_x = []
results_y = []
for result in walker_results:
    results_y.append(np.array(list(result.values())))
    results_x.append(np.array(list(result.keys())))

derivatives_wrt_e = get_derivative_wrt_e(walker_results)
minimum_deviation_energies = find_lowest_inverse_temp_deviation(derivatives_wrt_e)
rescale_results_for_concatenation(results_x, results_y, minimum_deviation_energies)

In [26]:
"""Store concatenate interval results"""
concatenated_keys = np.concatenate(results_x)
concatenated_values = np.concatenate(results_y)
list_of_concat_rescale_dicts = []
for keys, values in zip(results_x, results_y):
    # Combine keys and values into a dictionary
    dict_from_arrays = {k: v for k, v in zip(keys, values)}
    list_of_concat_rescale_dicts.append(dict_from_arrays)

DEVIATION CONCATENATE

In [27]:
#max(abs(concatenated_values - np.array(stitched_data)))

In [28]:
"""normalize the walker results by min value for log results"""
walker_results = get_renormalized_log_g_values_as_dict_list(walker_results)

"""averages over walker results per intervals"""
walker_results = average_matching_keys(walker_results)

results_x = []
results_y = []
for result in walker_results:
    results_y.append(np.array(list(result.values())))
    results_x.append(np.array(list(result.keys())))

derivatives_wrt_e = get_derivative_wrt_e(walker_results)
minimum_deviation_energies = find_lowest_inverse_temp_deviation(derivatives_wrt_e)
rescale_results_for_concatenation(results_x, results_y, minimum_deviation_energies)

"""Store concatenate interval results"""
concatenated_keys = np.concatenate(results_x)
concatenated_values = np.concatenate(results_y)
list_of_concat_rescale_dicts = []
for keys, values in zip(results_x, results_y):
    # Combine keys and values into a dictionary
    dict_from_arrays = {k: v for k, v in zip(keys, values)}
    list_of_concat_rescale_dicts.append(dict_from_arrays)

"""Cut overlapping parts"""
x_max = -1 -2*X*Y
unique_x = []
unique_y = []

sorted_dict_list = [dict(sorted(d.items())) for d in list_of_concat_rescale_dicts]

for dictionary in sorted_dict_list:
    for key, value in dictionary.items():
        if key > x_max: #avoid double counting
            x_max = key
            unique_x.append(key)
            unique_y.append(value)

offset = log_sum_exp(cut_data_input)
rescaled_y = [res + mp.log(2)*16 - offset for res in cut_data_input]
print(rescaled_y)


DEVIATION CUT OVERLAP

In [29]:
#np.array(unique_y) - np.array(list(cut_data.values()))

THIS IMPLIES PROBLEM WITH NAIVE CUT OF OVERLAP IN PYTHON

ASSUMING NOW INPUT FOR LOG SUM EXP RESCALING WHICH GOES INTO C CODE

In [30]:
cut_data_input = np.array(list(cut_data.values()))

In [31]:
offset = log_sum_exp(cut_data_input)
rescaled_y = [res + mp.log(2)*16 - offset for res in cut_data_input]
print(rescaled_y)

[mpf('2.8611055117440856'), mpf('4.6269746523440851'), mpf('6.6603730898440858'), mpf('8.3006807070440853'), mpf('9.2604707461440849'), mpf('9.7597383242440845'), mpf('9.7613496523440855'), mpf('9.2594209414440858'), mpf('8.2914277773440848'), mpf('6.6883760195440853'), mpf('4.6819551211440853'), mpf('2.9483125429440857')]


DEVIATION LOG SUM EXP RESCALING FOR SAME INPUT

In [36]:
abs(np.array(rescaled_y) - np.array(list(normalized_data.values())))

mpf('5.5915272412221384e-11')

ALTERNATIVE WAY OF CUTTING THE OVERLAP ANALOGOUSLY TO C CODE

In [37]:
def cut_overlapping_histogram_parts(interval_data, stitching_keys):
    for i in range(len(stitching_keys)):
        stitching_energy_of_interval_i = stitching_keys[i]

        # Modify the i-th interval
        current_interval = interval_data[i]
        # Keep only keys <= stitching_energy_of_interval_i
        current_interval = {k: v for k, v in current_interval.items() if k <= stitching_energy_of_interval_i}

        # Modify the (i+1)-th interval if following interval is still in bounds
        if i + 1 < len(interval_data):
            next_interval = interval_data[i + 1]
            # Keep only keys > stitching_energy_of_interval_i
            next_interval = {k: v for k, v in next_interval.items() if k > stitching_energy_of_interval_i}

        # Update the intervals in the original list
        interval_data[i] = current_interval
        if i + 1 < len(interval_data):
            interval_data[i + 1] = next_interval

In [38]:
cut_overlapping_histogram_parts(list_of_concat_rescale_dicts, minimum_deviation_energies)

In [39]:
# Flatten the list of dictionaries into a single list of values
cut_values = [value for d in list_of_concat_rescale_dicts for value in d.values()]

# Convert the list of values into a NumPy array
cut_values_array = np.array(cut_values)


DEVIATION OF CUT RESULTS NEW IMPLEMENTATION

In [40]:
max(abs(cut_values_array-np.array(list(cut_data.values()))))

2.0010038070950031e-10