In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
# Set to True to force a full refresh of the data
full_refresh = True
eta = 1

In [3]:
repo_root = os.popen('git rev-parse --show-toplevel').read().strip()

In [4]:
data_folder = os.path.join(repo_root, 'multi-parameter-estimation', 'data')

# Get list of data directories
data_dirs = os.listdir(data_folder)
data_dirs = [d for d in data_dirs if os.path.isdir(os.path.join(data_folder, d))]

# skip old-data
if 'old-data' in data_dirs:
    data_dirs.remove('old-data')

data_dirs.sort()
data_dirs

['2025-05-28--14h-06m-26s',
 '2025-05-28--14h-08m-47s',
 '2025-05-28--14h-11m-10s',
 '2025-05-28--14h-13m-33s',
 '2025-05-28--14h-15m-56s',
 '2025-05-28--14h-18m-20s',
 '2025-05-28--14h-20m-43s',
 '2025-05-28--14h-23m-06s',
 '2025-05-28--14h-25m-29s',
 '2025-05-28--14h-27m-45s']

In [5]:
new_data_dirs = data_dirs.copy()

if not full_refresh:
    for d in data_dirs:
        if os.path.exists(os.path.join(data_folder, d, "estimators.csv")):
            new_data_dirs.remove(d)

new_data_dirs

['2025-05-28--14h-06m-26s',
 '2025-05-28--14h-08m-47s',
 '2025-05-28--14h-11m-10s',
 '2025-05-28--14h-13m-33s',
 '2025-05-28--14h-15m-56s',
 '2025-05-28--14h-18m-20s',
 '2025-05-28--14h-20m-43s',
 '2025-05-28--14h-23m-06s',
 '2025-05-28--14h-25m-29s',
 '2025-05-28--14h-27m-45s']

In [6]:
def load_chunks(data_dir, n):
    if not os.path.exists(os.path.join(data_folder, data_dir, f"chunked_coincidences_n={n}.csv")):
        print(f"Skipping {data_dir} n={n} as file does not exist.")
        return pd.DataFrame()
    coincidences = pd.read_csv(os.path.join(data_folder, data_dir, f"chunked_coincidences_n={n}.csv"))
    coincidences["data_dir"] = data_dir
    return coincidences

chunks_df = pd.concat([pd.concat([load_chunks(d, n) for d in new_data_dirs]) for n in [40,80,120,160,200]], ignore_index=True)
chunks_df

Skipping 2025-05-28--14h-27m-45s n=40 as file does not exist.


  chunks_df = pd.concat([pd.concat([load_chunks(d, n) for d in new_data_dirs]) for n in [40,80,120,160,200]], ignore_index=True)
  chunks_df = pd.concat([pd.concat([load_chunks(d, n) for d in new_data_dirs]) for n in [40,80,120,160,200]], ignore_index=True)
  chunks_df = pd.concat([pd.concat([load_chunks(d, n) for d in new_data_dirs]) for n in [40,80,120,160,200]], ignore_index=True)
  chunks_df = pd.concat([pd.concat([load_chunks(d, n) for d in new_data_dirs]) for n in [40,80,120,160,200]], ignore_index=True)


Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N
0,2025-05-28--14h-06m-26s,2.0,38.0,0.0,0.0,40.0
1,2025-05-28--14h-06m-26s,1.0,39.0,0.0,0.0,40.0
2,2025-05-28--14h-06m-26s,0.0,40.0,0.0,0.0,40.0
3,2025-05-28--14h-06m-26s,0.0,40.0,0.0,0.0,40.0
4,2025-05-28--14h-06m-26s,1.0,39.0,0.0,0.0,40.0
...,...,...,...,...,...,...
281,2025-05-28--14h-20m-43s,3.0,15.0,103.0,79.0,200.0
282,2025-05-28--14h-23m-06s,5.0,4.0,150.0,41.0,200.0
283,2025-05-28--14h-23m-06s,2.5,2.0,166.0,29.5,200.0
284,2025-05-28--14h-25m-29s,1.0,0.0,188.0,11.0,200.0


In [7]:
# load the scaled_coincidences_all data
def load_all_data(data_dir):
    file_path = os.path.join(data_folder, data_dir, "scaled_coincidences_all.csv")
    if not os.path.exists(file_path):
        print(f"Skipping {data_dir} as file does not exist.")
        return pd.DataFrame()
    scaled_coincidences = pd.read_csv(file_path)
    scaled_coincidences["data_dir"] = data_dir
    return scaled_coincidences

all_data_df = pd.concat([load_all_data(d) for d in new_data_dirs], ignore_index=True)
all_data_df

Unnamed: 0,data_dir,repetition,C,DB_H,DB_V,SB,N
0,2025-05-28--14h-06m-26s,123474,43.5,2255.0,0.0,1.0,2299.5
1,2025-05-28--14h-08m-47s,124198,36.0,2228.0,0.0,93.0,2357.0
2,2025-05-28--14h-11m-10s,124750,38.0,1742.0,28.0,386.5,2194.5
3,2025-05-28--14h-13m-33s,124750,38.0,1332.0,129.0,680.0,2179.0
4,2025-05-28--14h-15m-56s,123923,43.0,837.0,388.0,935.5,2203.5
5,2025-05-28--14h-18m-20s,124389,37.5,417.0,801.0,924.5,2180.0
6,2025-05-28--14h-20m-43s,123898,34.5,180.0,1221.0,717.0,2152.5
7,2025-05-28--14h-23m-06s,124305,31.0,35.0,1692.0,384.0,2142.0
8,2025-05-28--14h-25m-29s,124129,33.0,1.0,2030.0,116.5,2180.5
9,2025-05-28--14h-27m-45s,122433,27.5,0.0,2159.0,2.5,2189.0


In [8]:
# combine the dataframes
chunks_df = pd.concat([chunks_df, all_data_df], ignore_index=True)
chunks_df

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N,repetition
0,2025-05-28--14h-06m-26s,2.0,38.0,0.0,0.0,40.0,
1,2025-05-28--14h-06m-26s,1.0,39.0,0.0,0.0,40.0,
2,2025-05-28--14h-06m-26s,0.0,40.0,0.0,0.0,40.0,
3,2025-05-28--14h-06m-26s,0.0,40.0,0.0,0.0,40.0,
4,2025-05-28--14h-06m-26s,1.0,39.0,0.0,0.0,40.0,
...,...,...,...,...,...,...,...
291,2025-05-28--14h-18m-20s,37.5,417.0,801.0,924.5,2180.0,124389.0
292,2025-05-28--14h-20m-43s,34.5,180.0,1221.0,717.0,2152.5,123898.0
293,2025-05-28--14h-23m-06s,31.0,35.0,1692.0,384.0,2142.0,124305.0
294,2025-05-28--14h-25m-29s,33.0,1.0,2030.0,116.5,2180.5,124129.0


In [9]:
# Ensure the 1/eta factor doesn't exceed 1
chunks_df["theta_estimate"] = np.arccos(
    np.clip((1 / eta) * (chunks_df["DB_H"] - chunks_df["DB_V"]) / chunks_df["N"], -1, 1)
)

chunks_df

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N,repetition,theta_estimate
0,2025-05-28--14h-06m-26s,2.0,38.0,0.0,0.0,40.0,,0.317560
1,2025-05-28--14h-06m-26s,1.0,39.0,0.0,0.0,40.0,,0.224075
2,2025-05-28--14h-06m-26s,0.0,40.0,0.0,0.0,40.0,,0.000000
3,2025-05-28--14h-06m-26s,0.0,40.0,0.0,0.0,40.0,,0.000000
4,2025-05-28--14h-06m-26s,1.0,39.0,0.0,0.0,40.0,,0.224075
...,...,...,...,...,...,...,...,...
291,2025-05-28--14h-18m-20s,37.5,417.0,801.0,924.5,2180.0,124389.0,1.747867
292,2025-05-28--14h-20m-43s,34.5,180.0,1221.0,717.0,2152.5,123898.0,2.075586
293,2025-05-28--14h-23m-06s,31.0,35.0,1692.0,384.0,2142.0,124305.0,2.455261
294,2025-05-28--14h-25m-29s,33.0,1.0,2030.0,116.5,2180.5,124129.0,2.766628


In [10]:
chunks_df["delta_phi_estimate"] = np.arctan(
    np.sqrt(
        (eta / (2 - eta)) * chunks_df["C"] / chunks_df["SB"]
    )
)

chunks_df

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N,repetition,theta_estimate,delta_phi_estimate
0,2025-05-28--14h-06m-26s,2.0,38.0,0.0,0.0,40.0,,0.317560,1.570796
1,2025-05-28--14h-06m-26s,1.0,39.0,0.0,0.0,40.0,,0.224075,1.570796
2,2025-05-28--14h-06m-26s,0.0,40.0,0.0,0.0,40.0,,0.000000,
3,2025-05-28--14h-06m-26s,0.0,40.0,0.0,0.0,40.0,,0.000000,
4,2025-05-28--14h-06m-26s,1.0,39.0,0.0,0.0,40.0,,0.224075,1.570796
...,...,...,...,...,...,...,...,...,...
291,2025-05-28--14h-18m-20s,37.5,417.0,801.0,924.5,2180.0,124389.0,1.747867,0.198743
292,2025-05-28--14h-20m-43s,34.5,180.0,1221.0,717.0,2152.5,123898.0,2.075586,0.215936
293,2025-05-28--14h-23m-06s,31.0,35.0,1692.0,384.0,2142.0,124305.0,2.455261,0.276833
294,2025-05-28--14h-25m-29s,33.0,1.0,2030.0,116.5,2180.5,124129.0,2.766628,0.489093


In [11]:
# save the dataframes to csv files
# save the dataframes to csv files based on the data_dir
for data_dir in new_data_dirs:
    chunks_subset = chunks_df[chunks_df["data_dir"] == data_dir]
    if not chunks_subset.empty:
        output_file = os.path.join(data_folder, data_dir, "estimators.csv")
        chunks_subset.to_csv(output_file, index=False)
        print(f"Saved estimators for {data_dir} to {output_file}")
    else:
        print(f"No data to save for {data_dir}")

Saved estimators for 2025-05-28--14h-06m-26s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-28--14h-06m-26s/estimators.csv
Saved estimators for 2025-05-28--14h-08m-47s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-28--14h-08m-47s/estimators.csv
Saved estimators for 2025-05-28--14h-11m-10s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-28--14h-11m-10s/estimators.csv
Saved estimators for 2025-05-28--14h-13m-33s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-28--14h-13m-33s/estimators.csv
Saved estimators for 2025-05-28--14h-15m-56s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-es