In [12]:
import pandas as pd
import numpy as np
import os

In [13]:
# Set to True to force a full refresh of the data
full_refresh = True
eta = 1

In [14]:
repo_root = os.popen('git rev-parse --show-toplevel').read().strip()

In [15]:
data_folder = os.path.join(repo_root, 'multi-parameter-estimation', 'data')

# Get list of data directories
data_dirs = os.listdir(data_folder)
data_dirs = [d for d in data_dirs if os.path.isdir(os.path.join(data_folder, d))]

# skip old-data
if 'old-data' in data_dirs:
    data_dirs.remove('old-data')

data_dirs.sort()
data_dirs

['2025-06-03--11h-02m-55s',
 '2025-06-03--11h-04m-33s',
 '2025-06-03--11h-05m-58s',
 '2025-06-03--11h-07m-21s',
 '2025-06-03--11h-08m-43s',
 '2025-06-03--11h-10m-06s',
 '2025-06-03--11h-11m-30s',
 '2025-06-03--11h-12m-54s',
 '2025-06-03--11h-14m-20s',
 '2025-06-03--11h-15m-44s',
 '2025-06-03--11h-17m-10s',
 '2025-06-03--11h-18m-35s',
 '2025-06-03--11h-20m-00s',
 '2025-06-03--11h-21m-24s',
 '2025-06-03--11h-22m-50s',
 '2025-06-03--11h-24m-16s',
 '2025-06-03--11h-25m-44s',
 '2025-06-03--11h-27m-10s',
 '2025-06-03--11h-28m-52s',
 '2025-06-03--11h-30m-17s',
 '2025-06-03--11h-31m-43s',
 '2025-06-03--11h-33m-09s',
 '2025-06-03--11h-34m-37s',
 '2025-06-03--11h-36m-03s',
 '2025-06-03--11h-37m-30s',
 '2025-06-03--11h-38m-56s',
 '2025-06-03--11h-40m-23s',
 '2025-06-03--11h-41m-53s',
 '2025-06-03--11h-43m-21s',
 '2025-06-03--11h-44m-49s',
 '2025-06-03--11h-46m-15s',
 '2025-06-03--11h-47m-43s',
 '2025-06-03--11h-49m-09s',
 '2025-06-03--11h-50m-33s',
 '2025-06-03--11h-51m-57s',
 '2025-06-03--11h-53

In [16]:
new_data_dirs = data_dirs.copy()

if not full_refresh:
    for d in data_dirs:
        if os.path.exists(os.path.join(data_folder, d, "estimators.csv")):
            new_data_dirs.remove(d)

new_data_dirs

['2025-06-03--11h-02m-55s',
 '2025-06-03--11h-04m-33s',
 '2025-06-03--11h-05m-58s',
 '2025-06-03--11h-07m-21s',
 '2025-06-03--11h-08m-43s',
 '2025-06-03--11h-10m-06s',
 '2025-06-03--11h-11m-30s',
 '2025-06-03--11h-12m-54s',
 '2025-06-03--11h-14m-20s',
 '2025-06-03--11h-15m-44s',
 '2025-06-03--11h-17m-10s',
 '2025-06-03--11h-18m-35s',
 '2025-06-03--11h-20m-00s',
 '2025-06-03--11h-21m-24s',
 '2025-06-03--11h-22m-50s',
 '2025-06-03--11h-24m-16s',
 '2025-06-03--11h-25m-44s',
 '2025-06-03--11h-27m-10s',
 '2025-06-03--11h-28m-52s',
 '2025-06-03--11h-30m-17s',
 '2025-06-03--11h-31m-43s',
 '2025-06-03--11h-33m-09s',
 '2025-06-03--11h-34m-37s',
 '2025-06-03--11h-36m-03s',
 '2025-06-03--11h-37m-30s',
 '2025-06-03--11h-38m-56s',
 '2025-06-03--11h-40m-23s',
 '2025-06-03--11h-41m-53s',
 '2025-06-03--11h-43m-21s',
 '2025-06-03--11h-44m-49s',
 '2025-06-03--11h-46m-15s',
 '2025-06-03--11h-47m-43s',
 '2025-06-03--11h-49m-09s',
 '2025-06-03--11h-50m-33s',
 '2025-06-03--11h-51m-57s',
 '2025-06-03--11h-53

In [17]:
def load_chunks(data_dir, n):
    if not os.path.exists(os.path.join(data_folder, data_dir, f"chunked_coincidences_n={n}.csv")):
        print(f"Skipping {data_dir} n={n} as file does not exist.")
        return pd.DataFrame()
    coincidences = pd.read_csv(os.path.join(data_folder, data_dir, f"chunked_coincidences_n={n}.csv"))
    coincidences["data_dir"] = data_dir
    return coincidences

chunks_df = pd.concat([pd.concat([load_chunks(d, n) for d in new_data_dirs]) for n in [40,80,120,160,200]], ignore_index=True)
chunks_df

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N
0,2025-06-03--11h-02m-55s,1.0,39.0,0.0,0.0,40.0
1,2025-06-03--11h-02m-55s,3.0,37.0,0.0,0.0,40.0
2,2025-06-03--11h-02m-55s,1.0,39.0,0.0,0.0,40.0
3,2025-06-03--11h-02m-55s,1.0,39.0,0.0,0.0,40.0
4,2025-06-03--11h-02m-55s,3.0,37.0,0.0,0.0,40.0
...,...,...,...,...,...,...
1251061,2025-06-03--13h-04m-03s,100.0,55.0,44.0,1.0,200.0
1251062,2025-06-03--13h-04m-03s,102.0,52.0,43.0,3.0,200.0
1251063,2025-06-03--13h-04m-03s,112.0,46.0,41.0,1.0,200.0
1251064,2025-06-03--13h-04m-03s,107.0,57.0,32.0,4.0,200.0


In [18]:
# load the scaled_coincidences_all data
def load_all_data(data_dir):
    file_path = os.path.join(data_folder, data_dir, "scaled_coincidences_all.csv")
    if not os.path.exists(file_path):
        print(f"Skipping {data_dir} as file does not exist.")
        return pd.DataFrame()
    scaled_coincidences = pd.read_csv(file_path)
    scaled_coincidences["data_dir"] = data_dir
    return scaled_coincidences

all_data_df = pd.concat([load_all_data(d) for d in new_data_dirs], ignore_index=True)
all_data_df

Unnamed: 0,data_dir,repetition,C,DB_H,DB_V,SB,N
0,2025-06-03--11h-02m-55s,2757013896,9361.5,256702.0,0.0,396.0,266459.5
1,2025-06-03--11h-04m-33s,2694403936,9521.5,251725.0,6.0,2110.0,263362.5
2,2025-06-03--11h-05m-58s,2820568278,9528.5,247789.0,70.0,7544.0,264931.5
3,2025-06-03--11h-07m-21s,2948697615,9564.5,240004.0,318.0,16991.5,266878.0
4,2025-06-03--11h-08m-43s,3251525761,9697.0,229094.0,991.0,28572.5,268354.5
...,...,...,...,...,...,...,...
80,2025-06-03--12h-58m-28s,4392422128,110869.5,66434.0,58686.0,12542.5,248532.0
81,2025-06-03--12h-59m-52s,4581877128,115463.5,66986.0,59707.0,9214.0,251370.5
82,2025-06-03--13h-01m-14s,4487881170,117562.0,66477.0,59657.0,6512.5,250208.5
83,2025-06-03--13h-02m-38s,4444029226,118635.5,66345.0,59579.0,4891.5,249451.0


In [19]:
# combine the dataframes
chunks_df = pd.concat([chunks_df, all_data_df], ignore_index=True)
chunks_df

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N,repetition
0,2025-06-03--11h-02m-55s,1.0,39.0,0.0,0.0,40.0,
1,2025-06-03--11h-02m-55s,3.0,37.0,0.0,0.0,40.0,
2,2025-06-03--11h-02m-55s,1.0,39.0,0.0,0.0,40.0,
3,2025-06-03--11h-02m-55s,1.0,39.0,0.0,0.0,40.0,
4,2025-06-03--11h-02m-55s,3.0,37.0,0.0,0.0,40.0,
...,...,...,...,...,...,...,...
1251146,2025-06-03--12h-58m-28s,110869.5,66434.0,58686.0,12542.5,248532.0,4.392422e+09
1251147,2025-06-03--12h-59m-52s,115463.5,66986.0,59707.0,9214.0,251370.5,4.581877e+09
1251148,2025-06-03--13h-01m-14s,117562.0,66477.0,59657.0,6512.5,250208.5,4.487881e+09
1251149,2025-06-03--13h-02m-38s,118635.5,66345.0,59579.0,4891.5,249451.0,4.444029e+09


In [20]:
# Ensure the 1/eta factor doesn't exceed 1
chunks_df["theta_estimate"] = np.arccos(
    np.clip((1 / eta) * (chunks_df["DB_H"] - chunks_df["DB_V"]) / chunks_df["N"], -1, 1)
)

chunks_df

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N,repetition,theta_estimate
0,2025-06-03--11h-02m-55s,1.0,39.0,0.0,0.0,40.0,,0.224075
1,2025-06-03--11h-02m-55s,3.0,37.0,0.0,0.0,40.0,,0.389761
2,2025-06-03--11h-02m-55s,1.0,39.0,0.0,0.0,40.0,,0.224075
3,2025-06-03--11h-02m-55s,1.0,39.0,0.0,0.0,40.0,,0.224075
4,2025-06-03--11h-02m-55s,3.0,37.0,0.0,0.0,40.0,,0.389761
...,...,...,...,...,...,...,...,...
1251146,2025-06-03--12h-58m-28s,110869.5,66434.0,58686.0,12542.5,248532.0,4.392422e+09,1.539616
1251147,2025-06-03--12h-59m-52s,115463.5,66986.0,59707.0,9214.0,251370.5,4.581877e+09,1.541835
1251148,2025-06-03--13h-01m-14s,117562.0,66477.0,59657.0,6512.5,250208.5,4.487881e+09,1.543536
1251149,2025-06-03--13h-02m-38s,118635.5,66345.0,59579.0,4891.5,249451.0,4.444029e+09,1.543669


In [21]:
chunks_df["delta_phi_estimate"] = np.arctan(
    np.sqrt(
        (eta / (2 - eta)) * chunks_df["C"] / chunks_df["SB"]
    )
)

chunks_df

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N,repetition,theta_estimate,delta_phi_estimate
0,2025-06-03--11h-02m-55s,1.0,39.0,0.0,0.0,40.0,,0.224075,1.570796
1,2025-06-03--11h-02m-55s,3.0,37.0,0.0,0.0,40.0,,0.389761,1.570796
2,2025-06-03--11h-02m-55s,1.0,39.0,0.0,0.0,40.0,,0.224075,1.570796
3,2025-06-03--11h-02m-55s,1.0,39.0,0.0,0.0,40.0,,0.224075,1.570796
4,2025-06-03--11h-02m-55s,3.0,37.0,0.0,0.0,40.0,,0.389761,1.570796
...,...,...,...,...,...,...,...,...,...
1251146,2025-06-03--12h-58m-28s,110869.5,66434.0,58686.0,12542.5,248532.0,4.392422e+09,1.539616,1.246337
1251147,2025-06-03--12h-59m-52s,115463.5,66986.0,59707.0,9214.0,251370.5,4.581877e+09,1.541835,1.295481
1251148,2025-06-03--13h-01m-14s,117562.0,66477.0,59657.0,6512.5,250208.5,4.487881e+09,1.543536,1.339639
1251149,2025-06-03--13h-02m-38s,118635.5,66345.0,59579.0,4891.5,249451.0,4.444029e+09,1.543669,1.370465


In [22]:
# save the dataframes to csv files
# save the dataframes to csv files based on the data_dir
for data_dir in new_data_dirs:
    chunks_subset = chunks_df[chunks_df["data_dir"] == data_dir]
    if not chunks_subset.empty:
        output_file = os.path.join(data_folder, data_dir, "estimators.csv")
        chunks_subset.to_csv(output_file, index=False)
        print(f"Saved estimators for {data_dir} to {output_file}")
    else:
        print(f"No data to save for {data_dir}")

Saved estimators for 2025-06-03--11h-02m-55s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--11h-02m-55s/estimators.csv
Saved estimators for 2025-06-03--11h-04m-33s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--11h-04m-33s/estimators.csv
Saved estimators for 2025-06-03--11h-05m-58s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--11h-05m-58s/estimators.csv
Saved estimators for 2025-06-03--11h-07m-21s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--11h-07m-21s/estimators.csv
Saved estimators for 2025-06-03--11h-08m-43s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-es