In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
# Set to True to force a full refresh of the data
full_refresh = True
eta = 1

In [3]:
repo_root = os.popen('git rev-parse --show-toplevel').read().strip()

In [4]:
data_folder = os.path.join(repo_root, 'multi-parameter-estimation', 'data')

# Get list of data directories
data_dirs = os.listdir(data_folder)
data_dirs = [d for d in data_dirs if os.path.isdir(os.path.join(data_folder, d))]

# skip old-data
if 'old-data' in data_dirs:
    data_dirs.remove('old-data')

data_dirs.sort()
data_dirs

['2025-05-30--14h-02m-13s',
 '2025-05-30--14h-03m-06s',
 '2025-05-30--14h-04m-07s',
 '2025-05-30--14h-05m-08s',
 '2025-05-30--14h-19m-25s',
 '2025-05-30--14h-22m-01s',
 '2025-05-30--14h-24m-36s',
 '2025-05-30--14h-27m-02s',
 '2025-05-30--14h-43m-17s',
 '2025-05-30--14h-48m-08s',
 '2025-05-30--14h-52m-48s',
 '2025-05-30--14h-57m-52s']

In [5]:
new_data_dirs = data_dirs.copy()

if not full_refresh:
    for d in data_dirs:
        if os.path.exists(os.path.join(data_folder, d, "estimators.csv")):
            new_data_dirs.remove(d)

new_data_dirs

['2025-05-30--14h-02m-13s',
 '2025-05-30--14h-03m-06s',
 '2025-05-30--14h-04m-07s',
 '2025-05-30--14h-05m-08s',
 '2025-05-30--14h-19m-25s',
 '2025-05-30--14h-22m-01s',
 '2025-05-30--14h-24m-36s',
 '2025-05-30--14h-27m-02s',
 '2025-05-30--14h-43m-17s',
 '2025-05-30--14h-48m-08s',
 '2025-05-30--14h-52m-48s',
 '2025-05-30--14h-57m-52s']

In [6]:
def load_chunks(data_dir, n):
    if not os.path.exists(os.path.join(data_folder, data_dir, f"chunked_coincidences_n={n}.csv")):
        print(f"Skipping {data_dir} n={n} as file does not exist.")
        return pd.DataFrame()
    coincidences = pd.read_csv(os.path.join(data_folder, data_dir, f"chunked_coincidences_n={n}.csv"))
    coincidences["data_dir"] = data_dir
    return coincidences

chunks_df = pd.concat([pd.concat([load_chunks(d, n) for d in new_data_dirs]) for n in [40,80,120,160,200]], ignore_index=True)
chunks_df

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N
0,2025-05-30--14h-02m-13s,2.0,37.0,0.0,0.0,39.0
1,2025-05-30--14h-02m-13s,0.0,39.0,0.0,0.0,39.0
2,2025-05-30--14h-02m-13s,0.5,40.0,0.0,0.0,40.5
3,2025-05-30--14h-02m-13s,1.0,38.0,0.0,0.0,39.0
4,2025-05-30--14h-02m-13s,0.5,40.0,0.0,0.0,40.5
...,...,...,...,...,...,...
3634,2025-05-30--14h-57m-52s,3.5,187.0,0.0,10.0,200.5
3635,2025-05-30--14h-57m-52s,2.5,186.0,0.0,11.0,199.5
3636,2025-05-30--14h-57m-52s,5.5,181.0,0.0,12.5,199.0
3637,2025-05-30--14h-57m-52s,3.0,187.0,0.0,11.0,201.0


In [7]:
# load the scaled_coincidences_all data
def load_all_data(data_dir):
    file_path = os.path.join(data_folder, data_dir, "scaled_coincidences_all.csv")
    if not os.path.exists(file_path):
        print(f"Skipping {data_dir} as file does not exist.")
        return pd.DataFrame()
    scaled_coincidences = pd.read_csv(file_path)
    scaled_coincidences["data_dir"] = data_dir
    return scaled_coincidences

all_data_df = pd.concat([load_all_data(d) for d in new_data_dirs], ignore_index=True)
all_data_df

Unnamed: 0,data_dir,repetition,C,DB_H,DB_V,SB,N
0,2025-05-30--14h-02m-13s,112643,22.0,1178.0,0.0,5.0,1205.0
1,2025-05-30--14h-03m-06s,118363,24.0,1232.0,0.0,22.0,1278.0
2,2025-05-30--14h-04m-07s,116434,24.5,1168.0,0.0,36.0,1228.5
3,2025-05-30--14h-05m-08s,119053,20.5,1109.0,1.0,81.0,1211.5
4,2025-05-30--14h-19m-25s,1834177,77.5,4796.0,0.0,25.5,4899.0
5,2025-05-30--14h-22m-01s,1828534,94.0,4540.0,0.0,79.0,4713.0
6,2025-05-30--14h-24m-36s,1841429,89.5,4692.0,0.0,170.0,4951.5
7,2025-05-30--14h-27m-02s,1856848,90.0,4449.0,12.0,283.5,4834.5
8,2025-05-30--14h-43m-17s,7405780,184.5,9816.0,0.0,48.5,10049.0
9,2025-05-30--14h-48m-08s,7426966,185.0,9866.0,0.0,157.0,10208.0


In [8]:
# combine the dataframes
chunks_df = pd.concat([chunks_df, all_data_df], ignore_index=True)
chunks_df

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N,repetition
0,2025-05-30--14h-02m-13s,2.0,37.0,0.0,0.0,39.0,
1,2025-05-30--14h-02m-13s,0.0,39.0,0.0,0.0,39.0,
2,2025-05-30--14h-02m-13s,0.5,40.0,0.0,0.0,40.5,
3,2025-05-30--14h-02m-13s,1.0,38.0,0.0,0.0,39.0,
4,2025-05-30--14h-02m-13s,0.5,40.0,0.0,0.0,40.5,
...,...,...,...,...,...,...,...
3646,2025-05-30--14h-27m-02s,90.0,4449.0,12.0,283.5,4834.5,1856848.0
3647,2025-05-30--14h-43m-17s,184.5,9816.0,0.0,48.5,10049.0,7405780.0
3648,2025-05-30--14h-48m-08s,185.0,9866.0,0.0,157.0,10208.0,7426966.0
3649,2025-05-30--14h-52m-48s,179.0,9306.0,3.0,328.0,9816.0,7400733.0


In [9]:
# Ensure the 1/eta factor doesn't exceed 1
chunks_df["theta_estimate"] = np.arccos(
    np.clip((1 / eta) * (chunks_df["DB_H"] - chunks_df["DB_V"]) / chunks_df["N"], -1, 1)
)

chunks_df

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N,repetition,theta_estimate
0,2025-05-30--14h-02m-13s,2.0,37.0,0.0,0.0,39.0,,0.321641
1,2025-05-30--14h-02m-13s,0.0,39.0,0.0,0.0,39.0,,0.000000
2,2025-05-30--14h-02m-13s,0.5,40.0,0.0,0.0,40.5,,0.157297
3,2025-05-30--14h-02m-13s,1.0,38.0,0.0,0.0,39.0,,0.226942
4,2025-05-30--14h-02m-13s,0.5,40.0,0.0,0.0,40.5,,0.157297
...,...,...,...,...,...,...,...,...
3646,2025-05-30--14h-27m-02s,90.0,4449.0,12.0,283.5,4834.5,1856848.0,0.408347
3647,2025-05-30--14h-43m-17s,184.5,9816.0,0.0,48.5,10049.0,7405780.0,0.215762
3648,2025-05-30--14h-48m-08s,185.0,9866.0,0.0,157.0,10208.0,7426966.0,0.259584
3649,2025-05-30--14h-52m-48s,179.0,9306.0,3.0,328.0,9816.0,7400733.0,0.324725


In [10]:
chunks_df["delta_phi_estimate"] = np.arctan(
    np.sqrt(
        (eta / (2 - eta)) * chunks_df["C"] / chunks_df["SB"]
    )
)

chunks_df

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N,repetition,theta_estimate,delta_phi_estimate
0,2025-05-30--14h-02m-13s,2.0,37.0,0.0,0.0,39.0,,0.321641,1.570796
1,2025-05-30--14h-02m-13s,0.0,39.0,0.0,0.0,39.0,,0.000000,
2,2025-05-30--14h-02m-13s,0.5,40.0,0.0,0.0,40.5,,0.157297,1.570796
3,2025-05-30--14h-02m-13s,1.0,38.0,0.0,0.0,39.0,,0.226942,1.570796
4,2025-05-30--14h-02m-13s,0.5,40.0,0.0,0.0,40.5,,0.157297,1.570796
...,...,...,...,...,...,...,...,...,...
3646,2025-05-30--14h-27m-02s,90.0,4449.0,12.0,283.5,4834.5,1856848.0,0.408347,0.513100
3647,2025-05-30--14h-43m-17s,184.5,9816.0,0.0,48.5,10049.0,7405780.0,0.215762,1.097032
3648,2025-05-30--14h-48m-08s,185.0,9866.0,0.0,157.0,10208.0,7426966.0,0.259584,0.826380
3649,2025-05-30--14h-52m-48s,179.0,9306.0,3.0,328.0,9816.0,7400733.0,0.324725,0.636253


In [11]:
# save the dataframes to csv files
# save the dataframes to csv files based on the data_dir
for data_dir in new_data_dirs:
    chunks_subset = chunks_df[chunks_df["data_dir"] == data_dir]
    if not chunks_subset.empty:
        output_file = os.path.join(data_folder, data_dir, "estimators.csv")
        chunks_subset.to_csv(output_file, index=False)
        print(f"Saved estimators for {data_dir} to {output_file}")
    else:
        print(f"No data to save for {data_dir}")

Saved estimators for 2025-05-30--14h-02m-13s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-30--14h-02m-13s/estimators.csv
Saved estimators for 2025-05-30--14h-03m-06s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-30--14h-03m-06s/estimators.csv
Saved estimators for 2025-05-30--14h-04m-07s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-30--14h-04m-07s/estimators.csv
Saved estimators for 2025-05-30--14h-05m-08s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-30--14h-05m-08s/estimators.csv
Saved estimators for 2025-05-30--14h-19m-25s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-es