In [11]:
import pandas as pd
import numpy as np
import os

In [12]:
# Set to True to force a full refresh of the data
full_refresh = True
eta = 1

In [13]:
repo_root = os.popen('git rev-parse --show-toplevel').read().strip()

In [14]:
data_folder = os.path.join(repo_root, 'multi-parameter-estimation', 'data')

# Get list of data directories
data_dirs = os.listdir(data_folder)
data_dirs = [d for d in data_dirs if os.path.isdir(os.path.join(data_folder, d))]

# skip old-data
if 'old-data' in data_dirs:
    data_dirs.remove('old-data')

data_dirs.sort()
data_dirs

['2025-06-03--16h-33m-47s',
 '2025-06-03--16h-35m-37s',
 '2025-06-03--16h-37m-20s',
 '2025-06-03--16h-39m-05s',
 '2025-06-03--16h-40m-50s',
 '2025-06-03--16h-42m-36s',
 '2025-06-03--16h-44m-21s',
 '2025-06-03--16h-46m-06s',
 '2025-06-03--16h-47m-52s',
 '2025-06-03--16h-49m-39s',
 '2025-06-03--16h-51m-26s',
 '2025-06-03--16h-53m-14s',
 '2025-06-03--16h-54m-59s',
 '2025-06-03--16h-56m-47s',
 '2025-06-03--16h-58m-36s',
 '2025-06-03--17h-00m-23s',
 '2025-06-03--17h-02m-09s',
 '2025-06-03--17h-03m-56s',
 '2025-06-03--17h-05m-43s',
 '2025-06-03--17h-07m-28s',
 '2025-06-03--17h-09m-15s',
 '2025-06-03--17h-11m-02s',
 '2025-06-03--17h-12m-49s',
 '2025-06-03--17h-14m-36s',
 '2025-06-03--17h-16m-22s',
 '2025-06-03--17h-18m-08s',
 '2025-06-03--17h-19m-55s',
 '2025-06-03--17h-21m-56s',
 '2025-06-03--17h-23m-44s',
 '2025-06-03--17h-25m-29s',
 '2025-06-03--17h-27m-14s',
 '2025-06-03--17h-29m-01s',
 '2025-06-03--17h-30m-48s',
 '2025-06-03--17h-32m-37s',
 '2025-06-03--17h-34m-23s',
 '2025-06-03--17h-36

In [15]:
new_data_dirs = data_dirs.copy()

if not full_refresh:
    for d in data_dirs:
        if os.path.exists(os.path.join(data_folder, d, "estimators.csv")):
            new_data_dirs.remove(d)

new_data_dirs

['2025-06-03--16h-33m-47s',
 '2025-06-03--16h-35m-37s',
 '2025-06-03--16h-37m-20s',
 '2025-06-03--16h-39m-05s',
 '2025-06-03--16h-40m-50s',
 '2025-06-03--16h-42m-36s',
 '2025-06-03--16h-44m-21s',
 '2025-06-03--16h-46m-06s',
 '2025-06-03--16h-47m-52s',
 '2025-06-03--16h-49m-39s',
 '2025-06-03--16h-51m-26s',
 '2025-06-03--16h-53m-14s',
 '2025-06-03--16h-54m-59s',
 '2025-06-03--16h-56m-47s',
 '2025-06-03--16h-58m-36s',
 '2025-06-03--17h-00m-23s',
 '2025-06-03--17h-02m-09s',
 '2025-06-03--17h-03m-56s',
 '2025-06-03--17h-05m-43s',
 '2025-06-03--17h-07m-28s',
 '2025-06-03--17h-09m-15s',
 '2025-06-03--17h-11m-02s',
 '2025-06-03--17h-12m-49s',
 '2025-06-03--17h-14m-36s',
 '2025-06-03--17h-16m-22s',
 '2025-06-03--17h-18m-08s',
 '2025-06-03--17h-19m-55s',
 '2025-06-03--17h-21m-56s',
 '2025-06-03--17h-23m-44s',
 '2025-06-03--17h-25m-29s',
 '2025-06-03--17h-27m-14s',
 '2025-06-03--17h-29m-01s',
 '2025-06-03--17h-30m-48s',
 '2025-06-03--17h-32m-37s',
 '2025-06-03--17h-34m-23s',
 '2025-06-03--17h-36

In [16]:
def load_chunks(data_dir, n):
    if not os.path.exists(os.path.join(data_folder, data_dir, f"chunked_coincidences_n={n}.csv")):
        print(f"Skipping {data_dir} n={n} as file does not exist.")
        return pd.DataFrame()
    coincidences = pd.read_csv(os.path.join(data_folder, data_dir, f"chunked_coincidences_n={n}.csv"))
    coincidences["data_dir"] = data_dir
    return coincidences

chunks_df = pd.concat([pd.concat([load_chunks(d, n) for d in new_data_dirs]) for n in [40,80,120,160,200]], ignore_index=True)
chunks_df

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N
0,2025-06-03--16h-33m-47s,1.0,39.0,0.0,0.0,40.0
1,2025-06-03--16h-33m-47s,1.0,39.0,0.0,0.0,40.0
2,2025-06-03--16h-33m-47s,1.0,39.0,0.0,0.0,40.0
3,2025-06-03--16h-33m-47s,2.0,38.0,0.0,0.0,40.0
4,2025-06-03--16h-33m-47s,3.0,37.0,0.0,0.0,40.0
...,...,...,...,...,...,...
3813678,2025-06-03--20h-24m-15s,103.0,50.0,44.0,3.0,200.0
3813679,2025-06-03--20h-24m-15s,97.0,54.0,46.0,3.0,200.0
3813680,2025-06-03--20h-24m-15s,108.0,46.0,45.0,1.0,200.0
3813681,2025-06-03--20h-24m-15s,84.0,71.0,42.0,3.0,200.0


In [17]:
# load the scaled_coincidences_all data
def load_all_data(data_dir):
    file_path = os.path.join(data_folder, data_dir, "scaled_coincidences_all.csv")
    if not os.path.exists(file_path):
        print(f"Skipping {data_dir} as file does not exist.")
        return pd.DataFrame()
    scaled_coincidences = pd.read_csv(file_path)
    scaled_coincidences["data_dir"] = data_dir
    return scaled_coincidences

all_data_df = pd.concat([load_all_data(d) for d in new_data_dirs], ignore_index=True)
all_data_df

Unnamed: 0,data_dir,repetition,C,DB_H,DB_V,SB,N
0,2025-06-03--16h-33m-47s,10967471460,19511.5,512251.0,1.0,693.0,532456.5
1,2025-06-03--16h-35m-37s,11102213566,19008.0,496874.0,9.0,4082.0,519973.0
2,2025-06-03--16h-37m-20s,11236878828,19073.5,498360.0,115.0,15425.0,532973.5
3,2025-06-03--16h-39m-05s,11738414031,19252.5,476531.0,629.0,33322.0,529734.5
4,2025-06-03--16h-40m-50s,12458548026,19089.5,450679.0,1950.0,57243.5,528962.0
...,...,...,...,...,...,...,...
125,2025-06-03--20h-17m-09s,17806958686,220745.0,132324.0,116617.0,25163.5,494849.5
126,2025-06-03--20h-18m-55s,17816773296,228258.5,132508.0,117772.0,18235.0,496773.5
127,2025-06-03--20h-20m-41s,17210041101,231742.0,131256.0,116839.0,12815.0,492652.0
128,2025-06-03--20h-22m-29s,17262770955,234844.5,131045.0,117144.0,9387.5,492421.0


In [18]:
# combine the dataframes
chunks_df = pd.concat([chunks_df, all_data_df], ignore_index=True)
chunks_df

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N,repetition
0,2025-06-03--16h-33m-47s,1.0,39.0,0.0,0.0,40.0,
1,2025-06-03--16h-33m-47s,1.0,39.0,0.0,0.0,40.0,
2,2025-06-03--16h-33m-47s,1.0,39.0,0.0,0.0,40.0,
3,2025-06-03--16h-33m-47s,2.0,38.0,0.0,0.0,40.0,
4,2025-06-03--16h-33m-47s,3.0,37.0,0.0,0.0,40.0,
...,...,...,...,...,...,...,...
3813808,2025-06-03--20h-17m-09s,220745.0,132324.0,116617.0,25163.5,494849.5,1.780696e+10
3813809,2025-06-03--20h-18m-55s,228258.5,132508.0,117772.0,18235.0,496773.5,1.781677e+10
3813810,2025-06-03--20h-20m-41s,231742.0,131256.0,116839.0,12815.0,492652.0,1.721004e+10
3813811,2025-06-03--20h-22m-29s,234844.5,131045.0,117144.0,9387.5,492421.0,1.726277e+10


In [19]:
# Ensure the 1/eta factor doesn't exceed 1
chunks_df["theta_estimate"] = np.arccos(
    np.clip((1 / eta) * (chunks_df["DB_H"] - chunks_df["DB_V"]) / chunks_df["N"], -1, 1)
)

chunks_df

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N,repetition,theta_estimate
0,2025-06-03--16h-33m-47s,1.0,39.0,0.0,0.0,40.0,,0.224075
1,2025-06-03--16h-33m-47s,1.0,39.0,0.0,0.0,40.0,,0.224075
2,2025-06-03--16h-33m-47s,1.0,39.0,0.0,0.0,40.0,,0.224075
3,2025-06-03--16h-33m-47s,2.0,38.0,0.0,0.0,40.0,,0.317560
4,2025-06-03--16h-33m-47s,3.0,37.0,0.0,0.0,40.0,,0.389761
...,...,...,...,...,...,...,...,...
3813808,2025-06-03--20h-17m-09s,220745.0,132324.0,116617.0,25163.5,494849.5,1.780696e+10,1.539050
3813809,2025-06-03--20h-18m-55s,228258.5,132508.0,117772.0,18235.0,496773.5,1.781677e+10,1.541129
3813810,2025-06-03--20h-20m-41s,231742.0,131256.0,116839.0,12815.0,492652.0,1.721004e+10,1.541528
3813811,2025-06-03--20h-22m-29s,234844.5,131045.0,117144.0,9387.5,492421.0,1.726277e+10,1.542563


In [20]:
chunks_df["delta_phi_estimate"] = np.arctan(
    np.sqrt(
        (eta / (2 - eta)) * chunks_df["C"] / chunks_df["SB"]
    )
)

chunks_df

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N,repetition,theta_estimate,delta_phi_estimate
0,2025-06-03--16h-33m-47s,1.0,39.0,0.0,0.0,40.0,,0.224075,1.570796
1,2025-06-03--16h-33m-47s,1.0,39.0,0.0,0.0,40.0,,0.224075,1.570796
2,2025-06-03--16h-33m-47s,1.0,39.0,0.0,0.0,40.0,,0.224075,1.570796
3,2025-06-03--16h-33m-47s,2.0,38.0,0.0,0.0,40.0,,0.317560,1.570796
4,2025-06-03--16h-33m-47s,3.0,37.0,0.0,0.0,40.0,,0.389761,1.570796
...,...,...,...,...,...,...,...,...,...
3813808,2025-06-03--20h-17m-09s,220745.0,132324.0,116617.0,25163.5,494849.5,1.780696e+10,1.539050,1.245184
3813809,2025-06-03--20h-18m-55s,228258.5,132508.0,117772.0,18235.0,496773.5,1.781677e+10,1.541129,1.295338
3813810,2025-06-03--20h-20m-41s,231742.0,131256.0,116839.0,12815.0,492652.0,1.721004e+10,1.541528,1.339836
3813811,2025-06-03--20h-22m-29s,234844.5,131045.0,117144.0,9387.5,492421.0,1.726277e+10,1.542563,1.373465


In [21]:
# save the dataframes to csv files
# save the dataframes to csv files based on the data_dir
for data_dir in new_data_dirs:
    chunks_subset = chunks_df[chunks_df["data_dir"] == data_dir]
    if not chunks_subset.empty:
        output_file = os.path.join(data_folder, data_dir, "estimators.csv")
        chunks_subset.to_csv(output_file, index=False)
        print(f"Saved estimators for {data_dir} to {output_file}")
    else:
        print(f"No data to save for {data_dir}")

Saved estimators for 2025-06-03--16h-33m-47s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-33m-47s/estimators.csv
Saved estimators for 2025-06-03--16h-35m-37s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-35m-37s/estimators.csv
Saved estimators for 2025-06-03--16h-37m-20s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-37m-20s/estimators.csv
Saved estimators for 2025-06-03--16h-39m-05s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-39m-05s/estimators.csv
Saved estimators for 2025-06-03--16h-40m-50s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-es