In [82]:
import pandas as pd
import numpy as np
import os

In [83]:
# Set to True to force a full refresh of the data
full_refresh = True
eta = 0.985

In [84]:
repo_root = os.popen('git rev-parse --show-toplevel').read().strip()

In [85]:
data_folder = os.path.join(repo_root, 'multi-parameter-estimation', 'data')

# Get list of data directories
data_dirs = os.listdir(data_folder)
data_dirs = [d for d in data_dirs if os.path.isdir(os.path.join(data_folder, d))]

# skip old-data
if 'old-data' in data_dirs:
    data_dirs.remove('old-data')

data_dirs.sort()
data_dirs

['2025-05-23--17h-24m-53s',
 '2025-05-23--17h-27m-00s',
 '2025-05-23--17h-28m-51s',
 '2025-05-23--17h-30m-41s',
 '2025-05-23--17h-32m-32s',
 '2025-05-23--17h-34m-23s',
 '2025-05-23--17h-36m-14s',
 '2025-05-23--17h-38m-05s',
 '2025-05-23--17h-39m-56s',
 '2025-05-23--17h-41m-47s',
 '2025-05-23--17h-43m-38s',
 '2025-05-23--17h-45m-29s',
 '2025-05-23--17h-47m-20s',
 '2025-05-23--17h-49m-11s',
 '2025-05-23--17h-51m-02s',
 '2025-05-23--17h-52m-53s',
 '2025-05-23--17h-54m-44s',
 '2025-05-23--17h-56m-35s',
 '2025-05-23--17h-58m-27s',
 '2025-05-23--18h-00m-17s',
 '2025-05-23--18h-02m-09s',
 '2025-05-23--18h-04m-00s',
 '2025-05-23--18h-05m-51s',
 '2025-05-23--18h-07m-42s',
 '2025-05-23--18h-09m-34s',
 '2025-05-23--18h-11m-25s',
 '2025-05-23--18h-13m-17s',
 '2025-05-23--18h-15m-08s',
 '2025-05-23--18h-16m-59s',
 '2025-05-23--18h-18m-47s',
 '2025-05-23--18h-20m-26s',
 '2025-05-23--18h-22m-05s',
 '2025-05-23--18h-23m-43s',
 '2025-05-23--18h-25m-22s',
 '2025-05-23--18h-27m-01s',
 '2025-05-23--18h-28

In [86]:
new_data_dirs = data_dirs.copy()

if not full_refresh:
    for d in data_dirs:
        if os.path.exists(os.path.join(data_folder, d, "estimators.csv")):
            new_data_dirs.remove(d)

new_data_dirs

['2025-05-23--17h-24m-53s',
 '2025-05-23--17h-27m-00s',
 '2025-05-23--17h-28m-51s',
 '2025-05-23--17h-30m-41s',
 '2025-05-23--17h-32m-32s',
 '2025-05-23--17h-34m-23s',
 '2025-05-23--17h-36m-14s',
 '2025-05-23--17h-38m-05s',
 '2025-05-23--17h-39m-56s',
 '2025-05-23--17h-41m-47s',
 '2025-05-23--17h-43m-38s',
 '2025-05-23--17h-45m-29s',
 '2025-05-23--17h-47m-20s',
 '2025-05-23--17h-49m-11s',
 '2025-05-23--17h-51m-02s',
 '2025-05-23--17h-52m-53s',
 '2025-05-23--17h-54m-44s',
 '2025-05-23--17h-56m-35s',
 '2025-05-23--17h-58m-27s',
 '2025-05-23--18h-00m-17s',
 '2025-05-23--18h-02m-09s',
 '2025-05-23--18h-04m-00s',
 '2025-05-23--18h-05m-51s',
 '2025-05-23--18h-07m-42s',
 '2025-05-23--18h-09m-34s',
 '2025-05-23--18h-11m-25s',
 '2025-05-23--18h-13m-17s',
 '2025-05-23--18h-15m-08s',
 '2025-05-23--18h-16m-59s',
 '2025-05-23--18h-18m-47s',
 '2025-05-23--18h-20m-26s',
 '2025-05-23--18h-22m-05s',
 '2025-05-23--18h-23m-43s',
 '2025-05-23--18h-25m-22s',
 '2025-05-23--18h-27m-01s',
 '2025-05-23--18h-28

In [87]:
def load_chunks(data_dir, n):
    if not os.path.exists(os.path.join(data_folder, data_dir, f"chunked_coincidences_n={n}.csv")):
        print(f"Skipping {data_dir} n={n} as file does not exist.")
        return pd.DataFrame()
    coincidences = pd.read_csv(os.path.join(data_folder, data_dir, f"chunked_coincidences_n={n}.csv"))
    coincidences["data_dir"] = data_dir
    return coincidences

chunks_df = pd.concat([pd.concat([load_chunks(d, n) for d in new_data_dirs]) for n in [40,80,120,160,200]], ignore_index=True)
chunks_df

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N
0,2025-05-23--17h-24m-53s,2.00,38.0,0.0,0.00,40.0
1,2025-05-23--17h-24m-53s,0.75,39.0,0.0,0.25,40.0
2,2025-05-23--17h-24m-53s,1.00,39.0,0.0,0.00,40.0
3,2025-05-23--17h-24m-53s,0.75,39.0,0.0,0.25,40.0
4,2025-05-23--17h-24m-53s,1.00,39.0,0.0,0.00,40.0
...,...,...,...,...,...,...
11470,2025-05-23--19h-08m-07s,3.75,0.0,196.0,0.25,200.0
11471,2025-05-23--19h-08m-07s,3.50,0.0,196.0,0.50,200.0
11472,2025-05-23--19h-08m-07s,2.75,0.0,197.0,0.25,200.0
11473,2025-05-23--19h-08m-07s,4.25,0.0,195.0,0.75,200.0


In [88]:
# Ensure the 1/eta factor doesn't exceed 1
chunks_df["theta_estimate"] = np.arccos(
    np.clip((1 / eta) * (chunks_df["DB_H"] - chunks_df["DB_V"]) / chunks_df["N"], -1, 1)
)

chunks_df

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N,theta_estimate
0,2025-05-23--17h-24m-53s,2.00,38.0,0.0,0.00,40.0,0.267378
1,2025-05-23--17h-24m-53s,0.75,39.0,0.0,0.25,40.0,0.142615
2,2025-05-23--17h-24m-53s,1.00,39.0,0.0,0.00,40.0,0.142615
3,2025-05-23--17h-24m-53s,0.75,39.0,0.0,0.25,40.0,0.142615
4,2025-05-23--17h-24m-53s,1.00,39.0,0.0,0.00,40.0,0.142615
...,...,...,...,...,...,...,...
11470,2025-05-23--19h-08m-07s,3.75,0.0,196.0,0.25,200.0,3.040791
11471,2025-05-23--19h-08m-07s,3.50,0.0,196.0,0.50,200.0,3.040791
11472,2025-05-23--19h-08m-07s,2.75,0.0,197.0,0.25,200.0,3.141593
11473,2025-05-23--19h-08m-07s,4.25,0.0,195.0,0.75,200.0,2.998978


In [89]:
chunks_df["delta_phi_estimate"] = np.arctan(
    np.sqrt(
        (eta / (2 - eta)) * chunks_df["C"] / chunks_df["SB"]
    )
)

chunks_df

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N,theta_estimate,delta_phi_estimate
0,2025-05-23--17h-24m-53s,2.00,38.0,0.0,0.00,40.0,0.267378,1.570796
1,2025-05-23--17h-24m-53s,0.75,39.0,0.0,0.25,40.0,0.142615,1.040678
2,2025-05-23--17h-24m-53s,1.00,39.0,0.0,0.00,40.0,0.142615,1.570796
3,2025-05-23--17h-24m-53s,0.75,39.0,0.0,0.25,40.0,0.142615,1.040678
4,2025-05-23--17h-24m-53s,1.00,39.0,0.0,0.00,40.0,0.142615,1.570796
...,...,...,...,...,...,...,...,...
11470,2025-05-23--19h-08m-07s,3.75,0.0,196.0,0.25,200.0,3.040791,1.314461
11471,2025-05-23--19h-08m-07s,3.50,0.0,196.0,0.50,200.0,3.040791,1.204440
11472,2025-05-23--19h-08m-07s,2.75,0.0,197.0,0.25,200.0,3.141593,1.273781
11473,2025-05-23--19h-08m-07s,4.25,0.0,195.0,0.75,200.0,2.998978,1.167712


In [90]:
# save the dataframes to csv files
# save the dataframes to csv files based on the data_dir
for data_dir in new_data_dirs:
    chunks_subset = chunks_df[chunks_df["data_dir"] == data_dir]
    if not chunks_subset.empty:
        output_file = os.path.join(data_folder, data_dir, "estimators.csv")
        chunks_subset.to_csv(output_file, index=False)
        print(f"Saved estimators for {data_dir} to {output_file}")
    else:
        print(f"No data to save for {data_dir}")

Saved estimators for 2025-05-23--17h-24m-53s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-23--17h-24m-53s/estimators.csv
Saved estimators for 2025-05-23--17h-27m-00s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-23--17h-27m-00s/estimators.csv
Saved estimators for 2025-05-23--17h-28m-51s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-23--17h-28m-51s/estimators.csv
Saved estimators for 2025-05-23--17h-30m-41s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-23--17h-30m-41s/estimators.csv
Saved estimators for 2025-05-23--17h-32m-32s to /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-es