In [7]:
import pandas as pd
import numpy as np
import os

In [8]:
# Set to True to force a full refresh of the data
full_refresh = True

In [9]:
repo_root = os.popen('git rev-parse --show-toplevel').read().strip()

In [10]:
data_folder = os.path.join(repo_root, 'multi-parameter-estimation', 'data')

# Get list of data directories
data_dirs = os.listdir(data_folder)
data_dirs = [d for d in data_dirs if os.path.isdir(os.path.join(data_folder, d))]

# skip old-data
if 'old-data' in data_dirs:
    data_dirs.remove('old-data')

data_dirs.sort()
data_dirs

['2025-05-30--14h-02m-13s',
 '2025-05-30--14h-03m-06s',
 '2025-05-30--14h-04m-07s',
 '2025-05-30--14h-05m-08s',
 '2025-05-30--14h-19m-25s',
 '2025-05-30--14h-22m-01s',
 '2025-05-30--14h-24m-36s',
 '2025-05-30--14h-27m-02s',
 '2025-05-30--14h-43m-17s']

In [11]:
new_data_dirs = data_dirs.copy()

if not full_refresh:
    for d in data_dirs:
        if os.path.exists(os.path.join(data_folder, d, "scaled_coincidences.csv")):
            new_data_dirs.remove(d)

new_data_dirs

['2025-05-30--14h-02m-13s',
 '2025-05-30--14h-03m-06s',
 '2025-05-30--14h-04m-07s',
 '2025-05-30--14h-05m-08s',
 '2025-05-30--14h-19m-25s',
 '2025-05-30--14h-22m-01s',
 '2025-05-30--14h-24m-36s',
 '2025-05-30--14h-27m-02s',
 '2025-05-30--14h-43m-17s']

In [12]:
def load_coincidences(data_dir):
    coincidences = pd.read_csv(os.path.join(data_folder, data_dir, "labelled_coincidences.csv"))
    coincidences["data_dir"] = data_dir
    return coincidences

coincidences_df = pd.concat([load_coincidences(d) for d in new_data_dirs], ignore_index=True)
coincidences_df

Unnamed: 0,detector_a_name,detector_b_name,tomography_setting_t,tomography_setting_r,repetition,coincidences,timestamp,data_dir,estimation_label
0,9,12,H,H,0,0,2025-05-30--14h-02m-16s-929063,2025-05-30--14h-02m-13s,DB_H
1,9,11,H,H,0,0,2025-05-30--14h-02m-16s-929063,2025-05-30--14h-02m-13s,SB
2,9,10,H,H,0,0,2025-05-30--14h-02m-16s-929063,2025-05-30--14h-02m-13s,SB
3,11,12,H,H,0,0,2025-05-30--14h-02m-16s-929063,2025-05-30--14h-02m-13s,SB
4,10,12,H,H,0,0,2025-05-30--14h-02m-16s-929063,2025-05-30--14h-02m-13s,SB
...,...,...,...,...,...,...,...,...,...
1448603,2,12,V,H,3999,0,2025-05-30--14h-48m-07s-973974,2025-05-30--14h-43m-17s,C
1448604,2,11,V,H,3999,0,2025-05-30--14h-48m-07s-973974,2025-05-30--14h-43m-17s,C
1448605,2,10,V,H,3999,0,2025-05-30--14h-48m-07s-973974,2025-05-30--14h-43m-17s,C
1448606,2,4,V,H,3999,0,2025-05-30--14h-48m-07s-973974,2025-05-30--14h-43m-17s,SB


Double bunched events are only resolved half of the time. Therefore, we now throw away half of all non-double-bunched events to recover the expected statistics.

In [13]:
coincidences_df["scaled_coincidences"] = coincidences_df["coincidences"].astype(float)

coincidences_df.loc[coincidences_df["estimation_label"] == "SB", "scaled_coincidences"] = coincidences_df.loc[coincidences_df["estimation_label"] == "SB", "coincidences"] * 0.5
coincidences_df.loc[coincidences_df["estimation_label"] == "C", "scaled_coincidences"] = coincidences_df.loc[coincidences_df["estimation_label"] == "C", "coincidences"] * 0.5

coincidences_df

Unnamed: 0,detector_a_name,detector_b_name,tomography_setting_t,tomography_setting_r,repetition,coincidences,timestamp,data_dir,estimation_label,scaled_coincidences
0,9,12,H,H,0,0,2025-05-30--14h-02m-16s-929063,2025-05-30--14h-02m-13s,DB_H,0.0
1,9,11,H,H,0,0,2025-05-30--14h-02m-16s-929063,2025-05-30--14h-02m-13s,SB,0.0
2,9,10,H,H,0,0,2025-05-30--14h-02m-16s-929063,2025-05-30--14h-02m-13s,SB,0.0
3,11,12,H,H,0,0,2025-05-30--14h-02m-16s-929063,2025-05-30--14h-02m-13s,SB,0.0
4,10,12,H,H,0,0,2025-05-30--14h-02m-16s-929063,2025-05-30--14h-02m-13s,SB,0.0
...,...,...,...,...,...,...,...,...,...,...
1448603,2,12,V,H,3999,0,2025-05-30--14h-48m-07s-973974,2025-05-30--14h-43m-17s,C,0.0
1448604,2,11,V,H,3999,0,2025-05-30--14h-48m-07s-973974,2025-05-30--14h-43m-17s,C,0.0
1448605,2,10,V,H,3999,0,2025-05-30--14h-48m-07s-973974,2025-05-30--14h-43m-17s,C,0.0
1448606,2,4,V,H,3999,0,2025-05-30--14h-48m-07s-973974,2025-05-30--14h-43m-17s,SB,0.0


In [14]:
# Sum up by estimation label
df_sum = (
    coincidences_df.groupby(["data_dir", "estimation_label", "repetition"]) # Sum over the different tomography settings (HH, HV, VH, VV)
    .agg({"scaled_coincidences": "sum"})
    .sort_values(by=["data_dir", "repetition", "estimation_label"])
    .reset_index()
)
df_sum

Unnamed: 0,data_dir,estimation_label,repetition,scaled_coincidences
0,2025-05-30--14h-02m-13s,C,0,0.0
1,2025-05-30--14h-02m-13s,DB_H,0,1.0
2,2025-05-30--14h-02m-13s,DB_V,0,0.0
3,2025-05-30--14h-02m-13s,SB,0,0.0
4,2025-05-30--14h-02m-13s,C,1,0.0
...,...,...,...,...
51731,2025-05-30--14h-43m-17s,SB,3998,0.0
51732,2025-05-30--14h-43m-17s,C,3999,0.5
51733,2025-05-30--14h-43m-17s,DB_H,3999,1.0
51734,2025-05-30--14h-43m-17s,DB_V,3999,0.0


In [15]:
df_pivoted_sum = df_sum.pivot(index=['data_dir', 'repetition'], columns='estimation_label', values='scaled_coincidences')
df_pivoted_sum = df_pivoted_sum.reset_index()  # optional, to flatten the index
df_pivoted_sum.columns.name = None  # remove the name of the columns
df_pivoted_sum

Unnamed: 0,data_dir,repetition,C,DB_H,DB_V,SB
0,2025-05-30--14h-02m-13s,0,0.0,1.0,0.0,0.0
1,2025-05-30--14h-02m-13s,1,0.0,4.0,0.0,0.0
2,2025-05-30--14h-02m-13s,2,0.0,2.0,0.0,0.0
3,2025-05-30--14h-02m-13s,3,0.0,2.0,0.0,0.0
4,2025-05-30--14h-02m-13s,5,0.0,4.0,0.0,0.0
...,...,...,...,...,...,...
12929,2025-05-30--14h-43m-17s,3995,0.0,5.0,0.0,0.0
12930,2025-05-30--14h-43m-17s,3996,0.0,1.0,0.0,0.0
12931,2025-05-30--14h-43m-17s,3997,0.0,1.0,0.0,0.0
12932,2025-05-30--14h-43m-17s,3998,0.0,4.0,0.0,0.0


In [16]:
df_pivoted_sum["N"] = df_pivoted_sum["SB"] + df_pivoted_sum["C"] + df_pivoted_sum["DB_H"] + df_pivoted_sum["DB_V"]
df_pivoted_sum.head(10)

Unnamed: 0,data_dir,repetition,C,DB_H,DB_V,SB,N
0,2025-05-30--14h-02m-13s,0,0.0,1.0,0.0,0.0,1.0
1,2025-05-30--14h-02m-13s,1,0.0,4.0,0.0,0.0,4.0
2,2025-05-30--14h-02m-13s,2,0.0,2.0,0.0,0.0,2.0
3,2025-05-30--14h-02m-13s,3,0.0,2.0,0.0,0.0,2.0
4,2025-05-30--14h-02m-13s,5,0.0,4.0,0.0,0.0,4.0
5,2025-05-30--14h-02m-13s,6,0.5,1.0,0.0,0.0,1.5
6,2025-05-30--14h-02m-13s,7,0.0,2.0,0.0,0.0,2.0
7,2025-05-30--14h-02m-13s,8,0.0,1.0,0.0,0.0,1.0
8,2025-05-30--14h-02m-13s,9,0.0,2.0,0.0,0.0,2.0
9,2025-05-30--14h-02m-13s,10,0.5,1.0,0.0,0.0,1.5


In [17]:
# save the dataframes to csv files based on the data_dir
for data_dir in new_data_dirs:
    df_subset = df_pivoted_sum[df_pivoted_sum["data_dir"] == data_dir]
    if not df_subset.empty:
        output_file = os.path.join(data_folder, data_dir, "scaled_coincidences.csv")
        df_subset.to_csv(output_file, index=False)
        print(f"Saved {output_file}")

Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-30--14h-02m-13s/scaled_coincidences.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-30--14h-03m-06s/scaled_coincidences.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-30--14h-04m-07s/scaled_coincidences.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-30--14h-05m-08s/scaled_coincidences.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-30--14h-19m-25s/scaled_coincidences.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-par

In [18]:
# sum all data
df_pivoted_sum_all = df_pivoted_sum.groupby("data_dir").sum().reset_index()
df_pivoted_sum_all

Unnamed: 0,data_dir,repetition,C,DB_H,DB_V,SB,N
0,2025-05-30--14h-02m-13s,112643,22.0,1178.0,0.0,5.0,1205.0
1,2025-05-30--14h-03m-06s,118363,24.0,1232.0,0.0,22.0,1278.0
2,2025-05-30--14h-04m-07s,116434,24.5,1168.0,0.0,36.0,1228.5
3,2025-05-30--14h-05m-08s,119053,20.5,1109.0,1.0,81.0,1211.5
4,2025-05-30--14h-19m-25s,1834177,77.5,4796.0,0.0,25.5,4899.0
5,2025-05-30--14h-22m-01s,1828534,94.0,4540.0,0.0,79.0,4713.0
6,2025-05-30--14h-24m-36s,1841429,89.5,4692.0,0.0,170.0,4951.5
7,2025-05-30--14h-27m-02s,1856848,90.0,4449.0,12.0,283.5,4834.5
8,2025-05-30--14h-43m-17s,7405780,184.5,9816.0,0.0,48.5,10049.0


In [19]:
# save the dataframes to csv files based on the data_dir
for data_dir in new_data_dirs:
    df_subset = df_pivoted_sum_all[df_pivoted_sum_all["data_dir"] == data_dir]
    if not df_subset.empty:
        output_file = os.path.join(data_folder, data_dir, "scaled_coincidences_all.csv")
        df_subset.to_csv(output_file, index=False)
        print(f"Saved {output_file}")

Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-30--14h-02m-13s/scaled_coincidences_all.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-30--14h-03m-06s/scaled_coincidences_all.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-30--14h-04m-07s/scaled_coincidences_all.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-30--14h-05m-08s/scaled_coincidences_all.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-30--14h-19m-25s/scaled_coincidences_all.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQ