In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
# Set to True to force a full refresh of the data
full_refresh = True

In [3]:
repo_root = os.popen('git rev-parse --show-toplevel').read().strip()

In [4]:
data_folder = os.path.join(repo_root, 'multi-parameter-estimation', 'data')

# Get list of data directories
data_dirs = os.listdir(data_folder)
data_dirs = [d for d in data_dirs if os.path.isdir(os.path.join(data_folder, d))]

# skip old-data
if 'old-data' in data_dirs:
    data_dirs.remove('old-data')

data_dirs.sort()
data_dirs

['2025-05-28--11h-37m-31s',
 '2025-05-28--11h-38m-37s',
 '2025-05-28--11h-39m-30s',
 '2025-05-28--11h-40m-23s',
 '2025-05-28--11h-41m-17s',
 '2025-05-28--11h-42m-11s']

In [5]:
new_data_dirs = data_dirs.copy()

if not full_refresh:
    for d in data_dirs:
        if os.path.exists(os.path.join(data_folder, d, "scaled_coincidences.csv")):
            new_data_dirs.remove(d)

new_data_dirs

['2025-05-28--11h-37m-31s',
 '2025-05-28--11h-38m-37s',
 '2025-05-28--11h-39m-30s',
 '2025-05-28--11h-40m-23s',
 '2025-05-28--11h-41m-17s',
 '2025-05-28--11h-42m-11s']

In [6]:
def load_coincidences(data_dir):
    coincidences = pd.read_csv(os.path.join(data_folder, data_dir, "labelled_coincidences.csv"))
    coincidences["data_dir"] = data_dir
    return coincidences

coincidences_df = pd.concat([load_coincidences(d) for d in new_data_dirs], ignore_index=True)
coincidences_df

Unnamed: 0,detector_a_name,detector_b_name,tomography_setting_a,tomography_setting_b,repetition,coincidences,timestamp,data_dir,estimation_label
0,9,12,H,H,0,0,2025-05-28--11h-37m-58s-274541,2025-05-28--11h-37m-31s,DB_H
1,9,11,H,H,0,0,2025-05-28--11h-37m-58s-274541,2025-05-28--11h-37m-31s,SB
2,9,10,H,H,0,0,2025-05-28--11h-37m-58s-274541,2025-05-28--11h-37m-31s,SB
3,11,12,H,H,0,0,2025-05-28--11h-37m-58s-274541,2025-05-28--11h-37m-31s,SB
4,10,12,H,H,0,0,2025-05-28--11h-37m-58s-274541,2025-05-28--11h-37m-31s,SB
...,...,...,...,...,...,...,...,...,...
83659,2,12,V,V,124,0,2025-05-28--11h-43m-04s-130129,2025-05-28--11h-42m-11s,C
83660,2,11,V,V,124,0,2025-05-28--11h-43m-04s-130129,2025-05-28--11h-42m-11s,C
83661,2,10,V,V,124,0,2025-05-28--11h-43m-04s-130129,2025-05-28--11h-42m-11s,C
83662,2,4,V,V,124,0,2025-05-28--11h-43m-04s-130129,2025-05-28--11h-42m-11s,SB


Double bunched events are only resolved half of the time. Therefore, we now throw away half of all non-double-bunched events to recover the expected statistics.

In [7]:
coincidences_df["scaled_coincidences"] = coincidences_df["coincidences"].astype(float)

coincidences_df.loc[coincidences_df["estimation_label"] == "SB", "scaled_coincidences"] = coincidences_df.loc[coincidences_df["estimation_label"] == "SB", "coincidences"] * 0.5
coincidences_df.loc[coincidences_df["estimation_label"] == "C", "scaled_coincidences"] = coincidences_df.loc[coincidences_df["estimation_label"] == "C", "coincidences"] * 0.5

coincidences_df

Unnamed: 0,detector_a_name,detector_b_name,tomography_setting_a,tomography_setting_b,repetition,coincidences,timestamp,data_dir,estimation_label,scaled_coincidences
0,9,12,H,H,0,0,2025-05-28--11h-37m-58s-274541,2025-05-28--11h-37m-31s,DB_H,0.0
1,9,11,H,H,0,0,2025-05-28--11h-37m-58s-274541,2025-05-28--11h-37m-31s,SB,0.0
2,9,10,H,H,0,0,2025-05-28--11h-37m-58s-274541,2025-05-28--11h-37m-31s,SB,0.0
3,11,12,H,H,0,0,2025-05-28--11h-37m-58s-274541,2025-05-28--11h-37m-31s,SB,0.0
4,10,12,H,H,0,0,2025-05-28--11h-37m-58s-274541,2025-05-28--11h-37m-31s,SB,0.0
...,...,...,...,...,...,...,...,...,...,...
83659,2,12,V,V,124,0,2025-05-28--11h-43m-04s-130129,2025-05-28--11h-42m-11s,C,0.0
83660,2,11,V,V,124,0,2025-05-28--11h-43m-04s-130129,2025-05-28--11h-42m-11s,C,0.0
83661,2,10,V,V,124,0,2025-05-28--11h-43m-04s-130129,2025-05-28--11h-42m-11s,C,0.0
83662,2,4,V,V,124,0,2025-05-28--11h-43m-04s-130129,2025-05-28--11h-42m-11s,SB,0.0


In [8]:
# Sum up by estimation label
df_sum = (
    coincidences_df.groupby(["data_dir", "estimation_label", "repetition"]) # Sum over the different tomography settings (HH, HV, VH, VV)
    .agg({"scaled_coincidences": "sum"})
    .sort_values(by=["data_dir", "repetition", "estimation_label"])
    .reset_index()
)
df_sum

Unnamed: 0,data_dir,estimation_label,repetition,scaled_coincidences
0,2025-05-28--11h-37m-31s,C,0,0.0
1,2025-05-28--11h-37m-31s,DB_H,0,1.0
2,2025-05-28--11h-37m-31s,DB_V,0,0.0
3,2025-05-28--11h-37m-31s,SB,0,1.5
4,2025-05-28--11h-37m-31s,C,1,0.0
...,...,...,...,...
2983,2025-05-28--11h-42m-11s,SB,123,1.0
2984,2025-05-28--11h-42m-11s,C,124,0.0
2985,2025-05-28--11h-42m-11s,DB_H,124,0.0
2986,2025-05-28--11h-42m-11s,DB_V,124,0.0


In [9]:
df_pivoted_sum = df_sum.pivot(index=['data_dir', 'repetition'], columns='estimation_label', values='scaled_coincidences')
df_pivoted_sum = df_pivoted_sum.reset_index()  # optional, to flatten the index
df_pivoted_sum.columns.name = None  # remove the name of the columns
df_pivoted_sum

Unnamed: 0,data_dir,repetition,C,DB_H,DB_V,SB
0,2025-05-28--11h-37m-31s,0,0.0,1.0,0.0,1.5
1,2025-05-28--11h-37m-31s,1,0.0,3.0,0.0,1.0
2,2025-05-28--11h-37m-31s,2,0.0,1.0,0.0,1.0
3,2025-05-28--11h-37m-31s,3,0.0,4.0,0.0,1.5
4,2025-05-28--11h-37m-31s,4,0.5,1.0,0.0,0.5
...,...,...,...,...,...,...
742,2025-05-28--11h-42m-11s,120,0.5,0.0,1.0,2.0
743,2025-05-28--11h-42m-11s,121,0.0,0.0,7.0,3.5
744,2025-05-28--11h-42m-11s,122,0.0,0.0,2.0,1.0
745,2025-05-28--11h-42m-11s,123,0.0,0.0,1.0,1.0


In [10]:
df_pivoted_sum["N"] = df_pivoted_sum["SB"] + df_pivoted_sum["C"] + df_pivoted_sum["DB_H"] + df_pivoted_sum["DB_V"]
df_pivoted_sum.head(10)

Unnamed: 0,data_dir,repetition,C,DB_H,DB_V,SB,N
0,2025-05-28--11h-37m-31s,0,0.0,1.0,0.0,1.5,2.5
1,2025-05-28--11h-37m-31s,1,0.0,3.0,0.0,1.0,4.0
2,2025-05-28--11h-37m-31s,2,0.0,1.0,0.0,1.0,2.0
3,2025-05-28--11h-37m-31s,3,0.0,4.0,0.0,1.5,5.5
4,2025-05-28--11h-37m-31s,4,0.5,1.0,0.0,0.5,2.0
5,2025-05-28--11h-37m-31s,5,0.0,4.0,0.0,0.5,4.5
6,2025-05-28--11h-37m-31s,6,0.0,5.0,0.0,1.5,6.5
7,2025-05-28--11h-37m-31s,7,0.0,2.0,0.0,1.5,3.5
8,2025-05-28--11h-37m-31s,8,0.5,1.0,0.0,1.5,3.0
9,2025-05-28--11h-37m-31s,9,1.0,3.0,0.0,1.0,5.0


In [11]:
# save the dataframes to csv files based on the data_dir
for data_dir in new_data_dirs:
    df_subset = df_pivoted_sum[df_pivoted_sum["data_dir"] == data_dir]
    if not df_subset.empty:
        output_file = os.path.join(data_folder, data_dir, "scaled_coincidences.csv")
        df_subset.to_csv(output_file, index=False)
        print(f"Saved {output_file}")

Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-28--11h-37m-31s/scaled_coincidences.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-28--11h-38m-37s/scaled_coincidences.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-28--11h-39m-30s/scaled_coincidences.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-28--11h-40m-23s/scaled_coincidences.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-28--11h-41m-17s/scaled_coincidences.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-par

In [12]:
# sum all data
df_pivoted_sum_all = df_pivoted_sum.groupby("data_dir").sum().reset_index()
df_pivoted_sum_all

Unnamed: 0,data_dir,repetition,C,DB_H,DB_V,SB,N
0,2025-05-28--11h-37m-31s,7750,10.5,288.0,0.0,142.0,440.5
1,2025-05-28--11h-38m-37s,7651,7.0,292.0,41.0,155.5,495.5
2,2025-05-28--11h-39m-30s,7750,7.5,264.0,137.0,207.5,616.0
3,2025-05-28--11h-40m-23s,7750,10.0,142.0,217.0,190.5,559.5
4,2025-05-28--11h-41m-17s,7643,10.0,50.0,301.0,162.0,523.0
5,2025-05-28--11h-42m-11s,7653,6.5,0.0,298.0,164.0,468.5


In [13]:
# save the dataframes to csv files based on the data_dir
for data_dir in new_data_dirs:
    df_subset = df_pivoted_sum_all[df_pivoted_sum_all["data_dir"] == data_dir]
    if not df_subset.empty:
        output_file = os.path.join(data_folder, data_dir, "scaled_coincidences_all.csv")
        df_subset.to_csv(output_file, index=False)
        print(f"Saved {output_file}")

Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-28--11h-37m-31s/scaled_coincidences_all.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-28--11h-38m-37s/scaled_coincidences_all.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-28--11h-39m-30s/scaled_coincidences_all.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-28--11h-40m-23s/scaled_coincidences_all.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-05-28--11h-41m-17s/scaled_coincidences_all.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQ