In [1]:
import pandas as pd
import numpy as np
import os

# Load the data

In [2]:
# Set to True to force a full refresh of the data
full_refresh = True

In [3]:
repo_root = os.popen('git rev-parse --show-toplevel').read().strip()

In [4]:
data_folder = os.path.join(repo_root, 'multi-parameter-estimation', 'data')

# Get list of data directories
data_dirs = os.listdir(data_folder)
data_dirs = [d for d in data_dirs if os.path.isdir(os.path.join(data_folder, d))]

# skip old-data
if 'old-data' in data_dirs:
    data_dirs.remove('old-data')

data_dirs.sort()
data_dirs

['2025-06-03--16h-33m-47s',
 '2025-06-03--16h-35m-37s',
 '2025-06-03--16h-37m-20s',
 '2025-06-03--16h-39m-05s',
 '2025-06-03--16h-40m-50s',
 '2025-06-03--16h-42m-36s',
 '2025-06-03--16h-44m-21s',
 '2025-06-03--16h-46m-06s',
 '2025-06-03--16h-47m-52s',
 '2025-06-03--16h-49m-39s',
 '2025-06-03--16h-51m-26s',
 '2025-06-03--16h-53m-14s',
 '2025-06-03--16h-54m-59s',
 '2025-06-03--16h-56m-47s',
 '2025-06-03--16h-58m-36s',
 '2025-06-03--17h-00m-23s',
 '2025-06-03--17h-02m-09s',
 '2025-06-03--17h-03m-56s',
 '2025-06-03--17h-05m-43s',
 '2025-06-03--17h-07m-28s',
 '2025-06-03--17h-09m-15s',
 '2025-06-03--17h-11m-02s',
 '2025-06-03--17h-12m-49s',
 '2025-06-03--17h-14m-36s',
 '2025-06-03--17h-16m-22s',
 '2025-06-03--17h-18m-08s',
 '2025-06-03--17h-19m-55s',
 '2025-06-03--17h-21m-56s',
 '2025-06-03--17h-23m-44s',
 '2025-06-03--17h-25m-29s',
 '2025-06-03--17h-27m-14s',
 '2025-06-03--17h-29m-01s',
 '2025-06-03--17h-30m-48s',
 '2025-06-03--17h-32m-37s',
 '2025-06-03--17h-34m-23s',
 '2025-06-03--17h-36

In [5]:
new_data_dirs = data_dirs.copy()

if not full_refresh:
    for d in data_dirs:
        if os.path.exists(os.path.join(data_folder, d, "scaled_coincidences.csv")):
            new_data_dirs.remove(d)

new_data_dirs

['2025-06-03--16h-33m-47s',
 '2025-06-03--16h-35m-37s',
 '2025-06-03--16h-37m-20s',
 '2025-06-03--16h-39m-05s',
 '2025-06-03--16h-40m-50s',
 '2025-06-03--16h-42m-36s',
 '2025-06-03--16h-44m-21s',
 '2025-06-03--16h-46m-06s',
 '2025-06-03--16h-47m-52s',
 '2025-06-03--16h-49m-39s',
 '2025-06-03--16h-51m-26s',
 '2025-06-03--16h-53m-14s',
 '2025-06-03--16h-54m-59s',
 '2025-06-03--16h-56m-47s',
 '2025-06-03--16h-58m-36s',
 '2025-06-03--17h-00m-23s',
 '2025-06-03--17h-02m-09s',
 '2025-06-03--17h-03m-56s',
 '2025-06-03--17h-05m-43s',
 '2025-06-03--17h-07m-28s',
 '2025-06-03--17h-09m-15s',
 '2025-06-03--17h-11m-02s',
 '2025-06-03--17h-12m-49s',
 '2025-06-03--17h-14m-36s',
 '2025-06-03--17h-16m-22s',
 '2025-06-03--17h-18m-08s',
 '2025-06-03--17h-19m-55s',
 '2025-06-03--17h-21m-56s',
 '2025-06-03--17h-23m-44s',
 '2025-06-03--17h-25m-29s',
 '2025-06-03--17h-27m-14s',
 '2025-06-03--17h-29m-01s',
 '2025-06-03--17h-30m-48s',
 '2025-06-03--17h-32m-37s',
 '2025-06-03--17h-34m-23s',
 '2025-06-03--17h-36

In [6]:
def load_coincidences(data_dir):
    coincidences = pd.read_csv(os.path.join(data_folder, data_dir, "labelled_coincidences.csv"))
    coincidences["data_dir"] = data_dir
    return coincidences

coincidences_df = pd.concat([load_coincidences(d) for d in new_data_dirs], ignore_index=True)
coincidences_df

Unnamed: 0,detector_a_name,detector_b_name,detector_a_time_tag,detector_b_time_tag,timestamp,tomography_setting_t,tomography_setting_r,data_dir,estimation_label,corrected_estimation_label
0,9,12,2448329932684244480,2448329932684244480,2025-06-03--16h-34m-21s,H,H,2025-06-03--16h-33m-47s,DB_H,HH
1,9,12,2448329932685031936,2448329932685031936,2025-06-03--16h-34m-21s,H,H,2025-06-03--16h-33m-47s,DB_H,HH
2,1,4,2448329932685511680,2448329932685511680,2025-06-03--16h-34m-21s,H,H,2025-06-03--16h-33m-47s,DB_H,HH
3,9,12,2448329932686406656,2448329932686406656,2025-06-03--16h-34m-21s,H,H,2025-06-03--16h-33m-47s,DB_H,HH
4,1,4,2448329932686564352,2448329932686564352,2025-06-03--16h-34m-21s,H,H,2025-06-03--16h-33m-47s,DB_H,HH
...,...,...,...,...,...,...,...,...,...,...
87544528,4,12,2448418947688921600,2448418947688921600,2025-06-03--20h-26m-01s,V,H,2025-06-03--20h-24m-15s,C,C'
87544529,1,9,2448418947689439744,2448418947689439744,2025-06-03--20h-26m-00s,V,H,2025-06-03--20h-24m-15s,C,C'
87544530,1,4,2448418947689449472,2448418947689449472,2025-06-03--20h-26m-01s,V,H,2025-06-03--20h-24m-15s,DB_H,HH
87544531,4,9,2448418947690494464,2448418947690494464,2025-06-03--20h-26m-01s,V,H,2025-06-03--20h-24m-15s,C,C'


# Interleave the different tomography settings

In [7]:
first_tags_in_tomo_settings = coincidences_df.groupby(["data_dir", "tomography_setting_t", "tomography_setting_r"])["detector_a_time_tag"].first().reset_index()
first_tags_in_tomo_settings

Unnamed: 0,data_dir,tomography_setting_t,tomography_setting_r,detector_a_time_tag
0,2025-06-03--16h-33m-47s,H,H,2448329932684244480
1,2025-06-03--16h-33m-47s,H,V,2448330092348034560
2,2025-06-03--16h-33m-47s,V,H,2448330412860623872
3,2025-06-03--16h-33m-47s,V,V,2448330251626140672
4,2025-06-03--16h-35m-37s,H,H,2448330590268917248
...,...,...,...,...
515,2025-06-03--20h-22m-29s,V,V,2448418035688581120
516,2025-06-03--20h-24m-15s,H,H,2448418383724170240
517,2025-06-03--20h-24m-15s,H,V,2448418550620131328
518,2025-06-03--20h-24m-15s,V,H,2448418883794719232


In [8]:
coincidences_df["first_tag_in_tomo_setting"] = coincidences_df.merge(
    first_tags_in_tomo_settings,
    on=["data_dir", "tomography_setting_t", "tomography_setting_r"],
    how="left",
    suffixes=("", "_first")
)["detector_a_time_tag_first"]
coincidences_df["relative_time_tag"] = coincidences_df["detector_a_time_tag"] - coincidences_df["first_tag_in_tomo_setting"]
coincidences_df = coincidences_df.drop(columns=["first_tag_in_tomo_setting"])
coincidences_df = coincidences_df.sort_values(by=["data_dir", "relative_time_tag"])
coincidences_df = coincidences_df.reset_index(drop=True)
coincidences_df

Unnamed: 0,detector_a_name,detector_b_name,detector_a_time_tag,detector_b_time_tag,timestamp,tomography_setting_t,tomography_setting_r,data_dir,estimation_label,corrected_estimation_label,relative_time_tag
0,9,12,2448329932684244480,2448329932684244480,2025-06-03--16h-34m-21s,H,H,2025-06-03--16h-33m-47s,DB_H,HH,0
1,2,7,2448330092348034560,2448330092348034560,2025-06-03--16h-34m-46s,H,V,2025-06-03--16h-33m-47s,DB_H,HH,0
2,10,11,2448330251626140672,2448330251626140672,2025-06-03--16h-35m-11s,V,V,2025-06-03--16h-33m-47s,DB_H,HH,0
3,10,11,2448330412860623872,2448330412860623872,2025-06-03--16h-35m-36s,V,H,2025-06-03--16h-33m-47s,DB_H,HH,0
4,9,12,2448330092348101120,2448330092348101120,2025-06-03--16h-34m-46s,H,V,2025-06-03--16h-33m-47s,DB_H,HH,66560
...,...,...,...,...,...,...,...,...,...,...,...
87544528,9,12,2448418614516683776,2448418614516683776,2025-06-03--20h-25m-07s,H,V,2025-06-03--20h-24m-15s,DB_H,HH,63896552448
87544529,10,11,2448418614517085184,2448418614517085184,2025-06-03--20h-25m-08s,H,V,2025-06-03--20h-24m-15s,DB_V,VV,63896953856
87544530,1,4,2448418447621335552,2448418447621335552,2025-06-03--20h-24m-42s,H,H,2025-06-03--20h-24m-15s,DB_H,HH,63897165312
87544531,2,7,2448418614517296640,2448418614517296640,2025-06-03--20h-25m-09s,H,V,2025-06-03--20h-24m-15s,DB_H,HH,63897165312


# Remove half the non-double bunched events

In [9]:
# count the number of non-double-bunched events
non_double_bunched_count = coincidences_df[coincidences_df["estimation_label"].isin(["C", "SB"])].count()["estimation_label"]
number_to_remove = int(non_double_bunched_count * 0.5)

mask = np.zeros(len(coincidences_df), dtype=bool)
mask[coincidences_df["estimation_label"].isin(["C", "SB"])] = True
np.random.seed(42)  # For reproducibility
indices_to_remove = np.random.choice(np.where(mask)[0], size=number_to_remove, replace=False)
coincidences_df = coincidences_df.drop(indices_to_remove).reset_index(drop=True)
coincidences_df

Unnamed: 0,detector_a_name,detector_b_name,detector_a_time_tag,detector_b_time_tag,timestamp,tomography_setting_t,tomography_setting_r,data_dir,estimation_label,corrected_estimation_label,relative_time_tag
0,9,12,2448329932684244480,2448329932684244480,2025-06-03--16h-34m-21s,H,H,2025-06-03--16h-33m-47s,DB_H,HH,0
1,2,7,2448330092348034560,2448330092348034560,2025-06-03--16h-34m-46s,H,V,2025-06-03--16h-33m-47s,DB_H,HH,0
2,10,11,2448330251626140672,2448330251626140672,2025-06-03--16h-35m-11s,V,V,2025-06-03--16h-33m-47s,DB_H,HH,0
3,10,11,2448330412860623872,2448330412860623872,2025-06-03--16h-35m-36s,V,H,2025-06-03--16h-33m-47s,DB_H,HH,0
4,9,12,2448330092348101120,2448330092348101120,2025-06-03--16h-34m-46s,H,V,2025-06-03--16h-33m-47s,DB_H,HH,66560
...,...,...,...,...,...,...,...,...,...,...,...
66820094,9,12,2448418614516683776,2448418614516683776,2025-06-03--20h-25m-07s,H,V,2025-06-03--20h-24m-15s,DB_H,HH,63896552448
66820095,10,11,2448418614517085184,2448418614517085184,2025-06-03--20h-25m-08s,H,V,2025-06-03--20h-24m-15s,DB_V,VV,63896953856
66820096,1,4,2448418447621335552,2448418447621335552,2025-06-03--20h-24m-42s,H,H,2025-06-03--20h-24m-15s,DB_H,HH,63897165312
66820097,2,7,2448418614517296640,2448418614517296640,2025-06-03--20h-25m-09s,H,V,2025-06-03--20h-24m-15s,DB_H,HH,63897165312


# Chunk into groups of 40

In [10]:
# group every 40 coincidences together
coincidences_df["group"] = (coincidences_df.index // 40) + 1
coincidences_df = coincidences_df.sort_values(by=["data_dir", "group", "relative_time_tag"])
coincidences_df = coincidences_df.reset_index(drop=True)
coincidences_df

Unnamed: 0,detector_a_name,detector_b_name,detector_a_time_tag,detector_b_time_tag,timestamp,tomography_setting_t,tomography_setting_r,data_dir,estimation_label,corrected_estimation_label,relative_time_tag,group
0,9,12,2448329932684244480,2448329932684244480,2025-06-03--16h-34m-21s,H,H,2025-06-03--16h-33m-47s,DB_H,HH,0,1
1,2,7,2448330092348034560,2448330092348034560,2025-06-03--16h-34m-46s,H,V,2025-06-03--16h-33m-47s,DB_H,HH,0,1
2,10,11,2448330251626140672,2448330251626140672,2025-06-03--16h-35m-11s,V,V,2025-06-03--16h-33m-47s,DB_H,HH,0,1
3,10,11,2448330412860623872,2448330412860623872,2025-06-03--16h-35m-36s,V,H,2025-06-03--16h-33m-47s,DB_H,HH,0,1
4,9,12,2448330092348101120,2448330092348101120,2025-06-03--16h-34m-46s,H,V,2025-06-03--16h-33m-47s,DB_H,HH,66560,1
...,...,...,...,...,...,...,...,...,...,...,...,...
66820094,9,12,2448418614516683776,2448418614516683776,2025-06-03--20h-25m-07s,H,V,2025-06-03--20h-24m-15s,DB_H,HH,63896552448,1670503
66820095,10,11,2448418614517085184,2448418614517085184,2025-06-03--20h-25m-08s,H,V,2025-06-03--20h-24m-15s,DB_V,VV,63896953856,1670503
66820096,1,4,2448418447621335552,2448418447621335552,2025-06-03--20h-24m-42s,H,H,2025-06-03--20h-24m-15s,DB_H,HH,63897165312,1670503
66820097,2,7,2448418614517296640,2448418614517296640,2025-06-03--20h-25m-09s,H,V,2025-06-03--20h-24m-15s,DB_H,HH,63897165312,1670503


In [11]:
# Sum by estimation_label within each group
grouped_coincidences = coincidences_df.groupby(
    ["data_dir", "group", "estimation_label"]
).agg(
    {
        "relative_time_tag": "min",
        "detector_a_time_tag": "count"
    }
).reset_index()

# pivot the estimation_label to columns
grouped_coincidences = grouped_coincidences.pivot_table(
    index=["data_dir", "group",],
    columns="estimation_label",
    values="detector_a_time_tag",
    fill_value=0
).reset_index()
grouped_coincidences.columns.name = None 
# ensure the columns all exist
for label in ["C", "SB", "DB_V"]:
    if label not in grouped_coincidences.columns:
        grouped_coincidences[label] = 0
grouped_coincidences["N"] = grouped_coincidences["C"] + grouped_coincidences["SB"] + grouped_coincidences["DB_V"] + grouped_coincidences["DB_H"]
grouped_coincidences = grouped_coincidences[grouped_coincidences["N"] == 40].drop(columns=["group"])
grouped_coincidences

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N
0,2025-06-03--16h-33m-47s,1.0,39.0,0.0,0.0,40.0
1,2025-06-03--16h-33m-47s,1.0,39.0,0.0,0.0,40.0
2,2025-06-03--16h-33m-47s,1.0,39.0,0.0,0.0,40.0
3,2025-06-03--16h-33m-47s,2.0,38.0,0.0,0.0,40.0
4,2025-06-03--16h-33m-47s,3.0,37.0,0.0,0.0,40.0
...,...,...,...,...,...,...
1670621,2025-06-03--20h-24m-15s,19.0,7.0,12.0,2.0,40.0
1670622,2025-06-03--20h-24m-15s,22.0,7.0,10.0,1.0,40.0
1670623,2025-06-03--20h-24m-15s,19.0,13.0,8.0,0.0,40.0
1670624,2025-06-03--20h-24m-15s,15.0,14.0,10.0,1.0,40.0


In [12]:
from concurrent.futures import ThreadPoolExecutor

grouped = dict(tuple(grouped_coincidences.groupby("data_dir")))

def save_group(data_dir):
    df_subset = grouped[data_dir]
    output_file = os.path.join(data_folder, data_dir, "chunked_coincidences_n=40.csv")
    df_subset.to_csv(output_file, index=False)
    print(f"Saved {output_file}")

with ThreadPoolExecutor(max_workers=8) as executor:
    executor.map(save_group, new_data_dirs)


Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-33m-47s/chunked_coincidences_n=40.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-40m-50s/chunked_coincidences_n=40.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-37m-20s/chunked_coincidences_n=40.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-42m-36s/chunked_coincidences_n=40.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-35m-37s/chunked_coincidences_n=40.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/R

## Using the corrected labels

In [13]:
# Sum by estimation_label within each group
grouped_corrected_coincidences = coincidences_df.groupby(
    ["data_dir", "group", "corrected_estimation_label"]
).agg(
    {
        "relative_time_tag": "min",
        "detector_a_time_tag": "count"
    }
).reset_index()

# pivot the estimation_label to columns
grouped_corrected_coincidences = grouped_corrected_coincidences.pivot_table(
    index=["data_dir", "group",],
    columns="corrected_estimation_label",
    values="detector_a_time_tag",
    fill_value=0
).reset_index()
grouped_corrected_coincidences.columns.name = None 
# ensure the columns all exist
for label in ["C'", "SB", "HH", "VV"]:
    if label not in grouped_corrected_coincidences.columns:
        grouped_corrected_coincidences[label] = 0
grouped_corrected_coincidences["N"] = grouped_corrected_coincidences["C'"] + grouped_corrected_coincidences["SB"] + grouped_corrected_coincidences["HH"] + grouped_corrected_coincidences["VV"]
grouped_corrected_coincidences = grouped_corrected_coincidences[grouped_corrected_coincidences["N"] == 40].drop(columns=["group"])
grouped_corrected_coincidences

Unnamed: 0,data_dir,C',HH,SB,VV,N
0,2025-06-03--16h-33m-47s,0.0,40.0,0.0,0.0,40.0
1,2025-06-03--16h-33m-47s,0.0,40.0,0.0,0.0,40.0
2,2025-06-03--16h-33m-47s,0.0,40.0,0.0,0.0,40.0
3,2025-06-03--16h-33m-47s,0.0,40.0,0.0,0.0,40.0
4,2025-06-03--16h-33m-47s,0.0,40.0,0.0,0.0,40.0
...,...,...,...,...,...,...
1670621,2025-06-03--20h-24m-15s,18.0,8.0,2.0,12.0,40.0
1670622,2025-06-03--20h-24m-15s,19.0,8.0,1.0,12.0,40.0
1670623,2025-06-03--20h-24m-15s,19.0,13.0,0.0,8.0,40.0
1670624,2025-06-03--20h-24m-15s,13.0,16.0,1.0,10.0,40.0


In [14]:
grouped = dict(tuple(grouped_corrected_coincidences.groupby("data_dir")))

def save_group(data_dir):
    df_subset = grouped[data_dir]
    output_file = os.path.join(data_folder, data_dir, "corrected_chunked_coincidences_n=40.csv")
    df_subset.to_csv(output_file, index=False)
    print(f"Saved {output_file}")

with ThreadPoolExecutor(max_workers=8) as executor:
    executor.map(save_group, new_data_dirs)


Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-33m-47s/corrected_chunked_coincidences_n=40.csvSaved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-35m-37s/corrected_chunked_coincidences_n=40.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-42m-36s/corrected_chunked_coincidences_n=40.csv

Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-39m-05s/corrected_chunked_coincidences_n=40.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-37m-20s/corrected_chunked_coincidences_n=40.csv
Saved

# Create higher n estimates

In [15]:
grouped_coincidences["group_80"] = (grouped_coincidences.index // 2) + 1
grouped_coincidences["group_120"] = (grouped_coincidences.index // 3) + 1
grouped_coincidences["group_160"] = (grouped_coincidences.index // 4) + 1
grouped_coincidences["group_200"] = (grouped_coincidences.index // 5) + 1

# sum by group_80, group_120, group_160, group_200
grouped_coincidences_80 = grouped_coincidences.groupby(
    ["data_dir", "group_80"]
).agg(
    {
        "C": "sum",
        "SB": "sum",
        "DB_V": "sum",
        "DB_H": "sum",
        "N": "sum"
    }
).reset_index()

grouped_coincidences_120 = grouped_coincidences.groupby(
    ["data_dir", "group_120"]
).agg(
    {
        "C": "sum",
        "SB": "sum",
        "DB_V": "sum",
        "DB_H": "sum",
        "N": "sum"
    }
).reset_index()

grouped_coincidences_160 = grouped_coincidences.groupby(
    ["data_dir", "group_160"]
).agg(
    {
        "C": "sum",
        "SB": "sum",
        "DB_V": "sum",
        "DB_H": "sum",
        "N": "sum"
    }
).reset_index()

grouped_coincidences_200 = grouped_coincidences.groupby(
    ["data_dir", "group_200"]
).agg(
    {
        "C": "sum",
        "SB": "sum",
        "DB_V": "sum",
        "DB_H": "sum",
        "N": "sum"
    }
).reset_index()

grouped_coincidences_80 = grouped_coincidences_80[grouped_coincidences_80["N"] == 80].drop(columns=["group_80"])
grouped_coincidences_120 = grouped_coincidences_120[grouped_coincidences_120["N"] == 120].drop(columns=["group_120"])
grouped_coincidences_160 = grouped_coincidences_160[grouped_coincidences_160["N"] == 160].drop(columns=["group_160"])
grouped_coincidences_200 = grouped_coincidences_200[grouped_coincidences_200["N"] == 200].drop(columns=["group_200"])

grouped_coincidences_200

Unnamed: 0,data_dir,C,SB,DB_V,DB_H,N
0,2025-06-03--16h-33m-47s,8.0,0.0,0.0,192.0,200.0
1,2025-06-03--16h-33m-47s,8.0,0.0,0.0,192.0,200.0
2,2025-06-03--16h-33m-47s,5.0,0.0,0.0,195.0,200.0
3,2025-06-03--16h-33m-47s,10.0,0.0,0.0,190.0,200.0
4,2025-06-03--16h-33m-47s,10.0,1.0,0.0,189.0,200.0
...,...,...,...,...,...,...
334177,2025-06-03--20h-24m-15s,103.0,3.0,44.0,50.0,200.0
334178,2025-06-03--20h-24m-15s,97.0,3.0,46.0,54.0,200.0
334179,2025-06-03--20h-24m-15s,108.0,1.0,45.0,46.0,200.0
334180,2025-06-03--20h-24m-15s,84.0,3.0,42.0,71.0,200.0


In [20]:
def save_grouped_data(df, n):
    df = df[df["N"] == n]
    grouped = dict(tuple(df.groupby("data_dir")))

    def save_group(data_dir):
        df_subset = grouped[data_dir]
        output_file = os.path.join(data_folder, data_dir, f"chunked_coincidences_n={n}.csv")
        df_subset.to_csv(output_file, index=False)
        print(f"Saved {output_file}")

    with ThreadPoolExecutor(max_workers=8) as executor:
        executor.map(save_group, new_data_dirs)


save_grouped_data(grouped_coincidences_80, 80)
save_grouped_data(grouped_coincidences_120, 120)
save_grouped_data(grouped_coincidences_160, 160)
save_grouped_data(grouped_coincidences_200, 200)

Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-35m-37s/chunked_coincidences_n=80.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-33m-47s/chunked_coincidences_n=80.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-44m-21s/chunked_coincidences_n=80.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-37m-20s/chunked_coincidences_n=80.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-40m-50s/chunked_coincidences_n=80.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/R

## Using corrected labels

In [18]:
grouped_corrected_coincidences["group_80"] = (grouped_corrected_coincidences.index // 2) + 1
grouped_corrected_coincidences["group_120"] = (grouped_corrected_coincidences.index // 3) + 1
grouped_corrected_coincidences["group_160"] = (grouped_corrected_coincidences.index // 4) + 1
grouped_corrected_coincidences["group_200"] = (grouped_corrected_coincidences.index // 5) + 1

# sum by group_80, group_120, group_160, group_200
grouped_corrected_coincidences_80 = grouped_corrected_coincidences.groupby(
    ["data_dir", "group_80"]
).agg(
    {
        "C'": "sum",
        "SB": "sum",
        "VV": "sum",
        "HH": "sum",
        "N": "sum"
    }
).reset_index()

grouped_corrected_coincidences_120 = grouped_corrected_coincidences.groupby(
    ["data_dir", "group_120"]
).agg(
    {
        "C'": "sum",
        "SB": "sum",
        "VV": "sum",
        "HH": "sum",
        "N": "sum"
    }
).reset_index()

grouped_corrected_coincidences_160 = grouped_corrected_coincidences.groupby(
    ["data_dir", "group_160"]
).agg(
    {
        "C'": "sum",
        "SB": "sum",
        "VV": "sum",
        "HH": "sum",
        "N": "sum"
    }
).reset_index()

grouped_corrected_coincidences_200 = grouped_corrected_coincidences.groupby(
    ["data_dir", "group_200"]
).agg(
    {
        "C'": "sum",
        "SB": "sum",
        "VV": "sum",
        "HH": "sum",
        "N": "sum"
    }
).reset_index()

grouped_corrected_coincidences_80 = grouped_corrected_coincidences_80[grouped_corrected_coincidences_80["N"] == 80].drop(columns=["group_80"])
grouped_corrected_coincidences_120 = grouped_corrected_coincidences_120[grouped_corrected_coincidences_120["N"] == 120].drop(columns=["group_120"])
grouped_corrected_coincidences_160 = grouped_corrected_coincidences_160[grouped_corrected_coincidences_160["N"] == 160].drop(columns=["group_160"])
grouped_corrected_coincidences_200 = grouped_corrected_coincidences_200[grouped_corrected_coincidences_200["N"] == 200].drop(columns=["group_200"])

grouped_corrected_coincidences_200

Unnamed: 0,data_dir,C',SB,VV,HH,N
0,2025-06-03--16h-33m-47s,0.0,0.0,0.0,200.0,200.0
1,2025-06-03--16h-33m-47s,0.0,0.0,0.0,200.0,200.0
2,2025-06-03--16h-33m-47s,1.0,0.0,0.0,199.0,200.0
3,2025-06-03--16h-33m-47s,0.0,0.0,0.0,200.0,200.0
4,2025-06-03--16h-33m-47s,1.0,1.0,0.0,198.0,200.0
...,...,...,...,...,...,...
334177,2025-06-03--20h-24m-15s,99.0,3.0,46.0,52.0,200.0
334178,2025-06-03--20h-24m-15s,96.0,3.0,46.0,55.0,200.0
334179,2025-06-03--20h-24m-15s,106.0,1.0,46.0,47.0,200.0
334180,2025-06-03--20h-24m-15s,78.0,3.0,44.0,75.0,200.0


In [21]:
def save_grouped_data(df, n):
    df = df[df["N"] == n]
    grouped = dict(tuple(df.groupby("data_dir")))

    def save_group(data_dir):
        df_subset = grouped[data_dir]
        output_file = os.path.join(data_folder, data_dir, f"corrected_chunked_coincidences_n={n}.csv")
        df_subset.to_csv(output_file, index=False)
        print(f"Saved {output_file}")

    with ThreadPoolExecutor(max_workers=8) as executor:
        executor.map(save_group, new_data_dirs)


save_grouped_data(grouped_corrected_coincidences_80, 80)
save_grouped_data(grouped_corrected_coincidences_120, 120)
save_grouped_data(grouped_corrected_coincidences_160, 160)
save_grouped_data(grouped_corrected_coincidences_200, 200)

Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-39m-05s/corrected_chunked_coincidences_n=80.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-37m-20s/corrected_chunked_coincidences_n=80.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-35m-37s/corrected_chunked_coincidences_n=80.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-33m-47s/corrected_chunked_coincidences_n=80.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-03--16h-42m-36s/corrected_chunked_coincidences_n=80.csv
Saved