In [15]:
import pandas as pd
import numpy as np
import os

In [16]:
# White and blue output ports of the in-fibre beam splitters
DETECTORS = {
    9: {"arm": "TT", "color": "white"},
    12: {"arm": "TT", "color": "blue"},
    11: {"arm": "TR", "color": "white"},
    10: {"arm": "TR", "color": "blue"},
    1: {"arm": "RT", "color": "white"},
    4: {"arm": "RT", "color": "blue"},
    7: {"arm": "RR", "color": "white"},
    2: {"arm": "RR", "color": "blue"},
}


In [17]:
# Set to True to force a full refresh of the data
full_refresh = True

In [18]:
repo_root = os.popen('git rev-parse --show-toplevel').read().strip()

In [19]:
data_folder = os.path.join(repo_root, 'multi-parameter-estimation', 'data')

# Get list of data directories
data_dirs = os.listdir(data_folder)
data_dirs = [d for d in data_dirs if os.path.isdir(os.path.join(data_folder, d))]

# skip old-data
if 'old-data' in data_dirs:
    data_dirs.remove('old-data')

data_dirs.sort()
data_dirs

['2025-07-03--15h-44m-51s',
 '2025-07-03--16h-11m-36s',
 '2025-07-03--16h-16m-56s',
 '2025-07-03--16h-22m-15s',
 '2025-07-03--16h-27m-19s',
 '2025-07-03--16h-36m-39s',
 '2025-07-03--16h-42m-17s',
 '2025-07-07--11h-59m-57s',
 '2025-07-07--12h-05m-34s',
 '2025-07-07--12h-12m-21s',
 '2025-07-07--12h-19m-57s',
 '2025-07-07--14h-31m-04s',
 '2025-07-07--14h-37m-06s',
 '2025-07-07--14h-45m-36s',
 '2025-07-07--14h-51m-58s',
 '2025-07-07--14h-57m-39s',
 '2025-07-08--10h-30m-07s',
 '2025-07-08--10h-35m-04s',
 '2025-07-08--10h-40m-12s',
 '2025-07-08--10h-45m-36s',
 '2025-07-08--10h-56m-10s',
 '2025-07-08--11h-01m-30s',
 '2025-07-08--11h-06m-45s']

In [20]:
new_data_dirs = data_dirs.copy()

if not full_refresh:
    for d in data_dirs:
        if os.path.exists(os.path.join(data_folder, d, "scaled_coincidences.csv")):
            new_data_dirs.remove(d)

new_data_dirs

['2025-07-03--15h-44m-51s',
 '2025-07-03--16h-11m-36s',
 '2025-07-03--16h-16m-56s',
 '2025-07-03--16h-22m-15s',
 '2025-07-03--16h-27m-19s',
 '2025-07-03--16h-36m-39s',
 '2025-07-03--16h-42m-17s',
 '2025-07-07--11h-59m-57s',
 '2025-07-07--12h-05m-34s',
 '2025-07-07--12h-12m-21s',
 '2025-07-07--12h-19m-57s',
 '2025-07-07--14h-31m-04s',
 '2025-07-07--14h-37m-06s',
 '2025-07-07--14h-45m-36s',
 '2025-07-07--14h-51m-58s',
 '2025-07-07--14h-57m-39s',
 '2025-07-08--10h-30m-07s',
 '2025-07-08--10h-35m-04s',
 '2025-07-08--10h-40m-12s',
 '2025-07-08--10h-45m-36s',
 '2025-07-08--10h-56m-10s',
 '2025-07-08--11h-01m-30s',
 '2025-07-08--11h-06m-45s']

In [21]:
def load_coincidences(data_dir):
    coincidences = pd.read_csv(os.path.join(data_folder, data_dir, "coincidences.csv"))
    coincidences["data_dir"] = data_dir
    return coincidences

coincidences_df = pd.concat([load_coincidences(d) for d in new_data_dirs], ignore_index=True)
coincidences_df

Unnamed: 0,detector_a_name,detector_b_name,detector_a_time_tag,detector_b_time_tag,timestamp,tomography_setting_t,tomography_setting_r,data_dir
0,9,12,552668721563163,552668721563158,2025-07-03--15h-45m-02s,H,H,2025-07-03--15h-44m-51s
1,1,4,552668723130306,552668723130306,2025-07-03--15h-45m-02s,H,H,2025-07-03--15h-44m-51s
2,9,12,552668723321214,552668723321211,2025-07-03--15h-45m-02s,H,H,2025-07-03--15h-44m-51s
3,9,12,552668723471264,552668723471260,2025-07-03--15h-45m-02s,H,H,2025-07-03--15h-44m-51s
4,1,4,552668723517406,552668723517406,2025-07-03--15h-45m-02s,H,H,2025-07-03--15h-44m-51s
...,...,...,...,...,...,...,...,...
2171136,10,11,-4242167837918237184,-4242167837918237184,2025-07-08--11h-08m-01s,V,H,2025-07-08--11h-06m-45s
2171137,1,12,-4242167837917572096,-4242167837917572096,2025-07-08--11h-08m-01s,V,H,2025-07-08--11h-06m-45s
2171138,10,11,-4242167837917491712,-4242167837917491712,2025-07-08--11h-08m-01s,V,H,2025-07-08--11h-06m-45s
2171139,1,4,-4242167837917381120,-4242167837917381120,2025-07-08--11h-08m-01s,V,H,2025-07-08--11h-06m-45s


In [22]:
# drop estimation label if it exists
if "estimation_label" in coincidences_df.columns:
    coincidences_df = coincidences_df.drop(columns=["estimation_label"])
coincidences_df

Unnamed: 0,detector_a_name,detector_b_name,detector_a_time_tag,detector_b_time_tag,timestamp,tomography_setting_t,tomography_setting_r,data_dir
0,9,12,552668721563163,552668721563158,2025-07-03--15h-45m-02s,H,H,2025-07-03--15h-44m-51s
1,1,4,552668723130306,552668723130306,2025-07-03--15h-45m-02s,H,H,2025-07-03--15h-44m-51s
2,9,12,552668723321214,552668723321211,2025-07-03--15h-45m-02s,H,H,2025-07-03--15h-44m-51s
3,9,12,552668723471264,552668723471260,2025-07-03--15h-45m-02s,H,H,2025-07-03--15h-44m-51s
4,1,4,552668723517406,552668723517406,2025-07-03--15h-45m-02s,H,H,2025-07-03--15h-44m-51s
...,...,...,...,...,...,...,...,...
2171136,10,11,-4242167837918237184,-4242167837918237184,2025-07-08--11h-08m-01s,V,H,2025-07-08--11h-06m-45s
2171137,1,12,-4242167837917572096,-4242167837917572096,2025-07-08--11h-08m-01s,V,H,2025-07-08--11h-06m-45s
2171138,10,11,-4242167837917491712,-4242167837917491712,2025-07-08--11h-08m-01s,V,H,2025-07-08--11h-06m-45s
2171139,1,4,-4242167837917381120,-4242167837917381120,2025-07-08--11h-08m-01s,V,H,2025-07-08--11h-06m-45s


In [23]:
def get_effective_detector_label(arm, tomography_setting_a, tomography_setting_b):
    arm = list(arm)
    # If the arm is T and the tomography a setting is V, then flip the detector 
    if arm[0] == "T" and tomography_setting_a == "V":
        arm[1] = "T" if arm[1] == "R" else "R"
    # If the arm is R and the tomography b setting is V, then flip the detector
    if arm[0] == "R" and tomography_setting_b == "V":
        arm[1] = "T" if arm[1] == "R" else "R"
    return "".join(arm)


def get_estimation_label(arm_a, arm_b, tomography_setting_a, tomography_setting_b):
    # If the tomo is in V, then the detectors are effectively flipped in that arm
    arm_a = get_effective_detector_label(arm_a, tomography_setting_a, tomography_setting_b)
    arm_b = get_effective_detector_label(arm_b, tomography_setting_a, tomography_setting_b)
    
    # Double bunched
    if arm_a == arm_b:
        # DB_H
        if arm_a[1] == "T":
            return "DB_H"
        # DB_V
        elif arm_a[1] == "R":
            return "DB_V"
        else:
            raise ValueError(f"Unknown arm: {arm_a}")
    # Coincidence
    elif arm_a[0] != arm_b[0]:
        return "C"
    # Single bunched
    elif arm_a[1] != arm_b[1]:
        return "SB"
    else:
        raise ValueError(f"Unknown arm combination: {arm_a}, {arm_b}")

In [24]:
# Based on the imperfect indistinguishability
def get_corrected_estimation_label(arm_a, arm_b, tomography_setting_a, tomography_setting_b):
    # If the tomo is in V, then the detectors are effectively flipped in that arm
    arm_a = get_effective_detector_label(arm_a, tomography_setting_a, tomography_setting_b)
    arm_b = get_effective_detector_label(arm_b, tomography_setting_a, tomography_setting_b)
    
    # HH
    if arm_a[1] == "T" and arm_b[1] == "T":
        return "HH"
    # VV
    elif arm_a[1] == "R" and arm_b[1] == "R":
        return "VV"
    # SB
    elif arm_a[0] == arm_b[0] and arm_a[1] != arm_b[1]:
        return "SB"
    # PC'
    elif arm_a[0] != arm_b[0] and arm_a[1] != arm_b[1]:
        return "C'"
    # That should be all the cases
    else:
        raise ValueError(f"Unknown arm combination: {arm_a}, {arm_b}")

In [25]:
labels = pd.DataFrame(
        [
            (
                i,
                j,
                tomography_setting_a,
                tomography_setting_b,
                get_estimation_label(DETECTORS[i]["arm"], DETECTORS[j]["arm"], tomography_setting_a, tomography_setting_b),
                get_corrected_estimation_label(DETECTORS[i]["arm"], DETECTORS[j]["arm"], tomography_setting_a, tomography_setting_b)
            )
            for i in DETECTORS.keys()
            for j in DETECTORS.keys()
            for tomography_setting_a in ["H", "V"]
            for tomography_setting_b in ["H", "V"]
            if i < j
        ],
        columns=[
            "detector_a_name",
            "detector_b_name",
            "tomography_setting_t",
            "tomography_setting_r",
            "estimation_label",
            "corrected_estimation_label"
        ],
    )

labels

Unnamed: 0,detector_a_name,detector_b_name,tomography_setting_t,tomography_setting_r,estimation_label,corrected_estimation_label
0,9,12,H,H,DB_H,HH
1,9,12,H,V,DB_H,HH
2,9,12,V,H,DB_V,VV
3,9,12,V,V,DB_V,VV
4,9,11,H,H,SB,SB
...,...,...,...,...,...,...
107,2,4,V,V,SB,SB
108,2,7,H,H,DB_V,VV
109,2,7,H,V,DB_H,HH
110,2,7,V,H,DB_V,VV


In [26]:
# join the labels with the coincidences dataframe
coincidences_df = coincidences_df.merge(labels, on=["detector_a_name", "detector_b_name", "tomography_setting_t", "tomography_setting_r"], how="left")
coincidences_df

Unnamed: 0,detector_a_name,detector_b_name,detector_a_time_tag,detector_b_time_tag,timestamp,tomography_setting_t,tomography_setting_r,data_dir,estimation_label,corrected_estimation_label
0,9,12,552668721563163,552668721563158,2025-07-03--15h-45m-02s,H,H,2025-07-03--15h-44m-51s,DB_H,HH
1,1,4,552668723130306,552668723130306,2025-07-03--15h-45m-02s,H,H,2025-07-03--15h-44m-51s,DB_H,HH
2,9,12,552668723321214,552668723321211,2025-07-03--15h-45m-02s,H,H,2025-07-03--15h-44m-51s,DB_H,HH
3,9,12,552668723471264,552668723471260,2025-07-03--15h-45m-02s,H,H,2025-07-03--15h-44m-51s,DB_H,HH
4,1,4,552668723517406,552668723517406,2025-07-03--15h-45m-02s,H,H,2025-07-03--15h-44m-51s,DB_H,HH
...,...,...,...,...,...,...,...,...,...,...
2171136,10,11,-4242167837918237184,-4242167837918237184,2025-07-08--11h-08m-01s,V,H,2025-07-08--11h-06m-45s,DB_H,HH
2171137,1,12,-4242167837917572096,-4242167837917572096,2025-07-08--11h-08m-01s,V,H,2025-07-08--11h-06m-45s,C,C'
2171138,10,11,-4242167837917491712,-4242167837917491712,2025-07-08--11h-08m-01s,V,H,2025-07-08--11h-06m-45s,DB_H,HH
2171139,1,4,-4242167837917381120,-4242167837917381120,2025-07-08--11h-08m-01s,V,H,2025-07-08--11h-06m-45s,DB_H,HH


In [27]:
grouped = {
    data_dir: df for data_dir, df in coincidences_df.groupby("data_dir")
}

In [28]:
from concurrent.futures import ProcessPoolExecutor

def save_scaled_coincidences(args):
    data_dir, df_subset = args
    if not df_subset.empty:
        output_file = os.path.join(data_folder, data_dir, "labelled_coincidences.csv")
        df_subset.to_csv(output_file, index=False)
        print(f"Saved {output_file}")

with ProcessPoolExecutor(max_workers=8) as executor:
    executor.map(save_scaled_coincidences, [(k, grouped[k]) for k in new_data_dirs])


Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-07-03--15h-44m-51s/labelled_coincidences.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-07-03--16h-11m-36s/labelled_coincidences.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-07-03--16h-36m-39s/labelled_coincidences.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-07-03--16h-16m-56s/labelled_coincidences.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-07-03--16h-27m-19s/labelled_coincidences.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects