In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
# White and blue output ports of the in-fibre beam splitters
DETECTORS = {
    9: {"arm": "TT", "color": "white"},
    12: {"arm": "TT", "color": "blue"},
    11: {"arm": "TR", "color": "white"},
    10: {"arm": "TR", "color": "blue"},
    1: {"arm": "RT", "color": "white"},
    4: {"arm": "RT", "color": "blue"},
    7: {"arm": "RR", "color": "white"},
    2: {"arm": "RR", "color": "blue"},
}


In [3]:
# Set to True to force a full refresh of the data
full_refresh = True

In [4]:
repo_root = os.popen('git rev-parse --show-toplevel').read().strip()

In [5]:
data_folder = os.path.join(repo_root, 'multi-parameter-estimation', 'data')

# Get list of data directories
data_dirs = os.listdir(data_folder)
data_dirs = [d for d in data_dirs if os.path.isdir(os.path.join(data_folder, d))]

# skip old-data
if 'old-data' in data_dirs:
    data_dirs.remove('old-data')

data_dirs.sort()
data_dirs

['2025-06-20--17h-16m-34s',
 '2025-06-20--17h-21m-29s',
 '2025-06-20--17h-26m-32s',
 '2025-06-20--17h-31m-35s',
 '2025-06-20--17h-36m-38s',
 '2025-06-20--17h-41m-43s',
 '2025-06-20--17h-46m-48s',
 '2025-06-20--17h-51m-53s',
 '2025-06-20--17h-56m-58s',
 '2025-06-20--18h-02m-03s',
 '2025-06-20--18h-07m-08s',
 '2025-06-20--18h-12m-13s',
 '2025-06-20--18h-17m-18s',
 '2025-06-20--18h-22m-24s',
 '2025-06-20--18h-27m-28s',
 '2025-06-20--18h-32m-32s',
 '2025-06-20--18h-37m-37s',
 '2025-06-20--18h-42m-41s',
 '2025-06-20--18h-47m-44s',
 '2025-06-20--18h-52m-47s',
 '2025-06-20--18h-57m-50s',
 '2025-06-20--19h-02m-55s',
 '2025-06-20--19h-08m-00s',
 '2025-06-20--19h-13m-05s',
 '2025-06-20--19h-18m-11s',
 '2025-06-20--19h-23m-16s',
 '2025-06-20--19h-28m-37s',
 '2025-06-20--19h-33m-42s',
 '2025-06-20--19h-38m-47s',
 '2025-06-20--19h-43m-53s',
 '2025-06-20--19h-48m-57s',
 '2025-06-20--19h-54m-03s',
 '2025-06-20--19h-59m-08s',
 '2025-06-20--20h-04m-13s',
 '2025-06-20--20h-09m-20s',
 '2025-06-20--20h-14

In [6]:
new_data_dirs = data_dirs.copy()

if not full_refresh:
    for d in data_dirs:
        if os.path.exists(os.path.join(data_folder, d, "scaled_coincidences.csv")):
            new_data_dirs.remove(d)

new_data_dirs

['2025-06-20--17h-16m-34s',
 '2025-06-20--17h-21m-29s',
 '2025-06-20--17h-26m-32s',
 '2025-06-20--17h-31m-35s',
 '2025-06-20--17h-36m-38s',
 '2025-06-20--17h-41m-43s',
 '2025-06-20--17h-46m-48s',
 '2025-06-20--17h-51m-53s',
 '2025-06-20--17h-56m-58s',
 '2025-06-20--18h-02m-03s',
 '2025-06-20--18h-07m-08s',
 '2025-06-20--18h-12m-13s',
 '2025-06-20--18h-17m-18s',
 '2025-06-20--18h-22m-24s',
 '2025-06-20--18h-27m-28s',
 '2025-06-20--18h-32m-32s',
 '2025-06-20--18h-37m-37s',
 '2025-06-20--18h-42m-41s',
 '2025-06-20--18h-47m-44s',
 '2025-06-20--18h-52m-47s',
 '2025-06-20--18h-57m-50s',
 '2025-06-20--19h-02m-55s',
 '2025-06-20--19h-08m-00s',
 '2025-06-20--19h-13m-05s',
 '2025-06-20--19h-18m-11s',
 '2025-06-20--19h-23m-16s',
 '2025-06-20--19h-28m-37s',
 '2025-06-20--19h-33m-42s',
 '2025-06-20--19h-38m-47s',
 '2025-06-20--19h-43m-53s',
 '2025-06-20--19h-48m-57s',
 '2025-06-20--19h-54m-03s',
 '2025-06-20--19h-59m-08s',
 '2025-06-20--20h-04m-13s',
 '2025-06-20--20h-09m-20s',
 '2025-06-20--20h-14

In [7]:
def load_coincidences(data_dir):
    coincidences = pd.read_csv(os.path.join(data_folder, data_dir, "coincidences.csv"))
    coincidences["data_dir"] = data_dir
    return coincidences

coincidences_df = pd.concat([load_coincidences(d) for d in new_data_dirs], ignore_index=True)
coincidences_df

Unnamed: 0,detector_a_name,detector_b_name,detector_a_time_tag,detector_b_time_tag,timestamp,tomography_setting_t,tomography_setting_r,data_dir
0,1,4,2555412465261692416,2555412465261692416,2025-06-20--17h-17m-43s,H,H,2025-06-20--17h-16m-34s
1,1,4,2555412465265911296,2555412465265911296,2025-06-20--17h-17m-43s,H,H,2025-06-20--17h-16m-34s
2,9,12,2555412465267243520,2555412465267243520,2025-06-20--17h-17m-42s,H,H,2025-06-20--17h-16m-34s
3,9,12,2555412465268338688,2555412465268338688,2025-06-20--17h-17m-42s,H,H,2025-06-20--17h-16m-34s
4,9,12,2555412465269690880,2555412465269690880,2025-06-20--17h-17m-42s,H,H,2025-06-20--17h-16m-34s
...,...,...,...,...,...,...,...,...
56614731,1,7,2557399189369489920,2557399189369489920,2025-06-21--03h-52m-59s,V,H,2025-06-21--03h-47m-55s
56614732,4,9,2557399189370192896,2557399189370192896,2025-06-21--03h-52m-59s,V,H,2025-06-21--03h-47m-55s
56614733,9,12,2557399189372173824,2557399189372173824,2025-06-21--03h-52m-59s,V,H,2025-06-21--03h-47m-55s
56614734,4,9,2557399189376884224,2557399189376884224,2025-06-21--03h-52m-59s,V,H,2025-06-21--03h-47m-55s


In [8]:
# drop estimation label if it exists
if "estimation_label" in coincidences_df.columns:
    coincidences_df = coincidences_df.drop(columns=["estimation_label"])
coincidences_df

Unnamed: 0,detector_a_name,detector_b_name,detector_a_time_tag,detector_b_time_tag,timestamp,tomography_setting_t,tomography_setting_r,data_dir
0,1,4,2555412465261692416,2555412465261692416,2025-06-20--17h-17m-43s,H,H,2025-06-20--17h-16m-34s
1,1,4,2555412465265911296,2555412465265911296,2025-06-20--17h-17m-43s,H,H,2025-06-20--17h-16m-34s
2,9,12,2555412465267243520,2555412465267243520,2025-06-20--17h-17m-42s,H,H,2025-06-20--17h-16m-34s
3,9,12,2555412465268338688,2555412465268338688,2025-06-20--17h-17m-42s,H,H,2025-06-20--17h-16m-34s
4,9,12,2555412465269690880,2555412465269690880,2025-06-20--17h-17m-42s,H,H,2025-06-20--17h-16m-34s
...,...,...,...,...,...,...,...,...
56614731,1,7,2557399189369489920,2557399189369489920,2025-06-21--03h-52m-59s,V,H,2025-06-21--03h-47m-55s
56614732,4,9,2557399189370192896,2557399189370192896,2025-06-21--03h-52m-59s,V,H,2025-06-21--03h-47m-55s
56614733,9,12,2557399189372173824,2557399189372173824,2025-06-21--03h-52m-59s,V,H,2025-06-21--03h-47m-55s
56614734,4,9,2557399189376884224,2557399189376884224,2025-06-21--03h-52m-59s,V,H,2025-06-21--03h-47m-55s


In [9]:
def get_effective_detector_label(arm, tomography_setting_a, tomography_setting_b):
    arm = list(arm)
    # If the arm is T and the tomography a setting is V, then flip the detector 
    if arm[0] == "T" and tomography_setting_a == "V":
        arm[1] = "T" if arm[1] == "R" else "R"
    # If the arm is R and the tomography b setting is V, then flip the detector
    if arm[0] == "R" and tomography_setting_b == "V":
        arm[1] = "T" if arm[1] == "R" else "R"
    return "".join(arm)


def get_estimation_label(arm_a, arm_b, tomography_setting_a, tomography_setting_b):
    # If the tomo is in V, then the detectors are effectively flipped in that arm
    arm_a = get_effective_detector_label(arm_a, tomography_setting_a, tomography_setting_b)
    arm_b = get_effective_detector_label(arm_b, tomography_setting_a, tomography_setting_b)
    
    # Double bunched
    if arm_a == arm_b:
        # DB_H
        if arm_a[1] == "T":
            return "DB_H"
        # DB_V
        elif arm_a[1] == "R":
            return "DB_V"
        else:
            raise ValueError(f"Unknown arm: {arm_a}")
    # Coincidence
    elif arm_a[0] != arm_b[0]:
        return "C"
    # Single bunched
    elif arm_a[1] != arm_b[1]:
        return "SB"
    else:
        raise ValueError(f"Unknown arm combination: {arm_a}, {arm_b}")

In [10]:
# Based on the imperfect indistinguishability
def get_corrected_estimation_label(arm_a, arm_b, tomography_setting_a, tomography_setting_b):
    # If the tomo is in V, then the detectors are effectively flipped in that arm
    arm_a = get_effective_detector_label(arm_a, tomography_setting_a, tomography_setting_b)
    arm_b = get_effective_detector_label(arm_b, tomography_setting_a, tomography_setting_b)
    
    # HH
    if arm_a[1] == "T" and arm_b[1] == "T":
        return "HH"
    # VV
    elif arm_a[1] == "R" and arm_b[1] == "R":
        return "VV"
    # SB
    elif arm_a[0] == arm_b[0] and arm_a[1] != arm_b[1]:
        return "SB"
    # PC'
    elif arm_a[0] != arm_b[0] and arm_a[1] != arm_b[1]:
        return "C'"
    # That should be all the cases
    else:
        raise ValueError(f"Unknown arm combination: {arm_a}, {arm_b}")

In [11]:
labels = pd.DataFrame(
        [
            (
                i,
                j,
                tomography_setting_a,
                tomography_setting_b,
                get_estimation_label(DETECTORS[i]["arm"], DETECTORS[j]["arm"], tomography_setting_a, tomography_setting_b),
                get_corrected_estimation_label(DETECTORS[i]["arm"], DETECTORS[j]["arm"], tomography_setting_a, tomography_setting_b)
            )
            for i in DETECTORS.keys()
            for j in DETECTORS.keys()
            for tomography_setting_a in ["H", "V"]
            for tomography_setting_b in ["H", "V"]
            if i < j
        ],
        columns=[
            "detector_a_name",
            "detector_b_name",
            "tomography_setting_t",
            "tomography_setting_r",
            "estimation_label",
            "corrected_estimation_label"
        ],
    )

labels

Unnamed: 0,detector_a_name,detector_b_name,tomography_setting_t,tomography_setting_r,estimation_label,corrected_estimation_label
0,9,12,H,H,DB_H,HH
1,9,12,H,V,DB_H,HH
2,9,12,V,H,DB_V,VV
3,9,12,V,V,DB_V,VV
4,9,11,H,H,SB,SB
...,...,...,...,...,...,...
107,2,4,V,V,SB,SB
108,2,7,H,H,DB_V,VV
109,2,7,H,V,DB_H,HH
110,2,7,V,H,DB_V,VV


In [12]:
# join the labels with the coincidences dataframe
coincidences_df = coincidences_df.merge(labels, on=["detector_a_name", "detector_b_name", "tomography_setting_t", "tomography_setting_r"], how="left")
coincidences_df

Unnamed: 0,detector_a_name,detector_b_name,detector_a_time_tag,detector_b_time_tag,timestamp,tomography_setting_t,tomography_setting_r,data_dir,estimation_label,corrected_estimation_label
0,1,4,2555412465261692416,2555412465261692416,2025-06-20--17h-17m-43s,H,H,2025-06-20--17h-16m-34s,DB_H,HH
1,1,4,2555412465265911296,2555412465265911296,2025-06-20--17h-17m-43s,H,H,2025-06-20--17h-16m-34s,DB_H,HH
2,9,12,2555412465267243520,2555412465267243520,2025-06-20--17h-17m-42s,H,H,2025-06-20--17h-16m-34s,DB_H,HH
3,9,12,2555412465268338688,2555412465268338688,2025-06-20--17h-17m-42s,H,H,2025-06-20--17h-16m-34s,DB_H,HH
4,9,12,2555412465269690880,2555412465269690880,2025-06-20--17h-17m-42s,H,H,2025-06-20--17h-16m-34s,DB_H,HH
...,...,...,...,...,...,...,...,...,...,...
56614731,1,7,2557399189369489920,2557399189369489920,2025-06-21--03h-52m-59s,V,H,2025-06-21--03h-47m-55s,SB,SB
56614732,4,9,2557399189370192896,2557399189370192896,2025-06-21--03h-52m-59s,V,H,2025-06-21--03h-47m-55s,C,C'
56614733,9,12,2557399189372173824,2557399189372173824,2025-06-21--03h-52m-59s,V,H,2025-06-21--03h-47m-55s,DB_V,VV
56614734,4,9,2557399189376884224,2557399189376884224,2025-06-21--03h-52m-59s,V,H,2025-06-21--03h-47m-55s,C,C'


In [13]:
grouped = {
    data_dir: df for data_dir, df in coincidences_df.groupby("data_dir")
}

In [14]:
from concurrent.futures import ProcessPoolExecutor

def save_scaled_coincidences(args):
    data_dir, df_subset = args
    if not df_subset.empty:
        output_file = os.path.join(data_folder, data_dir, "labelled_coincidences.csv")
        df_subset.to_csv(output_file, index=False)
        print(f"Saved {output_file}")

with ProcessPoolExecutor(max_workers=8) as executor:
    executor.map(save_scaled_coincidences, [(k, grouped[k]) for k in new_data_dirs])


Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-20--17h-21m-29s/labelled_coincidences.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-20--17h-16m-34s/labelled_coincidences.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-20--17h-26m-32s/labelled_coincidences.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-20--17h-31m-35s/labelled_coincidences.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects/multi-parameter-estimation/multi-parameter-estimation/data/2025-06-20--17h-41m-43s/labelled_coincidences.csv
Saved /home/jh115/Heriot-Watt University Team Dropbox/RES_EPS_EMQL/projects