In [169]:
import pandas as pd
import numpy as np
import os

In [170]:
eta = 0.985

In [171]:
repo_root = os.popen('git rev-parse --show-toplevel').read().strip()

In [172]:
data_folder = os.path.join(repo_root, 'multi-parameter-estimation', 'data')
# Set to True to force a full refresh of the data
full_refresh = True

# Get list of data directories
data_dirs = os.listdir(data_folder)
data_dirs = [d for d in data_dirs if os.path.isdir(os.path.join(data_folder, d))]

# skip old-data
if 'old-data' in data_dirs:
    data_dirs.remove('old-data')

data_dirs.sort()
data_dirs

['2025-05-23--16h-02m-52s',
 '2025-05-23--16h-02m-56s',
 '2025-05-23--16h-03m-00s',
 '2025-05-23--16h-03m-03s',
 '2025-05-23--16h-03m-07s',
 '2025-05-23--16h-03m-11s',
 '2025-05-23--16h-03m-15s',
 '2025-05-23--16h-03m-18s',
 '2025-05-23--16h-03m-22s',
 '2025-05-23--16h-03m-26s',
 '2025-05-23--16h-03m-30s',
 '2025-05-23--16h-03m-34s',
 '2025-05-23--16h-03m-37s',
 '2025-05-23--16h-03m-41s',
 '2025-05-23--16h-03m-45s',
 '2025-05-23--16h-03m-49s',
 '2025-05-23--16h-03m-53s',
 '2025-05-23--16h-03m-56s',
 '2025-05-23--16h-04m-00s',
 '2025-05-23--16h-04m-04s',
 '2025-05-23--16h-04m-08s',
 '2025-05-23--16h-04m-12s',
 '2025-05-23--16h-04m-15s',
 '2025-05-23--16h-04m-19s',
 '2025-05-23--16h-04m-23s',
 '2025-05-23--16h-04m-27s',
 '2025-05-23--16h-04m-31s',
 '2025-05-23--16h-04m-34s',
 '2025-05-23--16h-04m-38s',
 '2025-05-23--16h-04m-42s',
 '2025-05-23--16h-04m-46s',
 '2025-05-23--16h-04m-49s',
 '2025-05-23--16h-04m-53s',
 '2025-05-23--16h-04m-57s',
 '2025-05-23--16h-05m-01s',
 '2025-05-23--16h-05

In [173]:
new_data_dirs = data_dirs.copy()
# Remove directories that already have theta_delta_phi_estimates.csv
if not full_refresh:
    for d in data_dirs:
        if os.path.exists(os.path.join(data_folder, d, "theta_delta_phi_estimates.csv")):
            new_data_dirs.remove(d)

new_data_dirs

['2025-05-23--16h-02m-52s',
 '2025-05-23--16h-02m-56s',
 '2025-05-23--16h-03m-00s',
 '2025-05-23--16h-03m-03s',
 '2025-05-23--16h-03m-07s',
 '2025-05-23--16h-03m-11s',
 '2025-05-23--16h-03m-15s',
 '2025-05-23--16h-03m-18s',
 '2025-05-23--16h-03m-22s',
 '2025-05-23--16h-03m-26s',
 '2025-05-23--16h-03m-30s',
 '2025-05-23--16h-03m-34s',
 '2025-05-23--16h-03m-37s',
 '2025-05-23--16h-03m-41s',
 '2025-05-23--16h-03m-45s',
 '2025-05-23--16h-03m-49s',
 '2025-05-23--16h-03m-53s',
 '2025-05-23--16h-03m-56s',
 '2025-05-23--16h-04m-00s',
 '2025-05-23--16h-04m-04s',
 '2025-05-23--16h-04m-08s',
 '2025-05-23--16h-04m-12s',
 '2025-05-23--16h-04m-15s',
 '2025-05-23--16h-04m-19s',
 '2025-05-23--16h-04m-23s',
 '2025-05-23--16h-04m-27s',
 '2025-05-23--16h-04m-31s',
 '2025-05-23--16h-04m-34s',
 '2025-05-23--16h-04m-38s',
 '2025-05-23--16h-04m-42s',
 '2025-05-23--16h-04m-46s',
 '2025-05-23--16h-04m-49s',
 '2025-05-23--16h-04m-53s',
 '2025-05-23--16h-04m-57s',
 '2025-05-23--16h-05m-01s',
 '2025-05-23--16h-05

In [174]:
def load_coincidences(data_dir):
    coincidences = pd.read_csv(os.path.join(data_folder, data_dir, "coincidences.csv"))
    coincidences["data_dir"] = data_dir
    return coincidences

coincidences_df = pd.concat([load_coincidences(d) for d in new_data_dirs], ignore_index=True)
coincidences_df

Unnamed: 0,detector_a_name,detector_b_name,arm_a,arm_b,color_a,color_b,delay_a,delay_b,estimation_label,coincidences,data_dir
0,9,12,TT,TT,white,blue,1.016000e-08,1.172000e-08,DB_H,1676,2025-05-23--16h-02m-52s
1,9,11,TT,TR,white,white,1.016000e-08,1.172000e-08,SB,1503,2025-05-23--16h-02m-52s
2,9,10,TT,TR,white,blue,1.016000e-08,1.016000e-08,SB,1565,2025-05-23--16h-02m-52s
3,11,12,TR,TT,white,blue,1.172000e-08,1.172000e-08,SB,1876,2025-05-23--16h-02m-52s
4,10,12,TR,TT,blue,blue,1.016000e-08,1.172000e-08,SB,2013,2025-05-23--16h-02m-52s
...,...,...,...,...,...,...,...,...,...,...,...
1675,2,12,RR,TT,blue,blue,3.120000e-09,1.172000e-08,C,0,2025-05-23--16h-06m-51s
1676,2,11,RR,TR,blue,white,3.120000e-09,1.172000e-08,C,0,2025-05-23--16h-06m-51s
1677,2,10,RR,TR,blue,blue,3.120000e-09,1.016000e-08,C,45,2025-05-23--16h-06m-51s
1678,2,4,RR,RT,blue,blue,3.120000e-09,3.910000e-09,SB,0,2025-05-23--16h-06m-51s


Double bunched events are only resolved half of the time. Therefore, we now throw away half of all non-double-bunched events to recover the expected statistics.

In [175]:
coincidences_df["scaled_coincidences"] = coincidences_df["coincidences"].astype(float)

coincidences_df.loc[coincidences_df["estimation_label"] == "SB", "scaled_coincidences"] = coincidences_df.loc[coincidences_df["estimation_label"] == "SB", "coincidences"] * 0.5
coincidences_df.loc[coincidences_df["estimation_label"] == "C", "scaled_coincidences"] = coincidences_df.loc[coincidences_df["estimation_label"] == "C", "coincidences"] * 0.5

coincidences_df

Unnamed: 0,detector_a_name,detector_b_name,arm_a,arm_b,color_a,color_b,delay_a,delay_b,estimation_label,coincidences,data_dir,scaled_coincidences
0,9,12,TT,TT,white,blue,1.016000e-08,1.172000e-08,DB_H,1676,2025-05-23--16h-02m-52s,1676.0
1,9,11,TT,TR,white,white,1.016000e-08,1.172000e-08,SB,1503,2025-05-23--16h-02m-52s,751.5
2,9,10,TT,TR,white,blue,1.016000e-08,1.016000e-08,SB,1565,2025-05-23--16h-02m-52s,782.5
3,11,12,TR,TT,white,blue,1.172000e-08,1.172000e-08,SB,1876,2025-05-23--16h-02m-52s,938.0
4,10,12,TR,TT,blue,blue,1.016000e-08,1.172000e-08,SB,2013,2025-05-23--16h-02m-52s,1006.5
...,...,...,...,...,...,...,...,...,...,...,...,...
1675,2,12,RR,TT,blue,blue,3.120000e-09,1.172000e-08,C,0,2025-05-23--16h-06m-51s,0.0
1676,2,11,RR,TR,blue,white,3.120000e-09,1.172000e-08,C,0,2025-05-23--16h-06m-51s,0.0
1677,2,10,RR,TR,blue,blue,3.120000e-09,1.016000e-08,C,45,2025-05-23--16h-06m-51s,22.5
1678,2,4,RR,RT,blue,blue,3.120000e-09,3.910000e-09,SB,0,2025-05-23--16h-06m-51s,0.0


In [176]:
# Sum up by estimation label
df_sum = (
    coincidences_df.groupby(["data_dir", "estimation_label"])
    .agg({"scaled_coincidences": "sum"})
    .sort_values(by=["data_dir", "estimation_label"])
    .reset_index()
)
df_sum

Unnamed: 0,data_dir,estimation_label,scaled_coincidences
0,2025-05-23--16h-02m-52s,C,222.0
1,2025-05-23--16h-02m-52s,DB_H,4259.0
2,2025-05-23--16h-02m-52s,DB_V,2962.0
3,2025-05-23--16h-02m-52s,SB,5775.5
4,2025-05-23--16h-02m-56s,C,222.5
...,...,...,...
235,2025-05-23--16h-06m-47s,SB,45.0
236,2025-05-23--16h-06m-51s,C,6636.0
237,2025-05-23--16h-06m-51s,DB_H,3874.0
238,2025-05-23--16h-06m-51s,DB_V,2950.0


In [177]:
df_pivoted_sum = df_sum.pivot(index='data_dir', columns='estimation_label', values='scaled_coincidences')
df_pivoted_sum = df_pivoted_sum.reset_index()  # optional, to flatten the index
df_pivoted_sum.columns.name = None  # remove the name of the columns
df_pivoted_sum

Unnamed: 0,data_dir,C,DB_H,DB_V,SB
0,2025-05-23--16h-02m-52s,222.0,4259.0,2962.0,5775.5
1,2025-05-23--16h-02m-56s,222.5,4095.0,3024.0,5785.0
2,2025-05-23--16h-03m-00s,217.5,4084.0,2929.0,5746.0
3,2025-05-23--16h-03m-03s,231.5,4053.0,2881.0,5814.0
4,2025-05-23--16h-03m-07s,247.5,4099.0,3006.0,5726.0
5,2025-05-23--16h-03m-11s,262.0,3998.0,2897.0,5661.5
6,2025-05-23--16h-03m-15s,287.0,4185.0,2980.0,5611.5
7,2025-05-23--16h-03m-18s,304.5,4152.0,3018.0,5625.5
8,2025-05-23--16h-03m-22s,392.5,4152.0,2849.0,5568.0
9,2025-05-23--16h-03m-26s,453.0,4109.0,2896.0,5462.0


In [178]:
df_pivoted_sum["N"] = df_pivoted_sum["SB"] + df_pivoted_sum["C"] + df_pivoted_sum["DB_H"] + df_pivoted_sum["DB_V"]
df_pivoted_sum

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N
0,2025-05-23--16h-02m-52s,222.0,4259.0,2962.0,5775.5,13218.5
1,2025-05-23--16h-02m-56s,222.5,4095.0,3024.0,5785.0,13126.5
2,2025-05-23--16h-03m-00s,217.5,4084.0,2929.0,5746.0,12976.5
3,2025-05-23--16h-03m-03s,231.5,4053.0,2881.0,5814.0,12979.5
4,2025-05-23--16h-03m-07s,247.5,4099.0,3006.0,5726.0,13078.5
5,2025-05-23--16h-03m-11s,262.0,3998.0,2897.0,5661.5,12818.5
6,2025-05-23--16h-03m-15s,287.0,4185.0,2980.0,5611.5,13063.5
7,2025-05-23--16h-03m-18s,304.5,4152.0,3018.0,5625.5,13100.0
8,2025-05-23--16h-03m-22s,392.5,4152.0,2849.0,5568.0,12961.5
9,2025-05-23--16h-03m-26s,453.0,4109.0,2896.0,5462.0,12920.0


In [179]:
df_pivoted_sum["theta_estimate"] = np.arccos(
    (1 / eta) * (df_pivoted_sum["DB_H"] - df_pivoted_sum["DB_V"]) / df_pivoted_sum["N"]
)
df_pivoted_sum["delta_phi_estimate"] = np.arctan(
    np.sqrt(
        (eta / (2 - eta)) * df_pivoted_sum["C"] / df_pivoted_sum["SB"]
    )
)
df_pivoted_sum

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N,theta_estimate,delta_phi_estimate
0,2025-05-23--16h-02m-52s,222.0,4259.0,2962.0,5775.5,13218.5,1.471017,0.190788
1,2025-05-23--16h-02m-56s,222.5,4095.0,3024.0,5785.0,13126.5,1.487868,0.190845
2,2025-05-23--16h-03m-00s,217.5,4084.0,2929.0,5746.0,12976.5,1.48031,0.189364
3,2025-05-23--16h-03m-03s,231.5,4053.0,2881.0,5814.0,12979.5,1.478996,0.194098
4,2025-05-23--16h-03m-07s,247.5,4099.0,3006.0,5726.0,13078.5,1.485849,0.202014
5,2025-05-23--16h-03m-11s,262.0,3998.0,2897.0,5661.5,12818.5,1.483486,0.208829
6,2025-05-23--16h-03m-15s,287.0,4185.0,2980.0,5611.5,13063.5,1.477012,0.219205
7,2025-05-23--16h-03m-18s,304.5,4152.0,3018.0,5625.5,13100.0,1.4828,0.2253
8,2025-05-23--16h-03m-22s,392.5,4152.0,2849.0,5568.0,12961.5,1.468559,0.25582
9,2025-05-23--16h-03m-26s,453.0,4109.0,2896.0,5462.0,12920.0,1.475336,0.276436


In [180]:
df_pivoted_sum["theta_estimate_degrees"] = np.degrees(df_pivoted_sum["theta_estimate"])
df_pivoted_sum["delta_phi_estimate_degrees"] = np.degrees(df_pivoted_sum["delta_phi_estimate"])
df_pivoted_sum

Unnamed: 0,data_dir,C,DB_H,DB_V,SB,N,theta_estimate,delta_phi_estimate,theta_estimate_degrees,delta_phi_estimate_degrees
0,2025-05-23--16h-02m-52s,222.0,4259.0,2962.0,5775.5,13218.5,1.471017,0.190788,84.283041,10.931373
1,2025-05-23--16h-02m-56s,222.5,4095.0,3024.0,5785.0,13126.5,1.487868,0.190845,85.248565,10.934607
2,2025-05-23--16h-03m-00s,217.5,4084.0,2929.0,5746.0,12976.5,1.48031,0.189364,84.815539,10.849747
3,2025-05-23--16h-03m-03s,231.5,4053.0,2881.0,5814.0,12979.5,1.478996,0.194098,84.740237,11.120988
4,2025-05-23--16h-03m-07s,247.5,4099.0,3006.0,5726.0,13078.5,1.485849,0.202014,85.132891,11.574571
5,2025-05-23--16h-03m-11s,262.0,3998.0,2897.0,5661.5,12818.5,1.483486,0.208829,84.997485,11.965047
6,2025-05-23--16h-03m-15s,287.0,4185.0,2980.0,5611.5,13063.5,1.477012,0.219205,84.626581,12.559549
7,2025-05-23--16h-03m-18s,304.5,4152.0,3018.0,5625.5,13100.0,1.4828,0.2253,84.958163,12.908751
8,2025-05-23--16h-03m-22s,392.5,4152.0,2849.0,5568.0,12961.5,1.468559,0.25582,84.142229,14.657389
9,2025-05-23--16h-03m-26s,453.0,4109.0,2896.0,5462.0,12920.0,1.475336,0.276436,84.53054,15.838605


In [181]:
# For each row, save the results to a CSV file

for index, row in df_pivoted_sum.iterrows():
    data_dir = row["data_dir"]
    theta_estimate = row["theta_estimate_degrees"]
    delta_phi_estimate = row["delta_phi_estimate_degrees"]
    
    # Create a new DataFrame for the current row
    results = pd.DataFrame({
        "estimation_label": ["C", "SB", "DB_H", "DB_V"],
        "scaled_coincidences": [row["C"], row["SB"], row["DB_H"], row["DB_V"]]
    })
    
    # Save the results to a CSV file
    results.to_csv(os.path.join(data_folder, data_dir, "scaled_coincidences.csv"), index=False)
    
    # Save the theta and delta_phi estimates to a CSV file
    theta_delta_phi = pd.DataFrame({
        "theta_estimate": [theta_estimate],
        "delta_phi_estimate": [delta_phi_estimate]
    })
    theta_delta_phi.to_csv(os.path.join(data_folder, data_dir, "theta_delta_phi_estimates.csv"), index=False)