In [2]:
import numpy as np
import pandas as pd
from tkinter import Tk, filedialog
from scipy.io import loadmat

def load_data_file(prompt):
    """Load .npz or .mat file and return standardized lists of arrays."""
    file_path = filedialog.askopenfilename(filetypes=[("Data files", "*.npz *.mat")], title=prompt)
    if not file_path:
        raise ValueError("❌ No file selected.")

    if file_path.endswith(".npz"):
        data = np.load(file_path, allow_pickle=True)
        if 'Final_Loc_Blinking_Corrected' in data:
            locs = data['Final_Loc_Blinking_Corrected']
        elif 'LocalizationsFinal' in data:
            locs = data['LocalizationsFinal']
        else:
            raise KeyError("❌ No localization data found in .npz file.")

        if 'Final_Frame_Blinking_Corrected' in data:
            frames = data['Final_Frame_Blinking_Corrected']
        elif 'Frame_Information' in data:
            frames = data['Frame_Information']
        else:
            raise KeyError("❌ No frame data found in .npz file.")

    elif file_path.endswith(".mat"):
        data = loadmat(file_path, simplify_cells=True)
        if 'Final_Loc_Blinking_Corrected' in data:
            locs = data['Final_Loc_Blinking_Corrected']
        elif 'LocalizationsFinal' in data:
            locs = data['LocalizationsFinal']
        else:
            raise KeyError("❌ No localization data found in .mat file.")

        if 'Final_Frame_Blinking_Corrected' in data:
            frames = data['Final_Frame_Blinking_Corrected']
        elif 'Frame_Information' in data:
            frames = data['Frame_Information']
        else:
            raise KeyError("❌ No frame data found in .mat file.")
    else:
        raise ValueError("❌ Unsupported file type.")

    # Standardize to list of arrays
    if isinstance(locs, np.ndarray) and locs.ndim == 2:
        locs = [locs]
    if isinstance(frames, np.ndarray) and frames.ndim in [1, 2]:
        frames = [frames]

    return locs, frames, file_path

def flatten_localizations(locs, frames):
    """Flatten localization and frame data to a single DataFrame for comparison."""
    flat = []
    for loc_arr, frame_arr in zip(locs, frames):
        if len(loc_arr) == 0:
            continue
        temp = pd.DataFrame(loc_arr[:, :2], columns=['X', 'Y'])
        temp['Frame'] = frame_arr
        flat.append(temp)
    return pd.concat(flat, ignore_index=True).sort_values(['X', 'Y', 'Frame']).reset_index(drop=True)

def compare_localization_sets():
    print("📂 Select recombined file (after merging)...")
    final_locs, final_frames, final_path = load_data_file("Select recombined .npz or .mat file")
    
    print("📂 Select original file (before splitting)...")
    orig_locs, orig_frames, orig_path = load_data_file("Select original .npz or .mat file")

    df_final = flatten_localizations(final_locs, final_frames)
    df_orig = flatten_localizations(orig_locs, orig_frames)

    if df_final.shape != df_orig.shape:
        print("❌ Shape mismatch!")
        print(f"Original shape: {df_orig.shape}, Final shape: {df_final.shape}")
        return

    diffs = df_final - df_orig
    tolerance = 1e-5
    max_diff = diffs.abs().max()

    if (max_diff < tolerance).all():
        print("✅ PASS: No significant difference in localizations or frames.")
    else:
        print("⚠️ WARNING: Differences detected!")
        print(diffs.describe())
        print("Max difference:")
        print(max_diff)


In [8]:
compare_localization_sets()

📂 Select recombined file (after merging)...
📂 Select original file (before splitting)...
✅ PASS: No significant difference in localizations or frames.


In [5]:
print("📂 Select recombined file (after merging)...")
final_locs, final_frames, final_path = load_data_file("Select recombined .npz or .mat file")

print("📂 Select original file (before splitting)...")
orig_locs, orig_frames, orig_path = load_data_file("Select original .npz or .mat file")

df_final = flatten_localizations(final_locs, final_frames)
df_orig = flatten_localizations(orig_locs, orig_frames)

if df_final.shape != df_orig.shape:
    print("❌ Shape mismatch!")
    print(f"Original shape: {df_orig.shape}, Final shape: {df_final.shape}")

diffs = df_final - df_orig
tolerance = 1e-5
max_diff = diffs.abs().max()

if (max_diff < tolerance).all():
    print("✅ PASS: No significant difference in localizations or frames.")
else:
    print("⚠️ WARNING: Differences detected!")
    print(diffs.describe())
    print("Max difference:")
    print(max_diff)


📂 Select recombined file (after merging)...
📂 Select original file (before splitting)...
✅ PASS: No significant difference in localizations or frames.
