In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import os
from ipywidgets import Text, Button, VBox, Output, FloatRangeSlider
from IPython.display import display

# === USER INPUT SECTION ===
sample_folders = sorted(glob.glob("./samples/sample*/"))  # Adjust if needed
output_base_folder = "./output_aligned/"
align_to = "first"  # or "mean"

# Collect file names from each sample
sample_files = [sorted(glob.glob(os.path.join(folder, "*.xy"))) for folder in sample_folders]
num_files_per_sample = len(sample_files[0])
assert all(len(files) == num_files_per_sample for files in sample_files), "Mismatch in file count!"

# === Step 1: Input core level names ===
print(f"🧪 Detected {num_files_per_sample} files per sample.")
print("➡️ Please enter a comma-separated list of core level names (in same order as files in folders).")

text_input = Text(description="Core levels:", layout={'width': '90%'})
confirm_button = Button(description="Confirm", button_style='success')
output_area = Output()

def on_confirm_clicked(b):
    with output_area:
        output_area.clear_output()
        core_level_names = [name.strip() for name in text_input.value.split(",")]
        if len(core_level_names) != num_files_per_sample:
            print(f"❌ Expected {num_files_per_sample} names, got {len(core_level_names)}.")
            return
        print("✅ Core levels set:", core_level_names)
        run_interface(core_level_names)

confirm_button.on_click(on_confirm_clicked)
display(VBox([text_input, confirm_button, output_area]))

# === MAIN PROCESSING ===
def process_core_level(core_idx, core_name):
    spectra_raw = []
    for sample in sample_files:
        path = sample[core_idx]
        data = np.loadtxt(path)
        x, y = data[:, 0], data[:, 1]
        spectra_raw.append((x, y))

    # Plot raw spectra
    print(f"\n🔎 Core Level: {core_name}")
    plt.figure(figsize=(8, 5))
    for x, y in spectra_raw:
        plt.plot(x, y)
    plt.title(f"Core Level: {core_name} — Raw Spectra")
    plt.xlabel("x")
    plt.ylabel("y")
    plt.grid(True)
    plt.show()

    # Setup slider for peak selection
    x_all = np.concatenate([x for x, _ in spectra_raw])
    x_min_slider = np.min(x_all)
    x_max_slider = np.max(x_all)

    interval_slider = FloatRangeSlider(
        value=[x_min_slider + 0.1*(x_max_slider - x_min_slider), x_max_slider - 0.1*(x_max_slider - x_min_slider)],
        min=x_min_slider,
        max=x_max_slider,
        step=0.1,
        description='Peak range:',
        continuous_update=False,
        layout={"width": "80%"}
    )
    out = Output()
    run_button = Button(description="Align and Normalize", button_style='success')

    def on_click_run(b):
        with out:
            out.clear_output()
            x_min, x_max = interval_slider.value
            peak_positions = []
            spectra_norm = []
        
            # Normalize and extract peak positions
            for x, y in spectra_raw:
                y_norm = y / np.max(y)
                spectra_norm.append((x, y_norm))
        
                mask = (x >= x_min) & (x <= x_max)
                if not np.any(mask):
                    raise ValueError(f"No data in interval [{x_min}, {x_max}]")
                peak_idx = np.argmax(y_norm[mask])
                peak_x = x[mask][peak_idx]
                peak_positions.append(peak_x)
        
            ref_peak = peak_positions[0] if align_to == "first" else np.mean(peak_positions)
        
            spectra_aligned = []
            shifts_record = []
        
            # Prepare sample names
            sample_names = [os.path.basename(os.path.normpath(folder)) for folder in sample_folders]
        
            for i, ((x, y), peak_x, sample_name) in enumerate(zip(spectra_norm, peak_positions, sample_names)):
                x_shifted = x + (ref_peak - peak_x)
                spectra_aligned.append((x_shifted, y))
        
                shift_amount = ref_peak - peak_x
                shifts_record.append({
                    "sample": sample_name,
                    "core_level": core_name,
                    "file": os.path.basename(sample_files[i][core_idx]),
                    "x_shift_applied": shift_amount
                })
        
                # Save individual aligned file
                output_folder = os.path.join(output_base_folder, sample_name)
                os.makedirs(output_folder, exist_ok=True)
                output_path = os.path.join(output_folder, f"{core_name}.xy")
                np.savetxt(output_path, np.column_stack((x_shifted, y)), fmt="%.6f")
        
            # Plot raw normalized
            plt.figure(figsize=(7, 5))
            for (x, y), name in zip(spectra_norm, sample_names):
                plt.plot(x, y, label=name)
            plt.title(f"{core_name} – Normalized Raw Spectra")
            plt.xlabel("x")
            plt.ylabel("Normalized y")
            plt.legend()
            plt.grid(True)
            plt.show()
        
            # Plot aligned normalized
            plt.figure(figsize=(7, 5))
            for (x, y), name, shift in zip(spectra_aligned, sample_names, shifts_record):
                label = f"{name} (s: {shift['x_shift_applied']:.2f})"
                plt.plot(x, y, label=label)
            plt.title(f"{core_name} – Aligned + Normalized Spectra")
            plt.xlabel("x (shifted)")
            plt.ylabel("Normalized y")
            plt.legend()
            plt.grid(True)
            plt.show()
        
            # Save shifts.csv
            shifts_csv_path = os.path.join(output_base_folder, "shifts.csv")
            df_new = pd.DataFrame(shifts_record)
            if os.path.exists(shifts_csv_path):
                df_existing = pd.read_csv(shifts_csv_path)
                df_combined = pd.concat([df_existing, df_new], ignore_index=True)
            else:
                df_combined = df_new
        
            df_combined.sort_values(by=["core_level", "sample"], inplace=True)
            df_combined.to_csv(shifts_csv_path, index=False)
            print(f"📄 Saved x-shifts to {shifts_csv_path}")
            print(f"💾 Aligned and normalized data saved in '{output_base_folder}'")


    run_button.on_click(on_click_run)
    display(VBox([interval_slider, run_button, out]))

# === LOOP OVER ALL CORE LEVELS ===
def run_interface(core_level_names):
    for idx, core_name in enumerate(core_level_names):
        print(f"\n==============================")
        print(f" Core Level: {core_name}")
        print(f"==============================")
        process_core_level(idx, core_name)



🧪 Detected 12 files per sample.
➡️ Please enter a comma-separated list of core level names (in same order as files in folders).


VBox(children=(Text(value='', description='Core levels:', layout=Layout(width='90%')), Button(button_style='su…

Debug Code

In [5]:
for folder in sample_folders:
    files = glob.glob(os.path.join(folder, "*.xy"))
    print(f"{folder}: {len(files)} .xy files")

./samples/sample1/: 12 .xy files
./samples/sample2/: 12 .xy files
./samples/sample3/: 12 .xy files
./samples/sample4/: 12 .xy files
./samples/sample5/: 12 .xy files
./samples/sample6/: 12 .xy files
