In [None]:
import os
import re

import numpy as np
import pandas as pd
import torch
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GroupKFold
from tsai.all import Learner, TSDataLoaders, accuracy
from tsai.models.InceptionTimePlus import InceptionTimePlus

In [None]:
# -----------------------------------------------
# 1. Load a folder AND extract group ID from filename
# -----------------------------------------------
def load_folder(folder_path, label):
    X_list, y_list, groups = [], [], []

    for fname in sorted(os.listdir(folder_path)):
        if fname.endswith(".csv"):
            # ---- extract sample number for grouping ----
            # matches "_sample_7.csv" OR "_sample_7_1.csv"
            m = re.search(r"sample_(\d+)", fname)
            if m is None:
                continue
            sample_id = int(m.group(1))  # group ID

            df = pd.read_csv(os.path.join(folder_path, fname))
            df.columns = [c.strip() for c in df.columns]

            sig = df["Signal/nA"].values
            X_list.append(sig.reshape(1, -1))  # (1, L)
            y_list.append(label)
            groups.append(sample_id)

    return X_list, y_list, groups


# ------------------------------------------------
# 2. Load all folders
# ------------------------------------------------
DATA_PATHS = {
    0: r"C:\Dev\MasterThesis\data\Si_wafer_clear_SRRs",
    1: r"C:\Dev\MasterThesis\data\SRRs_cleaned_with_ethanol",
    2: r"C:\Dev\MasterThesis\data\SRRs_with_10ppb_Acetamiprid",
    3: r"C:\Dev\MasterThesis\data\SRRs_with_100ppb_Acetamiprid",
    4: r"C:\Dev\MasterThesis\data\SRRs_with_1000ppb_Acetamiprid",
}

X, y, groups = [], [], []

for label, path in DATA_PATHS.items():
    Xi, yi, gi = load_folder(path, label)
    X += Xi
    y += yi
    groups += gi

X = np.array(X, dtype=object)
y = np.array(y, dtype=int)
groups = np.array(groups, dtype=int)

print("Total samples:", len(X))


# ------------------------------------------------
# 3. Pad all signals to equal length
# ------------------------------------------------
max_len = max(ts.shape[1] for ts in X)
X_pad = np.zeros((len(X), 1, max_len))

for i, ts in enumerate(X):
    X_pad[i, 0, : ts.shape[1]] = ts

# tsai format: (N, seq_len, channels)
X_tsai = np.swapaxes(X_pad, 1, 2).astype(np.float32)
c_in = 1
c_out = len(np.unique(y))


# ------------------------------------------------
# 4. Grouped 10-fold cross-validation
# ------------------------------------------------
def grouped_inception_cv(X, y, groups, epochs=40):
    # ---- FIX SHAPE ----
    if X.shape[1] != 1:  # <-- wrong orientation
        X = np.swapaxes(X, 1, 2)

    print("Corrected X shape:", X.shape)

    gkf = GroupKFold(n_splits=10)
    fold_results = []
    auc_per_fold = []

    for fold, (train_idx, valid_idx) in enumerate(gkf.split(X, y, groups)):
        print("\n============================")
        print(f"   FOLD {fold + 1} / 10")
        print("============================")

        dls = TSDataLoaders.from_numpy(
            X[train_idx],
            y[train_idx],
            splits=[list(train_idx), list(valid_idx)],
            bs=32,
            num_workers=0,
        )

        model = InceptionTimePlus(c_in=c_in, c_out=c_out)
        learn = Learner(dls, model, metrics=accuracy)

        learn.fit_one_cycle(epochs, 1e-3)

        # ---- predictions for ROC AUC ----
        preds, targets = learn.get_preds(ds_idx=1)  # validation only
        preds = preds.numpy()
        targets = targets.numpy().astype(int)

        # AUC per class
        aucs = []
        for cls in range(c_out):
            y_true = (targets == cls).astype(int)
            y_score = preds[:, cls]
            auc_val = roc_auc_score(y_true, y_score)
            aucs.append(auc_val)

        print("AUCs:", aucs)
        auc_per_fold.append(aucs)

        # accuracy
        acc = accuracy(torch.tensor(preds), torch.tensor(targets)).item()
        fold_results.append(acc)

    return fold_results, auc_per_fold


# ------------------------------------------------
# 5. RUN THE FULL PIPELINE
# ------------------------------------------------
accs, aucs = grouped_inception_cv(X_tsai, y, groups, epochs=40)

print("\nFinal accuracy per fold:")
print(accs)

print("\nMean accuracy:", np.mean(accs))

print("\nMean AUC per class:")
print(np.mean(np.array(aucs), axis=0))

In [None]:
from collections import Counter

import numpy as np

print("Unique groups:", len(np.unique(groups)))
print("Group counts:", Counter(groups))