---
Cell 0 — Pipeline Switches (0/1)
---
---

In [1]:
# Pipeline mode:
# 0 = CNN on time-series (your current Torch training)
# 1 = Feature-based ML (Logistic Regression with optional feature selection + postprocess)
PIPELINE_MODE = 1

# Section switches (0/1)
ENABLE_FEATURE_EXTRACTION = 1
ENABLE_FEATURE_SELECTION = 0
ENABLE_POSTPROCESSING = 1

# Recommended presets:
# - CNN pipeline: PIPELINE_MODE=0, ENABLE_FEATURE_EXTRACTION=0, ENABLE_FEATURE_SELECTION=0
# - Feature pipeline: PIPELINE_MODE=1, ENABLE_FEATURE_EXTRACTION=1, ENABLE_FEATURE_SELECTION=1

# Feature extraction settings (used when ENABLE_FEATURE_EXTRACTION=1)
FS = 128.0  # sampling rate (Hz). Change if your dataset uses a different FS.

BANDS = {
    "delta": (0.5, 4.0),
    "theta": (4.0, 8.0),
    "alpha": (8.0, 13.0),
    "beta":  (13.0, 30.0),
    "gamma": (30.0, 45.0),
}

# Feature selection settings (used when ENABLE_FEATURE_SELECTION=1)
FEATURE_SELECTION_METHOD = "f_classif"  # "f_classif" or "mutual_info"
FEATURE_K = 200  # number of selected features (top-K)

# Post-processing settings (used when ENABLE_POSTPROCESSING=1)
SMOOTH_WINDOW = 5  # odd integer recommended (e.g., 3,5,7)


---
Cell 1 — Preprocessing
---
---

In [2]:
!pip install -q torch scipy scikit-learn
print("✓ Installation completed")

# Copy dataset
!cp "/kaggle/input/dataset-drowsiness/dataset (1).mat" /kaggle/working/dataset.mat

# Check
!ls -lh /kaggle/working/
print("\n✓ Dataset copied")

import os
os.environ['TORCH_COMPILE_DISABLE'] = '1'
os.environ['TORCHDYNAMO_DISABLE'] = '1'

import torch
torch._dynamo.config.suppress_errors = True

import torch.nn as nn
import torch.optim as optim
import scipy.io as sio
import numpy as np
import scipy.stats as stats
from sklearn.metrics import accuracy_score


# Electrode indices
fp1, fp2, f7 = 0, 1, 2
f3, fz, f4 = 3, 4, 5
f8, ft7, fc3 = 6, 7, 8
fcz, fc4, ft8 = 9, 10, 11
t3, c3, cz = 12, 13, 14
c4, t4, tp7 = 15, 16, 17
cp3, cpz, cp4 = 18, 19, 20
tp8a1, t5, p3 = 21, 22, 23
pz, p4, t6a2 = 24, 25, 26
o1, oz, o2 = 27, 28, 29

# Subject boundaries (original sample indices)
startsub = np.zeros(11)
finalsub = np.zeros(11)
startsub[0] = 0
finalsub[0] = 188
startsub[1] = finalsub[0]
finalsub[1] = 320
startsub[2] = finalsub[1]
finalsub[2] = 470
startsub[3] = finalsub[2]
finalsub[3] = 618
startsub[4] = finalsub[3]
finalsub[4] = 842
startsub[5] = finalsub[4]
finalsub[5] = 1008
startsub[6] = finalsub[5]
finalsub[6] = 1110
startsub[7] = finalsub[6]
finalsub[7] = 1374
startsub[8] = finalsub[7]
finalsub[8] = 1688
startsub[9] = finalsub[8]
finalsub[9] = 1796
startsub[10] = finalsub[9]
finalsub[10] = 2022


def bipolar(xdata):
    xdatabipolar = np.zeros((2022, 32, 384))
    # vertical
    # 1
    xdatabipolar[:, 0, :] = xdata[:, fp1, :] - xdata[:, f7, :]
    xdatabipolar[:, 1, :] = xdata[:, f7, :] - xdata[:, t3, :]
    xdatabipolar[:, 2, :] = xdata[:, t3, :] - xdata[:, t5, :]
    xdatabipolar[:, 3, :] = xdata[:, t5, :] - xdata[:, o1, :]
    xdatabipolar[:, 4, :] = xdata[:, o1, :] - xdata[:, p3, :]
    xdatabipolar[:, 5, :] = xdata[:, p3, :] - xdata[:, c3, :]
    xdatabipolar[:, 6, :] = xdata[:, c3, :] - xdata[:, f3, :]
    xdatabipolar[:, 7, :] = xdata[:, f3, :] - xdata[:, fp1, :]
    # 2
    xdatabipolar[:, 8, :] = xdata[:, pz, :] - xdata[:, cz, :]
    xdatabipolar[:, 9, :] = xdata[:, cz, :] - xdata[:, fz, :]
    # 3
    xdatabipolar[:, 10, :] = xdata[:, fp2, :] - xdata[:, f8, :]
    xdatabipolar[:, 11, :] = xdata[:, f8, :] - xdata[:, t4, :]
    xdatabipolar[:, 12, :] = xdata[:, t4, :] - xdata[:, t6a2, :]
    xdatabipolar[:, 13, :] = xdata[:, t6a2, :] - xdata[:, o2, :]
    xdatabipolar[:, 14, :] = xdata[:, o2, :] - xdata[:, p4, :]
    xdatabipolar[:, 15, :] = xdata[:, p4, :] - xdata[:, c4, :]
    xdatabipolar[:, 16, :] = xdata[:, c4, :] - xdata[:, f4, :]
    xdatabipolar[:, 17, :] = xdata[:, f4, :] - xdata[:, fp2, :]
    # horizontal
    # 4
    xdatabipolar[:, 18, :] = xdata[:, fp1, :] - xdata[:, fp2, :]
    xdatabipolar[:, 19, :] = xdata[:, f8, :] - xdata[:, f4, :]
    xdatabipolar[:, 20, :] = xdata[:, f4, :] - xdata[:, fz, :]
    xdatabipolar[:, 21, :] = xdata[:, fz, :] - xdata[:, f3, :]
    xdatabipolar[:, 22, :] = xdata[:, f3, :] - xdata[:, f7, :]
    # 5
    xdatabipolar[:, 23, :] = xdata[:, t3, :] - xdata[:, c3, :]
    xdatabipolar[:, 24, :] = xdata[:, c3, :] - xdata[:, cz, :]
    xdatabipolar[:, 25, :] = xdata[:, cz, :] - xdata[:, c4, :]
    xdatabipolar[:, 26, :] = xdata[:, c4, :] - xdata[:, t4, :]
    # 6
    xdatabipolar[:, 27, :] = xdata[:, t5, :] - xdata[:, p3, :]
    xdatabipolar[:, 28, :] = xdata[:, p3, :] - xdata[:, pz, :]
    xdatabipolar[:, 29, :] = xdata[:, pz, :] - xdata[:, p4, :]
    xdatabipolar[:, 30, :] = xdata[:, p4, :] - xdata[:, t6a2, :]
    xdatabipolar[:, 31, :] = xdata[:, o2, :] - xdata[:, o1, :]
    return xdatabipolar


def zscoresubjective(xdatabipolar):
    startsub = np.zeros(11)
    finalsub = np.zeros(11)
    startsub[0] = 0
    finalsub[0] = 188 * 3 - 2
    startsub[1] = finalsub[0]
    finalsub[1] = 320 * 3 - 2
    startsub[2] = finalsub[1]
    finalsub[2] = 470 * 3 - 2
    startsub[3] = finalsub[2]
    finalsub[3] = 618 * 3 - 2
    startsub[4] = finalsub[3]
    finalsub[4] = 842 * 3 - 2
    startsub[5] = finalsub[4]
    finalsub[5] = 1008 * 3 - 2
    startsub[6] = finalsub[5]
    finalsub[6] = 1110 * 3 - 2
    startsub[7] = finalsub[6]
    finalsub[7] = 1374 * 3 - 2
    startsub[8] = finalsub[7]
    finalsub[8] = 1688 * 3 - 2
    startsub[9] = finalsub[8]
    finalsub[9] = 1796 * 3 - 2
    startsub[10] = finalsub[9]
    finalsub[10] = 2022 * 3 - 2

    for i in range(0, 11):
        xdatabipolar[int(startsub[i]):int(finalsub[i])] = stats.zscore(
            xdatabipolar[int(startsub[i]):int(finalsub[i])]
        )
    return xdatabipolar


def hamposhanisub(xdata, label, ncha):
    n = len(xdata)
    k = 3 * n - 2
    newdata = np.zeros((k, ncha, 384))
    newlabel = np.zeros((k))
    leni1 = 0
    for iindex in range(11):
        s = startsub[iindex]
        f = finalsub[iindex]
        leni2 = leni1 + int(f - s)
        for i in range(leni1, leni2 - 1):
            newdata[(3 * i), :, :] = xdata[i, :, :]

            newdata[(3 * i) + 1, :, :256] = xdata[i, :, 128:]
            newdata[(3 * i) + 1, :, 256:] = xdata[i + 1, :, :128]

            newdata[(3 * i) + 2, :, :128] = xdata[i, :, 256:]
            newdata[(3 * i) + 2, :, 128:] = xdata[i + 1, :, :256]

        for i in range(leni1, leni2 - 1):
            newlabel[(3 * i)] = label[i]
            newlabel[(3 * i) + 1] = label[i]
            newlabel[(3 * i) + 2] = label[i + 1]
        leni1 = leni2

    startsub[0] = 0
    finalsub[0] = 188 * 3 - 2
    startsub[1] = finalsub[0]
    finalsub[1] = 320 * 3 - 2
    startsub[2] = finalsub[1]
    finalsub[2] = 470 * 3 - 2
    startsub[3] = finalsub[2]
    finalsub[3] = 618 * 3 - 2
    startsub[4] = finalsub[3]
    finalsub[4] = 842 * 3 - 2
    startsub[5] = finalsub[4]
    finalsub[5] = 1008 * 3 - 2
    startsub[6] = finalsub[5]
    finalsub[6] = 1110 * 3 - 2
    startsub[7] = finalsub[6]
    finalsub[7] = 1374 * 3 - 2
    startsub[8] = finalsub[7]
    finalsub[8] = 1688 * 3 - 2
    startsub[9] = finalsub[8]
    finalsub[9] = 1796 * 3 - 2
    startsub[10] = finalsub[9]
    finalsub[10] = 2022 * 3 - 2

    return newdata, newlabel, finalsub, startsub


APPLY_BIPOLAR = True
APPLY_OVERLAP = True
APPLY_ZSCORE = True

N_RUNS = 2
N_EPOCHS = 11
BATCH_SIZE = 50
LEARNING_RATE = 0.001
DATASET_PATH = 'dataset.mat'

print("=" * 70)
print("Settings:")
print(f"  Bipolar: {APPLY_BIPOLAR}")
print(f"  Overlap: {APPLY_OVERLAP}")
print(f"  Z-score: {APPLY_ZSCORE}")
print(f"  Runs: {N_RUNS}")
print(f"  Epochs: {N_EPOCHS}")
print("=" * 70)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"\nUsing device: {device}")

print("\n[1] Loading dataset...")
data = sio.loadmat(DATASET_PATH)
original_data = np.array(data['EEGsample'])
original_labels = np.array(data['substate']).flatten().astype(int)
print(f"✓ Shape: {original_data.shape}")

def create_subject_index():
    subIdx = np.zeros(2022, dtype=int)
    boundaries = [0, 188, 320, 470, 618, 842, 1008, 1110, 1374, 1688, 1796, 2022]
    for i in range(11):
        subIdx[boundaries[i]:boundaries[i + 1]] = i + 1
    return subIdx

print("\n[2] Preprocessing...")
processed_data = original_data.copy()
processed_labels = original_labels.copy()
subIdx = create_subject_index()
n_channels = 30

if APPLY_BIPOLAR:
    print("  -> Applying bipolar montage...")
    processed_data = bipolar(processed_data)
    n_channels = 32
    print(f"    Channels: {original_data.shape[1]} -> {n_channels}")

if APPLY_OVERLAP:
    print("  -> Applying overlap augmentation...")
    before = processed_data.shape[0]
    processed_data, processed_labels, finalsub_aug, startsub_aug = hamposhanisub(
        processed_data, processed_labels, n_channels
    )
    print(f"    Samples: {before} -> {processed_data.shape[0]}")

    new_subIdx = np.zeros(processed_data.shape[0], dtype=int)
    idx = 0
    for subj in range(11):
        n_augmented = int(finalsub_aug[subj] - (startsub_aug[subj] if subj == 0 else finalsub_aug[subj - 1]))
        new_subIdx[idx:idx + n_augmented] = subj + 1
        idx += n_augmented
    subIdx = new_subIdx

if APPLY_ZSCORE:
    print("  -> Applying subject-wise z-score normalization...")
    processed_data = zscoresubjective(processed_data)
    print(f"    Mean: {np.mean(processed_data):.4f}")

print(f"✓ Final shape: {processed_data.shape}")


✓ Installation completed
total 173M
-rw-r--r-- 1 root root 173M Jan  3 07:11 dataset.mat
---------- 1 root root  31K Jan  3 07:10 __notebook__.ipynb

✓ Dataset copied
Settings:
  Bipolar: True
  Overlap: True
  Z-score: True
  Runs: 2
  Epochs: 11

Using device: cpu

[1] Loading dataset...
✓ Shape: (2022, 30, 384)

[2] Preprocessing...
  -> Applying bipolar montage...
    Channels: 30 -> 32
  -> Applying overlap augmentation...
    Samples: 2022 -> 6064
  -> Applying subject-wise z-score normalization...
    Mean: 0.0000
✓ Final shape: (6064, 32, 384)


---
Cell 2 — Feature Extraction
---
---

In [3]:
def _bandpower_from_psd(freqs, psd, fmin, fmax):
    mask = (freqs >= fmin) & (freqs < fmax)
    if not np.any(mask):
        return 0.0
    return np.trapz(psd[mask], freqs[mask])

def _hjorth_params(x):
    x = np.asarray(x)
    dx = np.diff(x)
    ddx = np.diff(dx)
    var_x = np.var(x) + 1e-12
    var_dx = np.var(dx) + 1e-12
    var_ddx = np.var(ddx) + 1e-12
    activity = var_x
    mobility = np.sqrt(var_dx / var_x)
    complexity = np.sqrt(var_ddx / var_dx) / (mobility + 1e-12)
    return activity, mobility, complexity

def extract_eeg_features(X, fs, bands):
    # X: (N, C, T)
    N, C, T = X.shape
    freqs = np.fft.rfftfreq(T, d=1.0 / fs)
    feats = []

    for i in range(N):
        sample_feats = []
        for ch in range(C):
            x = X[i, ch, :]
            x = x - np.mean(x)

            fft = np.fft.rfft(x)
            psd = (np.abs(fft) ** 2) / (T + 1e-12)

            band_powers = {}
            total_power = _bandpower_from_psd(freqs, psd, 0.5, 45.0) + 1e-12
            for name, (fmin, fmax) in bands.items():
                bp = _bandpower_from_psd(freqs, psd, fmin, fmax)
                band_powers[name] = bp

            # Relative bandpowers
            rel = [band_powers[k] / total_power for k in ["delta", "theta", "alpha", "beta", "gamma"]]

            # Ratios often useful for drowsiness
            theta_alpha = band_powers["theta"] / (band_powers["alpha"] + 1e-12)
            theta_beta = band_powers["theta"] / (band_powers["beta"] + 1e-12)
            (act, mob, comp) = _hjorth_params(x)

            # Spectral entropy (0..1)
            p = psd / (np.sum(psd) + 1e-12)
            spec_entropy = -np.sum(p * np.log(p + 1e-12)) / (np.log(len(p)) + 1e-12)

            sample_feats.extend(rel)
            sample_feats.extend([theta_alpha, theta_beta])
            sample_feats.extend([act, mob, comp])
            sample_feats.append(spec_entropy)

        feats.append(sample_feats)

    return np.asarray(feats, dtype=np.float64)

feature_X = None
feature_y = None
feature_groups = None

if ENABLE_FEATURE_EXTRACTION == 1:
    print("[Feature Extraction] Computing EEG features...")
    feature_X = extract_eeg_features(processed_data, FS, BANDS)
    feature_y = processed_labels.astype(int).copy()
    feature_groups = subIdx.astype(int).copy()
    print(f"[Feature Extraction] feature_X shape: {feature_X.shape}")
else:
    print("[Feature Extraction] Disabled")


[Feature Extraction] Computing EEG features...


  return np.trapz(psd[mask], freqs[mask])


[Feature Extraction] feature_X shape: (6064, 352)


---
Cell 3 — Feature Selection
---
---

In [4]:
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif, mutual_info_classif

def fit_transform_selector(X_train, y_train, X_test, method="f_classif", k=200):
    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_test_s = scaler.transform(X_test)

    if k is None or k <= 0 or k >= X_train_s.shape[1]:
        return X_train_s, X_test_s, scaler, None

    if method == "mutual_info":
        selector = SelectKBest(mutual_info_classif, k=k)
    else:
        selector = SelectKBest(f_classif, k=k)

    X_train_sel = selector.fit_transform(X_train_s, y_train)
    X_test_sel = selector.transform(X_test_s)
    return X_train_sel, X_test_sel, scaler, selector

print("[Feature Selection] Ready (applied inside training loop if enabled)")


[Feature Selection] Ready (applied inside training loop if enabled)


---
Cell 4 — Model Training
---
---

In [5]:
from sklearn.linear_model import LogisticRegression

class InterpretableCNN(nn.Module):
    def __init__(self, n_channels=30):
        super().__init__()
        self.temp_conv = nn.Conv2d(1, 40, (1, 64), bias=False)
        self.spat_conv = nn.Conv2d(40, 40, (n_channels, 1), bias=False)
        self.bn = nn.BatchNorm2d(40, track_running_stats=False)
        self.pool = nn.AvgPool2d((1, 8))
        self.fc = nn.Linear(40 * ((384 - 64 + 1) // 8), 2)
        self.log_softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = self.temp_conv(x)
        x = self.spat_conv(x)
        x = self.bn(x)
        x = torch.square(x)
        x = self.pool(x)
        x = torch.log(torch.clamp(x, min=1e-6))
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return self.log_softmax(x)


print(f"[Training] PIPELINE_MODE={PIPELINE_MODE}")
all_results = np.zeros((N_RUNS, 11))

for run in range(N_RUNS):
    print(f"\n{'=' * 70}")
    print(f"Run {run + 1}/{N_RUNS}")
    print(f"{'=' * 70}")

    torch.manual_seed(run)
    np.random.seed(run)

    for test_subj in range(1, 12):
        train_mask = (subIdx != test_subj)
        test_mask = (subIdx == test_subj)

        if PIPELINE_MODE == 0:
            # CNN on time-series
            x_train = processed_data[train_mask].reshape(-1, 1, n_channels, 384)
            y_train = processed_labels[train_mask].astype(np.longlong)
            x_test = processed_data[test_mask].reshape(-1, 1, n_channels, 384)
            y_test = processed_labels[test_mask].astype(np.longlong)

            train_dataset = torch.utils.data.TensorDataset(
                torch.from_numpy(x_train), torch.from_numpy(y_train)
            )
            train_loader = torch.utils.data.DataLoader(
                train_dataset, batch_size=BATCH_SIZE, shuffle=True
            )

            model = InterpretableCNN(n_channels=n_channels).double().to(device)
            optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
            criterion = nn.NLLLoss()

            model.train()
            for epoch in range(N_EPOCHS):
                for batch_x, batch_y in train_loader:
                    batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                    optimizer.zero_grad()
                    output = model(batch_x)
                    loss = criterion(output, batch_y)
                    loss.backward()
                    optimizer.step()

            model.eval()
            with torch.no_grad():
                x_test_t = torch.DoubleTensor(x_test).to(device)
                pred = model(x_test_t).cpu().numpy().argmax(axis=1)
                acc = accuracy_score(y_test, pred)

            all_results[run, test_subj - 1] = acc
            print(f"  Subject {test_subj:2d}: {acc:.4f}")

            del model
            if torch.cuda.is_available():
                torch.cuda.empty_cache()

        else:
            # Feature-based ML
            if feature_X is None or feature_y is None or feature_groups is None:
                raise RuntimeError("Feature extraction is required for PIPELINE_MODE=1. Set ENABLE_FEATURE_EXTRACTION=1.")

            X_train = feature_X[train_mask]
            y_train = feature_y[train_mask]
            X_test = feature_X[test_mask]
            y_test = feature_y[test_mask]

            if ENABLE_FEATURE_SELECTION == 1:
                X_train, X_test, scaler, selector = fit_transform_selector(
                    X_train, y_train, X_test, method=FEATURE_SELECTION_METHOD, k=FEATURE_K
                )
            else:
                X_train, X_test, scaler, selector = fit_transform_selector(
                    X_train, y_train, X_test, method=FEATURE_SELECTION_METHOD, k=0
                )

            clf = LogisticRegression(
                max_iter=2000,
                class_weight="balanced",
                random_state=run,
                n_jobs=None
            )
            clf.fit(X_train, y_train)
            pred = clf.predict(X_test)
            acc = accuracy_score(y_test, pred)

            all_results[run, test_subj - 1] = acc
            print(f"  Subject {test_subj:2d}: {acc:.4f}")

    print(f"Run mean: {np.mean(all_results[run]):.4f}")


[Training] PIPELINE_MODE=1

Run 1/2
  Subject  1: 0.7171
  Subject  2: 0.9015
  Subject  3: 0.7289
  Subject  4: 0.6757
  Subject  5: 0.8140
  Subject  6: 0.7892
  Subject  7: 0.6307
  Subject  8: 0.6503
  Subject  9: 0.8493
  Subject 10: 0.8827
  Subject 11: 0.7301
Run mean: 0.7608

Run 2/2
  Subject  1: 0.7171
  Subject  2: 0.9015
  Subject  3: 0.7289
  Subject  4: 0.6757
  Subject  5: 0.8140
  Subject  6: 0.7892
  Subject  7: 0.6307
  Subject  8: 0.6503
  Subject  9: 0.8493
  Subject 10: 0.8827
  Subject 11: 0.7301
Run mean: 0.7608


---
Cell 5 — Post-processing
---
---

In [6]:
def smooth_predictions(pred, window=5):
    pred = np.asarray(pred).astype(int)
    if window is None or window <= 1:
        return pred
    if window % 2 == 0:
        window += 1

    pad = window // 2
    padded = np.pad(pred, (pad, pad), mode="edge")
    out = pred.copy()

    for i in range(len(pred)):
        w = padded[i:i + window]
        out[i] = 1 if np.mean(w) >= 0.5 else 0
    return out

print("[Post-processing] Ready (apply in evaluation if enabled)")


[Post-processing] Ready (apply in evaluation if enabled)


---
Cell 6 — Evaluation
---
---

In [7]:
# Optional: apply post-processing to stored results would require keeping per-subject predictions.
# This cell provides final reporting for the current pipeline outputs (accuracy per subject).

print("\n" + "=" * 70)
print("Final Results")
print("=" * 70)

print("\nAccuracy per Subject:")
for i in range(11):
    mean = np.mean(all_results[:, i])
    std = np.std(all_results[:, i])
    print(f"  Subject {i + 1:2d}: {mean:.4f} ± {std:.4f}")

print(f"\nOverall mean: {np.mean(all_results):.4f}")
print(f"Overall std:  {np.std(all_results):.4f}")

np.save("results_final.npy", all_results)
print("\n✓ Results saved")



Final Results

Accuracy per Subject:
  Subject  1: 0.7171 ± 0.0000
  Subject  2: 0.9015 ± 0.0000
  Subject  3: 0.7289 ± 0.0000
  Subject  4: 0.6757 ± 0.0000
  Subject  5: 0.8140 ± 0.0000
  Subject  6: 0.7892 ± 0.0000
  Subject  7: 0.6307 ± 0.0000
  Subject  8: 0.6503 ± 0.0000
  Subject  9: 0.8493 ± 0.0000
  Subject 10: 0.8827 ± 0.0000
  Subject 11: 0.7301 ± 0.0000

Overall mean: 0.7608
Overall std:  0.0886

✓ Results saved
