In [1]:
!pip install hmmlearn

Collecting hmmlearn
  Downloading hmmlearn-0.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Downloading hmmlearn-0.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (165 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m165.9/165.9 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: hmmlearn
Successfully installed hmmlearn-0.3.3


In [2]:
# ============================
# CELL 1 — Import & Config
# ============================

import os
from glob import glob
import numpy as np
import mne
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.decomposition import PCA
from hmmlearn import hmm
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from tqdm import tqdm

# Config
BASE_PATH = "/kaggle/input/eeg-oddball/EEG"
TMIN, TMAX = -0.2, 0.8
FMIN, FMAX = 0.1, 20
P300_WINDOW = (0.25, 0.35)
RANDOM_STATE = 42
BATCH_SIZE = 64
EPOCHS = 30
LEARNING_RATE = 1e-3
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device:", DEVICE)

# List subjects
subject_dirs = sorted(glob(os.path.join(BASE_PATH, "sub*")))
print("Found subjects:", len(subject_dirs))


Device: cuda
Found subjects: 42


In [3]:
# ============================
# CELL 2 — Process one subject
# ============================

def process_subject_folder(sub_folder, event_id, tmin=TMIN, tmax=TMAX,
                           fmin=FMIN, fmax=FMAX):

    vhdr_files = glob(os.path.join(sub_folder, "*.vhdr"))
    if len(vhdr_files) == 0:
        raise FileNotFoundError(f"No .vhdr found in {sub_folder}")

    vhdr = vhdr_files[0]
    raw = mne.io.read_raw_brainvision(vhdr, preload=True, verbose=False)

    raw.filter(fmin, fmax, fir_design='firwin', verbose=False)
    raw.pick_types(meg=False, eeg=True, eog=True, stim=False, verbose=False)

    events, event_dict = mne.events_from_annotations(raw, verbose=False)

    epochs = mne.Epochs(
        raw, events, event_id=event_id, tmin=tmin, tmax=tmax,
        baseline=(None, 0), preload=True, verbose=False
    )

    if len(epochs) == 0:
        print(f"Warning: no epochs for {sub_folder}")
        return None, None, None, None, None

    X = epochs.get_data()
    y = epochs.events[:, -1]
    return X, y, epochs.times, raw.info["sfreq"], event_dict


In [4]:
# ============================
# CELL 3 — Extract event_dict
# ============================

# Load 1 subject to get event_ids
tmp_sub = subject_dirs[0]
tmp_vhdr = glob(os.path.join(tmp_sub, "*.vhdr"))[0]
tmp_raw = mne.io.read_raw_brainvision(tmp_vhdr, preload=False, verbose=False)
tmp_events, tmp_dict = mne.events_from_annotations(tmp_raw, verbose=False)

print("Event_dict:", tmp_dict)

EVENT_ID = {
    "standard": tmp_dict["Stimulus/S  6"],
    "oddball":  tmp_dict["Stimulus/S  7"]
}
print("Using EVENT_ID:", EVENT_ID)


Event_dict: {'New Segment/': 99999, 'Stimulus/S  1': 1, 'Stimulus/S  5': 5, 'Stimulus/S  6': 6, 'Stimulus/S  7': 7, 'Stimulus/S 10': 10, 'Stimulus/S 12': 12, 'Time 0/': 10001}
Using EVENT_ID: {'standard': 6, 'oddball': 7}


In [5]:
# ============================
# CELL 4 — Load all subjects
# ============================

all_X = []
all_y = []
all_sub_idx = []

for i, sub in enumerate(subject_dirs):
    print(f"Processing {sub} ({i+1}/{len(subject_dirs)})")

    X, y, times, sfreq, evdict = process_subject_folder(sub, EVENT_ID)
    if X is None:
        continue

    all_X.append(X)
    all_y.append(y)
    all_sub_idx.append(np.full(len(y), i))

X_all = np.vstack(all_X)
y_all = np.hstack(all_y)
sub_idx_all = np.hstack(all_sub_idx)

print("Total trials:", X_all.shape)
print("Label distribution:", np.unique(y_all, return_counts=True))


Processing /kaggle/input/eeg-oddball/EEG/sub 01 (1/42)
Processing /kaggle/input/eeg-oddball/EEG/sub 02 (2/42)
Processing /kaggle/input/eeg-oddball/EEG/sub 03 (3/42)
Processing /kaggle/input/eeg-oddball/EEG/sub 04 (4/42)
Processing /kaggle/input/eeg-oddball/EEG/sub 05 (5/42)
Processing /kaggle/input/eeg-oddball/EEG/sub 06 (6/42)
Processing /kaggle/input/eeg-oddball/EEG/sub 07 (7/42)
Processing /kaggle/input/eeg-oddball/EEG/sub 08 (8/42)
Processing /kaggle/input/eeg-oddball/EEG/sub 09 (9/42)
Processing /kaggle/input/eeg-oddball/EEG/sub 10 (10/42)
Processing /kaggle/input/eeg-oddball/EEG/sub 11 (11/42)
Processing /kaggle/input/eeg-oddball/EEG/sub 12 (12/42)
Processing /kaggle/input/eeg-oddball/EEG/sub 13 (13/42)
Processing /kaggle/input/eeg-oddball/EEG/sub 14 (14/42)
Processing /kaggle/input/eeg-oddball/EEG/sub 15 (15/42)
Processing /kaggle/input/eeg-oddball/EEG/sub 16 (16/42)
Processing /kaggle/input/eeg-oddball/EEG/sub 17 (17/42)
Processing /kaggle/input/eeg-oddball/EEG/sub 18 (18/42)
P

In [6]:
# ============================
# CELL 5 — Feature Extraction
# ============================

# Convert labels to 0/1
label_map = {
    list(EVENT_ID.values())[0]: 0,
    list(EVENT_ID.values())[1]: 1
}
y_bin = np.array([label_map[v] for v in y_all])

# P300 ERP mean amplitude
t = times
mask = (t >= P300_WINDOW[0]) & (t <= P300_WINDOW[1])

def extract_erp_features(X):
    seg_mean = X[:, :, mask].mean(axis=2)
    return seg_mean.reshape(seg_mean.shape[0], -1)

X_feat = extract_erp_features(X_all)
print("ERP feature shape:", X_feat.shape)


ERP feature shape: (6847, 127)


In [7]:
# ============================
# CELL 5b — HMM Feature Extraction
# ============================

N_STATES = 4      # số hidden states
N_PCA = 5         # số chiều PCA cho HMM

print("\n=== Extracting HMM features ===")

# ---- Step 1: PCA across each trial (flatten ch × time) ----
X_flat = X_all.reshape(X_all.shape[0], -1)
pca = PCA(n_components=N_PCA)
X_pca = pca.fit_transform(X_flat)

# reshape lại thành chuỗi time series cho HMM
X_seq = X_pca.reshape(X_all.shape[0], -1, 1)  # shape: (trials, time', 1)

# ---- Step 2: Train 1 global HMM for all data ----
X_concat = np.vstack([x for x in X_seq])    # concatenate into (T_total, 1)
lengths = [len(x) for x in X_seq]           # sequence lengths

hmm_model = hmm.GaussianHMM(
    n_components=N_STATES,
    covariance_type="diag",
    n_iter=100,
    random_state=RANDOM_STATE
)

print("Training HMM...")
hmm_model.fit(X_concat, lengths)

# ---- Step 3: Extract HMM features for each trial ----
def extract_hmm_features(model, X_seq):
    feats = []
    for seq in X_seq:
        ll = model.score(seq)  # log-likelihood
        states = model.predict(seq)
        # state fraction
        frac = np.bincount(states, minlength=model.n_components) / len(states)
        # transition prob flatten
        trans = model.transmat_.flatten()
        f = np.concatenate([[ll], frac, trans])
        feats.append(f)
    return np.array(feats)

X_hmm = extract_hmm_features(hmm_model, X_seq)
print("HMM feature shape:", X_hmm.shape)

# ---- Step 4: Combine ERP + HMM features ----
X_feat_full = np.concatenate([X_feat, X_hmm], axis=1)
print("Final feature shape:", X_feat_full.shape)



=== Extracting HMM features ===
Training HMM...


Model is not converging.  Current: 168607.20383680574 is not greater than 168618.2039171142. Delta is -11.000080308469478


HMM feature shape: (6847, 21)
Final feature shape: (6847, 148)


In [8]:
# ============================
# CELL 6 — Train ML models
# ============================

X_train, X_test, y_train, y_test = train_test_split(
    X_feat, y_bin, test_size=0.2, random_state=RANDOM_STATE, stratify=y_bin
)

scaler = StandardScaler().fit(X_train)
X_train_s = scaler.transform(X_train)
X_test_s = scaler.transform(X_test)

# Logistic Regression
print("\n--- Logistic Regression ---")
clf_lr = LogisticRegression(max_iter=1000)
clf_lr.fit(X_train_s, y_train)
pred_lr = clf_lr.predict(X_test_s)
print("Accuracy:", accuracy_score(y_test, pred_lr))
print(classification_report(y_test, pred_lr))

# Random Forest
print("\n--- Random Forest ---")
clf_rf = RandomForestClassifier(n_estimators=200, random_state=RANDOM_STATE)
clf_rf.fit(X_train_s, y_train)
pred_rf = clf_rf.predict(X_test_s)
print("Accuracy:", accuracy_score(y_test, pred_rf))
print(classification_report(y_test, pred_rf))

# SVM RBF
print("\n--- SVM (RBF Kernel) ---")
clf_svm = SVC(kernel="rbf", C=1.0, gamma="scale")
clf_svm.fit(X_train_s, y_train)
pred_svm = clf_svm.predict(X_test_s)
print("Accuracy:", accuracy_score(y_test, pred_svm))
print(classification_report(y_test, pred_svm))


# Gradient Boosting
print("\n--- Gradient Boosting ---")
clf_gb = GradientBoostingClassifier()
clf_gb.fit(X_train, y_train)  # Tree-based models không cần scale
pred_gb = clf_gb.predict(X_test)
print("Accuracy:", accuracy_score(y_test, pred_gb))
print(classification_report(y_test, pred_gb))


# XGBoost
print("\n--- XGBoost ---")
try:
    clf_xgb = XGBClassifier(
        n_estimators=300,
        learning_rate=0.05,
        max_depth=5,
        subsample=0.8,
        colsample_bytree=0.8,
        objective="binary:logistic",
        eval_metric="logloss"
    )
    clf_xgb.fit(X_train, y_train)
    pred_xgb = clf_xgb.predict(X_test)
    print("Accuracy:", accuracy_score(y_test, pred_xgb))
    print(classification_report(y_test, pred_xgb))
except:
    print("⚠️ XGBoost không khả dụng trên môi trường này.")


# LDA
print("\n--- LDA (Linear Discriminant Analysis) ---")
clf_lda = LinearDiscriminantAnalysis()
clf_lda.fit(X_train_s, y_train)
pred_lda = clf_lda.predict(X_test_s)
print("Accuracy:", accuracy_score(y_test, pred_lda))
print(classification_report(y_test, pred_lda))



--- Logistic Regression ---
Accuracy: 0.5328467153284672
              precision    recall  f1-score   support

           0       0.53      0.50      0.52       685
           1       0.53      0.56      0.55       685

    accuracy                           0.53      1370
   macro avg       0.53      0.53      0.53      1370
weighted avg       0.53      0.53      0.53      1370


--- Random Forest ---
Accuracy: 0.5211678832116788
              precision    recall  f1-score   support

           0       0.52      0.55      0.53       685
           1       0.52      0.49      0.51       685

    accuracy                           0.52      1370
   macro avg       0.52      0.52      0.52      1370
weighted avg       0.52      0.52      0.52      1370


--- SVM (RBF Kernel) ---
Accuracy: 0.5401459854014599
              precision    recall  f1-score   support

           0       0.54      0.54      0.54       685
           1       0.54      0.54      0.54       685

    accuracy     

In [9]:
# ============================
# CELL 6b — Train ML models with HMM
# ============================

X_train, X_test, y_train, y_test = train_test_split(
    X_feat_full, y_bin, test_size=0.2, random_state=RANDOM_STATE, stratify=y_bin
)


scaler = StandardScaler().fit(X_train)
X_train_s = scaler.transform(X_train)
X_test_s = scaler.transform(X_test)

# Logistic Regression
print("\n--- Logistic Regression ---")
clf_lr = LogisticRegression(max_iter=1000)
clf_lr.fit(X_train_s, y_train)
pred_lr = clf_lr.predict(X_test_s)
print("Accuracy:", accuracy_score(y_test, pred_lr))
print(classification_report(y_test, pred_lr))

# Random Forest
print("\n--- Random Forest ---")
clf_rf = RandomForestClassifier(n_estimators=200, random_state=RANDOM_STATE)
clf_rf.fit(X_train_s, y_train)
pred_rf = clf_rf.predict(X_test_s)
print("Accuracy:", accuracy_score(y_test, pred_rf))
print(classification_report(y_test, pred_rf))

# SVM RBF
print("\n--- SVM (RBF Kernel) ---")
clf_svm = SVC(kernel="rbf", C=1.0, gamma="scale")
clf_svm.fit(X_train_s, y_train)
pred_svm = clf_svm.predict(X_test_s)
print("Accuracy:", accuracy_score(y_test, pred_svm))
print(classification_report(y_test, pred_svm))


# Gradient Boosting
print("\n--- Gradient Boosting ---")
clf_gb = GradientBoostingClassifier()
clf_gb.fit(X_train, y_train)  # Tree-based models không cần scale
pred_gb = clf_gb.predict(X_test)
print("Accuracy:", accuracy_score(y_test, pred_gb))
print(classification_report(y_test, pred_gb))


# XGBoost
print("\n--- XGBoost ---")
try:
    clf_xgb = XGBClassifier(
        n_estimators=300,
        learning_rate=0.05,
        max_depth=5,
        subsample=0.8,
        colsample_bytree=0.8,
        objective="binary:logistic",
        eval_metric="logloss"
    )
    clf_xgb.fit(X_train, y_train)
    pred_xgb = clf_xgb.predict(X_test)
    print("Accuracy:", accuracy_score(y_test, pred_xgb))
    print(classification_report(y_test, pred_xgb))
except:
    print("⚠️ XGBoost không khả dụng trên môi trường này.")


# LDA
print("\n--- LDA (Linear Discriminant Analysis) ---")
clf_lda = LinearDiscriminantAnalysis()
clf_lda.fit(X_train_s, y_train)
pred_lda = clf_lda.predict(X_test_s)
print("Accuracy:", accuracy_score(y_test, pred_lda))
print(classification_report(y_test, pred_lda))



--- Logistic Regression ---
Accuracy: 0.5357664233576642
              precision    recall  f1-score   support

           0       0.54      0.53      0.53       685
           1       0.54      0.54      0.54       685

    accuracy                           0.54      1370
   macro avg       0.54      0.54      0.54      1370
weighted avg       0.54      0.54      0.54      1370


--- Random Forest ---
Accuracy: 0.5401459854014599
              precision    recall  f1-score   support

           0       0.54      0.54      0.54       685
           1       0.54      0.54      0.54       685

    accuracy                           0.54      1370
   macro avg       0.54      0.54      0.54      1370
weighted avg       0.54      0.54      0.54      1370


--- SVM (RBF Kernel) ---
Accuracy: 0.5467153284671533
              precision    recall  f1-score   support

           0       0.55      0.56      0.55       685
           1       0.55      0.53      0.54       685

    accuracy     

In [10]:
# ============================
# CELL 7 — Prepare DL inputs
# ============================

X_dl = X_all.astype(np.float32)
N = X_dl.shape[0]

# Same split as ML
idx = np.arange(N)
idx_train, idx_test = train_test_split(idx, test_size=0.2,
                                       random_state=RANDOM_STATE,
                                       stratify=y_bin)

ch_mean = X_dl[idx_train].mean(axis=(0,2))
ch_std = X_dl[idx_train].std(axis=(0,2)) + 1e-9

for c in range(X_dl.shape[1]):
    X_dl[:, c, :] = (X_dl[:, c, :] - ch_mean[c]) / ch_std[c]

X_dl = X_dl[:, np.newaxis, :, :]

class EEGDataset(Dataset):
    def __init__(self, X, y, indices):
        self.X = torch.from_numpy(X[indices])
        self.y = torch.from_numpy(y[indices].astype(np.int64))
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = EEGDataset(X_dl, y_bin, idx_train)
test_dataset = EEGDataset(X_dl, y_bin, idx_test)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)


In [11]:
# ============================
# CELL 8 — EEGNet model
# ============================

class SimpleEEGNet(nn.Module):
    def __init__(self, in_ch, n_times, n_classes=2, F1=8, D=2):
        super().__init__()
        self.temp_conv = nn.Conv2d(1, F1, kernel_size=(1, 64),
                                   padding=(0,32), bias=False)
        self.bn1 = nn.BatchNorm2d(F1)

        self.depthwise = nn.Conv2d(F1, F1*D, kernel_size=(in_ch,1),
                                   groups=F1, bias=False)
        self.bn2 = nn.BatchNorm2d(F1*D)

        self.pool = nn.AvgPool2d((1,4))
        self.drop = nn.Dropout(0.5)

        out_time = n_times // 4
        self.classify = nn.Sequential(
            nn.Flatten(),
            nn.Linear(F1*D * out_time, 64),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, n_classes)
        )

    def forward(self, x):
        x = torch.relu(self.bn1(self.temp_conv(x)))
        x = torch.relu(self.bn2(self.depthwise(x)))
        x = self.pool(x)
        x = self.drop(x)
        return self.classify(x)

In [12]:
# ============================
# CELL 8b — ShallowConvNet model
# ============================
class ShallowConvNet(nn.Module):
    def __init__(self, in_ch, n_times, n_classes=2):
        super().__init__()

        # Temporal convolution
        self.conv_time = nn.Conv2d(
            1, 40, kernel_size=(1, 25), stride=1, padding=(0, 12), bias=False
        )
        self.bn_time = nn.BatchNorm2d(40)

        # Spatial convolution (depthwise)
        self.conv_spat = nn.Conv2d(
            40, 40, kernel_size=(in_ch, 1), groups=40, bias=False
        )
        self.bn_spat = nn.BatchNorm2d(40)

        # Mean Pooling with big kernel (the original paper uses 75)
        self.pool = nn.AvgPool2d(kernel_size=(1, 75), stride=(1, 15))

        # Compute output size dynamically
        dummy = torch.zeros(1, 1, in_ch, n_times)
        out = self._forward_features(dummy)
        self.flatten_dim = out.numel()

        self.classifier = nn.Linear(self.flatten_dim, n_classes)

    def _forward_features(self, x):
        x = self.conv_time(x)
        x = self.bn_time(x)

        x = self.conv_spat(x)
        x = self.bn_spat(x)

        x = x ** 2                         # Square activation
        x = self.pool(x)
        x = torch.log(torch.clamp(x, min=1e-6))  # Log activation
        return x

    def forward(self, x):
        x = self._forward_features(x)
        x = torch.flatten(x, 1)
        return self.classifier(x)

In [13]:
# ============================
# CELL 8c — DeepConvNet model
# ============================
class DeepConvNet(nn.Module):
    def __init__(self, in_ch, n_times, n_classes=2, dropout=0.5):
        super().__init__()

        self.block1 = nn.Sequential(
            nn.Conv2d(1, 25, kernel_size=(1, 5), stride=1, padding=(0, 2), bias=False),
            nn.Conv2d(25, 25, kernel_size=(in_ch, 1), groups=25, bias=False),
            nn.BatchNorm2d(25),
            nn.ELU(),
            nn.MaxPool2d(kernel_size=(1, 2), stride=(1, 2)),
            nn.Dropout(dropout),
        )

        self.block2 = nn.Sequential(
            nn.Conv2d(25, 50, kernel_size=(1, 5), padding=(0, 2), bias=False),
            nn.BatchNorm2d(50),
            nn.ELU(),
            nn.MaxPool2d((1, 2)),
            nn.Dropout(dropout),
        )

        self.block3 = nn.Sequential(
            nn.Conv2d(50, 100, kernel_size=(1, 5), padding=(0, 2), bias=False),
            nn.BatchNorm2d(100),
            nn.ELU(),
            nn.MaxPool2d((1, 2)),
            nn.Dropout(dropout),
        )

        self.block4 = nn.Sequential(
            nn.Conv2d(100, 200, kernel_size=(1, 5), padding=(0, 2), bias=False),
            nn.BatchNorm2d(200),
            nn.ELU(),
            nn.MaxPool2d((1, 2)),
            nn.Dropout(dropout),
        )

        # Compute feature dimensions
        dummy = torch.zeros(1, 1, in_ch, n_times)
        out = self._forward_features(dummy)
        self.flatten_dim = out.numel()

        self.classifier = nn.Linear(self.flatten_dim, n_classes)

    def _forward_features(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        return x

    def forward(self, x):
        x = self._forward_features(x)
        x = torch.flatten(x, 1)
        return self.classifier(x)

In [15]:
# ============================
# CELL 9 — Train DL model
# ============================
n_channels = X_dl.shape[2]
n_times = X_dl.shape[3]
models = {
    "EEGNet": SimpleEEGNet(n_channels, n_times).to(DEVICE),
    "ShallowConvNet": ShallowConvNet(n_channels, n_times).to(DEVICE),
    "DeepConvNet": DeepConvNet(n_channels, n_times).to(DEVICE),
}

In [17]:
# ============================
# CELL 9 — Train DL model
# ============================
def train_one_model(model, name, train_loader, test_loader, epochs=EPOCHS, lr=LEARNING_RATE):

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    best_acc = 0
    best_path = f"best_{name}.pth"

    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for xb, yb in train_loader:
            xb = xb.to(DEVICE)
            yb = yb.to(DEVICE)

            optimizer.zero_grad()
            out = model(xb)
            loss = criterion(out, yb)
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * xb.size(0)

        # ===== Evaluation =====
        model.eval()
        preds, labels = [], []

        with torch.no_grad():
            for xb, yb in test_loader:
                xb = xb.to(DEVICE)
                out = model(xb)
                p = out.argmax(1).cpu().numpy()
                preds.append(p)
                labels.append(yb.numpy())

        preds = np.concatenate(preds)
        labels = np.concatenate(labels)
        acc = (preds == labels).mean()

        print(f"[{name}] Epoch {epoch+1}/{epochs}  Loss={total_loss/len(train_dataset):.4f}  Acc={acc:.4f}")

        if acc > best_acc:
            best_acc = acc
            torch.save(model.state_dict(), best_path)

    print(f"Best accuracy for {name}: {best_acc:.4f}\n")
    return best_acc


In [18]:
# ============================
# CELL 9 — Train DL model
# ============================
results = {}

for name, model in models.items():
    print("========================================")
    print(f"Training model: {name}")
    print("========================================")
    acc = train_one_model(model, name, train_loader, test_loader,
                          epochs=EPOCHS, lr=LEARNING_RATE)
    results[name] = acc

print("====== FINAL RESULTS ======")
for k, v in results.items():
    print(f"{k}: {v:.4f}")

Training model: EEGNet
[EEGNet] Epoch 1/30  Loss=0.6917  Acc=0.6000
[EEGNet] Epoch 2/30  Loss=0.6511  Acc=0.6336
[EEGNet] Epoch 3/30  Loss=0.6269  Acc=0.6285
[EEGNet] Epoch 4/30  Loss=0.6021  Acc=0.6679
[EEGNet] Epoch 5/30  Loss=0.5879  Acc=0.6796
[EEGNet] Epoch 6/30  Loss=0.5641  Acc=0.6985
[EEGNet] Epoch 7/30  Loss=0.5555  Acc=0.6898
[EEGNet] Epoch 8/30  Loss=0.5369  Acc=0.6898
[EEGNet] Epoch 9/30  Loss=0.5267  Acc=0.6964
[EEGNet] Epoch 10/30  Loss=0.5086  Acc=0.7117
[EEGNet] Epoch 11/30  Loss=0.5065  Acc=0.7146
[EEGNet] Epoch 12/30  Loss=0.5011  Acc=0.7095
[EEGNet] Epoch 13/30  Loss=0.4798  Acc=0.7073
[EEGNet] Epoch 14/30  Loss=0.4791  Acc=0.7073
[EEGNet] Epoch 15/30  Loss=0.4689  Acc=0.7036
[EEGNet] Epoch 16/30  Loss=0.4674  Acc=0.7058
[EEGNet] Epoch 17/30  Loss=0.4613  Acc=0.7153
[EEGNet] Epoch 18/30  Loss=0.4452  Acc=0.7241
[EEGNet] Epoch 19/30  Loss=0.4390  Acc=0.7182
[EEGNet] Epoch 20/30  Loss=0.4405  Acc=0.7153
[EEGNet] Epoch 21/30  Loss=0.4286  Acc=0.7263
[EEGNet] Epoch 22/30

In [19]:
# ============================
# CELL 10 — Evaluate + Save
# ============================
def evaluate_model(model, checkpoint_path, test_loader, device):
    # Load lại trọng số của model
    model.load_state_dict(torch.load(checkpoint_path, map_location=device))
    model.eval()

    preds, labels = [], []

    with torch.no_grad():
        for xb, yb in test_loader:
            xb = xb.to(device)
            out = model(xb)
            p = out.argmax(1).cpu().numpy()

            preds.append(p)
            labels.append(yb.numpy())

    preds = np.concatenate(preds)
    labels = np.concatenate(labels)

    acc = accuracy_score(labels, preds)
    print("Final Accuracy:", acc)
    print(classification_report(labels, preds))
    print(confusion_matrix(labels, preds))

    return acc

In [21]:
# ============================
# CELL 10 — Evaluate + Save
# ============================

model_paths = {
    "EEGNet": "best_EEGNet.pth",
    "Shallow": "best_ShallowConvNet.pth",
    "DeepConvNet": "best_DeepConvNet.pth"
}

for name, path in model_paths.items():
    print("\n===== Evaluating", name, "=====")

    # tạo model tương ứng
    if name == "EEGNet":
        model = SimpleEEGNet(n_channels, n_times).to(DEVICE)
    elif name == "Shallow":
        model = ShallowConvNet(n_channels, n_times).to(DEVICE)
    elif name == "DeepConvNet":
        model = DeepConvNet(n_channels, n_times).to(DEVICE)

    evaluate_model(model, path, test_loader, DEVICE)

import joblib
joblib.dump(clf_lr, "logreg.pkl")
joblib.dump(clf_rf, "rf.pkl")
joblib.dump(clf_svm,"svm.pkl")
joblib.dump(clf_gb,"gb.pkl")
joblib.dump(clf_xgb,"xgb.pkl")
joblib.dump(clf_lda,"lda.pkl")
joblib.dump(scaler, "scaler.pkl")

print("Saved: logreg.pkl, rf.pkl, svm.pkl, gb.pkl, xgb.pkl, lda.pkl, best_EEGNet.pth, best_ShallowConvNet.pth, best_DeepConvNet.pth")


===== Evaluating EEGNet =====
Final Accuracy: 0.7299270072992701
              precision    recall  f1-score   support

           0       0.72      0.76      0.74       685
           1       0.74      0.70      0.72       685

    accuracy                           0.73      1370
   macro avg       0.73      0.73      0.73      1370
weighted avg       0.73      0.73      0.73      1370

[[519 166]
 [204 481]]

===== Evaluating Shallow =====
Final Accuracy: 0.6693430656934306
              precision    recall  f1-score   support

           0       0.64      0.79      0.70       685
           1       0.72      0.55      0.63       685

    accuracy                           0.67      1370
   macro avg       0.68      0.67      0.66      1370
weighted avg       0.68      0.67      0.66      1370

[[539 146]
 [307 378]]

===== Evaluating DeepConvNet =====
Final Accuracy: 0.6985401459854015
              precision    recall  f1-score   support

           0       0.67      0.79      0.