In [None]:
# !pip install torch pandas scikit-learn numpy 
# !pip install kagglehub

In [None]:
# import kagglehub

# # Download latest version
# path = kagglehub.dataset_download("mansibmursalin/ninapro-db1-full-dataset")

# print("Path to dataset files:", path)

## Training time

In [101]:
import pandas as pd
path = "/Users/guanyulu/Documents/GitHub/BioMechDesignTeam_EMG/data_collection/data/Ninapro_DB1.csv"
data = pd.read_csv(path)
data = data.iloc[:, 1:]

In [102]:
print(data["exercise"].unique())
df = data[data["exercise"] == 3]
df = df[df['subject'] == 1]
df = df.drop(columns=["exercise", "restimulus", "repetition", "rerepetition"])
df.shape, df.columns

[1 2 3]


((227493, 34),
 Index(['emg_0', 'emg_1', 'emg_2', 'emg_3', 'emg_4', 'emg_5', 'emg_6', 'emg_7',
        'emg_8', 'emg_9', 'glove_0', 'glove_1', 'glove_2', 'glove_3', 'glove_4',
        'glove_5', 'glove_6', 'glove_7', 'glove_8', 'glove_9', 'glove_10',
        'glove_11', 'glove_12', 'glove_13', 'glove_14', 'glove_15', 'glove_16',
        'glove_17', 'glove_18', 'glove_19', 'glove_20', 'glove_21', 'stimulus',
        'subject'],
       dtype='object'))

In [103]:
n_classes = len(df['stimulus'].unique())
n_classes

24

In [104]:
import numpy as np

def make_windows(X, y, window_size=200, overlap=50, label_mode="center"):
    stride = window_size - overlap
    Xw, yw = [], []
    T = len(X)

    for start in range(0, T - window_size + 1, stride):
        end = start + window_size
        x_win = X[start:end]              
        y_win = y[start:end]              

        if label_mode == "center":
            label = y_win[window_size // 2]
        elif label_mode == "mode":
            label = np.bincount(y_win).argmax()

        Xw.append(x_win.T)      
        yw.append(label)

    return np.stack(Xw), np.array(yw)


import numpy as np

from scipy.signal import butter, filtfilt

def prep(X, fs=100.0, hp=0.5):
    """
    NinaPro DB1 (fs=100): gentle cleanup only.
    X: (T, C)
    """
    X = np.asarray(X, dtype=np.float64)
    X = X - np.mean(X, axis=0, keepdims=True)  # remove DC

    nyq = fs / 2.0
    # gentle high-pass to reduce drift/motion artifact
    w = hp / nyq
    b, a = butter(2, w, btype="highpass")
    return filtfilt(b, a, X, axis=0)

# def prep(
#     X: np.ndarray,
#     fs: float = 100.0,
#     band: tuple[float, float] | None = None,
#     line_freq: float | None = 60.0,   # set to 60 for NA, 50 for many other places; or None to skip
#     notch_q: float = 30.0,
#     demean: bool = True,
# ):
#     """
#     X: (T, C) array (time x channels)
#     fs: sampling rate in Hz
#     band: (low, high) bandpass in Hz. If None, picks a sensible default based on fs.
#     line_freq: 50 or 60 for notch; set None to disable.
#     """
#     try:
#         from scipy.signal import butter, filtfilt, iirnotch
#     except ImportError as e:
#         raise ImportError("Need scipy for filtering: pip install scipy") from e

#     X = np.asarray(X, dtype=np.float64)

#     # 1) remove DC per channel
#     if demean:
#         X = X - np.mean(X, axis=0, keepdims=True)

#     nyq = fs / 2.0

#     # 2) choose bandpass defaults that respect Nyquist
#     if band is None:
#         if fs <= 120:        # NinaPro DB1-like low fs
#             low, high = 1.0, min(45.0, nyq - 1.0)
#         else:                # typical sEMG hardware
#             low, high = 20.0, min(450.0, nyq - 1.0)
#     else:
#         low, high = band

#     # clamp to valid range
#     low = max(0.1, float(low))
#     high = min(float(high), nyq - 0.5)
#     if not (low < high):
#         # if fs is extremely low, just do a gentle high-pass
#         low, high = 0.5, nyq - 0.5

#     # 3) bandpass
#     b, a = butter(4, [low / nyq, high / nyq], btype="bandpass")
#     Xf = filtfilt(b, a, X, axis=0)

#     # 4) notch (only if it's actually representable)
#     if line_freq is not None and (line_freq < nyq - 1.0):
#         w0 = line_freq / nyq
#         bn, an = iirnotch(w0=w0, Q=notch_q)
#         Xf = filtfilt(bn, an, Xf, axis=0)

#     return Xf
    


y = df["stimulus"].to_numpy()
X = df.drop(columns=["stimulus"]).to_numpy()

X = prep(X, fs = 100)

Xw, yw = make_windows(X, y, window_size=200, overlap=50, label_mode="center")
print(Xw.shape, yw.shape, np.unique(yw).shape)

(1516, 33, 200) (1516,) (24,)


In [105]:
from sklearn.model_selection import train_test_split

X_, X_test, y_, y_test = train_test_split(Xw, yw, test_size=0.2, random_state=42, stratify=yw)
X_train, X_val, y_train, y_val = train_test_split(X_, y_, test_size=0.25, random_state=42, stratify=y_)
print(X_train.shape, X_val.shape, X_test.shape)
print(y_train.shape, y_val.shape, y_test.shape)

(909, 33, 200) (303, 33, 200) (304, 33, 200)
(909,) (303,) (304,)


In [106]:
import numpy as np

mean = X_train.mean(axis=(0, 2), keepdims=True)
std = X_train.std(axis=(0, 2), keepdims=True)

std[std < 1e-8] = 1.0  # Prevent division by zero

def standardize(X, mean, std):
    return (X - mean[None, :, None]) / std[None, :, None]

X_train = standardize(X_train, mean, std)
X_val   = standardize(X_val,   mean, std)
X_test  = standardize(X_test,  mean, std)

X_train = np.squeeze(X_train)   
X_val   = np.squeeze(X_val)     
X_test  = np.squeeze(X_test)

print(X_train.shape, X_val.shape, X_test.shape)

(909, 33, 200) (303, 33, 200) (304, 33, 200)


In [107]:
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

print("X_val shape:", X_val.shape)
print("y_val shape:", y_val.shape)

X_train shape: (909, 33, 200)
y_train shape: (909,)
X_val shape: (303, 33, 200)
y_val shape: (303,)


In [108]:
import torch 
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch import nn
from torch import optim
import torch.nn.functional as F

class EMGDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.y)  
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
train_set = EMGDataset(X_train, y_train)
val_set = EMGDataset(X_val, y_val)
test_set = EMGDataset(X_test, y_test)


In [109]:
class EMGCNN(nn.Module):
    def __init__(self, input_channels, num_classes):
        super(EMGCNN, self).__init__()
        self.conv1 = nn.Conv1d(input_channels, 64, kernel_size=3, padding=1)
        self.maxpool1 = nn.MaxPool1d(kernel_size=3, stride=2)
        self.conv2 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
        self.maxpool2 = nn.MaxPool1d(kernel_size=3, stride=2)
        self.conv3 = nn.Conv1d(128, 256, kernel_size=3, padding=1)
        self.maxpool3 = nn.MaxPool1d(kernel_size=3, stride=2)
        self.bn = nn.BatchNorm1d(256)
        # self.bn = nn.LayerNorm(256)
        self.relu = nn.ReLU()
        self.fc = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.maxpool1(x)
        x = self.relu(self.conv2(x))
        x = self.maxpool2(x)
        x = self.conv3(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.maxpool3(x)
        # feat = x.mean(dim=2)
        x = self.fc(x.mean(dim=2))
        return x
    

    

device = "mps" if torch.backends.mps.is_available() else "cpu"

model = EMGCNN(input_channels=33, num_classes=n_classes).to(device)
print(model)

EMGCNN(
  (conv1): Conv1d(33, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (maxpool1): MaxPool1d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=(1,))
  (maxpool2): MaxPool1d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=(1,))
  (maxpool3): MaxPool1d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (bn): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
  (fc): Linear(in_features=256, out_features=24, bias=True)
)


In [110]:
from tqdm import tqdm

train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
val_loader = DataLoader(val_set, batch_size=32, shuffle=False)
test_loader = DataLoader(test_set, batch_size=32, shuffle=False)

epochs = 30
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)
for epoch in range(epochs):
  model.train()
  train_loss = 0

  loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} [Train]", leave=True)
  for x, y in loop:
    x, y = x.to(device), y.to(device)

    optimizer.zero_grad()
    yhat = model(x)
    loss = criterion(yhat, y)
    loss.backward()
    optimizer.step()

    train_loss += loss.item()
    loop.set_postfix(train_loss=loss.item())

  train_loss /= len(train_loader)

  model.eval()
  val_loss = 0
  correct = 0
  total = 0
  with torch.no_grad():
    loop = tqdm(val_loader, desc=f"Epoch {epoch+1}/{epochs} [Val]", leave=True)
    for x, y in loop:
      x, y = x.to(device), y.to(device)

      yhat = model(x)
      loss = criterion(yhat, y)
      val_loss += loss.item()

      predicted = yhat.argmax(dim=1)
      total += y.size(0)
      correct += (predicted == y).sum().item()

      loop.set_postfix(val_loss=loss.item())

  val_loss /= len(val_loader)
  val_accuracy = correct / total
  print(f"Epoch {epoch+1}/{epochs} => Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f}")

torch.save(model.state_dict(), "best_model.pt")

Epoch 1/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 97.67it/s, train_loss=1.72]
Epoch 1/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 132.28it/s, val_loss=1.95]


Epoch 1/30 => Train Loss: 2.2518, Val Loss: 2.1308, Val Acc: 0.5545


Epoch 2/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 134.87it/s, train_loss=1.54]
Epoch 2/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 244.34it/s, val_loss=1.32]


Epoch 2/30 => Train Loss: 1.5915, Val Loss: 1.4644, Val Acc: 0.6304


Epoch 3/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 127.19it/s, train_loss=0.745]
Epoch 3/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 229.85it/s, val_loss=1.11]


Epoch 3/30 => Train Loss: 1.2888, Val Loss: 1.2253, Val Acc: 0.6634


Epoch 4/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 130.46it/s, train_loss=1.37] 
Epoch 4/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 222.92it/s, val_loss=0.874]


Epoch 4/30 => Train Loss: 1.0796, Val Loss: 1.0373, Val Acc: 0.7096


Epoch 5/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 132.48it/s, train_loss=0.908]
Epoch 5/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 164.54it/s, val_loss=0.689]


Epoch 5/30 => Train Loss: 0.8505, Val Loss: 0.8636, Val Acc: 0.7657


Epoch 6/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 130.58it/s, train_loss=0.201]
Epoch 6/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 236.60it/s, val_loss=0.537]


Epoch 6/30 => Train Loss: 0.6642, Val Loss: 0.7426, Val Acc: 0.8185


Epoch 7/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 127.19it/s, train_loss=0.352]
Epoch 7/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 205.39it/s, val_loss=0.407]


Epoch 7/30 => Train Loss: 0.5593, Val Loss: 0.6233, Val Acc: 0.8482


Epoch 8/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 123.53it/s, train_loss=0.618]
Epoch 8/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 225.51it/s, val_loss=0.373]


Epoch 8/30 => Train Loss: 0.4478, Val Loss: 0.5776, Val Acc: 0.8548


Epoch 9/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 126.70it/s, train_loss=0.389]
Epoch 9/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 258.43it/s, val_loss=0.312]


Epoch 9/30 => Train Loss: 0.3738, Val Loss: 0.5313, Val Acc: 0.8713


Epoch 10/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 134.01it/s, train_loss=0.327]
Epoch 10/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 304.19it/s, val_loss=0.256]


Epoch 10/30 => Train Loss: 0.3243, Val Loss: 0.5054, Val Acc: 0.8680


Epoch 11/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 139.80it/s, train_loss=0.448] 
Epoch 11/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 210.37it/s, val_loss=0.269]


Epoch 11/30 => Train Loss: 0.2767, Val Loss: 0.4937, Val Acc: 0.8680


Epoch 12/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 138.81it/s, train_loss=0.433]
Epoch 12/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 269.01it/s, val_loss=0.163]


Epoch 12/30 => Train Loss: 0.2401, Val Loss: 0.4470, Val Acc: 0.8911


Epoch 13/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 119.56it/s, train_loss=0.201] 
Epoch 13/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 197.24it/s, val_loss=0.182]


Epoch 13/30 => Train Loss: 0.2172, Val Loss: 0.4094, Val Acc: 0.9010


Epoch 14/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 115.95it/s, train_loss=0.0924]
Epoch 14/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 233.93it/s, val_loss=0.237]


Epoch 14/30 => Train Loss: 0.1755, Val Loss: 0.4212, Val Acc: 0.8845


Epoch 15/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 126.60it/s, train_loss=0.0924]
Epoch 15/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 222.48it/s, val_loss=0.171]


Epoch 15/30 => Train Loss: 0.1683, Val Loss: 0.4106, Val Acc: 0.8977


Epoch 16/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 130.14it/s, train_loss=0.249] 
Epoch 16/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 226.12it/s, val_loss=0.165]


Epoch 16/30 => Train Loss: 0.1363, Val Loss: 0.4400, Val Acc: 0.8977


Epoch 17/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 157.78it/s, train_loss=0.22]  
Epoch 17/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 239.64it/s, val_loss=0.154]


Epoch 17/30 => Train Loss: 0.1264, Val Loss: 0.3980, Val Acc: 0.8944


Epoch 18/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 129.21it/s, train_loss=0.452] 
Epoch 18/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 220.60it/s, val_loss=0.204]


Epoch 18/30 => Train Loss: 0.1311, Val Loss: 0.4032, Val Acc: 0.8779


Epoch 19/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 143.57it/s, train_loss=0.152] 
Epoch 19/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 235.68it/s, val_loss=0.128]


Epoch 19/30 => Train Loss: 0.1037, Val Loss: 0.3697, Val Acc: 0.9043


Epoch 20/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 130.31it/s, train_loss=0.19]  
Epoch 20/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 232.90it/s, val_loss=0.14]


Epoch 20/30 => Train Loss: 0.0960, Val Loss: 0.3616, Val Acc: 0.8944


Epoch 21/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 124.16it/s, train_loss=0.123] 
Epoch 21/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 200.09it/s, val_loss=0.146]


Epoch 21/30 => Train Loss: 0.0907, Val Loss: 0.3745, Val Acc: 0.9043


Epoch 22/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 129.51it/s, train_loss=0.0724]
Epoch 22/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 221.76it/s, val_loss=0.163]


Epoch 22/30 => Train Loss: 0.0765, Val Loss: 0.3730, Val Acc: 0.9043


Epoch 23/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 135.89it/s, train_loss=0.033] 
Epoch 23/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 226.54it/s, val_loss=0.148]


Epoch 23/30 => Train Loss: 0.0650, Val Loss: 0.3600, Val Acc: 0.8944


Epoch 24/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 116.39it/s, train_loss=0.0527]
Epoch 24/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 237.84it/s, val_loss=0.201]


Epoch 24/30 => Train Loss: 0.0603, Val Loss: 0.3942, Val Acc: 0.8845


Epoch 25/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 155.14it/s, train_loss=0.0451]
Epoch 25/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 248.79it/s, val_loss=0.0985]


Epoch 25/30 => Train Loss: 0.0467, Val Loss: 0.3657, Val Acc: 0.8977


Epoch 26/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 137.56it/s, train_loss=0.0245]
Epoch 26/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 239.30it/s, val_loss=0.118]


Epoch 26/30 => Train Loss: 0.0396, Val Loss: 0.3621, Val Acc: 0.8944


Epoch 27/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 122.29it/s, train_loss=0.071] 
Epoch 27/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 248.05it/s, val_loss=0.103]


Epoch 27/30 => Train Loss: 0.0361, Val Loss: 0.3692, Val Acc: 0.9010


Epoch 28/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 135.24it/s, train_loss=0.0358]
Epoch 28/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 231.61it/s, val_loss=0.0902]


Epoch 28/30 => Train Loss: 0.0439, Val Loss: 0.3792, Val Acc: 0.9010


Epoch 29/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 140.22it/s, train_loss=0.0373]
Epoch 29/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 230.84it/s, val_loss=0.179]


Epoch 29/30 => Train Loss: 0.0429, Val Loss: 0.3877, Val Acc: 0.8812


Epoch 30/30 [Train]: 100%|██████████| 29/29 [00:00<00:00, 134.59it/s, train_loss=0.0465]
Epoch 30/30 [Val]: 100%|██████████| 10/10 [00:00<00:00, 234.19it/s, val_loss=0.116]


Epoch 30/30 => Train Loss: 0.0384, Val Loss: 0.3807, Val Acc: 0.8977


In [111]:
# to load: 
model.load_state_dict(torch.load("best_model.pt", map_location=device))
model.eval()        

EMGCNN(
  (conv1): Conv1d(33, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (maxpool1): MaxPool1d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=(1,))
  (maxpool2): MaxPool1d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=(1,))
  (maxpool3): MaxPool1d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (bn): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
  (fc): Linear(in_features=256, out_features=24, bias=True)
)

In [112]:
from sklearn.metrics import accuracy_score, confusion_matrix

test_loader = DataLoader(test_set, batch_size=32, shuffle=False)
criterion = nn.CrossEntropyLoss()

model.eval()

test_loss = 0.0
correct = 0
total = 0

all_preds = []
all_labels = []

with torch.no_grad():
    for x, y in test_loader:
        x, y = x.to(device), y.to(device)

        yhat = model(x)
        loss = criterion(yhat, y)
        test_loss += loss.item()

        preds = yhat.argmax(dim=1)

        correct += (preds == y).sum().item()
        total += y.size(0)

        all_preds.append(preds.cpu())
        all_labels.append(y.cpu())

test_loss /= len(test_loader)
test_acc = correct / total

all_preds = torch.cat(all_preds)
all_labels = torch.cat(all_labels)

print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")

Test Loss: 0.3937
Test Accuracy: 0.8980


## Try Using on subject 2:

In [113]:
print(data["exercise"].unique())
df2 = data[data["exercise"] == 3]
df2 = df2[df2['subject'] == 2]
df2 = df2.drop(columns=["exercise", "restimulus", "repetition", "rerepetition"])
df2.shape, df2.columns

[1 2 3]


((229084, 34),
 Index(['emg_0', 'emg_1', 'emg_2', 'emg_3', 'emg_4', 'emg_5', 'emg_6', 'emg_7',
        'emg_8', 'emg_9', 'glove_0', 'glove_1', 'glove_2', 'glove_3', 'glove_4',
        'glove_5', 'glove_6', 'glove_7', 'glove_8', 'glove_9', 'glove_10',
        'glove_11', 'glove_12', 'glove_13', 'glove_14', 'glove_15', 'glove_16',
        'glove_17', 'glove_18', 'glove_19', 'glove_20', 'glove_21', 'stimulus',
        'subject'],
       dtype='object'))

In [114]:
y_2 = df2["stimulus"].to_numpy()
X_2 = df2.drop(columns=["stimulus"]).to_numpy()

X_2 = prep(X_2, fs = 100)

Xw_2, yw_2 = make_windows(X_2, y_2, window_size=200, overlap=50, label_mode="center")
print(Xw_2.shape, yw_2.shape, np.unique(yw_2).shape)

(1526, 33, 200) (1526,) (24,)


In [115]:
mean_2 = Xw_2.mean(axis=(0, 2), keepdims=True)
std_2 = Xw_2.std(axis=(0, 2), keepdims=True)

std_2[std_2 < 1e-8] = 1.0  # Prevent division by zero

Xw_2 = standardize(Xw_2, mean_2, std_2)
Xw_2 = np.squeeze(Xw_2)

sub_2_dataset = EMGDataset(Xw_2, yw_2)

In [116]:
from sklearn.metrics import accuracy_score, confusion_matrix

test_loader = DataLoader(sub_2_dataset, batch_size=32, shuffle=False)
criterion = nn.CrossEntropyLoss()

model.eval()

test_loss = 0.0
correct = 0
total = 0

all_preds = []
all_labels = []

with torch.no_grad():
    for x, y in test_loader:
        x, y = x.to(device), y.to(device)

        yhat = model(x)
        loss = criterion(yhat, y)
        test_loss += loss.item()

        preds = yhat.argmax(dim=1)

        correct += (preds == y).sum().item()
        total += y.size(0)

        all_preds.append(preds.cpu())
        all_labels.append(y.cpu())

test_loss /= len(test_loader)
test_acc = correct / total

all_preds = torch.cat(all_preds)
all_labels = torch.cat(all_labels)

print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")

Test Loss: 2.2716
Test Accuracy: 0.4626


## see that test accuracy is significantly lower, meaning emg for gesture classification cannot be generalized across different individual. 

In [117]:
class EMGFeatures(nn.Module):
    def __init__(self, input_channels):
        super().__init__()
        self.conv1 = nn.Conv1d(input_channels, 64, kernel_size=3, padding=1)
        self.maxpool1 = nn.MaxPool1d(kernel_size=3, stride=2)
        self.conv2 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
        self.maxpool2 = nn.MaxPool1d(kernel_size=3, stride=2)
        self.conv3 = nn.Conv1d(128, 256, kernel_size=3, padding=1)
        self.maxpool3 = nn.MaxPool1d(kernel_size=3, stride=2)
        self.bn = nn.BatchNorm1d(256)
        self.relu = nn.ReLU()
        # self.fc = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.maxpool1(x)
        x = self.relu(self.conv2(x))
        x = self.maxpool2(x)
        x = self.conv3(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.maxpool3(x)
        feat = x.mean(dim=2)
        # x = self.fc(x.mean(dim=2))
        return feat
    
class EMGClassifier(nn.Module):
    def __init__(self, feature_dim, n_class):
        super().__init__()
        self.fc = nn.Linear(feature_dim, n_class)

    def forward(self, x):
        x = self.fc(x)
        return x
    

def coral_loss(source_feat, target_feat):
    # source_feat, target_feat: (B, D)
    xm = source_feat - source_feat.mean(0, keepdim=True)
    xc = (xm.t() @ xm) / (source_feat.size(0) - 1)

    ym = target_feat - target_feat.mean(0, keepdim=True)
    yc = (ym.t() @ ym) / (target_feat.size(0) - 1)

    return ((xc - yc) ** 2).mean()

    

device = "mps" if torch.backends.mps.is_available() else "cpu"

model = EMGCNN(input_channels=33, num_classes=n_classes).to(device)
print(model)

EMGCNN(
  (conv1): Conv1d(33, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (maxpool1): MaxPool1d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=(1,))
  (maxpool2): MaxPool1d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=(1,))
  (maxpool3): MaxPool1d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (bn): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
  (fc): Linear(in_features=256, out_features=24, bias=True)
)


In [136]:
sub_2_loader = DataLoader(sub_2_dataset, batch_size=32, shuffle=True, drop_last=True)
train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
# val_loader = DataLoader(val_set, batch_size=32, shuffle=False)
# test_loader = DataLoader(test_set, batch_size=32, shuffle=False)

device = "mps" if torch.backends.mps.is_available() else "cpu"

feat_net = EMGFeatures(input_channels=33).to(device)
clf_net  = EMGClassifier(feature_dim=256, n_class=n_classes).to(device)

opt = torch.optim.Adam(list(feat_net.parameters()) + list(clf_net.parameters()), lr=5e-4)
ce  = nn.CrossEntropyLoss()

lambda_coral = 10  # try 0.01, 0.1, 1.0

feat_net.train(); clf_net.train()

for epoch in range(10):
    src_iter = iter(train_loader)
    tgt_iter = iter(sub_2_loader)

    steps = min(len(train_loader), len(sub_2_loader))
    total_cls, total_adapt = 0.0, 0.0

    for _ in range(steps):
        xs, ys = next(src_iter)
        xt, _  = next(tgt_iter)

        xs, ys = xs.to(device), ys.to(device)
        xt     = xt.to(device)

        fs = feat_net(xs)
        ft = feat_net(xt)

        logits_s = clf_net(fs)
        loss_cls = ce(logits_s, ys)
        loss_adapt = coral_loss(fs, ft)

        loss = loss_cls + lambda_coral * loss_adapt

        opt.zero_grad()
        loss.backward()
        opt.step()

        total_cls += loss_cls.item()
        total_adapt += loss_adapt.item()

    print(f"Epoch {epoch+1}: cls={total_cls/steps:.4f}, coral={total_adapt/steps:.4f}")

Epoch 1: cls=2.2176, coral=0.0039
Epoch 2: cls=1.6509, coral=0.0022
Epoch 3: cls=1.3332, coral=0.0034
Epoch 4: cls=1.1070, coral=0.0035
Epoch 5: cls=0.8990, coral=0.0047
Epoch 6: cls=0.7160, coral=0.0048
Epoch 7: cls=0.5884, coral=0.0044
Epoch 8: cls=0.5033, coral=0.0045
Epoch 9: cls=0.4266, coral=0.0052
Epoch 10: cls=0.3852, coral=0.0046


In [137]:
feat_net.eval()
clf_net.eval()

sub_2_loader_eval = DataLoader(sub_2_dataset, batch_size=32, shuffle=False)

correct = 0
total = 0

with torch.no_grad():
    for x, y in sub_2_loader_eval:   # IMPORTANT: use a NON-shuffled loader
        x = x.to(device)
        y = y.to(device)

        feat = feat_net(x)
        logits = clf_net(feat)

        preds = torch.argmax(logits, dim=1)

        correct += (preds == y).sum().item()
        total += y.size(0)

acc = correct / total
print(f"Subject 2 Accuracy: {acc:.4f}")

Subject 2 Accuracy: 0.5518
