In [None]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

class EMGDataset(Dataset):
    def __init__(self, data_directory, test_split=0.2, random_seed=100, sequence_length=500):
        self.data_directory = data_directory
        self.test_split = test_split
        self.random_seed = random_seed
        self.sequence_length = sequence_length
        self.sequences = []
        self.labels = []

        # Load and preprocess data
        self.load_data()

    def load_data(self):
        scaler = StandardScaler()  # Initialize the scaler
        step_size = 250  # Half the sequence_length for 50% overlap

        for folder_name in os.listdir(self.data_directory):
            folder_path = os.path.join(self.data_directory, folder_name)
            if os.path.isdir(folder_path):
                for file_name in os.listdir(folder_path):
                    file_path = os.path.join(folder_path, file_name)
                    data = np.loadtxt(file_path, delimiter='\t', skiprows=1)

                    # Normalize EMG data except for the label column
                    data[:, 1:-1] = scaler.fit_transform(data[:, 1:-1])

                    # Segment data into overlapping uniform sequences
                    for start in range(0, len(data) - self.sequence_length + 1, step_size):
                        end = start + self.sequence_length
                        if end <= len(data):
                            segment = data[start:end]
                            self.sequences.append(segment[:, 1:-1])  # Exclude the first column if it's a timestamp
                            self.labels.append(segment[0, -1])  # Use the label of the first row in the segment

        self.labels = np.array(self.labels, dtype=int)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # Convert sequences to tensor
        sequence = torch.tensor(self.sequences[idx], dtype=torch.float32)
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        return sequence, label

    def split_data(self):
        indices = list(range(len(self)))
        train_indices, test_indices = train_test_split(indices, test_size=self.test_split, random_state=self.random_seed)
        train_data = [self[i] for i in train_indices]
        test_data = [self[i] for i in test_indices]
        return train_data, test_data

# Example of how to use the EMGDataset class
if __name__ == "__main__":
    dataset = EMGDataset('./EMG_Data')
    train_data, test_data = dataset.split_data()
    print(f"Number of training samples: {len(train_data)}")
    print(f"Number of testing samples: {len(test_data)}")

Number of training samples: 13475
Number of testing samples: 3369


LSTM model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
from emg_dataset import *  # Make sure this import is correct


class LSTMClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super(LSTMClassifier, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # Initialize hidden and cell states with zeros
        h0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)
        c0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        return self.fc(out[:, -1, :])  # Only take the output from the last sequence step

def compute_class_weights(dataset):
    # Extract labels from the dataset
    labels = [label for _, label in dataset]
    labels = torch.tensor(labels)
    class_counts = labels.bincount()
    total_samples = len(labels)
    class_weights = total_samples / (class_counts * len(class_counts))
    return class_weights

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device):
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        correct = 0
        total = 0

        for sequences, labels in train_loader:
            sequences, labels = sequences.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(sequences)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = total_loss / len(train_loader)
        train_accuracy = 100 * correct / total
        print(f'Epoch {epoch + 1}: Training Loss = {train_loss:.4f}, Training Accuracy = {train_accuracy:.2f}%')

        val_loss, val_accuracy = evaluate_model(model, val_loader, device, is_testing=False)
        print(f'Epoch {epoch + 1}: Validation Loss = {val_loss:.4f}, Validation Accuracy = {val_accuracy:.2f}%')

def evaluate_model(model, loader, device, is_testing=False):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    criterion = nn.CrossEntropyLoss()
    all_labels = []
    all_predictions = []

    with torch.no_grad():
        for sequences, labels in loader:
            sequences, labels = sequences.to(device), labels.to(device)
            outputs = model(sequences)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            if is_testing:
                all_labels.extend(labels.cpu().numpy())
                all_predictions.extend(predicted.cpu().numpy())

    average_loss = total_loss / len(loader)
    accuracy = 100 * correct / total

    if is_testing:
        print("Classification Report:")
        print(classification_report(all_labels, all_predictions))
        print("Confusion Matrix:")
        print(confusion_matrix(all_labels, all_predictions))

    return average_loss, accuracy


def main():
    if torch.cuda.is_available():
        torch.cuda.set_device(4)  # Adjust the device index as necessary
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    dataset = EMGDataset('./EMG_Data/')
    total_count = len(dataset)
    train_count = int(0.8 * total_count)
    val_count = int(0.1 * total_count)
    test_count = total_count - train_count - val_count

    train_dataset, val_dataset, test_dataset = random_split(dataset, [train_count, val_count, test_count])

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    model = LSTMClassifier(input_dim=8, hidden_dim=64, output_dim=8, num_layers=2)
    model.to(device)

    class_weights = compute_class_weights(train_dataset)
    criterion = nn.CrossEntropyLoss(weight=class_weights.to(device))

    optimizer = optim.Adam(model.parameters(), lr=0.001)

    train_model(model, train_loader, val_loader, criterion, optimizer, 150, device)
    print("Evaluating on test set:")
    evaluate_model(model, test_loader, device, is_testing=True)

if __name__ == "__main__":
    main()

Using device: cuda
Epoch 1: Training Loss = 1.9911, Training Accuracy = 19.75%
Epoch 1: Validation Loss = 2.1383, Validation Accuracy = 15.80%
Epoch 2: Training Loss = 1.7820, Training Accuracy = 16.76%
Epoch 2: Validation Loss = 1.7267, Validation Accuracy = 23.99%
Epoch 3: Training Loss = 1.4874, Training Accuracy = 23.35%
Epoch 3: Validation Loss = 1.5449, Validation Accuracy = 27.73%
Epoch 4: Training Loss = 1.4738, Training Accuracy = 25.30%
Epoch 4: Validation Loss = 1.5133, Validation Accuracy = 29.45%
Epoch 5: Training Loss = 1.3015, Training Accuracy = 29.24%
Epoch 5: Validation Loss = 1.5197, Validation Accuracy = 30.46%
Epoch 6: Training Loss = 1.1947, Training Accuracy = 30.64%
Epoch 6: Validation Loss = 1.4179, Validation Accuracy = 32.07%
Epoch 7: Training Loss = 1.1230, Training Accuracy = 31.50%
Epoch 7: Validation Loss = 1.3990, Validation Accuracy = 33.55%
Epoch 8: Training Loss = 1.1228, Training Accuracy = 33.40%
Epoch 8: Validation Loss = 1.5341, Validation Accurac

1D CNN model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from emg_dataset import EMGDataset  # Ensure this import reflects your project structure
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR
from torch.optim import Adam
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

class Simple1DCNN(nn.Module):
    def __init__(self, num_channels, num_classes):
        super(Simple1DCNN, self).__init__()
        self.num_classes = num_classes
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.conv1 = nn.Conv1d(num_channels, 16, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool1d(2)
        self.dropout1 = nn.Dropout(0.25)
        self.conv2 = nn.Conv1d(16, 32, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool1d(2)
        self.dropout2 = nn.Dropout(0.25)
        self.flatten = nn.Flatten()
        self.fc = None  # To be initialized dynamically

    def forward(self, x):
        x = x.float().permute(0, 2, 1)
        x = self.dropout1(self.pool1(F.relu(self.conv1(x))))
        x = self.dropout2(self.pool2(F.relu(self.conv2(x))))
        x = self.flatten(x)
        if self.fc is None:
            self.fc = nn.Linear(x.shape[1], self.num_classes).to(self.device)
        x = self.fc(x)
        return x

def compute_class_weights(dataset):
    # Assuming label tensor is already part of the dataset
    labels = torch.tensor([label for _, label in dataset])
    class_counts = labels.bincount()
    total_samples = len(labels)
    class_weights = total_samples / (class_counts * len(class_counts))
    return class_weights.float()

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device, scheduler, lambda_l1=0.01):
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        train_correct = 0
        train_total = 0

        # Training phase
        for sequences, labels in train_loader:
            sequences, labels = sequences.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(sequences)
            loss = criterion(outputs, labels) + lambda_l1 * sum(p.abs().sum() for p in model.parameters())
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()

        # Validation phase
        val_loss, val_accuracy = evaluate_model(model, val_loader, device, is_testing=False)
        print(f'Epoch {epoch + 1}: Train Loss = {train_loss / len(train_loader):.4f}, '
              f'Train Accuracy = {100 * train_correct / train_total:.2f}%, '
              f'Val Loss = {val_loss:.4f}, Val Accuracy = {val_accuracy:.2f}%')

        scheduler.step()



def evaluate_model(model, loader, device, is_testing=False):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    criterion = nn.CrossEntropyLoss()
    all_labels = []
    all_predictions = []

    with torch.no_grad():
        for sequences, labels in loader:
            sequences, labels = sequences.to(device), labels.to(device)
            outputs = model(sequences)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    if is_testing:
        print("Classification Report:")
        print(classification_report(all_labels, all_predictions))
        print("Confusion Matrix:")
        print(confusion_matrix(all_labels, all_predictions))

    return total_loss / len(loader), 100 * correct / total


def main():
    if torch.cuda.is_available():
        torch.cuda.set_device(5)  # Adjust the device index if necessary
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    dataset = EMGDataset('./EMG_Data')
    total_count = len(dataset)
    train_count = int(0.8 * total_count)
    val_count = int(0.1 * total_count)
    test_count = total_count - train_count - val_count

    train_dataset, val_dataset, test_dataset = random_split(dataset, [train_count, val_count, test_count])

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

    num_channels = 8
    num_classes = 8

    model = Simple1DCNN(num_channels, num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=0.001)
    scheduler = StepLR(optimizer, step_size=30, gamma=0.1)

    train_model(model, train_loader, val_loader, criterion, optimizer, 100, device, scheduler, lambda_l1=0.0001)
    print("Evaluating on test set:")
    evaluate_model(model, test_loader, device, is_testing=True)

if __name__ == "__main__":
    main()


Using device: cuda
Epoch 1: Train Loss = 1.6745, Train Accuracy = 60.79%, Val Loss = 1.4133, Val Accuracy = 64.55%
Epoch 2: Train Loss = 1.4286, Train Accuracy = 64.10%, Val Loss = 1.3024, Val Accuracy = 64.55%
Epoch 3: Train Loss = 1.3859, Train Accuracy = 63.64%, Val Loss = 1.2640, Val Accuracy = 64.43%
Epoch 4: Train Loss = 1.3582, Train Accuracy = 63.21%, Val Loss = 1.2394, Val Accuracy = 64.43%
Epoch 5: Train Loss = 1.3364, Train Accuracy = 63.00%, Val Loss = 1.2155, Val Accuracy = 64.49%
Epoch 6: Train Loss = 1.3202, Train Accuracy = 62.96%, Val Loss = 1.1963, Val Accuracy = 64.49%
Epoch 7: Train Loss = 1.3068, Train Accuracy = 62.75%, Val Loss = 1.1793, Val Accuracy = 64.49%
Epoch 8: Train Loss = 1.2868, Train Accuracy = 62.69%, Val Loss = 1.1630, Val Accuracy = 64.61%
Epoch 9: Train Loss = 1.2715, Train Accuracy = 62.43%, Val Loss = 1.1452, Val Accuracy = 64.61%
Epoch 10: Train Loss = 1.2607, Train Accuracy = 62.71%, Val Loss = 1.1298, Val Accuracy = 64.61%
Epoch 11: Train Loss

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Vision transformer model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import classification_report, confusion_matrix
from emg_dataset import EMGDataset  # Ensure your dataset class is properly implemented
import math
import numpy as np
from sklearn.utils.class_weight import compute_class_weight

class PatchEmbedding(nn.Module):
    def __init__(self, num_features, patch_size, emb_size, seq_len):
        super(PatchEmbedding, self).__init__()
        self.num_patches = seq_len // patch_size
        self.patch_size = patch_size
        self.projection = nn.Linear(num_features * patch_size, emb_size)

    def forward(self, x):
        x = x.unfold(1, self.patch_size, self.patch_size)
        x = x.contiguous().view(x.size(0), x.size(1), -1)
        x = self.projection(x)
        return x

class PositionalEncoding(nn.Module):
    def __init__(self, emb_size, max_len=500):
        super(PositionalEncoding, self).__init__()
        self.pos_embedding = nn.Parameter(torch.randn(max_len, emb_size))

    def forward(self, x):
        # x: (batch_size, seq_len, emb_size)
        # Only take as many position embeddings as needed
        return x + self.pos_embedding[:x.size(1), :]

class TransformerEncoder(nn.Module):
    def __init__(self, emb_size, num_heads, ff_dim, dropout=0.1):
        super(TransformerEncoder, self).__init__()
        self.attention = nn.MultiheadAttention(emb_size, num_heads, dropout=dropout)
        self.norm1 = nn.LayerNorm(emb_size)
        self.ff = nn.Sequential(
            nn.Linear(emb_size, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, emb_size)
        )
        self.norm2 = nn.LayerNorm(emb_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = x + self.attention(self.norm1(x), self.norm1(x), self.norm1(x))[0]
        x = x + self.dropout(self.ff(self.norm2(x)))
        return x

class VisionTransformer(nn.Module):
    def __init__(self, num_features, num_patches, patch_size, emb_size, depth, num_heads, ff_dim, num_classes, max_len=500):
        super(VisionTransformer, self).__init__()
        self.patch_embedding = PatchEmbedding(num_features, patch_size, emb_size, num_patches * patch_size)
        self.pos_embedding = PositionalEncoding(emb_size, max_len)
        self.cls_token = nn.Parameter(torch.randn(1, 1, emb_size))
        self.encoders = nn.ModuleList([TransformerEncoder(emb_size, num_heads, ff_dim) for _ in range(depth)])
        self.classifier = nn.Linear(emb_size, num_classes)

    def forward(self, x):
        b = x.size(0)
        x = self.patch_embedding(x)
        cls_tokens = self.cls_token.expand(b, -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)
        x = self.pos_embedding(x)
        for encoder in self.encoders:
            x = encoder(x)
        cls_token_final = x[:, 0]
        return self.classifier(cls_token_final)


def compute_class_weights(dataset):
    # Assuming dataset is a PyTorch Dataset or a Subset containing (sequence, label) pairs
    all_labels = []
    for _, label in dataset:
        all_labels.append(label.item())  # Assuming label is a tensor, use .item() to get its Python scalar value

    all_labels = np.array(all_labels)
    unique_labels = np.unique(all_labels)

    # Compute class weights
    class_weights = compute_class_weight(class_weight='balanced', classes=unique_labels, y=all_labels)

    # Convert class weights to tensor
    return torch.tensor(class_weights, dtype=torch.float)


def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device):
    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        total_loss = 0
        correct = 0
        total = 0

        for sequences, labels in train_loader:
            sequences, labels = sequences.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(sequences)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = total_loss / len(train_loader)
        train_accuracy = 100 * correct / total
        print(f'Epoch {epoch + 1}: Training Loss = {train_loss:.4f}, Training Accuracy = {train_accuracy:.2f}%')

        val_loss, val_accuracy = evaluate_model(model, val_loader, device, is_validation=True)
        print(f'Epoch {epoch + 1}: Validation Loss = {val_loss:.4f}, Validation Accuracy = {val_accuracy:.2f}%')



def evaluate_model(model, loader, device, is_validation=False):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    criterion = nn.CrossEntropyLoss()
    all_labels = []
    all_predictions = []

    with torch.no_grad():
        for sequences, labels in loader:
            sequences, labels = sequences.to(device), labels.to(device)
            outputs = model(sequences)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    average_loss = total_loss / len(loader)
    accuracy = 100 * correct / total

    if not is_validation:
        print("Classification Report:")
        print(classification_report(all_labels, all_predictions))
        print("Confusion Matrix:")
        print(confusion_matrix(all_labels, all_predictions))

    return average_loss, accuracy

def main():
    device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    dataset = EMGDataset('./EMG_Data/')
    total_count = len(dataset)
    train_count = int(0.8 * total_count)
    val_count = int(0.1 * total_count)
    test_count = total_count - train_count - val_count

    train_dataset, val_dataset, test_dataset = random_split(dataset, [train_count, val_count, test_count])

    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

    model = VisionTransformer(
        num_features=8,
        num_patches=10,
        patch_size=50,
        emb_size=256,
        depth=6,
        num_heads=8,
        ff_dim=512,
        num_classes=8,
        max_len=500
    ).to(device)

    class_weights = compute_class_weights(train_dataset)
    criterion = nn.CrossEntropyLoss(weight=class_weights.to(device))

    optimizer = optim.Adam(model.parameters(), lr=0.0001)

    train_model(model, train_loader, val_loader, criterion, optimizer, 150, device)

    print("Evaluating on test set:")
    evaluate_model(model, test_loader, device, is_validation=False)

if __name__ == "__main__":
    main()

Using device: cuda:1
Unique labels in the dataset: [0 1 2 3 4 5 6 7]
Epoch 1: Training Loss = 2.1621, Training Accuracy = 14.83%
Epoch 1: Validation Loss = 2.0138, Validation Accuracy = 6.06%
Epoch 2: Training Loss = 2.0897, Training Accuracy = 18.31%
Epoch 2: Validation Loss = 1.8133, Validation Accuracy = 5.88%
Epoch 3: Training Loss = 2.0692, Training Accuracy = 22.20%
Epoch 3: Validation Loss = 1.7987, Validation Accuracy = 63.84%
Epoch 4: Training Loss = 2.0739, Training Accuracy = 20.71%
Epoch 4: Validation Loss = 1.8488, Validation Accuracy = 5.88%
Epoch 5: Training Loss = 2.0610, Training Accuracy = 20.10%
Epoch 5: Validation Loss = 1.9400, Validation Accuracy = 5.88%
Epoch 6: Training Loss = 2.0592, Training Accuracy = 23.32%
Epoch 6: Validation Loss = 1.9295, Validation Accuracy = 6.29%
Epoch 7: Training Loss = 2.0538, Training Accuracy = 21.92%
Epoch 7: Validation Loss = 1.9321, Validation Accuracy = 6.29%
Epoch 8: Training Loss = 2.0559, Training Accuracy = 21.70%
Epoch 8: 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Swin Transformer

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import classification_report, confusion_matrix
from emg_dataset import *
import math

# Swin Transformer Block adapted for 1D
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)

class SwinTransformerBlock(nn.Module):
    def __init__(self, dim, num_heads, window_size=7, shift_size=0):
        super().__init__()
        self.norm1 = nn.LayerNorm(dim)
        self.attn = nn.MultiheadAttention(embed_dim=dim, num_heads=num_heads)
        self.norm2 = nn.LayerNorm(dim)
        self.mlp = nn.Sequential(
            nn.Linear(dim, 2 * dim),
            nn.GELU(),
            nn.Linear(2 * dim, dim),
        )
        self.window_size = window_size
        self.shift_size = shift_size if shift_size > 0 else 0

    def forward(self, x):
        B, L, C = x.shape

        # Optional: shift the sequence for overlapping windows
        if self.shift_size > 0:
            x = torch.roll(x, shifts=-self.shift_size, dims=1)

        # Prepare for multihead attention
        x = x.view(B * L, 1, C)  # Reshape for multihead attention
        x = self.norm1(x)
        x, _ = self.attn(x, x, x)
        x = x.view(B, L, C)  # Reshape back to original

        x = self.norm2(x)
        x = self.mlp(x) + x  # Apply MLP and add residual

        return x

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:x.size(0), :]

# Full model including Swin Transformer blocks
class SwinTransformerTimeSeries(nn.Module):
    def __init__(self, num_features, num_classes, num_heads, window_size, shift_size, depth=2):
        super().__init__()
        self.embedding = nn.Linear(num_features, 128)  # Embedding layer
        self.pos_encoder = PositionalEncoding(128)
        self.blocks = nn.ModuleList([
            SwinTransformerBlock(128, num_heads, window_size, shift_size if i % 2 == 0 else 0)
            for i in range(depth)
        ])
        self.classifier = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        x = self.pos_encoder(x)
        for block in self.blocks:
            x = block(x)
        x = x.mean(dim=1)  # Global average pooling
        return self.classifier(x)


# Training function
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device):
    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        total_loss = 0
        correct = 0
        total = 0

        for sequences, labels in train_loader:
            sequences, labels = sequences.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(sequences)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = total_loss / len(train_loader)
        train_accuracy = 100 * correct / total
        print(f'Epoch {epoch + 1}: Training Loss = {train_loss:.4f}, Training Accuracy = {train_accuracy:.2f}%')

        val_loss, val_accuracy = evaluate_model(model, val_loader, device, is_validation=True)
        print(f'Epoch {epoch + 1}: Validation Loss = {val_loss:.4f}, Validation Accuracy = {val_accuracy:.2f}%')


# Evaluation function
def evaluate_model(model, loader, device, is_validation=False):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    criterion = nn.CrossEntropyLoss()
    all_labels = []
    all_predictions = []

    with torch.no_grad():
        for sequences, labels in loader:
            sequences, labels = sequences.to(device), labels.to(device)
            outputs = model(sequences)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    average_loss = total_loss / len(loader)
    accuracy = 100 * correct / total

    if not is_validation:
        print("Classification Report:")
        print(classification_report(all_labels, all_predictions))
        print("Confusion Matrix:")
        print(confusion_matrix(all_labels, all_predictions))

    return average_loss, accuracy

# Main function setup and execution
def main():
    device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    dataset = EMGDataset('./EMG_Data/')
    total_count = len(dataset)
    train_count = int(0.8 * total_count)
    val_count = int(0.1 * total_count)
    test_count = total_count - train_count - val_count

    train_dataset, val_dataset, test_dataset = random_split(dataset, [train_count, val_count, test_count])

    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

    model = SwinTransformerTimeSeries(num_features=8, num_classes=8, num_heads=4, window_size=50, shift_size=25, depth=2)
    model.apply(init_weights)  # Initialize weights
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

    train_model(model, train_loader, val_loader, criterion, optimizer, 200, device)
    print("Evaluating on test set:")
    evaluate_model(model, test_loader, device, is_validation=False)

if __name__ == "__main__":
    main()

Using device: cuda:2
Epoch 1: Training Loss = 1.3425, Training Accuracy = 64.06%
Epoch 1: Validation Loss = 1.3578, Validation Accuracy = 63.78%
Epoch 2: Training Loss = 1.3178, Training Accuracy = 64.19%
Epoch 2: Validation Loss = 1.3457, Validation Accuracy = 63.78%
Epoch 3: Training Loss = 1.3140, Training Accuracy = 64.19%
Epoch 3: Validation Loss = 1.3111, Validation Accuracy = 63.78%
Epoch 4: Training Loss = 1.2534, Training Accuracy = 63.70%
Epoch 4: Validation Loss = 1.2231, Validation Accuracy = 63.06%
Epoch 5: Training Loss = 1.1167, Training Accuracy = 62.82%
Epoch 5: Validation Loss = 1.0748, Validation Accuracy = 62.23%
Epoch 6: Training Loss = 0.9572, Training Accuracy = 63.40%
Epoch 6: Validation Loss = 0.9187, Validation Accuracy = 64.19%
Epoch 7: Training Loss = 0.8922, Training Accuracy = 63.76%
Epoch 7: Validation Loss = 0.9031, Validation Accuracy = 64.07%
Epoch 8: Training Loss = 0.8653, Training Accuracy = 64.19%
Epoch 8: Validation Loss = 0.8308, Validation Accur