In [1]:
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import os
import linecache

In [None]:
# Data Preprocessing

# CURRENT_PATH = os.getcwd()

# def read_and_save(feature_path, files, label_path):

#     for file in files:
#         # Label

#         filename = file.split('/')[-1] + '.pkl'
        
#         # Data

#         data = pd.read_csv(file,
#                     sep = ' ', skiprows= 5 , header = None, comment='#')
#         data.columns = pd.Index(['Index', 'Sensor', 'Position', 'Value'])
#         data = data.drop(columns= ['Index'])
#         pd.to_pickle(data, os.path.join(os.getcwd(), feature_path, filename))

#         alcoholic = 1 if 'a' in filename else 0
#         stimulus = ''.join(linecache.getline(file, 4).split()[1:3])
#         with open(os.path.join(os.getcwd(), label_path), 'a') as file:
#             file.write(stimulus + ' ' + str(alcoholic) + ' ' + filename+ '\n')

        
        
# def process_directory(path, feature_path, label_path):
#     print(f"Processing directory: {path}")

#     subdirs = [os.path.join(path, name) for name in os.listdir(path)]
#     if os.path.isfile(subdirs[0]):
#         read_and_save(feature_path, subdirs, label_path)
#     else:
#         for dir_ in subdirs:
#             process_directory(dir_, feature_path, label_path)


# process_directory(os.path.join(CURRENT_PATH, 'data/EEG/SMNI_CMI_TEST/'), 'data/eeg/test/', 'data/eeg/test_label.txt')
# process_directory(os.path.join(CURRENT_PATH, 'data/EEG/SMNI_CMI_TRAIN/'), 'data/eeg/train/', 'data/eeg/train_label.txt')

In [2]:
# %load './data_scripts/eeg_data.py'



class EegDataset(Dataset):
    def __init__(self,tgt_transform = None, transform = None, train = True, channel = 'S1obj'):
        self.channel = channel
        self.train = train
        self.label_path = 'data/eeg/train_label.txt' if train else 'data/eeg/test_label.txt'
        self.labels = pd.read_csv(self.label_path, sep=' ', header= None)
        self.labels = self.labels[self.labels[0] == self.channel]
        self.feature_dir = 'data/eeg/train/' if train else 'data/eeg/test/'
        self.transform = transform
        self.tgt_transform = tgt_transform

    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, index):
        
        feature_path = os.path.join(self.feature_dir, self.labels.iloc[index, 2])
        label = self.labels.iloc[index, 1]
        feature_matrix = pd.read_pickle(feature_path)
        if self.transform:
            feature_matrix = self.transform(feature_matrix)
        if self.tgt_transform:
            label = self.tgt_transform(label)
        
        return feature_matrix, label
        
        
def transform(x):
    assert(x.shape == (256 * 64, 3))
    assert(list(x.columns) == ['Sensor', 'Position', 'Value'])
    value = x['Value']
    value = torch.from_numpy(value.to_numpy()).reshape(256, 64).to(torch.float32)
    return value   

In [4]:
# model and Nesterov optimizer
import math 
import torch
import torch.nn as nn

class TraceLayer(nn.Module):
    def __init__(self, input_shape):
        super().__init__()
        self.input_shape = input_shape
        self.weight = nn.Parameter(torch.empty(self.input_shape))
        self.bias = nn.Parameter(torch.empty(1))
        self.reset_parameters()
        self.sigmoid = nn.Sigmoid()
    def reset_parameters(self) -> None:

        nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
            nn.init.uniform_(self.bias, -bound, bound)
    def forward(self, x):
        x = torch.matmul(x, torch.transpose(self.weight, 1, 0))
        x = torch.diagonal(x, dim1=1, dim2= 2)
        x = x.sum(dim = 1) + self.bias
        x = self.sigmoid(x.reshape(-1, 1))
        return x
    
    
class FlattenTraceLayer(nn.Module):
    def __init__(self):
        super(FlattenTraceLayer, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_sigmoid = nn.Sequential(
            nn.Linear(256 * 64, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        x = self.flatten(x)
        sigmoid = self.linear_sigmoid(x)
        return sigmoid


class VGGNet(nn.Module):
    def __init__(self):
        super(VGGNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(20)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(20, 20, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(20)
        self.relu2 = nn.ReLU(inplace=True)
        self.conv3 = nn.Conv2d(20, 20, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(20)
        self.relu3 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv4 = nn.Conv2d(20, 40, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(40)
        self.relu4 = nn.ReLU(inplace=True)
        self.conv5 = nn.Conv2d(40, 40, kernel_size=3, padding=1)
        self.bn5 = nn.BatchNorm2d(40)
        self.relu5 = nn.ReLU(inplace=True)
        self.conv6 = nn.Conv2d(40, 40, kernel_size=3, padding=1)
        self.bn6 = nn.BatchNorm2d(40)
        self.relu6 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv7 = nn.Conv2d(40, 60, kernel_size=3)
        self.bn7 = nn.BatchNorm2d(60)
        self.relu7 = nn.ReLU(inplace=True)
        self.conv8 = nn.Conv2d(60, 40, kernel_size=1)
        self.bn8 = nn.BatchNorm2d(40)
        self.relu8 = nn.ReLU(inplace=True)
        self.conv9 = nn.Conv2d(40, 20, kernel_size=1)
        self.bn9 = nn.BatchNorm2d(20)
        self.relu9 = nn.ReLU(inplace=True)
        self.avgpool = nn.AvgPool2d(kernel_size=5)
        self.fc = nn.Linear(480, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x, verbose = False):
        x = self.relu1(self.bn1(self.conv1(x)))
        x = self.relu2(self.bn2(self.conv2(x)))
        x = self.relu3(self.bn3(self.conv3(x)))
        x = self.pool1(x)
        x = self.relu4(self.bn4(self.conv4(x)))
        x = self.relu5(self.bn5(self.conv5(x)))
        x = self.relu6(self.bn6(self.conv6(x)))
        x = self.pool2(x)
        x = self.relu7(self.bn7(self.conv7(x)))
        x = self.relu8(self.bn8(self.conv8(x)))
        x = self.relu9(self.bn9(self.conv9(x)))
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        y = self.fc(x)
        y = self.sigmoid(y)
        return y

    
        



In [10]:
batch = 10
weight = 0.1
num_epochs = 30
train_set = EegDataset(tgt_transform=lambda x: torch.tensor(x, dtype = torch.float32), 
                       transform=lambda x: transform(x))
train_loader = DataLoader(train_set, batch_size = batch, shuffle= True)
val_set = EegDataset(tgt_transform=lambda x: torch.tensor(x, dtype = torch.float32),
                        transform=lambda x: transform(x), train= False)
val_loader = DataLoader(val_set, batch_size = batch, shuffle= True)

model = TraceLayer((256, 64))   # Uncomment this for trace model

criterion = nn.BCELoss()
# optimizer = torch.optim.SGD(model.parameters(), lr = 0.001, nesterov= True, momentum= 0.9)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)


for epoch in range(num_epochs):
    train_loss = 0.0
    train_accuracy = 0.0
    val_loss = 0.0
    val_accuracy = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()

        # forward pass
        outputs = model(inputs)
        prediction =  torch.where(outputs.squeeze() > 0.5, torch.tensor(1.0), torch.tensor(0.0)).to(torch.int)

        U, S, Vh = torch.linalg.svd( list(model.parameters())[0].data)    # Uncomment this for trace model
        loss = criterion(outputs, labels.reshape(-1,1)) + S.sum() ** 2  # Uncomment this for trace model
        # backward pass and optimization
        loss.backward()
        optimizer.step()
        
        # print statistics
        train_loss += loss.item() * batch 
        correct = torch.sum(prediction == labels.to(torch.int))
        total = len(prediction)
        train_accuracy += (correct.item() / total) * batch

    for inputs, labels in val_loader:

        with torch.no_grad():
            outputs = model(inputs)
            prediction =  torch.where(outputs.squeeze() > 0.5, torch.tensor(1.0),
                                       torch.tensor(0.0)).to(torch.int)
            loss = criterion(outputs, labels.reshape(-1,1))

            # print statistics
            val_loss += loss.item() * batch 
            correct = torch.sum(prediction == labels.to(torch.int))
            total = len(prediction)
            val_accuracy += (correct.item() / total) * batch
    train_accuracy /= len(train_loader.dataset)
    train_loss /= len(train_loader.dataset)
    val_accuracy /= len(val_loader.dataset)
    val_loss /= len(val_loader.dataset)
    print(f"Epoch {epoch + 1}/{num_epochs}: Train Loss = {train_loss:.4f} Train Accuracy = {train_accuracy: .4f} Val Loss = {val_loss:.4f} Val Accuracy = {val_accuracy: .4f}")

Epoch 1/30: Train Loss = 5135.5203 Train Accuracy =  0.4850 Val Loss = 35.5535 Val Accuracy =  0.5300
Epoch 2/30: Train Loss = 5148.8650 Train Accuracy =  0.6250 Val Loss = 31.4250 Val Accuracy =  0.5850
Epoch 3/30: Train Loss = 5158.5819 Train Accuracy =  0.6650 Val Loss = 29.7738 Val Accuracy =  0.5950
Epoch 4/30: Train Loss = 5164.6556 Train Accuracy =  0.6700 Val Loss = 28.6363 Val Accuracy =  0.5900
Epoch 5/30: Train Loss = 5170.6369 Train Accuracy =  0.6650 Val Loss = 36.8718 Val Accuracy =  0.5150
Epoch 6/30: Train Loss = 5176.9952 Train Accuracy =  0.6900 Val Loss = 37.4960 Val Accuracy =  0.5150
Epoch 7/30: Train Loss = 5186.0902 Train Accuracy =  0.7250 Val Loss = 37.7811 Val Accuracy =  0.5250
Epoch 8/30: Train Loss = 5204.3752 Train Accuracy =  0.7100 Val Loss = 38.3312 Val Accuracy =  0.5300
Epoch 9/30: Train Loss = 5221.9926 Train Accuracy =  0.6750 Val Loss = 36.5810 Val Accuracy =  0.5750
Epoch 10/30: Train Loss = 5231.9367 Train Accuracy =  0.6800 Val Loss = 29.1493 Va

In [11]:

model = FlattenTraceLayer()
# optimizer = torch.optim.SGD(model.parameters(), lr = 0.001)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)

for epoch in range(num_epochs):
    train_loss = 0.0
    train_accuracy = 0.0
    val_loss = 0.0
    val_accuracy = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()

        # forward pass
        outputs = model(inputs)
        prediction =  torch.where(outputs.squeeze() > 0.5, torch.tensor(1.0), torch.tensor(0.0)).to(torch.int)

        loss = criterion(outputs, labels.reshape(-1,1))                     # Comment this for trace model
        # backward pass and optimization
        loss.backward()
        optimizer.step()
        
        # print statistics
        train_loss += loss.item() * batch 
        correct = torch.sum(prediction == labels.to(torch.int))
        total = len(prediction)
        train_accuracy += (correct.item() / total) * batch

    for inputs, labels in val_loader:

        with torch.no_grad():
            outputs = model(inputs)
            prediction =  torch.where(outputs.squeeze() > 0.5, torch.tensor(1.0),
                                       torch.tensor(0.0)).to(torch.int)
            loss = criterion(outputs, labels.reshape(-1,1))

            # print statistics
            val_loss += loss.item() * batch 
            correct = torch.sum(prediction == labels.to(torch.int))
            total = len(prediction)
            val_accuracy += (correct.item() / total) * batch
    train_accuracy /= len(train_loader.dataset)
    train_loss /= len(train_loader.dataset)
    val_accuracy /= len(val_loader.dataset)
    val_loss /= len(val_loader.dataset)
    print(f"Epoch {epoch + 1}/{num_epochs}: Train Loss = {train_loss:.4f} Train Accuracy = {train_accuracy: .4f} Val Loss = {val_loss:.4f} Val Accuracy = {val_accuracy: .4f}")

Epoch 1/30: Train Loss = 26.0896 Train Accuracy =  0.5300 Val Loss = 33.4827 Val Accuracy =  0.5100
Epoch 2/30: Train Loss = 32.1434 Train Accuracy =  0.6350 Val Loss = 33.3760 Val Accuracy =  0.5500
Epoch 3/30: Train Loss = 31.9775 Train Accuracy =  0.6350 Val Loss = 30.0786 Val Accuracy =  0.5550
Epoch 4/30: Train Loss = 26.2775 Train Accuracy =  0.6850 Val Loss = 31.5785 Val Accuracy =  0.5450
Epoch 5/30: Train Loss = 25.7455 Train Accuracy =  0.7100 Val Loss = 29.1537 Val Accuracy =  0.6050
Epoch 6/30: Train Loss = 20.8307 Train Accuracy =  0.7650 Val Loss = 22.7410 Val Accuracy =  0.6500
Epoch 7/30: Train Loss = 17.7236 Train Accuracy =  0.7600 Val Loss = 22.8627 Val Accuracy =  0.6600
Epoch 8/30: Train Loss = 22.0359 Train Accuracy =  0.7050 Val Loss = 22.5453 Val Accuracy =  0.6600
Epoch 9/30: Train Loss = 18.3948 Train Accuracy =  0.7600 Val Loss = 20.8363 Val Accuracy =  0.6550
Epoch 10/30: Train Loss = 21.4667 Train Accuracy =  0.7400 Val Loss = 35.0589 Val Accuracy =  0.5500

In [8]:
model = VGGNet()
num_epochs = 20
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)

for epoch in range(num_epochs):
    train_loss = 0.0
    train_accuracy = 0.0
    val_loss = 0.0
    val_accuracy = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()

        # forward pass
        outputs = model(torch.unsqueeze(inputs, dim=1))
        prediction =  torch.where(outputs.squeeze() > 0.5, torch.tensor(1.0), torch.tensor(0.0)).to(torch.int)

        loss = criterion(outputs, labels.reshape(-1,1))                     # Comment this for trace model
        # backward pass and optimization
        loss.backward()
        optimizer.step()
        
        # print statistics
        train_loss += loss.item() * batch 
        correct = torch.sum(prediction == labels.to(torch.int))
        total = len(prediction)
        train_accuracy += (correct.item() / total) * batch

    for inputs, labels in val_loader:

        with torch.no_grad():
            outputs = model(torch.unsqueeze(inputs, dim=1))
            prediction =  torch.where(outputs.squeeze() > 0.5, torch.tensor(1.0),
                                       torch.tensor(0.0)).to(torch.int)
            loss = criterion(outputs, labels.reshape(-1,1))

            # print statistics
            val_loss += loss.item() * batch 
            correct = torch.sum(prediction == labels.to(torch.int))
            total = len(prediction)
            val_accuracy += (correct.item() / total) * batch
    train_accuracy /= len(train_loader.dataset)
    train_loss /= len(train_loader.dataset)
    val_accuracy /= len(val_loader.dataset)
    val_loss /= len(val_loader.dataset)
    print(f"Epoch {epoch + 1}/{num_epochs}: Train Loss = {train_loss:.4f} Train Accuracy = {train_accuracy: .4f} Val Loss = {val_loss:.4f} Val Accuracy = {val_accuracy: .4f}")

Epoch 1/30: Train Loss = 0.6503 Train Accuracy =  0.6000 Val Loss = 0.6192 Val Accuracy =  0.7000
Epoch 2/30: Train Loss = 0.5824 Train Accuracy =  0.6750 Val Loss = 0.5925 Val Accuracy =  0.6850
Epoch 3/30: Train Loss = 0.4923 Train Accuracy =  0.8000 Val Loss = 0.5555 Val Accuracy =  0.7150
Epoch 4/30: Train Loss = 0.4249 Train Accuracy =  0.8250 Val Loss = 0.4769 Val Accuracy =  0.7500
Epoch 5/30: Train Loss = 0.3161 Train Accuracy =  0.8650 Val Loss = 0.3999 Val Accuracy =  0.7900
Epoch 6/30: Train Loss = 0.2929 Train Accuracy =  0.8650 Val Loss = 0.4555 Val Accuracy =  0.7850
Epoch 7/30: Train Loss = 0.2741 Train Accuracy =  0.8850 Val Loss = 0.3632 Val Accuracy =  0.8550
Epoch 8/30: Train Loss = 0.2001 Train Accuracy =  0.9400 Val Loss = 0.3686 Val Accuracy =  0.8300
Epoch 9/30: Train Loss = 0.2355 Train Accuracy =  0.9150 Val Loss = 0.5253 Val Accuracy =  0.7550
Epoch 10/30: Train Loss = 0.2082 Train Accuracy =  0.9150 Val Loss = 0.4382 Val Accuracy =  0.7850
Epoch 11/30: Train 