In [1]:
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

In [6]:
# Data Preprocessing

import os
import linecache

CURRENT_PATH = os.getcwd()

def read_and_save(feature_path, files, label_path):

    for file in files:
        # Label

        filename = file.split('/')[-1] + '.pkl'
        
        # Data

        data = pd.read_csv(file,
                    sep = ' ', skiprows= 5 , header = None, comment='#')
        data.columns = pd.Index(['Index', 'Sensor', 'Position', 'Value'])
        data = data.drop(columns= ['Index'])
        pd.to_pickle(data, os.path.join(os.getcwd(), feature_path, filename))

        alcoholic = 1 if 'a' in filename else 0
        stimulus = ''.join(linecache.getline(file, 4).split()[1:3])
        with open(os.path.join(os.getcwd(), label_path), 'a') as file:
            file.write(stimulus + ' ' + str(alcoholic) + ' ' + filename+ '\n')

        
        
def process_directory(path, feature_path, label_path):
    print(f"Processing directory: {path}")

    subdirs = [os.path.join(path, name) for name in os.listdir(path)]
    if os.path.isfile(subdirs[0]):
        read_and_save(feature_path, subdirs, label_path)
    else:
        for dir_ in subdirs:
            process_directory(dir_, feature_path, label_path)


process_directory(os.path.join(CURRENT_PATH, 'data/EEG/SMNI_CMI_TEST/'), 'data/eeg/test/', 'data/eeg/test_label.txt')
process_directory(os.path.join(CURRENT_PATH, 'data/EEG/SMNI_CMI_TRAIN/'), 'data/eeg/train/', 'data/eeg/train_label.txt')

Processing directory: /home/hieunguyen/Desktop/MlSupervise/Experiment/experiment/data/EEG/SMNI_CMI_TEST/
Processing directory: /home/hieunguyen/Desktop/MlSupervise/Experiment/experiment/data/EEG/SMNI_CMI_TEST/co2a0000370
Processing directory: /home/hieunguyen/Desktop/MlSupervise/Experiment/experiment/data/EEG/SMNI_CMI_TEST/co2a0000365
Processing directory: /home/hieunguyen/Desktop/MlSupervise/Experiment/experiment/data/EEG/SMNI_CMI_TEST/co2a0000368
Processing directory: /home/hieunguyen/Desktop/MlSupervise/Experiment/experiment/data/EEG/SMNI_CMI_TEST/co2c0000337
Processing directory: /home/hieunguyen/Desktop/MlSupervise/Experiment/experiment/data/EEG/SMNI_CMI_TEST/co2c0000341
Processing directory: /home/hieunguyen/Desktop/MlSupervise/Experiment/experiment/data/EEG/SMNI_CMI_TEST/co2a0000371
Processing directory: /home/hieunguyen/Desktop/MlSupervise/Experiment/experiment/data/EEG/SMNI_CMI_TEST/co2c0000346
Processing directory: /home/hieunguyen/Desktop/MlSupervise/Experiment/experiment/da

In [3]:
# %load './data_scripts/eeg_data.py'


import os

class EegDataset(Dataset):
    def __init__(self,tgt_transform = None, transform = None, train = True, channel = 'S1obj'):
        self.channel = channel
        self.train = train
        self.label_path = 'data/eeg/train_label.txt' if train else 'data/eeg/test_label.txt'
        self.labels = pd.read_csv(self.label_path, sep=' ', header= None)
        self.labels = self.labels[self.labels[0] == self.channel]
        self.feature_dir = 'data/eeg/train/' if train else 'data/eeg/test/'
        self.transform = transform
        self.tgt_transform = tgt_transform

    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, index):
        
        feature_path = os.path.join(self.feature_dir, self.labels.iloc[index, 2])
        label = self.labels.iloc[index, 1]
        feature_matrix = pd.read_pickle(feature_path)
        if self.transform:
            feature_matrix = self.transform(feature_matrix)
        if self.tgt_transform:
            label = self.tgt_transform(label)
        
        return feature_matrix, label
        
        
def transform(x):
    assert(x.shape == (256 * 64, 3))
    assert(list(x.columns) == ['Sensor', 'Position', 'Value'])
    value = x['Value']
    value = torch.from_numpy(value.to_numpy()).reshape(256, 64)
    return value   

In [4]:
from torch.utils.data import DataLoader

train_set = EegDataset(tgt_transform=lambda x: torch.tensor(x), transform=lambda x: transform(x))
train_loader = DataLoader(train_set, batch_size = 5, shuffle= True)

In [None]:
# Nesterov method

class TraceLayer(nn)