In [1]:
import os
import numpy as np
import pandas as pd

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
# from torchvision import transforms, utils
from dataprep import *

In [12]:
prep("C:\\Users\\TheSy\\Desktop\\tests", save = True, mode = "per_channel", save_dir="data")

Data directory created :D
Extracting EDF parameters from C:\Users\TheSy\Desktop\tests\aaaaaaaa\s001_2015_12_30\01_tcp_ar\aaaaaaaa_s001_t000.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 323839  =      0.000 ...  1264.996 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.1 - 30 Hz

IIR filter parameters
---------------------
Butterworth bandpass zero-phase (two-pass forward and reverse) non-causal filter:
- Filter order 16 (effective, after forward-backward)
- Cutoffs at 0.10, 30.00 Hz: -6.02, -6.02 dB

Not setting metadata
33 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 33 events and 4001 original time points ...
0 bad epochs dropped
Dropped 1 epoch: 32


Unnamed: 0,Patient,Session,N_Win,Dir
0,aaaaaaaa,s001,1,dataper_channel\aaaaaaaa\aaaaaaaa_s001_w1_ch1.pt
1,aaaaaaaa,s001,1,dataper_channel\aaaaaaaa\aaaaaaaa_s001_w1_ch2.pt
2,aaaaaaaa,s001,1,dataper_channel\aaaaaaaa\aaaaaaaa_s001_w1_ch3.pt
3,aaaaaaaa,s001,1,dataper_channel\aaaaaaaa\aaaaaaaa_s001_w1_ch4.pt
4,aaaaaaaa,s001,1,dataper_channel\aaaaaaaa\aaaaaaaa_s001_w1_ch5.pt
...,...,...,...,...
603,aaaaaaaa,s001,32,dataper_channel\aaaaaaaa\aaaaaaaa_s001_w32_ch1...
604,aaaaaaaa,s001,32,dataper_channel\aaaaaaaa\aaaaaaaa_s001_w32_ch1...
605,aaaaaaaa,s001,32,dataper_channel\aaaaaaaa\aaaaaaaa_s001_w32_ch1...
606,aaaaaaaa,s001,32,dataper_channel\aaaaaaaa\aaaaaaaa_s001_w32_ch1...


In [2]:
class CustomEEGDataset(Dataset):
    def __init__(self, csv_file , root_dir , transform = None, multi = False, ):

        try:
            self.loc_df = pd.read_csv(os.path.join(root_dir,csv_file)).drop(labels="Unnamed: 0", axis = 1)
        except:
            self.loc_df = pd.read_csv(os.path.join(root_dir,csv_file))
        # self.loc_df = loc_df
        self.transform = transform
        self.root_dir = root_dir
        self.multi = multi
    def __len__(self,):
        return len(self.loc_df)
        
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        if type(idx) == int:
            idx = [idx]

        batch = []   
        for i in idx:
            eeg_file = os.path.join(self.root_dir,
                                self.loc_df.iloc[i, 3])
            # eeg = torch.from_numpy(torch.load(eeg_file)) # [0][0]
            eeg = torch.load(eeg_file)

            if self.multi:
                eeg= eeg.unsqueeze(0)
            batch.append(eeg) 
        bat = torch.vstack(batch)
            
        if self.transform is not None:
            bat = self.transform(bat)

        return bat



In [4]:
root_path = "C:\\Users\\TheSy\\Desktop\\FinalEL7006"

In [5]:
# dataset = CustomEEGDataset("prep_channels.csv",root_path, multi=False)

In [16]:
class DFSpliter():
    def __init__(self, train_size= 0.8, val_size = 0.2, save = False, seed = 69) -> None:
        self.train_size = train_size
        self.val_size = val_size
        self.save = save
        self.seed = seed

    def __call__(self, csv_file, root_path):
        try:
            loc_df = pd.read_csv(os.path.join(root_path,csv_file)).drop(labels="Unnamed: 0", axis = 1)
        except:
            loc_df = pd.read_csv(os.path.join(root_path,csv_file))
        # loc_df = csv_file
        patients = loc_df["Patient"].unique()
        np.random.seed(self.seed)
        np.random.shuffle(patients)
        end_idx = round(len(patients)*self.train_size)

        train_patients = patients[:end_idx]
        val_patients = patients[end_idx:]
        
        train_df = pd.DataFrame()
        for patient in train_patients:
            train_df = pd.concat([train_df,loc_df[loc_df["Patient"] == patient]])

        val_df = pd.DataFrame()
        for patient in val_patients:
            val_df = pd.concat([val_df,loc_df[loc_df["Patient"] == patient]])
        
        val_df.reset_index(inplace=True, drop= True)
        train_df.reset_index(inplace=True, drop=True)

        if self.save:
            train_df.to_csv("train_feats.csv", encoding= "utf-8", index = False)
            val_df.to_csv("val_feats.csv", encoding="utf-8", index=False)
        print("CSVs creados")
        return train_df,val_df
        


In [3]:
def Masking(channel: np.array, window: int= 150):
    '''
    Set to zero 
    Input:  -channel = Numpy array
            -window = Number of samples to set to zero
    Output: Numpy array masked
    '''
    channel_size = len(channel)
    first = np.random.randint(0,channel_size- window)
    masked = channel.copy()
    masked[first:first+window] = 0

    return masked

def DCVoltage(channel : np.array, max_magnitude: float = 0.5):
    ''' 
    Add a DC component between [-max_mangitude, max_magnitude]
    Input:  -channel = Numpy array
            -max_magnitude = max value to be added
    Output: Numpy array 
    '''
    dc_comp = (np.random.random(1)*2 - 1)*max_magnitude
    dispaced_channel = channel + dc_comp
    return dispaced_channel    

def GaussianNoise(channel: np.array, std: float = 0.2):
    '''
    Add Gaussian Noise with zero mean and std deviation
    Input:  -channel = Numpy array
            -std = Gaussian std
    Output: Channel with additive gaussian noise added
    '''
    channel_size = len(channel)
    noise = np.random.normal(loc = 0, scale= std, size= channel_size)
    noisy_channel = channel + noise
    return noisy_channel

def Time_Shift(channel: np.array, min_shift: int = 0, max_shift: int = 50 ):
    ''' 
    Shifts the channel n samples between min_shift and max_shift using reflection pad
    Input:  -channel = Numpy array
            -min_shift = Min number of samples to shift
            -max_shhift = Max number of samples to shift  
    Output: Shifted channel
    '''
    n_shift = np.random.randint(min_shift,max_shift)
    channel_size = len(channel)
    padded_array = np.pad(channel,pad_width= n_shift, mode = "reflect")
    right_left = np.random.choice((0,2))
    shifted_array = padded_array[n_shift*right_left:channel_size + n_shift*right_left]
    return shifted_array
def Amplitude(channel :np.array, max_amplitude: float = 1.5):
    '''
    Modifies the ampliude of the channel values between [1+max_amplitude,1-max_amplitude]
    Input:  -channel = Numpy array
            -max_amplitude = Max aplitude to add
    Output: Boosted channel
    '''
    amplitude = 1 + ((np.random.random(1)*2 -1) * max_amplitude)
    boosted_channel = channel*amplitude
    return boosted_channel

def Permutation(channel: np.array, win_samples: int = 500):
    '''
    Permutates the arrays by secuences of win_samples len
    Ensure its divisible by the total len of the array or the len of the output secuence will be wrong
    Input:  -channel = Numpu array
            -win_samples = Number of samples per secuences (N_sec = len(channel)// win_samples)
    Output: Permutated secuence
    '''

    n_seqs = len(channel)// win_samples
    random_idx = np.random.choice(np.arange(0,n_seqs, 1), n_seqs, replace=False ) 
    permutated = np.concatenate([channel[win_samples*i: win_samples*(i+1)] for i in random_idx])
    return permutated
def Temporal_Invertion(channel: np.array):
    ''' 
    Return the array reversed
    Input:  -channel = Numpy array
    Output: Reversed array
    '''
    reversed = channel[::-1]
    return reversed

def Negation(channel: np.array):
    '''
    Inverts the full array
    Input: -channel = Numpy array
    Output: Inverted array
    '''
    negated = channel * (-1)
    return negated


In [4]:
#Augmentation set
AUGMENTATIONS = [Negation,
                 Time_Shift,
                 Amplitude,
                 DCVoltage,
                 GaussianNoise,
                 Temporal_Invertion,
                 Permutation,
                 Masking]


In [5]:
class Augmentations(nn.Module):
    def __init__(self, n_aug, multi = False, augmentations = None) -> None:
        self.n_aug = n_aug
        self.multi = multi
        self.augmentations = augmentations

    def __call__(self,batch):
        xbar_batch = []
        xhat_batch = []

        batch = batch.numpy()

        for channel in batch:

            rbar_idxs = np.random.choice(np.arange(0,len(self.augmentations),1),size=self.n_aug, replace= False)
            rhat_idxs = np.random.choice(np.arange(0,len(self.augmentations),1),size=self.n_aug, replace= False)

            xbar = channel
            xhat = channel
            for i in rbar_idxs:
                xbar = self.augmentations[i](xbar)
                # print(self.augmentations[i])
            for j in rhat_idxs:
                xhat = self.augmentations[j](xhat)
                # print(self.augmentations[j])
            xbar_batch.append(torch.from_numpy(xbar.copy()))
            xhat_batch.append(torch.from_numpy(xhat.copy()))
        
        xbar_batch = torch.vstack(xbar_batch)
        xhat_batch = torch.vstack(xhat_batch)

        return xbar_batch,xhat_batch

        

In [10]:
# loc_df = pd.DataFrame(columns= ["Patient", "Session","N_Win", "Dir"], )

In [7]:
root_path = "C:\\Users\\TheSy\\Desktop\\FinalEL7006"

In [12]:
#Esta cosa agrega elementos al dataframe vacio, prueba cambiando el primer elemento de la lista 
#Para agregar distintos pacientes c:
# loc_df.loc[len(loc_df)] = ["aaal","session_id",1,"LSTMData-0.001.pt"]


In [13]:
# loc_df

In [17]:
spliter = DFSpliter(save=True)
train ,val = spliter("prep_channels.csv", root_path)

CSVs creados


In [18]:
train,val

(        Patient Session  N_Win  \
 0      aaaaaaaa    s001      1   
 1      aaaaaaaa    s001      1   
 2      aaaaaaaa    s001      1   
 3      aaaaaaaa    s001      1   
 4      aaaaaaaa    s001      1   
 ...         ...     ...    ...   
 12839  aaaaaaab    s003     27   
 12840  aaaaaaab    s003     27   
 12841  aaaaaaab    s003     27   
 12842  aaaaaaab    s003     27   
 12843  aaaaaaab    s003     27   
 
                                                      Dir  
 0       dataper_channel\aaaaaaaa\aaaaaaaa_s001_w1_ch1.pt  
 1       dataper_channel\aaaaaaaa\aaaaaaaa_s001_w1_ch2.pt  
 2       dataper_channel\aaaaaaaa\aaaaaaaa_s001_w1_ch3.pt  
 3       dataper_channel\aaaaaaaa\aaaaaaaa_s001_w1_ch4.pt  
 4       dataper_channel\aaaaaaaa\aaaaaaaa_s001_w1_ch5.pt  
 ...                                                  ...  
 12839  dataper_channel\aaaaaaab\aaaaaaab_s003_w27_ch1...  
 12840  dataper_channel\aaaaaaab\aaaaaaab_s003_w27_ch1...  
 12841  dataper_channel\aaaaaaab\aaaaa

In [10]:
dataset = CustomEEGDataset("prep_channels.csv",root_path, multi=False)

In [11]:
dataset[0]

tensor([[ 0.4817,  0.7087,  0.6662,  ..., -0.7281, -0.5517, -0.8499],
        [ 0.4817,  0.7087,  0.6662,  ..., -0.7281, -0.5517, -0.8499]])

In [12]:
data = DataLoader(dataset=dataset,batch_size=4,shuffle=True,num_workers=0, )

In [20]:
augment = Augmentations(3,augmentations=AUGMENTATIONS)