# Cración de dataset con datos de electroencefalografía

In [80]:
import pandas as pd
import sys
import os
import torch
import torchaudio.transforms as T
import numpy as np
import mne
import librosa
import utilit_espectrograms as ue

In [81]:
DATA_DIR        = '/media/martin/Disco2/Rns_Data/RNS_ESPM_datatransfer/Data'
OUTDATA_DIR     = '/media/martin/Disco2/Rns_Data/PITT_PI_EEG/'
OUTMETADATA_DIR = '/media/martin/Disco2/Rns_Data/PITT_PI_EEG/METADATA/'

In [82]:
# crear las carpetas en caso de que no existan

if not os.path.exists(OUTDATA_DIR):
    os.makedirs(OUTDATA_DIR)

if not os.path.exists(OUTMETADATA_DIR):
    os.makedirs(OUTMETADATA_DIR)

In [83]:
# lista con ids de pacientes

RNSIDS = ue.get_subfolders(DATA_DIR)

print(RNSIDS)
print()
print(len(RNSIDS))

['PIT-RNS1603', 'PIT-RNS6989', 'PIT-RNS3016', 'PIT-RNS1529', 'PIT-RNS7168', 'PIT-RNS4098', 'PIT-RNS1836', 'PIT-RNS9183', 'PIT-RNS1440', 'PIT-RNS1713', 'PIT-RNS9536', 'PIT-RNS2543', 'PIT-RNS1438', 'PIT-RNS1534', 'PIT-RNS9793', 'PIT-RNS1703', 'PIT-RNS1556', 'PIT-RNS1597', 'PIT-RNS6992', 'PIT-RNS8326', 'PIT-RNS2227', 'PIT-RNS8163', 'PIT-RNS4998', 'PIT-RNS8973', 'PIT-RNS6806', 'PIT-RNS1090', 'PIT-RNS2368', 'PIT-RNS6762', 'PIT-RNS2938', 'PIT-RNS7525', 'PIT-RNS0427', 'PIT-RNS8076']

32


In [89]:
df = pd.DataFrame(columns=['rns_id', 'data', 'label', 'time'])

In [90]:
s      = 0
nepoch = 1
nfile  = 0

In [91]:
data_files  = ue.get_data_files(DATA_DIR, RNSIDS[s], Verbose=False)
annot_files = ue.get_annot_files(DATA_DIR, RNSIDS[s], Verbose=False)

In [107]:
events           = ue.get_events(annot_files[nepoch])
X, labels, times = ue.get_epochs_zeropad_all(data_files[nepoch], events)

[nt, nc, ns] = np.shape(X)  

hosp_id, subject_id, PE_id = ue.get_patient_PE(data_files[nepoch], RNSIDS[s])

file   = X[nfile, :, :]
signal = torch.from_numpy(file)
signal = (signal - signal.mean()) / signal.std()

signal.shape

Extracting EDF parameters from /media/martin/Disco2/Rns_Data/RNS_ESPM_datatransfer/Data/PIT-RNS1603/PIT-RNS1603_PE20150909-1.EDF...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


torch.Size([4, 22500])

In [108]:
label_time = np.zeros((signal.shape[1],))

# frecuencia de muestreo
raw    = mne.io.read_raw_edf(data_files[nepoch])
sf     = raw.info['sfreq']

times  = times * sf
indice = times[times != 0]
indice = [int(float(x)) for x in indice]
for i in indice:
    label_time[i]=1

Extracting EDF parameters from /media/martin/Disco2/Rns_Data/RNS_ESPM_datatransfer/Data/PIT-RNS1603/PIT-RNS1603_PE20150909-1.EDF...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


In [None]:
for s in range(len(RNSIDS)):
    
    print('Running subject ' + RNSIDS[s] + ' [s]: ' + str(s))
    data_files  = ue.get_data_files(DATA_DIR, RNSIDS[s], Verbose=False)
    annot_files = ue.get_annot_files(DATA_DIR, RNSIDS[s], Verbose=False)

    for nepoch in range(len(data_files)):

        events           = ue.get_events(annot_files[nepoch])
        X, labels, times = ue.get_epochs_zeropad_all(data_files[nepoch], events)

        [nt, nc, ns] = np.shape(X)        

        hosp_id, subject_id, PE_id = ue.get_patient_PE(data_files[nepoch], RNSIDS[s])

        for nfile in range(nt):
            file = X[nfile, :, :]
            signal = torch.from_numpy(file)

            # normalize the waveform
            signal = (signal - signal.mean()) / signal.std()

            # label time
            label_time = np.zeros((signal.shape[2],))
                
            if labels[nfile] !=0:
                idx_t = np.where(t<=times[nfile])[0][-1]
                label_time[idx_t]=1

            data = {'iEEG': signal, 'label': label_time}

            file_name = hosp_id + '_' + subject_id + '_' + PE_id + '_E' + str(nfile)

            df_aux = {  
                        'rns_id': hosp_id + '-' + subject_id, 
                        'data'  : file_name,
                        'label' : labels[nfile],
                        'time'  : times[nfile]
                     }
            
            df = pd.concat([df, pd.DataFrame([df_aux])], ignore_index=True)

            np.save(OUTDATA_DIR + file_name, data)

    df.to_csv(OUTMETADATA_DIR + 'allfiles_metadata.csv', index=False)