# Creación de Espectrograma y guardado local de los mismos con preprocesamiento 

In [1]:
import pandas as pd
import sys
import os

sys.path.insert(1, '/home/martin/Documentos/env_thalamus/01 Thalamus-PI/iESPnet_SRC_main/utilities')
import IO # customized functions for navigating throught the folders and files
import Epochs

import torch
import torchaudio.transforms as T
import numpy as np
import mne
mne.set_log_level(verbose='warning') #to avoid info at terminal
import matplotlib.pyplot as plt
import librosa

from scipy import fft as sp_fft
from itertools import permutations
from utilit import get_data_files, get_annot_files, get_epochs_zeropad_all, get_events, get_patient_PE, get_spectrogram

In [2]:
DATA_DIR = '/media/martin/Disco2/Rns_Data/RNS_ESPM_datatransfer/Data'
OUTDATA_DIR = '/media/martin/Disco2/Rns_Data/PITT_PI_v2/'
OUTMETADATA_DIR = '/media/martin/Disco2/Rns_Data/PITT_PI_v2/METADATA_v2/'

In [3]:
# crear las carpetas en caso de que no existan
if not os.path.exists(OUTDATA_DIR):
    os.makedirs(OUTDATA_DIR)

if not os.path.exists(OUTMETADATA_DIR):
    os.makedirs(OUTMETADATA_DIR)

In [4]:
# Definición de variables para crear el espectrograma 

ECOG_SAMPLE_RATE = 250
ECOG_CHANNELS    = 4
TT               = 1000 # window length
SPEC_WIN_LEN     = int(ECOG_SAMPLE_RATE * TT / 1000 ) # win size
overlap          = 500 
SPEC_HOP_LEN     = int(ECOG_SAMPLE_RATE * (TT - overlap) / 1000) # Length of hop between windows.
SPEC_NFFT        = 500  # to see changes in 0.5 reso
top_db           = 40.0

In [5]:
RNSIDS = IO.get_subfolders(DATA_DIR)

In [6]:
df = pd.DataFrame(columns=['rns_id', 'data', 'label', 'time'])

In [7]:
for s in range(len(RNSIDS)):
    
    print('Running subject ' + RNSIDS[s] + ' [s]: ' + str(s))
    data_files = get_data_files(DATA_DIR, RNSIDS[s], Verbose=False)
    annot_files = get_annot_files(DATA_DIR, RNSIDS[s], Verbose=False)

    for nepoch in range(len(data_files)):

        events = get_events(annot_files[nepoch])
        X, labels, times = get_epochs_zeropad_all(data_files[nepoch], events)

        # creo que no hace falta
        if len(X) == 0:
            continue
        # hasta aca

        [nt, nc, ns] = np.shape(X)
        

        hosp_id, subject_id, PE_id = get_patient_PE(data_files[nepoch],RNSIDS[s])

        for nfile in range(nt):
            file = X[nfile, :, :]
            signal = torch.from_numpy(file)

            # normalize the waveform
            signal = (signal - signal.mean()) / signal.std()

            # Calcular los cuartiles y el rango intercuartil
            Q1 = signal.quantile(0.25)
            Q3 = signal.quantile(0.75)
            IQR = Q3 - Q1

            # Definir los límites para los valores atípicos
            lim_inf = Q1 - 1.5 * IQR
            lim_sup = Q3 + 1.5 * IQR

            outliers = (signal < lim_inf) | (signal > lim_sup)
            signal[outliers] = 0

            spec, t, f = get_spectrogram(signal, ECOG_SAMPLE_RATE, SPEC_NFFT, SPEC_WIN_LEN, SPEC_HOP_LEN)

            # spec to DB
            spec = librosa.power_to_db(spec, top_db=top_db)

            # save up to 60 Hz
            idx_60 = np.where(f<= 60)[0][-1]
            spec = spec[:, :idx_60,:]

            # label time
            label_time = np.zeros((spec.shape[2],))
                
            if labels[nfile] !=0:
                idx_t = np.where(t<=times[nfile])[0][-1]
                label_time[idx_t]=1

            data = {'spectrogram': spec, 'label': label_time}

            file_name = hosp_id + '_' + subject_id + '_' + PE_id + '_E' + str(nfile)

            df_aux = {'rns_id': hosp_id + '-' + subject_id, 
                    'data'  : file_name,
                    'label' : labels[nfile],
                    'time'  : times[nfile]}
            
            df = pd.concat([df, pd.DataFrame([df_aux])], ignore_index=True)

            np.save(OUTDATA_DIR + file_name, data)

    df.to_csv(OUTMETADATA_DIR + 'allfiles_metadata.csv', index=False)

Running subject PIT-RNS1603 [s]: 0


  df = pd.concat([df, pd.DataFrame([df_aux])], ignore_index=True)


Running subject PIT-RNS6989 [s]: 1
Running subject PIT-RNS3016 [s]: 2
Running subject PIT-RNS1529 [s]: 3
Running subject PIT-RNS7168 [s]: 4
Running subject PIT-RNS4098 [s]: 5
Running subject PIT-RNS1836 [s]: 6
Running subject PIT-RNS9183 [s]: 7
Running subject PIT-RNS1440 [s]: 8
Running subject PIT-RNS1713 [s]: 9
Running subject PIT-RNS9536 [s]: 10
Running subject PIT-RNS2543 [s]: 11
Running subject PIT-RNS1438 [s]: 12
Running subject PIT-RNS1534 [s]: 13
Running subject PIT-RNS9793 [s]: 14
Running subject PIT-RNS1703 [s]: 15
Running subject PIT-RNS1556 [s]: 16
Running subject PIT-RNS1597 [s]: 17
Running subject PIT-RNS6992 [s]: 18
Running subject PIT-RNS8326 [s]: 19
Running subject PIT-RNS2227 [s]: 20
Running subject PIT-RNS8163 [s]: 21
Running subject PIT-RNS4998 [s]: 22
Running subject PIT-RNS8973 [s]: 23
Running subject PIT-RNS6806 [s]: 24
Running subject PIT-RNS1090 [s]: 25
Running subject PIT-RNS2368 [s]: 26
Running subject PIT-RNS6762 [s]: 27
Running subject PIT-RNS2938 [s]: 28
R

In [81]:
#df = pd.DataFrame(signal.T, columns=['Channel 1', 'Channel 2', 'Channel 3', 'Channel 4'])
#df.describe()

Unnamed: 0,Channel 1,Channel 2,Channel 3,Channel 4
count,22500.0,22500.0,22500.0,22500.0
mean,-0.060367,0.031505,-0.000932,0.029794
std,1.096207,0.640362,1.307625,0.820361
min,-15.585439,-4.0303,-15.585439,-2.417189
25%,-0.277349,-0.310269,-0.211507,-0.244428
50%,-0.013984,0.051858,0.084778,-0.046904
75%,0.18354,0.413985,0.315223,0.18354
max,4.989951,2.422143,4.36446,16.545091
