# Librerias

In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
import os


# Declaracion de constantes 

In [2]:
F_MIN = 5
F_MAX = 40
F_MUESTREO = 128
RUTA_DATOS_PROCESADOS = '../DatosProcesados/Frecuencia'
CARPETAS = ['Rojo', 'Azul', 'Verde', 'Morado', 'Baseline']

# Declaracion de Funciones 

In [None]:
def plot_datos(datos):
    
    plt.figure()
    plt.plot(datos[:, 0], label = 'EEG.O1')
    plt.plot(datos[:, 1], label = 'EEG.O2')
    plt.legend()
    plt.title('Raw Data')
    plt.xlabel('Tiempo')
    plt.ylabel('uV')
    
def plot_psd_semilog(datos):
    
    f1, psd_O1 = signal.periodogram(datos[:, 0], F_MUESTREO)
    f2, psd_O2 = signal.periodogram(datos[:, 1], F_MUESTREO)
    plt.semilogy(f1, psd_O1, label = 'EEG.O1')
    plt.semilogy(f1, psd_O2, label = 'EEG.O2')
    plt.ylim([1e-7, 1e2])
    plt.xlabel('Frecuencia [Hz]')
    plt.ylabel('PSD [V**2/Hz]')
    plt.legend()

    
def plot_psd(datos):
    
    plt.figure()
    f1, psd_O1 = signal.periodogram(datos[:, 0], F_MUESTREO)
    f2, psd_O2 = signal.periodogram(datos[:, 1], F_MUESTREO)
    plt.plot(f1, psd_O1, label = 'EEG.O1')
    plt.plot(f1, psd_O2, label = 'EEG.O2')
    plt.xlabel('Frecuencia [Hz]')
    plt.ylabel('PSD')
    plt.legend()
    print(psd_O1.mean())
    print(psd_O2.mean())
    print(max(psd_O1))
    print(max(psd_O2))

def filtro_pasa_banda(datos, f_min, f_max, f_muestreo, orden=5):
    nyq = 0.5 * f_muestreo
    f_min = f_min / nyq
    f_max = f_max / nyq
    b, a = signal.butter(orden, [f_min, f_max], btype='band')
    print(b, a)
    y = signal.lfilter(b, a, datos)
    return y



## Creacion de Archivos Transformada de Hilbert 

In [None]:
from scipy import fftpack 

for carpeta in CARPETAS:
    archivos = [e for e in os.listdir(RUTA_DATOS_PROCESADOS+'/'+carpeta) if e.endswith('.csv') ]
    
    for indice, archivo in enumerate(archivos):
        ruta_origen = f'{RUTA_DATOS_PROCESADOS}/{carpeta}/{archivo}'
        df = pd.read_csv(ruta_origen)
        etiqueta = df[['MarkerValueInt']]
        datos = df[['EEG.O1', 'EEG.O2']].to_numpy()
        o1 = fftpack.hilbert(datos[:, 0])
        o1 = pd.DataFrame(data=o1, columns=["EEG.O1"])
        o2 = fftpack.hilbert(datos[:, 1])
        o2 = pd.DataFrame(data=o2, columns=["EEG.O2"])
        archivo = pd.concat([o1, o2, etiqueta], axis=1).to_csv(f'{RUTA_DATOS_PROCESADOS}/{carpeta}/Hilbert/hilbert{indice}.csv', index = False)

In [None]:
pwd

In [None]:
!rm ../DatosProcesados/Frecuencia/Baseline/hilbert*


In [3]:
features1 = {'Decil 1': [], 'Decil 2': [], 'Decil 3':[], 'Decil 4': [], 'Decil 5': [], 'Decil 6': [], 'Decil 7': [],'Decil 8': [], 'Decil 9': []}
features2 = {'Decil 1': [], 'Decil 2': [], 'Decil 3':[], 'Decil 4': [], 'Decil 5': [], 'Decil 6': [], 'Decil 7': [],'Decil 8': [], 'Decil 9': []}
for carpeta in CARPETAS:
    archivos = [e for e in os.listdir(RUTA_DATOS_PROCESADOS+'/'+carpeta+'/FiltroPasoBanda') if e.endswith('.csv') ]
    
    for indice, archivo in enumerate(archivos):
        ruta_origen = f'{RUTA_DATOS_PROCESADOS}/{carpeta}/FiltroPasoBanda/{archivo}'
        df = pd.read_csv(ruta_origen)
    
        features1['Decil 1'].append(df.quantile(.1, axis = 0)[0])
        features1['Decil 2'].append(df.quantile(.2, axis = 0)[0])
        features1['Decil 3'].append(df.quantile(.3, axis = 0)[0])
        features1['Decil 4'].append(df.quantile(.4, axis = 0)[0])
        features1['Decil 5'].append(df.quantile(.5, axis = 0)[0])
        features1['Decil 6'].append(df.quantile(.6, axis = 0)[0])
        features1['Decil 7'].append(df.quantile(.7, axis = 0)[0])
        features1['Decil 8'].append(df.quantile(.8, axis = 0)[0])
        features1['Decil 9'].append(df.quantile(.9, axis = 0)[0])
        
    
        features2['Decil 1'].append(df.quantile(.1, axis = 0)[0])
        features2['Decil 2'].append(df.quantile(.2, axis = 0)[0])
        features2['Decil 3'].append(df.quantile(.3, axis = 0)[0])
        features2['Decil 4'].append(df.quantile(.4, axis = 0)[0])
        features2['Decil 5'].append(df.quantile(.5, axis = 0)[0])
        features2['Decil 6'].append(df.quantile(.6, axis = 0)[0])
        features2['Decil 7'].append(df.quantile(.7, axis = 0)[0])
        features2['Decil 8'].append(df.quantile(.8, axis = 0)[0])
        features2['Decil 9'].append(df.quantile(.9, axis = 0)[0])
        


In [4]:
O1 = pd.DataFrame(features1)
O2 = pd.DataFrame(features2)
ruta_origen = f'../Features/FiltroPasoBanda/'
df1 = pd.read_csv(ruta_origen+'Features_EEG.O1.csv')
df2 = pd.read_csv(ruta_origen+'Features_EEG.O2.csv')
pd.concat([df1, O1], axis = 1).to_csv(f'{ruta_origen}Occipital1.csv', index = False)
pd.concat([df2, O2], axis = 1).to_csv(f'{ruta_origen}Occipital2.csv', index = False)

In [5]:
features1 = {'AH0': [], 'AH1': [],'AH2': [],'AH3': [],'AH4': [],'AH5': [],'AH6': [],'AH7': [],'AH8': [], 'AH9': []}
features2 = {'AH0': [], 'AH1': [],'AH2': [],'AH3': [],'AH4': [],'AH5': [],'AH6': [],'AH7': [],'AH8': [], 'AH9': []}
for carpeta in CARPETAS:
    archivos = [e for e in os.listdir(RUTA_DATOS_PROCESADOS+'/'+carpeta+'/FiltroPasoBanda') if e.endswith('.csv') ]
    
    for indice, archivo in enumerate(archivos):
        ruta_origen = f'{RUTA_DATOS_PROCESADOS}/{carpeta}/FiltroPasoBanda/{archivo}'
        df = pd.read_csv(ruta_origen)
        a1, b1 = np.histogram(df['EEG.O1'], density=True)
        a2, b2 = np.histogram(df['EEG.O2'], density=True)
        
        features1['AH0'].append(a1[0])
        features1['AH1'].append(a1[1])
        features1['AH2'].append(a1[2])
        features1['AH3'].append(a1[3])
        features1['AH4'].append(a1[4])
        features1['AH5'].append(a1[5])
        features1['AH6'].append(a1[6])
        features1['AH7'].append(a1[7])
        features1['AH8'].append(a1[8])
        features1['AH9'].append(a1[9])
        
        features2['AH0'].append(a2[0])
        features2['AH1'].append(a2[1])
        features2['AH2'].append(a2[2])
        features2['AH3'].append(a2[3])
        features2['AH4'].append(a2[4])
        features2['AH5'].append(a2[5])
        features2['AH6'].append(a2[6])
        features2['AH7'].append(a2[7])
        features2['AH8'].append(a2[8])
        features2['AH9'].append(a2[9]) 

In [7]:
ruta_origen = f'../Features/FiltroPasoBanda/'
O1 = pd.DataFrame(features1)
O2 = pd.DataFrame(features2)
df1 = pd.read_csv(ruta_origen+'Occipital1.csv')
df2 = pd.read_csv(ruta_origen+'Occipital2.csv')
pd.concat([df1, O1], axis = 1).to_csv(f'{ruta_origen}Occipital1.csv', index = False)
pd.concat([df2, O2], axis = 1).to_csv(f'{ruta_origen}Occipital2.csv', index = False)

Split DF en train y test

In [8]:
df1 = pd.read_csv(ruta_origen+'Occipital1.csv')
df2 = pd.read_csv(ruta_origen+'Occipital2.csv')

df1 = df1.sample(frac=1).reset_index(drop=True)
df2 = df2.sample(frac=1).reset_index(drop=True)

df1.loc[0:1253, :].to_csv(f'{ruta_origen}train/Occipital1.csv', index = False)
df1.loc[1254:, :].to_csv(f'{ruta_origen}test/Occipital1.csv', index = False)
df2.loc[0:1253, :].to_csv(f'{ruta_origen}train/Occipital2.csv', index = False)
df2.loc[1254:, :].to_csv(f'{ruta_origen}test/Occipital2.csv', index = False)

Normalizar

In [9]:
from sklearn import preprocessing
df1 = pd.read_csv(ruta_origen+'train/Occipital1.csv')
df2 = pd.read_csv(ruta_origen+'train/Occipital2.csv')
x1 = df1.values #returns a numpy array
x2 = df2.values
min_max_scaler = preprocessing.MinMaxScaler()
x1_scaled = min_max_scaler.fit_transform(x1)
x2_scaled = min_max_scaler.fit_transform(x2)
train1_norm = pd.DataFrame(x1_scaled, columns=list(df1))
train2_norm = pd.DataFrame(x2_scaled, columns=list(df2))

Guardar valores maximos y minimos

In [10]:
max1 = df1.max(axis=0)
max1 = pd.DataFrame(max1, columns=["Max"])

min1 = df1.min(axis=0)
min1 = pd.DataFrame(min1, columns=["Min"])

max2 = df2.max(axis=0)
max2 = pd.DataFrame(max2, columns=["Max"])

min2 = df2.min(axis=0)
min2 = pd.DataFrame(min2, columns=["Min"])

pd.concat([max1, min1], axis = 1).to_csv(f'{ruta_origen}Valores_O1.csv', index = False)
pd.concat([max2, min2], axis = 1).to_csv(f'{ruta_origen}Valores_O2.csv', index = False)

Varianza

In [11]:
from sklearn.feature_selection import VarianceThreshold
selector = VarianceThreshold(threshold=0.1)
train1_norm_new = selector.fit_transform(train1_norm)

print('Number of features before variance thresholding: {}'.format(train1_norm.shape[1]))
print('Number of features after variance thresholding: {}'.format(train1_norm_new.shape[1]))

Number of features before variance thresholding: 41
Number of features after variance thresholding: 1


Correlacion

In [12]:
correlated_features = set()
correlation_matrix = train1_norm.corr()
for i in range(len(correlation_matrix .columns)):
    for j in range(i):
        if abs(correlation_matrix.iloc[i, j]) > 0.8:
            colname = correlation_matrix.columns[i]
            correlated_features.add(colname)
            
print(len(correlated_features))
print(correlated_features)
train1_norm.drop(labels=correlated_features, axis=1, inplace=True)
train1_norm.to_csv(f'{ruta_origen}train/Occipital1.csv', index = False)


25
{'AH2', 'Varianza', 'Decil 3', 'Decil 8', 'AH3', 'Decil 4', 'valorMediaDiferenciaAbs', 'AH9', 'FHist4', 'valorAbsMediana', 'Decil 9', 'valorAbsPromWII', 'FHist7', 'Decil 6', 'VarianzaFrecCentral', 'valorAbsPromWI', 'FHist9', 'Decil 7', 'AH7', 'AH8', 'FHist10', 'FHist8', 'AH1', 'Decil 2', 'PSD_max'}


In [13]:
correlated_features = set()
correlation_matrix = train2_norm.corr()
for i in range(len(correlation_matrix .columns)):
    for j in range(i):
        if abs(correlation_matrix.iloc[i, j]) > 0.8:
            colname = correlation_matrix.columns[i]
            correlated_features.add(colname)
            
print(len(correlated_features))
print(correlated_features)
train2_norm.drop(labels=correlated_features, axis=1, inplace=True)
train2_norm.to_csv(f'{ruta_origen}train/Occipital2.csv', index = False)

23
{'AH2', 'Decil 3', 'Decil 8', 'AH3', 'Decil 4', 'valorMediaDiferenciaAbs', 'FHist4', 'valorAbsMediana', 'Decil 9', 'valorAbsPromWII', 'FHist7', 'Decil 6', 'VarianzaFrecCentral', 'valorAbsPromWI', 'FHist9', 'Decil 7', 'AH7', 'AH8', 'FHist10', 'FHist8', 'AH1', 'Decil 2', 'PSD_max'}
