In [1]:
import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt
import scipy as sp
from scipy import signal
from random import sample

In [2]:
!gdown "1pRE91UrrutDNPdSCzjsf64DgiKl25k8d"
!unzip "/content/CSVs.zip" -d "/content"
!rm -rf '/content/CSVs.zip'

Downloading...
From: https://drive.google.com/uc?id=1pRE91UrrutDNPdSCzjsf64DgiKl25k8d
To: /content/CSVs.zip
100% 953M/953M [00:05<00:00, 162MB/s]
Archive:  /content/CSVs.zip
  inflating: /content/Subject33_1.csv  
  inflating: /content/Subject33_2.csv  
  inflating: /content/Subject34_1.csv  
  inflating: /content/Subject34_2.csv  
  inflating: /content/Subject35_1.csv  
  inflating: /content/Subject35_2.csv  
  inflating: /content/Subject00_1.csv  
  inflating: /content/Subject00_2.csv  
  inflating: /content/Subject01_1.csv  
  inflating: /content/Subject01_2.csv  
  inflating: /content/Subject02_1.csv  
  inflating: /content/Subject02_2.csv  
  inflating: /content/Subject03_1.csv  
  inflating: /content/Subject03_2.csv  
  inflating: /content/Subject04_1.csv  
  inflating: /content/Subject04_2.csv  
  inflating: /content/Subject05_1.csv  
  inflating: /content/Subject05_2.csv  
  inflating: /content/Subject06_1.csv  
  inflating: /content/Subject06_2.csv  
  inflating: /content/Subj

In [3]:
def filtragem(signal, ord=5,f_pa=.05, f_pb=50, Fs=500):
  #Inputs
  #- ord: Ordem do filtro Butterworth
  #- f_pa: Frequencia passa-alta do filtro
  #- f_pb: Frequência passa-baixa do filtro
  #- Fs: Frequência de amostragem do sinal
  #- signal: Sinal a ser filtrado
  #Output
  #- signal_f: Sinal filtrado

  f_pb_n = f_pb/Fs
  f_pa_n = f_pa/Fs

  sos = sp.signal.butter(ord,[f_pa_n,f_pb_n],btype='band',analog=False,output='sos')
  signal_f = sp.signal.sosfiltfilt(sos,signal)
  return signal_f

In [9]:
files = glob.glob("./*.csv")

all_pacients = pd.DataFrame()
resting = pd.DataFrame()
active = pd.DataFrame()
GroupB = ['00','04','06','09','10','14','19','21','22','30']

for file in files:
  df = pd.read_csv(file)
  subject = int(file[-5])-1
  df['State'] = subject
  df['Group'] = 0 if GroupB.count(file[-8:-6]) == 1 else 1
  if subject==1:
    active = pd.concat([active,df])
  else:
    resting = pd.concat([resting,df])

In [10]:
print(f'Active shape: {active.shape}\nResting shape: {resting.shape}')

Active shape: (1116000, 23)
Resting shape: (3222000, 23)


In [11]:
features = active.columns[:-2]

for feature in features:
  active[feature] = filtragem(active[feature])
  resting[feature] = filtragem(resting[feature])

In [12]:
np.random.seed(42)

pacs = [i for i in range(36)]
pac_test = sample(pacs,3)
pacs = [pac for pac in pacs if pac not in pac_test]
pac_val = sample(pacs,6)
pac_train = [pac for pac in pacs if pac not in pac_val]

In [13]:
resting = resting.to_numpy()
a = int(resting.shape[0]/36)
rest = np.zeros((a,23,36))

for k in range(36):
    rest[:,:,k] = resting[a*k:a*(k+1),:]


b = a*len(pac_train)
c = 0
rest_train = np.zeros((b,23))
for k in pac_train:
    rest_train[c*a:(c+1)*a,:] = rest[:,:,k]
    c = c+1


b = a*len(pac_val)
c = 0
rest_val = np.zeros((b,23))
for k in pac_val:
    rest_val[c*a:(c+1)*a,:] = rest[:,:,k]
    c = c+1


b = a*len(pac_test)
c = 0
rest_test = np.zeros((b,23))
for k in pac_test:
    rest_test[c*a:(c+1)*a,:] = rest[:,:,k]
    c = c+1


In [14]:
active = active.to_numpy()
a = int(active.shape[0]/36)
act = np.zeros((a,23,36))

for k in range(36):
    act[:,:,k] = active[a*k:a*(k+1),:]


b = a*len(pac_train)
c = 0
active_train = np.zeros((b,23))
for k in pac_train:
    active_train[c*a:(c+1)*a,:] = act[:,:,k]
    c = c+1


b = a*len(pac_val)
c = 0
active_val = np.zeros((b,23))
for k in pac_val:
    active_val[c*a:(c+1)*a,:] = act[:,:,k]
    c = c+1


b = a*len(pac_test)
c = 0
active_test = np.zeros((b,23))
for k in pac_test:
    active_test[c*a:(c+1)*a,:] = act[:,:,k]
    c = c+1

In [17]:
print(f'Active shape: {act.shape}\nResting shape: {rest.shape}')
print(f'\nTrain\nActive shape: {active_train.shape}\nResting shape: {rest_train.shape}')
print(f'\nValidation\nActive shape: {active_val.shape}\nResting shape: {rest_val.shape}')
print(f'\nTest\nActive shape: {active_test.shape}\nResting shape: {rest_test.shape}')

Active shape: (31000, 23, 36)
Resting shape: (89500, 23, 36)

Train
Active shape: (837000, 23)
Resting shape: (2416500, 23)

Validation
Active shape: (186000, 23)
Resting shape: (537000, 23)

Test
Active shape: (93000, 23)
Resting shape: (268500, 23)


In [19]:
train = np.concatenate((rest_train,active_train),axis=0)
val = np.concatenate((rest_val,active_val),axis=0)
test = np.concatenate((rest_test,active_test),axis=0)

In [20]:
print(f'Train shape: {train.shape}\nValidation shape: {val.shape}\nTest shape: {test.shape}')

Train shape: (3253500, 23)
Validation shape: (723000, 23)
Test shape: (361500, 23)


In [21]:
columns = [feature for feature in features]
columns[0] = columns[0][2:]
columns.append('State')
columns.append('Group')
columns = str(columns).replace('[','').replace("'","").replace(',',';').replace(']','')

# np.savetxt('Train.csv',train,delimiter=';',header=columns)
# np.savetxt('Validation.csv',val,delimiter=';',header=columns)
# np.savetxt('Test.csv',test,delimiter=';',header=columns)

In [22]:
print(columns)

EEG Fp1; EEG Fp2; EEG F3; EEG F4; EEG F7; EEG F8; EEG T3; EEG T4; EEG C3; EEG C4; EEG T5; EEG T6; EEG P3; EEG P4; EEG O1; EEG O2; EEG Fz; EEG Cz; EEG Pz; EEG A2-A1; ECG ECG; State; Group
