In [1]:
import matplotlib.pyplot as plt
from matplotlib import rcParams
import numpy as np
from scipy import signal
import os
import pandas as pd
from scipy.signal import stft
from math import prod

In [2]:
def butter_bandpass(data, lowcut, highcut, fs=200, order=4):
    nyq = fs * 0.5
    low = lowcut / nyq
    high = highcut / nyq
    b, a = signal.butter(order, [low, high], btype='bandpass')
    return signal.filtfilt(b, a, data)


def butter_lowpass(data, lowcut, fs=200, order=4):
    nyq = fs * 0.5
    low = lowcut / nyq
    b, a = signal.butter(order, low, btype='lowpass')
    return signal.filtfilt(b, a, data)


def butter_highpass(data, highcut, fs=200, order=4):
    nyq = fs * 0.5
    high = highcut / nyq
    b, a = signal.butter(order, high, btype='highpass')
    return signal.filtfilt(b, a, data)


def butter_notch(data, cutoff, var=1, fs=200, order=4):
    nyq = fs * 0.5
    low = (cutoff - var) / nyq
    high = (cutoff + var) / nyq
    b, a = signal.iirfilter(order, [low, high], btype='bandstop', ftype="butter")
    return signal.filtfilt(b, a, data)

In [3]:
def filtros(data):
    data_filtered = butter_notch(data, 60)
    data_filtered = butter_highpass(data_filtered, 5)
    data_filtered = butter_lowpass(data_filtered, 50)
    return data_filtered

In [4]:
dir = './datasets/topicos_cc'
arquivos = os.listdir(dir)
arq_numpy = [f for f in arquivos if f.endswith(".npy") and f.startswith('p2')]
participantes = {}
for i in arq_numpy:
    nome = i.split('_')
    trial = np.load(dir+'/'+i)
    for m in range(0,8):
        if participantes.get(f'participante_{nome[0]}',0) == 0:
            participantes[f'participante_{nome[0]}'] = {}
        if participantes[f'participante_{nome[0]}'].get(f'trial_{nome[1]}',0) == 0:
            participantes[f'participante_{nome[0]}'][f'trial_{nome[1]}'] = {}
        dados = trial[m, :, :].swapaxes(0,1)
        participantes[f'participante_{nome[0]}'][f'trial_{nome[1]}'][f'movimento_{m+1}'] = filtros(dados)

v = list()
for i in arq_numpy:
    trial = np.load(dir+'/'+i)
    v.append(trial[:, :, :].swapaxes(1,2))    

arr = np.vstack((v[0], v[1], v[2]))

print(arr.shape)

(24, 4, 1600)


In [5]:
step = 58
segment = 128
data = arr
# .get_data()
print('', data.shape)

n_win = int((data.shape[-1] - segment) / step) + 1
ids = np.arange(n_win) * step

# Janelas do dado no dominio do tempo
chunks_time = np.array([data[:,:,k:(k + segment)] for k in ids]).transpose(1, 2, 0, 3)

# Janelas do dado no domínio da frequência
_, _, chunks_freq = stft(data, fs=200, nperseg=128, noverlap=64)
chunks_freq = np.swapaxes(chunks_freq, 2, 3)

print('Formato (shape) dos dados depois da divisão de janelas')
print(f'Dominio do tempo: {chunks_time.shape} - (classes, ensaios, canais, janelas, linhas)')
print(f'Dominio da frequência:  {chunks_freq.shape} - (classes, ensaios, canais, janelas, linhas)')

 (24, 4, 1600)
Formato (shape) dos dados depois da divisão de janelas
Dominio do tempo: (24, 4, 26, 128) - (classes, ensaios, canais, janelas, linhas)
Dominio da frequência:  (24, 4, 26, 65) - (classes, ensaios, canais, janelas, linhas)


In [6]:
# funções auxiliares
def PSD(w):
    ''' definição da função PSD para o sinal no domínio da frequência '''
    return np.abs(w) ** 2


# funções de extração de características

def var(x):
    return np.sum(x ** 2, axis=-1) / (np.prod(x.shape[:-1]) - 1)

def rms(x):
    return np.sqrt(np.sum(np.abs(x) ** 2, axis=-1) / (np.prod(x.shape[:-1])))

def wamp(x):
    limiar = np.abs(np.diff(x))
    return np.sum(limiar > 0.0001, axis=-1)

def wl(x):
    return np.sum(np.abs(np.diff(x)), axis=-1)

# def zc(x):
#     trs = 0.0001
 
#     f = [1 if i*j <= 0 else 0 for i,j in zip(x[:,:,:,:-1], x[:,:,:,1:])]
    
#     return np.sum(f)


def getzc(data, th):
    t = len(data)
    soma = 0
    for i in range(t-1):
        res = (data[i]*data[i+1])
        res2 = np.abs(data[i]-data[i+1])
        if (res<0 and res2 > th):
            soma +=1
    return soma

def zc(data):
    f=[]
    x,y,z = data.shape[:3]
    for i in range(x):
        l = []
        for j in range(y):
            li = []
            for k in range(z):
                li.append(getzc(data[i][j][k], 0.0001))
            l.append(li.copy())
        f.append(l.copy())

    return np.array(f)

def fmd(w):
    return np.sum(PSD(w), axis=-1) / 2

def mmdf(w):
    return np.sum(np.abs(w), axis=-1) / 2


def fmn(w):
    sample_rate = 200
    f = (w * sample_rate)/(2*len(w))
    return np.sum(np.abs(f*PSD(w)), axis=-1)/np.sum(PSD(w), axis=-1)

def mmnf(w):
    sample_rate = 200
    f = (w * sample_rate)/(2*len(w))
    return np.sum(np.abs(f*np.abs(w)), axis=-1)/np.sum(np.abs(w), axis=-1)

from math import e

def logDec(data):
    N = np.prod(data.shape)
    return e ** (np.sum(np.log10(np.abs(data)), axis=-1))/N

In [7]:
final_data = list()
final_data.append(var(chunks_time))
final_data.append(rms(chunks_time))
final_data.append(fmd(chunks_freq))
final_data.append(mmdf(chunks_freq))
final_data.append(logDec(chunks_time))
final_data.append(wamp(chunks_time))
final_data.append(wl(chunks_time))
final_data.append(zc(chunks_time))
final_data.append(fmn(chunks_freq))
final_data.append(mmnf(chunks_freq))


final = np.array(final_data)
final.shape

(10, 24, 4, 26)

### Visualização

In [8]:
data = final.transpose(0, 1, 3, 2)
sh = data.shape

X = data.reshape(sh[0], int(sh[1]/3), 3 * sh[2], sh[3])

In [9]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)

features = list()
for f in X:
    classes = list()
    for c in f:
        C_pca = pca.fit_transform(c)
        classes.append(C_pca)
    features.append(classes)

X_pca = np.array(features)

  explained_variance_ = (S ** 2) / (n_samples - 1)
  explained_variance_ratio_ = explained_variance_ / total_var
  explained_variance_ = (S ** 2) / (n_samples - 1)
  explained_variance_ratio_ = explained_variance_ / total_var
  explained_variance_ = (S ** 2) / (n_samples - 1)
  explained_variance_ratio_ = explained_variance_ / total_var
  explained_variance_ = (S ** 2) / (n_samples - 1)
  explained_variance_ratio_ = explained_variance_ / total_var
  explained_variance_ = (S ** 2) / (n_samples - 1)
  explained_variance_ratio_ = explained_variance_ / total_var
  explained_variance_ = (S ** 2) / (n_samples - 1)
  explained_variance_ratio_ = explained_variance_ / total_var
  explained_variance_ = (S ** 2) / (n_samples - 1)
  explained_variance_ratio_ = explained_variance_ / total_var
  explained_variance_ratio_ = explained_variance_ / total_var


In [10]:
def plot_features(features, features_names, classes_names, ch_1, ch_2):
    
    movs = np.arange(len(classes_names))
    markers = ["o", "v", "^", "P", "*", "x", "X", "2", "3", "1"]
    for f, feature in enumerate(features):
        
        for mov, marker in zip(movs, markers):
            # argumentos: classes, amostras, canal
            plt.scatter(feature[mov, :, ch_1],
                        feature[mov, :, ch_2], marker=marker)

        plt.legend((classes_names), scatterpoints=1, loc='best',
                   ncol=3, fontsize=8)
        
        plt.title(features_names[f])
        plt.xlabel('CH{}'.format(ch_1))
        plt.ylabel('CH{}'.format(ch_2))
        plt.show()

In [11]:
import matplotlib.pyplot as plt
from matplotlib import rcParams

# plt.rcParams["figure.figsize"] = (12, 12)

# features_name = ('var', 'rms', 'wamp', 'wl', 'zc','logD','fmd', 'mmdf', 'fmn', 'mmnf')
# classes = [str(item) for item in list(range(8))]
# plot_features(X_pca, features_name, classes, 0, 1)

In [12]:
X = X.transpose(1, 2, 3, 0)
print('classes', 'amostras', 'canais', 'características')
print(X.shape)
X = X.reshape(X.shape[0]*X.shape[1], X.shape[2]*X.shape[3])
X.shape

classes amostras canais características
(8, 78, 4, 10)


(624, 40)

In [13]:
y = np.array(list(range(1, 9)) * int(X.shape[0] / 8))

In [14]:
# criação dos rótulos

# 1,1,1,1,1,1,1,1,1,1,...,2,2,2,2,2,2,2,2,2,2,...,3,...
y = [[str(i)] * int(X.shape[0] / 8) for i in range(8)]
y = np.array(y).flatten()
print('Shape dos rótulos:', y.shape)

Shape dos rótulos: (624,)


In [15]:
y

array(['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1',
       '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1',
       '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1',
       '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1',
       '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1',
       '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1',
       '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2',
       '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2

In [16]:
print(X.shape,y.shape)

(624, 40) (624,)


In [17]:
X[0:]

array([[2.00949951e+02, 1.41728417e+01, 4.45978656e+02, ...,
        3.00000000e+01, 1.48287183e+01, 7.25559629e+00],
       [9.96639577e+03, 9.98118371e+01, 1.83139893e+03, ...,
        4.90000000e+01, 1.89385710e+01, 1.17578499e+01],
       [3.18856012e+04, 1.78529624e+02, 2.62640603e+04, ...,
        6.00000000e+01, 2.80380287e+01, 2.28285347e+01],
       ...,
       [7.43306745e+01, 8.61979667e+00, 1.06199347e+03, ...,
        2.50000000e+01, 5.25549716e+01, 3.68268766e+01],
       [9.88532191e+01, 9.94050373e+00, 1.25075870e+03, ...,
        4.80000000e+01, 1.43532105e+03, 8.30160975e+02],
       [1.16572835e+02, 1.07947270e+01, 4.66791736e+02, ...,
        3.00000000e+01, 7.86723203e+02, 4.07772575e+02]])

### Normalização

In [18]:
X

array([[2.00949951e+02, 1.41728417e+01, 4.45978656e+02, ...,
        3.00000000e+01, 1.48287183e+01, 7.25559629e+00],
       [9.96639577e+03, 9.98118371e+01, 1.83139893e+03, ...,
        4.90000000e+01, 1.89385710e+01, 1.17578499e+01],
       [3.18856012e+04, 1.78529624e+02, 2.62640603e+04, ...,
        6.00000000e+01, 2.80380287e+01, 2.28285347e+01],
       ...,
       [7.43306745e+01, 8.61979667e+00, 1.06199347e+03, ...,
        2.50000000e+01, 5.25549716e+01, 3.68268766e+01],
       [9.88532191e+01, 9.94050373e+00, 1.25075870e+03, ...,
        4.80000000e+01, 1.43532105e+03, 8.30160975e+02],
       [1.16572835e+02, 1.07947270e+01, 4.66791736e+02, ...,
        3.00000000e+01, 7.86723203e+02, 4.07772575e+02]])

In [19]:
np.isnan(X)

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [20]:
X.shape

(624, 40)

In [21]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30,shuffle=True)
scaler = StandardScaler()
scaler.fit(X_train)
# scaler.fit(X)
X_train = scaler.transform(X_train)
# X = scaler.transform(X)
X_test = scaler.transform(X_test)

  temp **= 2
  new_unnormalized_variance -= correction ** 2 / new_sample_count
  new_unnormalized_variance -= correction ** 2 / new_sample_count
  upper_bound = n_samples * eps * var + (n_samples * mean * eps) ** 2


In [22]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

# dividindo as porções de dados em treino e teste (70 e 30% respectivamente)
# com embaralhamento sempre ativo (shuffle=True)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True)

In [23]:

np.isnan(X_train).sum()

1308

In [24]:
X_train = np.nan_to_num(X_train)
X_test = np.nan_to_num(X_test)

In [25]:
X_train.shape

(436, 40)

In [26]:
# modelo de classificador com os parâmetros padrões
clf = SVC(gamma='scale')

# criando o modelo de classificação com os dados de treino
clf.fit(X_train, y_train)

  x = um.multiply(x, x, out=x)


SVC()

In [27]:
# aplicando o classificador nos dados de teste
res = clf.predict(X_test)

# obtendo e ajustando os resultados 
tot_hit = sum([1 for i in range(len(res)) if res[i] == y_test[i]])
print('Acurácia: {:.2f}%'.format(tot_hit / X_test.shape[0] * 100))

Acurácia: 11.70%


### Feature selection

In [28]:
data = final.transpose(1, 3, 2, 0)
X = data.reshape(data.shape[0]*data.shape[1], data.shape[2]*data.shape[3])
X.shape

(624, 40)

In [None]:
y = np.array(list(range(1, 9)) * int(X.shape[0] / 8))

In [None]:
y.shape

(624,)

In [30]:
data_t = final.transpose(1, 3, 2, 0)
X_t = data.reshape(24*26, 10, 4)

data_t = X_t.transpose(2, 0, 1)
data_t.shape

(4, 624, 10)

In [None]:
from sklearn.feature_selection import VarianceThreshold

canais = list()

for c in data_t:
    sel = VarianceThreshold(0.1)
    vt = sel.fit_transform(c)
    canais.append(vt)

cn = np.array(canais)

In [None]:
cn.shape

(4, 624, 8)

In [31]:
from sklearn.feature_selection import SelectKBest

canais = list()

for c in data_t:
    print(c.shape)
    sel = SelectKBest(k=5)
    vt = sel.fit_transform(c,y)
    canais.append(vt)

cbest = np.array(canais)

# X_new = SelectKBest(k=5)
# X_new.fit_transform(X,y)
cbest.shape

(624, 10)
(624, 10)
(624, 10)
(624, 10)


  X = X ** 2
  X = X ** 2
  X = X ** 2
  X = X ** 2
  X = X ** 2
  X = X ** 2
  X = X ** 2
  square_of_sums_alldata = sum(sums_args) ** 2
  square_of_sums_args = [s ** 2 for s in sums_args]
  sstot = ss_alldata - square_of_sums_alldata / float(n_samples)
  ssbn -= square_of_sums_alldata / float(n_samples)
  X = X ** 2
  X = X ** 2
  X = X ** 2
  X = X ** 2
  X = X ** 2
  square_of_sums_alldata = sum(sums_args) ** 2
  square_of_sums_args = [s ** 2 for s in sums_args]
  sstot = ss_alldata - square_of_sums_alldata / float(n_samples)
  ssbn -= square_of_sums_alldata / float(n_samples)


(4, 624, 5)

In [32]:
X = cbest.transpose(1, 2, 0)
print('classes', 'amostras', 'canais', 'características')
# print(X.shape)
X = X.reshape(X.shape[0], X.shape[2]*X.shape[1])
X.shape

classes amostras canais características


(624, 20)

In [33]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

# dividindo as porções de dados em treino e teste (70 e 30% respectivamente)
# com embaralhamento sempre ativo (shuffle=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True)

In [34]:
# modelo de classificador com os parâmetros padrões
clf = SVC(gamma='scale')

# criando o modelo de classificação com os dados de treino
clf.fit(X_train, y_train)

SVC()

In [35]:
# aplicando o classificador nos dados de teste
res = clf.predict(X_test)

# obtendo e ajustando os resultados 
tot_hit = sum([1 for i in range(len(res)) if res[i] == y_test[i]])
print('Acurácia: {:.2f}%'.format(tot_hit / X_test.shape[0] * 100))

Acurácia: 18.62%


In [None]:
from sklearn.feature_selection import RFE
from sklearn.svm import SVC

estimator = SVC(kernel='linear')

selector = RFE(estimator, n_features_to_select=5, step=1)

selector = selector.fit_transform(X,y)

In [None]:
for i in combination(range(9),3):
    X.take(i, axis=-1)

In [None]:
# logdetector, wl, iemg