## Gerando dados sintéticos a partir da adição de ruído Gaussiano

In [1]:
import numpy as np
import torch
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset

X_ = torch.load('../data/processed/X_.pt')
y_ = torch.load('../data/processed/y_.pt')

noise = np.random.normal(0,1,(5184, 22, 1125))
fake_X_ = X_ + noise

fake_X_ = torch.as_tensor(fake_X_).float()

real_set = TensorDataset(X_, y_)
fake_set = TensorDataset(fake_X_, y_)

print(X_.shape)
print(y_.shape)

  from .autonotebook import tqdm as notebook_tqdm


torch.Size([5184, 22, 1125])
torch.Size([5184])


## Processamento dos dados sintéticos para classificação

In [2]:
from torch.utils.data import random_split

fake_fulltrainset, fake_evalset = random_split(fake_set, [2592, 2592])
fake_trainset, fake_testset = random_split(fake_fulltrainset, [1728, 864])
real_fulltrainset, real_evalset = random_split(real_set, [2592, 2592])
real_trainset, real_testset = random_split(real_fulltrainset, [1728, 864])

## Definição do modelo do classificador

In [3]:
from braindecode.util import set_random_seeds
from braindecode.models import EEGNetv4

cuda = torch.cuda.is_available()
device = 'cuda' if cuda else 'cpu'
if cuda:
    torch.backends.cudnn.benchmark = False

seed = 20200220
set_random_seeds(seed=seed, cuda=cuda)

n_classes = 4
n_chans = 22
input_window_samples = 1125
F1, D = 4, 2
kernel_length = 64

model = EEGNetv4(
    n_chans,
    n_classes,
    input_window_samples=input_window_samples,
    final_conv_length='auto',
    F1=8,
    D=2,
    F2=F1*D,
    kernel_length=kernel_length,
    drop_prob=0.5
)
model.to(device);

## Treinamento do classificador com dados reais

In [5]:
from skorch.helper import predefined_split
from skorch.callbacks import LRScheduler
from braindecode import EEGClassifier

batch_size = 32
n_epochs = 50

real_clf = EEGClassifier(
    model,
    criterion=torch.nn.NLLLoss,
    optimizer=torch.optim.Adam,
    train_split=predefined_split(real_testset),
    batch_size=batch_size,
    callbacks=[
        "accuracy", ("lr_scheduler", LRScheduler('CosineAnnealingLR', T_max=n_epochs - 1)),
    ],
    device=device,
)
real_clf.fit(real_trainset, y=None, epochs=n_epochs);

  epoch    train_accuracy    train_loss    valid_accuracy    valid_loss      lr     dur
-------  ----------------  ------------  ----------------  ------------  ------  ------
      1            [36m0.3455[0m        [32m1.4067[0m            [35m0.3299[0m        [31m1.3758[0m  0.0100  1.9494
      2            [36m0.4317[0m        [32m1.3271[0m            [35m0.3657[0m        [31m1.2899[0m  0.0100  0.6581
      3            [36m0.4479[0m        [32m1.2716[0m            [35m0.4028[0m        [31m1.2814[0m  0.0100  0.6625
      4            [36m0.4653[0m        [32m1.2141[0m            [35m0.4630[0m        1.3483  0.0099  0.6587
      5            0.4444        [32m1.2133[0m            0.4271        1.3368  0.0098  0.6585
      6            [36m0.5457[0m        [32m1.2074[0m            [35m0.5000[0m        [31m1.1507[0m  0.0097  0.6604
      7            [36m0.5608[0m        [32m1.1547[0m            0.4919        1.1842  0.0096  0.6567
      8   

In [6]:
print(f"Mean Accuracy: {np.mean(real_clf.predict(real_evalset) == [y for X,y in real_evalset])*100:.2f}%")

Mean Accuracy: 61.30%


In [7]:
print(f"Mean Accuracy: {np.mean(real_clf.predict(fake_evalset) == [y for X,y in fake_evalset])*100:.2f}%")

Mean Accuracy: 63.50%


## Treinamento do classificador com dados sintéticos

In [8]:
fake_clf = EEGClassifier(
    model,
    criterion=torch.nn.NLLLoss,
    optimizer=torch.optim.Adam,
    train_split=predefined_split(fake_testset),
    batch_size=batch_size,
    callbacks=[
        "accuracy", ("lr_scheduler", LRScheduler('CosineAnnealingLR', T_max=n_epochs - 1)),
    ],
    device=device,
)
fake_clf.fit(fake_trainset, y=None, epochs=n_epochs);

  epoch    train_accuracy    train_loss    valid_accuracy    valid_loss      lr     dur
-------  ----------------  ------------  ----------------  ------------  ------  ------
      1            [36m0.6088[0m        [32m1.1171[0m            [35m0.5498[0m        [31m1.0168[0m  0.0100  0.6651
      2            0.5579        [32m1.0631[0m            0.5139        1.0566  0.0100  0.6727
      3            [36m0.6788[0m        [32m1.0559[0m            [35m0.6204[0m        [31m0.9143[0m  0.0100  0.6578
      4            0.5735        [32m1.0319[0m            0.4907        1.1378  0.0099  0.6382
      5            0.6615        1.0583            0.5694        0.9547  0.0098  0.6388
      6            0.6574        [32m1.0257[0m            0.5787        0.9585  0.0097  0.6397
      7            0.6447        [32m1.0019[0m            0.5775        0.9847  0.0096  0.6379
      8            0.6620        1.0128            0.5579        0.9851  0.0095  0.6427
      9     

In [9]:
print(f"Mean Accuracy: {np.mean(fake_clf.predict(real_evalset) == [y for X,y in real_evalset])*100:.2f}%")

Mean Accuracy: 65.35%


In [10]:
print(f"Mean Accuracy: {np.mean(fake_clf.predict(fake_evalset) == [y for X,y in fake_evalset])*100:.2f}%")

Mean Accuracy: 60.65%


## Distância euclidiana entre os dados reais e sintéticos

In [11]:
size=(22, 1125)
real = X_.view(-1, *size)
fake = fake_X_.view(-1, *size)

In [12]:
torch.norm(real-fake, 2)

tensor(11171.8955)