# Install libraries 


In [1]:
import sys, os, json
import mne, sklearn, wandb
import numpy as np
import pandas as pd

from scipy.interpolate import interp1d
from nilearn import datasets, image, masking, plotting
from nilearn.input_data import NiftiLabelsMasker


# animation part
from IPython.display import HTML
import matplotlib
import matplotlib.pyplot as plt
# from celluloid import Camera   # it is convinient method to animate
from matplotlib import animation, rc
from matplotlib.animation import FuncAnimation


## torch libraries 
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, TensorDataset
from torch.utils.data import DataLoader, Subset

from pytorch_model_summary import summary


  warn("Fetchers from the nilearn.datasets module will be "


In [2]:
%load_ext autoreload
%autoreload 2
sys.path.insert(1, os.path.realpath(os.path.pardir))

from utils import get_datasets
from utils import preproc
from utils import torch_dataset
from utils import train_utils
from utils import inference
from utils.models_arch import autoencoder_v3
from utils.models_arch import vq_autoencoder

# Set all hyperparameters
- Cuda and GPU.
- Parameters of dataset. 
- random seed( if necessary). 


In [3]:
print(torch.cuda.is_available(), torch.cuda.device_count())
torch.cuda.set_device(1)

True 4


In [4]:
config = dict(  
                dataset_name = 'CWL', # NODDI
                fps = 250,
                new_fps=250, 
                n_channels = 30, # 64 
                # n_roi = 30,
                test_sec = 60, # in seconds.
                freqs = np.linspace(2, 100, 16), 
                
                
                bold_delay = 0,
                to_many = True,
                random_subsample = True,
                sample_per_epoch = 25600, 
                WINDOW_SIZE = 2048,
                    
                optimizer='adam',
                lr=0.0001,
                weight_decay=0, 
                batch_size=32, 

                loss_function = 'mae', 
                model_type = 'VQ_1D_CNN_AE_EEG'
                )



hp_autoencoder = dict(n_electrodes=config['n_channels'],
                      n_freqs = len(config['freqs']),
                      n_channels_out = config['n_channels']*len(config['freqs']),
                      channels = [128, 64, 64, 64], 
                      kernel_sizes=[7, 5, 3],
                      strides=[4, 4, 4],
                      codebook_size=256)

config = {**hp_autoencoder, **config}

params_train = {'batch_size': config['batch_size'],
                'shuffle': True,
                'num_workers': 0}

params_val = {'batch_size': config['batch_size'],
              'shuffle': False}

# Upload preprocessed dataset from np files. 
It should accelerate speed of experiments.

In [5]:
with open("labels_roi.json", 'r') as f:
    labels_roi = json.load(f)


if config['dataset_name']=='CWL':
    dataset_path = '../data/dataset_cwl_250_hz.npz'
    
elif config['dataset_name']=='NODDI':
    dataset_path = '../data/dataset_NODDI_250_hz.npz'
else:
    print('no such dataset')


# download data
data = np.load(dataset_path)
eeg_train_cliped = np.log(np.clip(data['x_train'], 0, np.max(data['x_train'])) + 0.0000001)
eeg_test_cliped = np.log(np.clip(data['x_test'], 0, np.max(data['x_test'])) + 0.0000001)


train_dataset_prep = (eeg_train_cliped, eeg_train_cliped.reshape([-1, eeg_train_cliped.shape[-1]]))
test_dataset_prep = (eeg_test_cliped, eeg_test_cliped.reshape([-1, eeg_test_cliped.shape[-1]]))





# apply time dealy corrected
train_dataset_prep = preproc.bold_time_delay_align(train_dataset_prep, 
                                                   config['new_fps'],
                                                   config['bold_delay'])
test_dataset_prep = preproc.bold_time_delay_align(test_dataset_prep, 
                                                  config['new_fps'],
                                                  config['bold_delay'])


print('Size of train dataset:', train_dataset_prep[0].shape, train_dataset_prep[1].shape)
print('Size of test dataset:', test_dataset_prep[0].shape, test_dataset_prep[1].shape)

# torch dataset creation 
torch_dataset_train = torch_dataset.CreateDataset_eeg_fmri(train_dataset_prep, 
                                                            random_sample=config['random_subsample'], 
                                                            sample_per_epoch=config['sample_per_epoch'], 
                                                            to_many=config['to_many'], 
                                                            window_size = config['WINDOW_SIZE'])

torch_dataset_test = torch_dataset.CreateDataset_eeg_fmri(test_dataset_prep, 
                                                            random_sample=False, 
                                                            sample_per_epoch=None, 
                                                            to_many=config['to_many'], 
                                                            window_size = config['WINDOW_SIZE'])


# init dataloaders for training
train_loader = torch.utils.data.DataLoader(torch_dataset_train, **params_train)
val_loader = torch.utils.data.DataLoader(torch_dataset_test, **params_val)

Size of train dataset: (30, 16, 54225) (480, 54225)
Size of test dataset: (30, 16, 15000) (480, 15000)


# Init Model, Loss, optimizers

In [6]:
model = vq_autoencoder.VQ_AutoEncoder1D(**hp_autoencoder)

loss_func = train_utils.make_mae_loss()
train_step = train_utils.train_step

optimizer = optim.Adam(model.parameters(), 
                       lr=config['lr'], 
                       weight_decay=config['weight_decay'])


print(summary(model, torch.zeros(4, config['n_channels'], 
                                 len(config['freqs']),
                                 config['WINDOW_SIZE']), show_input=False))


-------------------------------------------------------------------------------------------
          Layer (type)                        Output Shape         Param #     Tr. Param #
              Conv1d-1                      [4, 128, 2048]          61,568          61,568
             Block1D-2                        [4, 64, 512]          25,664          25,664
             Block1D-3                        [4, 64, 128]          21,568          21,568
             Block1D-4                         [4, 64, 32]          21,568          21,568
   VectorQuantizer1D-5     [], [4, 64, 32], [], [128, 256]          16,384          16,384
   UpsampleConvBlock-6                        [4, 64, 128]          21,568          21,568
   UpsampleConvBlock-7                        [4, 64, 512]          21,568          21,568
   UpsampleConvBlock-8                      [4, 128, 2048]          75,904          75,904
              Conv1d-9                      [4, 480, 2048]          61,920          61,92



# Model training

In [7]:
### Inference functions 

def eeg_inference(y_pred, y_true, n_freq=16):
    """
    Input shape is [n_channels*n_freq, time]
    n_freq - 16 by default
    """
    n_ch_freq, time = y_pred.shape
    n_ch = n_ch_freq//n_freq
    y_pred = y_pred.reshape(-1, n_freq, time)
    y_true = y_true.reshape(-1, n_freq, time)
    


    fig, ax = plt.subplots(n_freq, 2, figsize = (8,17),sharex=True, sharey=True, 
                          )
    fig.subplots_adjust(hspace=0.1, wspace=0)

    for f in range(n_freq):
        ax[f, 0].imshow(y_pred[:,f, ::4], aspect = 'auto')
        ax[f, 1].imshow(y_true[:,f, ::4], aspect = 'auto')
        

    return fig

def eeg_inference_function(model, dataset, labels, device='cuda',to_many=True):
    y_hats, y_true = inference.make_inference_seq_2_seq(model, dataset, device=device)
    fig = eeg_inference(y_true, y_hats)
    corrs = inference.calculate_corrs(y_prediction=y_hats, y_test=y_true)
    
    
    fig_bars = plt.figure(figsize = (8, 8), dpi= 125)
    plt.bar(np.arange(len(corrs)), corrs)
    plt.ylim(-1, 1)
    
    return fig, fig_bars, corrs
                                         


In [8]:
def train_step(x_batch, y_batch, model, optimizer, loss_function):
    
    optimizer.zero_grad()
    
    y_hat, codebook_loss, perp = model(x_batch)
    rec_losses = loss_function(y_hat, y_batch)
    
    sum_loss = 0.25*codebook_loss + rec_losses[0]
    sum_loss.backward()
    optimizer.step()
    
    
    losses = [sum_loss , codebook_loss, perp]
    return losses

In [9]:
test_dataset_prep[1].shape

(480, 15000)

In [None]:
n_runs = 3

for i in range(n_runs):
    
    model = vq_autoencoder.VQ_AutoEncoder1D(**hp_autoencoder)

    loss_func = train_utils.make_mse_loss()
    train_step = train_step

    optimizer = optim.Adam(model.parameters(), 
                           lr=config['lr'], 
                           weight_decay=config['weight_decay'])


    parameters = {
        'EPOCHS': 700,
        'model': model, 
        'train_loader': train_loader, 
        'val_loader': val_loader, 
        'loss_function': loss_func,
        'train_step': train_step,
        'optimizer': optimizer, 
        'device': 'cuda', 
        'raw_test_data': test_dataset_prep,
        'show_info': 1, 
        'num_losses': 5,
        'labels': labels_roi,
        'inference_function': eeg_inference_function, 
        'to_many': config['to_many']
    }



    path_to_save_wandb = 'common/koval_alvi/Checkpoints/wandb_brain'
    
    
    with wandb.init(project="eeg_fmri", config=config, save_code=True):
        
        wandb.define_metric("val/corr_mean", summary="max")

        if i == 0: 
            exp_name = wandb.run.name
        
        wandb.run.name = exp_name +'_run_' + str(i)
        
        print(config)
        print(parameters['model'])
        print(summary(model, torch.zeros(4, config['n_channels'], 
                                         len(config['freqs']),
                                         config['WINDOW_SIZE']), show_input=False))
        
        model = train_utils.wanb_train_regression(**parameters)
        

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkoval_alvi[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.10 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


{'n_electrodes': 30, 'n_freqs': 16, 'n_channels_out': 480, 'channels': [128, 64, 64, 64], 'kernel_sizes': [7, 5, 3], 'strides': [4, 4, 4], 'codebook_size': 256, 'dataset_name': 'CWL', 'fps': 250, 'new_fps': 250, 'n_channels': 30, 'test_sec': 60, 'freqs': array([  2.        ,   8.53333333,  15.06666667,  21.6       ,
        28.13333333,  34.66666667,  41.2       ,  47.73333333,
        54.26666667,  60.8       ,  67.33333333,  73.86666667,
        80.4       ,  86.93333333,  93.46666667, 100.        ]), 'bold_delay': 0, 'to_many': True, 'random_subsample': True, 'sample_per_epoch': 25600, 'WINDOW_SIZE': 2048, 'optimizer': 'adam', 'lr': 0.0001, 'weight_decay': 0, 'batch_size': 32, 'loss_function': 'mae', 'model_type': 'VQ_1D_CNN_AE_EEG'}
VQ_AutoEncoder1D(
  (spatial_reduce): Conv1d(480, 128, kernel_size=(1,), stride=(1,))
  (downsample_blocks): ModuleList(
    (0): Block1D(
      (downsample): AvgPool1d(kernel_size=(4,), stride=(4,), padding=(0,))
      (conv1d): Conv1d(128, 64, kernel_



................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
Epoch 1 train loss_0 : 7.13 val loss_0 : 4.03 train loss_1 : 1.24 val loss_1 : 0.763 train loss_2 : 14.5 val loss_2 : 0.0 train loss_3 : 0.0 val loss_3 : 0.0 train loss_4 : 0.0 val loss_4 : 0.0 
....

# 