# Install libraries 


In [1]:
import sys, os, json
import mne, sklearn, wandb
import numpy as np
import pandas as pd

from scipy.interpolate import interp1d
from nilearn import datasets, image, masking, plotting
from nilearn.input_data import NiftiLabelsMasker


# animation part
from IPython.display import HTML
import matplotlib
import matplotlib.pyplot as plt
# from celluloid import Camera   # it is convinient method to animate
from matplotlib import animation, rc
from matplotlib.animation import FuncAnimation


## torch libraries 
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, TensorDataset
from torch.utils.data import DataLoader, Subset

from pytorch_model_summary import summary




In [2]:
%load_ext autoreload
%autoreload 2
sys.path.insert(1, os.path.realpath(os.path.pardir))

from utils import get_datasets
from utils import preproc
from utils import torch_dataset
from utils import train_utils
from utils import inference
from utils.models_arch import autoencoder_new_Artur

# Set all hyperparameters
- Cuda and GPU.
- Parameters of dataset. 
- random seed( if necessary). 


In [3]:
# import random

# torch.manual_seed(0)
# random.seed(0)  # python operation seed
# np.random.seed(0)

# torch.backends.cudnn.benchmark = False
# torch.backends.cudnn.deterministic = True

print(torch.cuda.is_available(), torch.cuda.device_count())
torch.cuda.set_device(0)

True 4


In [4]:
config = dict(  
                dataset_name = 'CWL_raw', # CWL
                patients = 'trio1',
                fps = 1000,
                new_fps=100, 
                crop_start = 5,
                freqs = [-1], 
    
                n_channels = 30, # 63 
                n_roi = 8,
                
                bold_delay = 6,
                to_many = True,
                random_subsample = True,
                sample_per_epoch = 512, 
                WINDOW_SIZE = 2048,
                    
                optimizer='adamW',
                lr=3e-4,
                weight_decay=3e-4, 
                batch_size=16, 
                    
                mse_weight = 1,
                corr_weight = 0,
                
                preproc_type = 'dB_log',
                loss_function = 'corr', 
                model_type = 'Best_AE_Artur_Multi_Head'
                )


hp_autoencoder = dict(n_electrodes=config['n_channels'],
                      n_freqs = len(config['freqs']),
                      n_channels_out = config['n_roi'],

                     channels = [128, 128, 128, 128], 
                     kernel_sizes=[5, 5, 3],
                     strides=[8, 8, 4], 
                     dilation=[1, 1, 1], 
                     decoder_reduce=4, 
                     hidden_channels = 16,
                     )


config = {**hp_autoencoder, **config}

params_train = {'batch_size': config['batch_size'],
                'shuffle': True,
                'num_workers': 0}

params_val = {'batch_size': config['batch_size'],
              'shuffle': False}

# Upload preprocessed dataset from np files. 
It should accelerate speed of experiments.

In [5]:
with open("../data/interim/labels_roi_17.json", 'r') as f:
    labels_roi_17 = json.load(f)
    
labels_roi = ['Left Pallidum',
                     'Left Caudate',
                     'Left Putamen',
                     'Left Accumbens',
                      
                     'Right Pallidum',
                     'Right Caudate',
                     'Right Putamen',
                     'Right Accumbens']

In [6]:

if config['dataset_name']=='CWL_raw':
    
    dataset_path = f"../data/interim/CWL/{config['patients']}_1000_filtered_data.npz"

elif config['dataset_name']=='NODDI_raw':
    dataset_path = '../data/interim/NODDI/32_250_filtered_data.npz'
else:
    print('no such dataset')



data = np.load(dataset_path)

eeg, fmri = data['eeg'], data['fmri']
df = pd.DataFrame(data = fmri.T, columns=labels_roi_17)
df_filter = df[labels_roi]
fmri = df_filter.to_numpy().T

# crop start
train_crop = config['crop_start']*config['fps']
eeg, fmri = eeg[..., train_crop:], fmri[..., train_crop:]

# normalize 
eeg = eeg / np.std(eeg)
fmri, fmri_means_stds = preproc.normalize_data(fmri)

# train/test split
test_time = int(60*config['fps'])
train_dataset_prep = (eeg[..., :-test_time], fmri[..., :-test_time])
test_dataset_prep = (eeg[..., -test_time:], fmri[..., -test_time:])


ds_factor = config['fps']/config['new_fps']
train_dataset_prep = preproc.downsample_dataset(train_dataset_prep, factor = ds_factor)
test_dataset_prep = preproc.downsample_dataset(test_dataset_prep, factor = ds_factor)





# apply time dealy corrected
train_dataset_prep = preproc.bold_time_delay_align(train_dataset_prep, 
                                                   config['new_fps'],
                                                   config['bold_delay'])
test_dataset_prep = preproc.bold_time_delay_align(test_dataset_prep, 
                                                  config['new_fps'],
                                                  config['bold_delay'])


print('Size of train dataset:', train_dataset_prep[0].shape, train_dataset_prep[1].shape)
print('Size of test dataset:', test_dataset_prep[0].shape, test_dataset_prep[1].shape)

# torch dataset creation 
torch_dataset_train = torch_dataset.CreateDataset_eeg_fmri(train_dataset_prep, 
                                                            random_sample=config['random_subsample'], 
                                                            sample_per_epoch=config['sample_per_epoch'], 
                                                            to_many=config['to_many'], 
                                                            window_size = config['WINDOW_SIZE'])

torch_dataset_test = torch_dataset.CreateDataset_eeg_fmri(test_dataset_prep, 
                                                            random_sample=False, 
                                                            sample_per_epoch=None, 
                                                            to_many=config['to_many'], 
                                                            window_size = config['WINDOW_SIZE'])
print(len(torch_dataset_test))
# because you do not have strid for val data. 
torch_dataset_test = Subset(torch_dataset_test, np.arange(len(torch_dataset_test))[::100])

# init dataloaders for training
train_loader = torch.utils.data.DataLoader(torch_dataset_train, **params_train)
val_loader = torch.utils.data.DataLoader(torch_dataset_test, **params_val)




Size of train dataset: (30, 20980) (8, 20980)
Size of test dataset: (30, 5400) (8, 5400)
3351


# Init Model, Loss, optimizers

In [7]:
model = autoencoder_new_Artur.AutoEncoder1D_Artur_MultiHead(hp_autoencoder)

print(summary(model, torch.zeros(4, config['n_channels'],
                                 config['WINDOW_SIZE']), show_input=False))


-----------------------------------------------------------------------------
            Layer (type)        Output Shape         Param #     Tr. Param #
   AutoEncoder1D_Artur-1        [4, 1, 2048]         245,057         245,057
   AutoEncoder1D_Artur-2        [4, 1, 2048]         245,057         245,057
   AutoEncoder1D_Artur-3        [4, 1, 2048]         245,057         245,057
   AutoEncoder1D_Artur-4        [4, 1, 2048]         245,057         245,057
   AutoEncoder1D_Artur-5        [4, 1, 2048]         245,057         245,057
   AutoEncoder1D_Artur-6        [4, 1, 2048]         245,057         245,057
   AutoEncoder1D_Artur-7        [4, 1, 2048]         245,057         245,057
   AutoEncoder1D_Artur-8        [4, 1, 2048]         245,057         245,057
Total params: 1,960,456
Trainable params: 1,960,456
Non-trainable params: 0
-----------------------------------------------------------------------------


# Model training

In [None]:
n_runs = 3

for i in range(n_runs):
    
    model = autoencoder_new_Artur.AutoEncoder1D_Artur_MultiHead(hp_autoencoder)

    loss_func = train_utils.make_complex_loss_function(mse_weight = config['mse_weight'], 
                                                       corr_weight = config['corr_weight'],
                                                       manifold_weight = 0,
                                                       bound=1)
    train_step = train_utils.train_step

    optimizer = optim.AdamW(model.parameters(), 
                       lr=config['lr'], 
                       weight_decay=config['weight_decay'])
    
    
    parameters = {
        'EPOCHS': 1500,
        'model': model, 
        'train_loader': train_loader, 
        'val_loader': val_loader, 
        'loss_function': loss_func,
        'train_step': train_step,
        'optimizer': optimizer, 
        'device': 'cuda', 
        'raw_test_data': test_dataset_prep,
        'show_info': 20, 
        'num_losses': 5,
        'labels': labels_roi,
        'inference_function': inference.model_inference_function, 
        'to_many': config['to_many']
    }



    path_to_save_wandb = 'common/koval_alvi/Checkpoints/wandb_brain'
    
    
    with wandb.init(project="eeg_fmri", config=config, save_code=True):
        
        wandb.define_metric("val/corr_mean", summary="max")

        if i == 0: 
            exp_name = wandb.run.name
        
        wandb.run.name = exp_name +'_run_' + str(i)
        
        print(config)
        print(parameters['model'])
        print(summary(model, torch.zeros(4, config['n_channels'], config['WINDOW_SIZE']), show_input=False))
        
        model = train_utils.wanb_train_regression(**parameters)
        

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkoval_alvi[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.13.1 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


{'n_electrodes': 30, 'n_freqs': 1, 'n_channels_out': 8, 'channels': [128, 128, 128, 128], 'kernel_sizes': [5, 5, 3], 'strides': [8, 8, 4], 'dilation': [1, 1, 1], 'decoder_reduce': 4, 'hidden_channels': 16, 'dataset_name': 'CWL_raw', 'patients': 'trio2', 'fps': 1000, 'new_fps': 100, 'crop_start': 5, 'freqs': [-1], 'n_channels': 30, 'n_roi': 8, 'bold_delay': 6, 'to_many': True, 'random_subsample': True, 'sample_per_epoch': 512, 'WINDOW_SIZE': 2048, 'optimizer': 'adamW', 'lr': 0.0003, 'weight_decay': 0.0003, 'batch_size': 16, 'mse_weight': 1, 'corr_weight': 0, 'preproc_type': 'dB_log', 'loss_function': 'corr', 'model_type': 'Best_AE_Artur_Multi_Head'}
AutoEncoder1D_Artur_MultiHead(
  (models): ModuleList(
    (0): AutoEncoder1D_Artur(
      (artur_block): ArturBlock(
        (unmixing_layer): Conv1d(30, 16, kernel_size=(1,), stride=(1,))
        (unmixed_channels_batchnorm): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
        (band_pass): Conv1d(16, 16

VBox(children=(Label(value=' 2.93MB of 2.93MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train/loss_0,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/loss_1,▁▇▇█████████████████████████████████████
train/loss_2,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/loss_3,█▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val/corr_mean,▁▃▄▃▅▆▆▆▇▇▇▇███▇██
val/loss_0,▄█▄▂▄▅▄▃▂▂▁▂▃▂▂▂▂▂▄▂▃▃▂▂▂▁▂▃▃▂▅▂▃▃▄▁▂▄▂▁
val/loss_1,▁▂▂▄▄▃▄▅▅▆▆▇▆▆▆▇▇▅▅▆▅▅▇▇▇▇▆▆▇█▆▆▆▇▅▇█▆▇█
val/loss_2,▄█▄▂▄▅▄▃▂▂▁▂▃▂▂▂▂▂▄▂▃▃▂▂▂▁▂▃▃▂▅▂▃▃▄▁▂▄▂▁
val/loss_3,█▂▂▂▁▁▁▁▂▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
train/loss_0,0.03609
train/loss_1,0.9682
train/loss_2,0.03609
train/loss_3,2.7029
val/loss_0,2.01057
val/loss_1,0.13255
val/loss_2,2.01057
val/loss_3,8.02878


[34m[1mwandb[0m: wandb version 0.13.1 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


{'n_electrodes': 30, 'n_freqs': 1, 'n_channels_out': 8, 'channels': [128, 128, 128, 128], 'kernel_sizes': [5, 5, 3], 'strides': [8, 8, 4], 'dilation': [1, 1, 1], 'decoder_reduce': 4, 'hidden_channels': 16, 'dataset_name': 'CWL_raw', 'patients': 'trio2', 'fps': 1000, 'new_fps': 100, 'crop_start': 5, 'freqs': [-1], 'n_channels': 30, 'n_roi': 8, 'bold_delay': 6, 'to_many': True, 'random_subsample': True, 'sample_per_epoch': 512, 'WINDOW_SIZE': 2048, 'optimizer': 'adamW', 'lr': 0.0003, 'weight_decay': 0.0003, 'batch_size': 16, 'mse_weight': 1, 'corr_weight': 0, 'preproc_type': 'dB_log', 'loss_function': 'corr', 'model_type': 'Best_AE_Artur_Multi_Head'}
AutoEncoder1D_Artur_MultiHead(
  (models): ModuleList(
    (0): AutoEncoder1D_Artur(
      (artur_block): ArturBlock(
        (unmixing_layer): Conv1d(30, 16, kernel_size=(1,), stride=(1,))
        (unmixed_channels_batchnorm): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
        (band_pass): Conv1d(16, 16

VBox(children=(Label(value=' 1.47MB of 1.47MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train/loss_0,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/loss_1,▁▇▇█████████████████████████████████████
train/loss_2,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/loss_3,█▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val/corr_mean,▁▂▂▄▇██
val/loss_0,▁▆▆█▅▄▃▃▄▅▆▅▆▄▆▅▆▄▄▃▄▃▆▅▆▃▁▂▃▂▆▆▃▁▂▄▆▄▅▆
val/loss_1,█▄▃▁▄▅▄▇▄▂▄▂▄▅▄▅▃▆▃▅▅▃▃▄▄▄▇▄▅▇▃▄▅▇▆▆▃▄▄▃
val/loss_2,▁▆▆█▅▄▃▃▄▅▆▅▆▄▆▅▆▄▄▃▄▃▆▅▆▃▁▂▃▂▆▆▃▁▂▄▆▄▅▆
val/loss_3,█▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▁▂▂▁▂▂▁▁▂▁▁▂▂▁▂▁▁▁▁▁▁▁▁▁

0,1
train/loss_0,0.03646
train/loss_1,0.96895
train/loss_2,0.03646
train/loss_3,2.70657
val/loss_0,2.0985
val/loss_1,0.0511
val/loss_2,2.0985
val/loss_3,8.28913


[34m[1mwandb[0m: wandb version 0.13.1 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


{'n_electrodes': 30, 'n_freqs': 1, 'n_channels_out': 8, 'channels': [128, 128, 128, 128], 'kernel_sizes': [5, 5, 3], 'strides': [8, 8, 4], 'dilation': [1, 1, 1], 'decoder_reduce': 4, 'hidden_channels': 16, 'dataset_name': 'CWL_raw', 'patients': 'trio2', 'fps': 1000, 'new_fps': 100, 'crop_start': 5, 'freqs': [-1], 'n_channels': 30, 'n_roi': 8, 'bold_delay': 6, 'to_many': True, 'random_subsample': True, 'sample_per_epoch': 512, 'WINDOW_SIZE': 2048, 'optimizer': 'adamW', 'lr': 0.0003, 'weight_decay': 0.0003, 'batch_size': 16, 'mse_weight': 1, 'corr_weight': 0, 'preproc_type': 'dB_log', 'loss_function': 'corr', 'model_type': 'Best_AE_Artur_Multi_Head'}
AutoEncoder1D_Artur_MultiHead(
  (models): ModuleList(
    (0): AutoEncoder1D_Artur(
      (artur_block): ArturBlock(
        (unmixing_layer): Conv1d(30, 16, kernel_size=(1,), stride=(1,))
        (unmixed_channels_batchnorm): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
        (band_pass): Conv1d(16, 16

# 