In [9]:

import os
import sys

GOOGLE_DRIVE_PATH_AFTER_MYDRIVE = "Colab Notebooks/prj_neuroread_analysis/neuroread/"
GOOGLE_DRIVE_PATH = os.path.join("/content", "drive", "MyDrive", GOOGLE_DRIVE_PATH_AFTER_MYDRIVE)
print(os.listdir(GOOGLE_DRIVE_PATH))

# Add to sys so we can import .py files.
sys.path.append(GOOGLE_DRIVE_PATH)
os.chdir(GOOGLE_DRIVE_PATH)

# Install unavailable packages
import pip
def import_or_install(package):
    try:
        __import__(package)
    except ImportError:
        pip.main(['install', package])

import_or_install("mne")


['.git', '.DS_Store', '.gitignore', 'EEG', 'LICENSE', 'train_cl_eeg2speech_rochester_v1.ipynb', 'train_cl_eeg2speech_rochester_v2.ipynb', '.ipynb_checkpoints', 'train_cl_eeg2speech_rochester_v3_test_old.ipynb', 'runs', 'train_cl_eeg2speech_rochester_v3_test.ipynb', 'train_cl_eeg2speech_rochester_v4_gridseaerch.ipynb', 'train_cl_eeg2speech_2.ipynb', 'train_cl_eeg2speech_rochester_subj_2.ipynb', 'README.md', 'train_eeg2speech_rochester.ipynb', 'train_cl_eeg2speech_rochester_v3.ipynb', 'models', 'run_training_gridsearch.py', 'tools', 'train_cl_eeg2speech_rochester_gridsearch.ipynb', 'train_cl_eeg2speech_rochester_gridsearch_test.ipynb', 'train_cl_eeg2speech_rochester_v4_gridsearch.ipynb', 'run_training_gridserch.ipynb']


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [11]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
 print('Not connected to a GPU')
else:
 print(gpu_info)

Your runtime has 89.6 gigabytes of available RAM

Fri Mar  3 23:10:29 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    51W / 400W |    963MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-------------------------------------

In [13]:
import os, sys, glob

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import numpy as np

import mne
import time


from torchsummary import summary
from torch.utils.tensorboard import SummaryWriter

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

from tools.train import eval_model_cl
from tools.data import unfold_raw, rm_repeated_annotations
from tools.load_data import load_data

from models.eeg_encoder import EEGEncoder
from models.envelope_encoder import EnvelopeEncoder
from models.contrastive_eeg_speech import CLEE
# -------------------------------------

cuda:0


In [14]:






# Read the command-line argument passed to the interpreter when invoking the script
lr = 0.001#sys.argv[1]
batch_size = 256# sys.argv[2]


# Read data
subj_ids = list(range(1, 20))
fs = 128
window_size = int(5 * fs)
stride_size_train, stride_size_val, stride_size_test = int(2.5 * fs), int(5 * fs), int(5 * fs)
batch_size = int(batch_size)
lr = lr

n_channs = 129 # 128 for eeg, 1 for env
print('-------------------------------------')
print(f'window_size: {window_size}  stride_size_test: {stride_size_test}')

dataset_name = ['rochester_data', 'natural_speech']
outputs_path = f'../outputs/'
data_path = os.path.join(outputs_path, dataset_name[0], dataset_name[1])
after_ica_path = os.path.join(data_path, 'after_ica_raw')
print(f'data_path: {data_path}')


X = load_data(subj_ids, after_ica_path, window_size, 
              stride_size_train, stride_size_val, stride_size_test, n_channs)


# Create dataloaders
class MyDataset(Dataset):
    def __init__(self, eeg, env):
        self.eeg = eeg
        self.env = env
    
    def __getitem__(self, index):
        return self.eeg[index], self.env[index]
    
    def __len__(self):
        return len(self.eeg)
    

dataset_train = MyDataset(X['eegs_train'], X['envs_train'])
dl_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True, drop_last=True)
dl_val = DataLoader(MyDataset(X["eegs_val"], X["envs_val"]), 
                    batch_size=batch_size, shuffle=True, drop_last=True)



# Create model
eeg_encoder = EEGEncoder()
env_encoder = EnvelopeEncoder()
model = CLEE(eeg_encoder, env_encoder)
model.to(device)

# Train model
models_dict = {f'lr_{lr}_bs_{batch_size}': model}
lossi = []
udri = [] # update / data ratio 
ud = []



for name, model in models_dict.items():

    # Reset for the new model in the loop
    print(f"+--------------New model: {name}----------------------+")
    writer = SummaryWriter(log_dir=f"runs/{name}_{time.strftime('%Y%m%d_%H%M%S')}")
    model.to(device)
    optimizer = optim.NAdam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=150, gamma=0.7)
    cnt = 0
    loss_batches = []


    for epoch in range(1, 400):

        print(f"====== Epoch: {epoch}")

        model.train()
        for ix_batch, (Xb_eeg, Xb_env) in enumerate(dl_train):

            # send to device
            Xb_eeg = Xb_eeg.to(device)
            Xb_env = Xb_env.to(device)

            # Zero out gradients
            optimizer.zero_grad()

            # forward pass
            eeg_features, env_features, logit_scale = model(Xb_eeg, Xb_env) 


            # normalize features
            eeg_features_n = eeg_features / eeg_features.norm(dim=1, keepdim=True)
            env_features_n = env_features / env_features.norm(dim=1, keepdim=True)

            # logits
            logits_per_eeg = logit_scale * eeg_features_n @ env_features_n.t()
            logits_per_env = logits_per_eeg.t()

            #loss function
            labels = torch.arange(batch_size).to(device)
            loss_eeg = F.cross_entropy(logits_per_eeg, labels)
            loss_env = F.cross_entropy(logits_per_env, labels)
            loss   = (loss_eeg + loss_env)/2

            # backward pass
            loss.backward()
            optimizer.step()

            loss_batches.append(loss.item())
            cnt += 1

            with torch.no_grad():
                #ud = {f"p{ix}":(lr*p.grad.std() / p.data.std()).log10().item() for ix, p in enumerate(model.parameters()) if p.ndim==4 }
                #writer.add_scalars('UpdateOData/ud', ud, cnt)
                writer.add_scalar('Loss/train_batch', loss.item(), cnt)

            # normalize weights
            with torch.no_grad():
                model.eeg_encoder.normalize_weights()
            
            #break   

        loss_epoch = loss_batches[-(ix_batch + 1):]  # mean loss across batches
        loss_epoch = sum(loss_epoch) / len(loss_epoch)
        writer.add_scalar('Loss/train_epoch', loss_epoch, epoch)
        #for pname, p in model.named_parameters():
        #writer.add_histogram(f'Params/{pname}', p, epoch)
        #writer.add_histogram(f'Grads/{pname}', p.grad, epoch)

        loss_val, *_ = eval_model_cl(dl_val, model, device=device)
        writer.add_scalar('Loss/val_epoch', loss_val, epoch)

        

        model.train()

        # Update learning rate based on epoch
        scheduler.step()
            
    #break   







-------------------------------------
window_size: 640  stride_size_test: 640
data_path: ../outputs/rochester_data/natural_speech
Opening raw data file ../outputs/rochester_data/natural_speech/after_ica_raw/subj_1_after_ica_raw.fif...
    Range : 0 ... 464571 =      0.000 ...  3629.461 secs
Ready.
Reading 0 ... 464571  =      0.000 ...  3629.461 secs...
Initial num of annots: 48  Num of removed annots: 19  Num of retained annots:  29
-------------------------------------
N train: 26  N val: 1  N test: 1
Opening raw data file ../outputs/rochester_data/natural_speech/after_ica_raw/subj_2_after_ica_raw.fif...
    Range : 0 ... 464571 =      0.000 ...  3629.461 secs
Ready.
Reading 0 ... 464571  =      0.000 ...  3629.461 secs...
Initial num of annots: 65  Num of removed annots: 19  Num of retained annots:  46
-------------------------------------
N train: 68  N val: 2  N test: 2
Opening raw data file ../outputs/rochester_data/natural_speech/after_ica_raw/subj_3_after_ica_raw.fif...
    Ran



  return F.conv2d(input, weight, bias, self.stride,


====> Validation loss: 5.6160,  X1 loss: 5.6028   X2 loss: 5.6292
====> Validation loss: 5.3987,  X1 loss: 5.3778   X2 loss: 5.4195
====> Validation loss: 5.3589,  X1 loss: 5.3378   X2 loss: 5.3800
====> Validation loss: 5.2886,  X1 loss: 5.2660   X2 loss: 5.3112
====> Validation loss: 5.1663,  X1 loss: 5.1441   X2 loss: 5.1886
====> Validation loss: 5.1270,  X1 loss: 5.1099   X2 loss: 5.1441
====> Validation loss: 5.0846,  X1 loss: 5.0701   X2 loss: 5.0992
====> Validation loss: 5.1721,  X1 loss: 5.1536   X2 loss: 5.1907
====> Validation loss: 5.0722,  X1 loss: 5.0601   X2 loss: 5.0843
====> Validation loss: 5.0839,  X1 loss: 5.0737   X2 loss: 5.0941
====> Validation loss: 5.1204,  X1 loss: 5.1089   X2 loss: 5.1318
====> Validation loss: 5.1363,  X1 loss: 5.1240   X2 loss: 5.1485
====> Validation loss: 5.0326,  X1 loss: 5.0223   X2 loss: 5.0429
====> Validation loss: 5.1234,  X1 loss: 5.1117   X2 loss: 5.1351
====> Validation loss: 4.9744,  X1 loss: 4.9595   X2 loss: 4.9892
====> Vali