## Make dataloader

In [1]:
import sys
sys.path.append('/srv/user/turishcheva/experanto_video_dev/experanto/')
from experanto.data import Mouse2pVideoDataset
from torch.utils.data import DataLoader
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from collections import OrderedDict

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
m = 'dynamic29712-5-9-Video-full' # mouse
path_pre = # path to the folder with mice

In [3]:
root_folder = f'{path_pre}{m}'

**Important**

* If the `stim_duration` is below 65 -> change skip parameter here (to 20, for default sensorium configularion)
https://github.com/ecker-lab/sensorium_2023/blob/main/sensorium/utility/scores.py#L10
* Also note that original sensorium was released in 36 x 64 resolution, 144 x 256 might require more GPU memory

In [4]:
train_dataset = Mouse2pVideoDataset(root_folder=root_folder,
        tier='train',
        stim_duration=30,
        sampling_rate=20,
        subsample=True,
        cut=True,
        add_channel=True,
        channel_pos=0,
        rescale=True)

In [5]:
val_dataset = Mouse2pVideoDataset(
    root_folder,
    tier='oracle',
        stim_duration=30,
        sampling_rate=20,
        subsample=False, # this would start taking chunks from the begining always
        cut=True,
        add_channel=True,
        channel_pos=0,
        rescale=True
    )

In [6]:
batch_size = 4
data_loaders = OrderedDict()

data_loaders['train'] = OrderedDict()
data_loaders['oracle'] = OrderedDict()
data_loaders['train'][m] =  DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
data_loaders['oracle'][m] = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [7]:
data_loaders

OrderedDict([('train',
              OrderedDict([('dynamic29712-5-9-Video-full',
                            <torch.utils.data.dataloader.DataLoader at 0x7f3fb42f6a60>)])),
             ('oracle',
              OrderedDict([('dynamic29712-5-9-Video-full',
                            <torch.utils.data.dataloader.DataLoader at 0x7f3fb42f6a00>)]))])

In [8]:
it =next(iter(data_loaders['train'][m]))



In [9]:
it.responses.shape
# torch.Size([batch, neurons, times])

torch.Size([4, 7939, 30])

In [10]:
it.screen.shape
# original dataloader was orch.Size([batch, channels, times, h, w])

torch.Size([4, 1, 30, 144, 256])

## Make and test forward pass for sensorium

In [11]:
seed = 42
import sys
sys.path.append('/srv/user/turishcheva/sensorium_replicate/sensorium_2023/')
sys.path.append('/srv/user/turishcheva/sensorium_replicate/neuralpredictors/')
import torch
from nnfabrik.utility.nn_helpers import set_random_seed
set_random_seed(seed)

from sensorium.datasets.mouse_video_loaders import mouse_video_loader
from sensorium.utility.scores import get_correlations
from nnfabrik.builder import get_trainer
from sensorium.models.make_model import make_video_model

In [12]:
factorised_3D_core_dict = dict(
    input_channels=1, # increase if behaviour is used
    hidden_channels=[32, 64, 128],
    spatial_input_kernel=(11,11),
    temporal_input_kernel=11,
    spatial_hidden_kernel=(5,5),
    temporal_hidden_kernel=5,
    stride=1,
    layers=3,
    gamma_input_spatial=10,
    gamma_input_temporal=0.01, 
    bias=True, 
    hidden_nonlinearities='elu', 
    x_shift=0, 
    y_shift=0,
    batch_norm=True, 
    laplace_padding=None,
    input_regularizer='LaplaceL2norm',
    padding=False,
    final_nonlin=True,
    momentum=0.7
)


shifter_dict=None


readout_dict = dict(
    bias=True,
    init_mu_range=0.2,
    init_sigma=1.0,
    gamma_readout=0.0,
    gauss_type='full',
    grid_mean_predictor=None,
#     grid_mean_predictor={
#         'type': 'cortex',
#         'input_dimensions': 2,
#         'hidden_layers': 1,
#         'hidden_features': 30,
#         'final_tanh': True
#     },
    share_features=False,
    share_grid=False,
    shared_match_ids=None,
    gamma_grid_dispersion=0.0,
)

In [13]:
factorised_3d_model = make_video_model(
    data_loaders,
    seed,
    core_dict=factorised_3D_core_dict,
    core_type='3D_factorised',
    readout_dict=readout_dict.copy(),
    readout_type='gaussian',               
    use_gru=False,
    gru_dict=None,
    use_shifter=False,
    shifter_dict=shifter_dict,
    shifter_type='MLP',
    deeplake_ds=False,
)



In [15]:
factorised_3d_model

VideoFiringRateEncoder(
  (core): Factorized3dCore(
    (_input_weight_regularizer): LaplaceL2norm(
      (laplace): Laplace()
    )
    (temporal_regularizer): DepthLaplaceL21d(
      (laplace): Laplace1d()
    )
    (features): Sequential(
      (layer0): Sequential(
        (conv_spatial): Conv3d(1, 32, kernel_size=(1, 11, 11), stride=(1, 1, 1))
        (conv_temporal): Conv3d(32, 32, kernel_size=(11, 1, 1), stride=(1, 1, 1))
        (norm): BatchNorm3d(32, eps=1e-05, momentum=0.7, affine=True, track_running_stats=True)
        (nonlin): ELU(alpha=1.0)
      )
      (layer1): Sequential(
        (conv_spatial_1): Conv3d(32, 64, kernel_size=(1, 5, 5), stride=(1, 1, 1))
        (conv_temporal_1): Conv3d(64, 64, kernel_size=(5, 1, 1), stride=(1, 1, 1))
        (norm): BatchNorm3d(64, eps=1e-05, momentum=0.7, affine=True, track_running_stats=True)
        (nonlin): ELU(alpha=1.0)
      )
      (layer2): Sequential(
        (conv_spatial_2): Conv3d(64, 128, kernel_size=(1, 5, 5), stride=

In [16]:
device = 'cuda:6'
torch.cuda.set_device(device)

In [17]:
trainer_fn = "sensorium.training.video_training_loop.standard_trainer"

trainer_config = {
    'dataloaders' : data_loaders,
    'seed' : 111,
    'use_wandb' : False,
    'verbose': True,
    'lr_decay_steps': 4,
    'lr_init': 0.005,
    'device' : device,
    'detach_core' : False,
    
    # todo - put this to True if you are using deeplake
    # first connections to deeplake may take up for 10 mins
    'deeplake_ds' : False,
    'checkpoint_save_path': './loc/'
                 }

trainer = get_trainer(trainer_fn=trainer_fn, 
                 trainer_config=trainer_config)

In [18]:
validation_score, trainer_output, state_dict = trainer(factorised_3d_model)

optim_step_count = 1


Epoch 1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 90/90 [00:36<00:00,  2.47it/s]


Epoch 1, Batch 89, Train loss -3303629.555260817, Validation loss -1465832.0479486731
EPOCH=1  validation_correlation=0.005241501607030255


Epoch 2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 90/90 [00:35<00:00,  2.52it/s]


Epoch 2, Batch 89, Train loss -1566508.2490987657, Validation loss -754750.5198838328
EPOCH=2  validation_correlation=0.012891900318500956


Epoch 3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 90/90 [00:36<00:00,  2.49it/s]


Epoch 3, Batch 89, Train loss -8935561.672075795, Validation loss -3730460.4403645247
EPOCH=3  validation_correlation=0.020821546704323377


KeyboardInterrupt: 

## [Optional] Script to match tiers for new five sensorium mice

This works only for [these](https://gin.g-node.org/pollytur/sensorium_2023_dataset) five later released mice.  
For [these](https://gin.g-node.org/pollytur/sensorium_2023_data/src/798ba8ad041d8f0f0ce879af396d52c7238c2730) the `trial_idx.npy` file was not released, I am working on this matching and will update it when available

Current dataloader will not be able to handle the natural images partition of sensorium, but this was an OOD partition (live and final tests), so this should be fine so far

In [None]:
import yaml
import os

In [None]:
m = ## mouse name in the older data format (like from gin-g-node)
meta_prepre = ## path to the older data format

path =f'{meta_prepre}{m}/meta/trials/trial_idx.npy'
tiers_old = np.load(f'{meta_prepre}{m}/meta/trials/tiers.npy')
trial_idxs_old = np.load(f'{meta_prepre}{m}/meta/trials/trial_idx.npy')


tiers_before = []
tiers_after = []

yaml_prepre = ## path to the new export folder
yaml_pre_path = f'{yaml_prepre}{m.split("-Video-")[0]}-Video-full/screen/meta/'

for file in tqdm(os.listdir(yaml_pre_path)):
    with open(f'{yaml_pre_path}{file}', 'r') as f:
        data = yaml.safe_load(f)
    if data['modality'] != 'blank':
        idx = np.where(trial_idxs_old == str(data['trial_idx']))[0]
        assert len(idx) <= 1, f'duplicated trial_idxs, {data["trial_idx"]}'
        tiers_before.append(data['tier'])
        if len(idx) == 0:
            print(data["trial_idx"])
            data['tier'] = 'none'
        else:
            data['tier'] = str(tiers_old[idx[0]])
        tiers_after.append(data['tier'])
        with open(f'{yaml_pre_path}{file}', "w") as outfile:
            yaml.safe_dump(data, outfile)
