# Demo notebook containing all the steps from loading data to submission

In [1]:
import numpy as np
import torch
from torchvision import transforms
import hub
from sklearn.model_selection import train_test_split

from cascade.models import stacked_core_full_gauss_readout
from cascade.training import standard_trainer
from cascade.utility import generate_submission_file

Setting database.host to 134.76.19.44
Generating new fontManager, this may take some time...


### 1. Load the train and test datasets and create the corresponding dataloader

In [2]:
dataset_id = 'mouse1'

# get the data from Activeloop
train_dataset_train = hub.load(f"hub://mohammadbashiri/npc-{dataset_id}-train")
train_dataset_val = hub.load(f"hub://mohammadbashiri/npc-{dataset_id}-train")
test_dataset = hub.load(f"hub://mohammadbashiri/npc-{dataset_id}-test")

# split the trainset into train and validation (i.e. modify the index of the corresponding dataset)
n_training_samples = len(train_dataset_train)
train_indices, val_indices = train_test_split(np.arange(n_training_samples), train_size=0.7)
train_samples_mask = np.isin(np.arange(n_training_samples), train_indices)
train_dataset_train.index.values[0].value = tuple(np.where(train_samples_mask)[0].tolist())
train_dataset_val.index.values[0].value = tuple(np.where(~train_samples_mask)[0].tolist())

# create the dataloaders
train_dataloader = train_dataset_train.pytorch(batch_size=16, shuffle=True, transform={'inputs': transforms.ToTensor(), 'targets': None, 'image_ids': None, 'trial_indices': None})
val_dataloader = train_dataset_val.pytorch(batch_size=16, shuffle=False, transform={'inputs': transforms.ToTensor(), 'targets': None, 'image_ids': None, 'trial_indices': None})
test_dataloader = test_dataset.pytorch(batch_size=16, shuffle=False, transform={'inputs': transforms.ToTensor(), 'image_ids': None, 'trial_indices': None})

# Combine the dataloaders into a single object (dict)
dataloaders = {"train": {dataset_id: train_dataloader},
               "validation": {dataset_id: val_dataloader},
               "test": {dataset_id: test_dataloader}}

hub://mohammadbashiri/npc-mouse1-train loaded successfully.
This dataset can be visualized at https://app.activeloop.ai/mohammadbashiri/npc-mouse1-train.
hub://mohammadbashiri/npc-mouse1-train loaded successfully.
This dataset can be visualized at https://app.activeloop.ai/mohammadbashiri/npc-mouse1-train.
hub://mohammadbashiri/npc-mouse1-test loaded successfully.
This dataset can be visualized at https://app.activeloop.ai/mohammadbashiri/npc-mouse1-test.


In [3]:
dataloaders

{'train': {'mouse1': <torch.utils.data.dataloader.DataLoader at 0x7f6a7ceee220>},
 'validation': {'mouse1': <torch.utils.data.dataloader.DataLoader at 0x7f6a7cd71fa0>},
 'test': {'mouse1': <torch.utils.data.dataloader.DataLoader at 0x7f6a7cc9a670>}}

In [4]:
len(train_dataset_train), len(train_dataset_val), len(test_dataset)

(140, 60, 100)

### 2. Initialize model

In [5]:
model_config = {'pad_input': False,
                'stack': -1,
                'layers': 1,
                'input_kern': 5,
                'hidden_dilation': 1,
                'hidden_kern': 3,
                'hidden_channels': 64,
                'depth_separable': True,
                'init_sigma': 0.1,
                'init_mu_range': 0.3,
                'gauss_type': 'full',
                'shifter': True,
                'shift_layers': 3}

model = stacked_core_full_gauss_readout(dataloaders, seed=1, **model_config)



### 3. Train the model

In [6]:
trainer_config = {'max_iter': 2,
                  'verbose': False,
                  'lr_decay_steps': 4,
                  'avg_loss': False,
                  'lr_init': 0.009}

score, output, state_dict = standard_trainer(model, dataloaders, seed=1, **trainer_config)

Epoch 1: 100%|██████████| 9/9 [00:04<00:00,  2.10it/s]


[001|00/05] ---> 0.005140737484911137


Epoch 2: 100%|██████████| 9/9 [00:04<00:00,  2.05it/s]


[002|01/05] -/-> -0.0016359479945072125
Restoring best model! -0.001636 ---> 0.005141


### 4. Prepare the submission file

In [7]:
generate_submission_file(model, dataloaders['test'][dataset_id])

File saved.


In [8]:
import pandas as pd
pd.read_csv("submission_file.csv")

Unnamed: 0,trial_indices,image_ids,prediction,neuron_ids
0,0,0,"[0.8660333156585693, 0.7536219954490662, 0.667...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,..."
1,1,0,"[1.1662379503250122, 0.9683790802955627, 1.193...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,..."
2,2,0,"[0.9244330525398254, 0.9667186141014099, 0.837...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,..."
3,3,0,"[1.0985108613967896, 0.9715718626976013, 1.172...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,..."
4,4,0,"[1.0623140335083008, 0.9847257137298584, 1.053...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,..."
...,...,...,...,...
95,95,9,"[0.9583678245544434, 0.788209080696106, 0.8644...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,..."
96,96,9,"[1.042254090309143, 1.2295119762420654, 0.9393...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,..."
97,97,9,"[1.0805977582931519, 1.0807799100875854, 1.050...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,..."
98,98,9,"[0.9569833874702454, 1.0637223720550537, 0.947...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,..."


---