Let's start by reproducing some results of Quilee's notebook in _CElegansNeuralPrediction.ipynb_

# Fixing train/_main.py
---

It looks like the dataset returned after running `get_dataset()` doesn't have the "worm" keys.

In [2]:
# All imports for train module
import torch
import os
import hydra
import random
import time
import numpy as np
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
from scipy.linalg import solve
from typing import Tuple, Union
from datetime import datetime
from hydra.core.hydra_config import HydraConfig
from omegaconf import DictConfig
from omegaconf import OmegaConf
from utils import DEVICE, LOGS_DIR, NEURONS_302
from tqdm import tqdm
from torch.utils.data.dataloader import default_collate
from torch.utils.data import ConcatDataset, DataLoader
from models._utils import NetworkLSTM
from data._utils import NeuralActivityDataset, pick_worm
from data._main import get_dataset
from models._main import get_model
from scipy.signal import savgol_filter

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [4]:
config = OmegaConf.load("../../conf/train.yaml")
print("config:", OmegaConf.to_yaml(config))

config: train:
  optimizer: SGD
  learn_rate: 0.01
  epochs: 1
  save_freq: 100
  seq_len: 100
  k_splits: 2
  num_samples: 16
  num_batches: 1
  tau_in: 1
  shuffle: false
  reverse: false



In [5]:
model = get_model(OmegaConf.load("../../conf/model.yaml"))

Initialized a new model.

Model: LinearNN(
  (identity): Identity()
  (linear): Linear(in_features=512, out_features=302, bias=True)
  (model): Sequential(
    (0): Linear(in_features=302, out_features=512, bias=True)
    (1): ELU(alpha=1.0)
    (2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    (3): Linear(in_features=512, out_features=302, bias=True)
  )
)



In [6]:
dataset = get_dataset(OmegaConf.load("../../conf/dataset.yaml"))

Chosen dataset(s): ['Kaplan2020', 'Kato2015', 'Nichols2017', 'Skora2018']
Num. worms: 87



Let's take a look inside it

In [7]:
dataset.keys()

dict_keys(['worm0', 'worm1', 'worm2', 'worm3', 'worm4', 'worm5', 'worm6', 'worm7', 'worm8', 'worm9', 'worm10', 'worm11', 'worm12', 'worm13', 'worm14', 'worm15', 'worm16', 'worm17', 'worm18', 'worm19', 'worm20', 'worm21', 'worm22', 'worm23', 'worm24', 'worm25', 'worm26', 'worm27', 'worm28', 'worm29', 'worm30', 'worm31', 'worm32', 'worm33', 'worm34', 'worm35', 'worm36', 'worm37', 'worm38', 'worm39', 'worm40', 'worm41', 'worm42', 'worm43', 'worm44', 'worm45', 'worm46', 'worm47', 'worm48', 'worm49', 'worm50', 'worm51', 'worm52', 'worm53', 'worm54', 'worm55', 'worm56', 'worm57', 'worm58', 'worm59', 'worm60', 'worm61', 'worm62', 'worm63', 'worm64', 'worm65', 'worm66', 'worm67', 'worm68', 'worm69', 'worm70', 'worm71', 'worm72', 'worm73', 'worm74', 'worm75', 'worm76', 'worm77', 'worm78', 'worm79', 'worm80', 'worm81', 'worm82', 'worm83', 'worm84', 'worm85', 'worm86'])

In [21]:
features = []

for ds in dataset:
    features.append(list(dataset[ds].keys()))

features = np.unique(np.concatenate(features))

for ds in dataset:
    for f in features:
        if f not in dataset[ds].keys():
            print(dataset[ds]['dataset'])

All datasets have the same features (verifying)

In [22]:
features

array(['calcium_data', 'dataset', 'dt', 'max_timesteps',
       'named_neuron_to_slot', 'named_neurons_mask', 'neuron_to_slot',
       'neurons_mask', 'num_named_neurons', 'num_neurons',
       'num_unknown_neurons', 'residual_calcium', 'slot_to_named_neuron',
       'slot_to_neuron', 'slot_to_unknown_neuron', 'smooth_calcium_data',
       'smooth_method', 'smooth_residual_calcium', 'time_in_seconds',
       'unknown_neuron_to_slot', 'unknown_neurons_mask', 'worm'],
      dtype='<U23')

# The Datasets
---

The dataset consists of calcium imaging data from our worm

|Paper Link             |Database Link     |Files w/ Data          |Num. ID'd| 
|-----------------------|------------------|-----------------------|---------|
|tinyurl.com/Uzel2022   |osf.io/3vkxn/     |`Uzel_WT.mat`          |54/154   |
|tinyurl.com/Kaplan20   |osf.io/9nfhz/     |`Neuron2019_Data_*.mat`|48/103   |
|tinyurl.com/Nguyen17   |tinyurl.com/LeiferIEEE|`heatData*.mat`    |0/156    | 
|tinyurl.com/Skora2018  |osf.io/za3gt/     |`WT_*.mat`             |40/139   |
|tinyurl.com/Nichols2017|osf.io/kbf38/     |`*let.mat`             |35/116   |
|tinyurl.com/Kato2015   |osf.io/2395t/     |`WT_*Stim.mat`         |38/109   |

To load the datasets we can use the functions in `data/_utilis.py`

In [4]:
from data._utils import load_dataset, pick_worm, find_reliable_neurons

In [5]:
# load a dataset
Nguyen2017 = load_dataset("Nguyen2017")
print(Nguyen2017.keys())
print(Nguyen2017['worm0'].keys())
print(Nguyen2017['worm0']['calcium_data'].shape)

dict_keys(['worm0', 'worm1', 'worm2'])
dict_keys(['dataset', 'smooth_method', 'worm', 'calcium_data', 'smooth_calcium_data', 'residual_calcium', 'smooth_residual_calcium', 'max_timesteps', 'time_in_seconds', 'dt', 'num_neurons', 'num_named_neurons', 'num_unknown_neurons', 'named_neurons_mask', 'unknown_neurons_mask', 'neurons_mask', 'slot_to_named_neuron', 'named_neuron_to_slot', 'slot_to_unknown_neuron', 'unknown_neuron_to_slot', 'slot_to_neuron', 'neuron_to_slot'])
torch.Size([3044, 302])


So we have a hierarchy to access the data of each worm in the dataset:
 - First the choosen worm
 - Second the dataset itself with its features and other informations (note that the key `dataset` corresponds to the name of the dataset and not to the raw data or something like that)

In [6]:
'worm2' in set(Nguyen2017.keys())

True

In [7]:
wormid = np.random.choice(list(Nguyen2017.keys()))
single_worm_dataset = pick_worm(Nguyen2017, wormid)

KeyError: 'generator'