In [2]:
from _main_utils import ROOT_DIR
from preprocess.config import PREPROCESS_CONFIG
import numpy as np
import os
import pandas as pd
import pickle
EXPERIMENT_DATASETS = PREPROCESS_CONFIG["EXPERIMENT_DATASETS"]

In [18]:
def load_dataset(name):
    """Load a specified neural dataset's pickle file by name"""
    assert (name in EXPERIMENT_DATASETS), (
        "Unrecognized dataset!"
    )
    file = os.path.join(ROOT_DIR, "data", "processed", "neural", f"{name}.pickle")
    assert os.path.exists(file), f"The file {file} does not exist."
    with open(file, "rb") as pickle_in:
        return pickle.load(pickle_in)

ordered_datasets = [(i, name) for i, name in enumerate(EXPERIMENT_DATASETS)]
print(f"DATASETS\n{ordered_datasets}")

DATASETS
[(0, 'Kato2015'), (1, 'Nichols2017'), (2, 'Skora2018'), (3, 'Kaplan2020'), (4, 'Nejatbakhsh2020'), (5, 'Yemini2021'), (6, 'Uzel2022'), (7, 'Dag2023'), (8, 'Leifer2023'), (9, 'Lin2023'), (10, 'Flavell2023'), (11, 'Venkatachalam2024')]


In [None]:
# Loading Datasets 
def helper(dataset_name: str):
    """Function we will call over and over again in this notebook"""
    # load the dataset
    print(f"Loading {dataset_name}...")
    dataset = load_dataset(dataset_name)
    print(len(dataset), dataset.keys(), end="\n\n")

    # number of ID'd neurons versus number of neurons measured
    neurons_stats = dict(min=float("inf"), max=float("-inf"))
    timesteps_stats = dict(min=float("inf"), max=float("-inf"))
    total_avg, named_avg, neuron_to_slot_avg = 0, 0, 0
    for worm in list(dataset.keys()):
        single_worm_dataset = dataset[worm]
        neuron_to_slot = len(single_worm_dataset["neuron_to_slot"])
        total_neurons = single_worm_dataset["num_neurons"]
        labeled_neurons = single_worm_dataset["num_labeled_neurons"]
        max_timesteps = single_worm_dataset["max_timesteps"]
        neuron_to_slot_avg += neuron_to_slot / len(dataset)
        total_avg += total_neurons / len(dataset)
        named_avg += labeled_neurons / len(dataset)
        neurons_stats["min"] = min(neurons_stats["min"], labeled_neurons)
        neurons_stats["max"] = max(neurons_stats["max"], labeled_neurons)
        timesteps_stats["min"] = min(timesteps_stats["min"], max_timesteps)
        timesteps_stats["max"] = max(timesteps_stats["max"], max_timesteps)
    print(f"Avg num. neuron ID'd/recorded : {int(named_avg)}/{int(total_avg)}")
    print(f"Neuron_to_slot avg {int(neuron_to_slot_avg)}")
    print(f"Range num. ID'd neurons : ({neurons_stats['min']}, {neurons_stats['max']})")
    print(
        f"Range len. calcium data : ({timesteps_stats['min']}, {timesteps_stats['max']})"
    )
    print(f"Avg num. ID'd : {int(named_avg)}/{int(total_avg)}")
    
    # total_neurons_counted = sum(dataset[worm]["num_neurons"] for worm in dataset)
    # print(f"Total neurons counted in METHOD 1: {total_neurons_counted}")

In [38]:
dataset_name = "Flavell2023"
dataset = helper(dataset_name)

Loading Flavell2023...
40 dict_keys(['worm0', 'worm1', 'worm2', 'worm3', 'worm4', 'worm5', 'worm6', 'worm7', 'worm8', 'worm9', 'worm10', 'worm11', 'worm12', 'worm13', 'worm14', 'worm15', 'worm16', 'worm17', 'worm18', 'worm19', 'worm20', 'worm21', 'worm22', 'worm23', 'worm24', 'worm25', 'worm26', 'worm27', 'worm28', 'worm29', 'worm30', 'worm31', 'worm32', 'worm33', 'worm34', 'worm35', 'worm36', 'worm37', 'worm38', 'worm39'])

Avg num. neuron ID'd/recorded : 88/136
Neuron_to_slot avg 136
Range num. ID'd neurons : (64, 115)
Range len. calcium data : (2889, 2916)
Avg num. ID'd : 88/136
Total neurons counted in METHOD 1: 5458
