In [1]:
from allensdk.core.brain_observatory_cache import BrainObservatoryCache
import allensdk.brain_observatory.stimulus_info as stim_info

import pprint

#### How many experiments in total for excitatory neurons in primary visual cortex?

In [2]:
# Initiate Cache:
boc = BrainObservatoryCache()

# Excitatory Cre lines:
cre_lines = [
    # 'Cux2-CreERT2', # This one seems to have a lot of outliers
    'Emx1-IRES-Cre',
    'Fezf2-CreER',
    'Nr5a1-Cre',
    'Ntsr1-Cre_GN220',
    'Rbp4-Cre_KL100',
    'Rorb-IRES2-Cre',
    'Scnn1a-Tg3-Cre',
    'Slc17a7-IRES2-Cre',
    'Tlx3-Cre_PL56',
    ]

# Restrict to primary visual cortex:
ecs = []
cre_lines_ecs = []

for cre_line in cre_lines:

    print("Cre Line:", cre_line)
    ecs_cre = boc.get_experiment_containers(cre_lines=[cre_line,], targeted_structures=['VISp'])
    print("  Experiment containers:", len(ecs_cre))
    ecs.extend(ecs_cre)
    cre_lines_ecs.extend([cre_line for _ in range(len(ecs_cre))])

# print("Example experiment container record:")
# pprint.pprint(ecs[0])

print('\nTotal experiment containers:', len(ecs))

Cre Line: Emx1-IRES-Cre
  Experiment containers: 10
Cre Line: Fezf2-CreER
  Experiment containers: 4
Cre Line: Nr5a1-Cre
  Experiment containers: 8
Cre Line: Ntsr1-Cre_GN220
  Experiment containers: 6
Cre Line: Rbp4-Cre_KL100
  Experiment containers: 7
Cre Line: Rorb-IRES2-Cre
  Experiment containers: 8
Cre Line: Scnn1a-Tg3-Cre
  Experiment containers: 9
Cre Line: Slc17a7-IRES2-Cre
  Experiment containers: 60
Cre Line: Tlx3-Cre_PL56
  Experiment containers: 6

Total experiment containers: 118


-----

#### Among these how many have natural movie one, and (theoratically) have eye tracking?

In [5]:
n_containers = len(ecs)
exps = []

for container_idx in range(n_containers):
    ec_id = ecs[container_idx]['id']

    # Find the experiments with the natural movie one, and eye tracking
    exps_container = boc.get_ophys_experiments(
        experiment_container_ids=[ec_id], 
        
        # here we require stimulus type, and require tracking to be true:
        stimuli=[stim_info.NATURAL_MOVIE_ONE], require_eye_tracking = True)

    # All three experiments should have natural movie one:
    if len(exps_container) != 3:
        # print('Skipping, number of experiment container is:', len(exps_container))
        continue

    exps.extend(exps_container)

print('Total Number of experiments:', len(exps))

n_containers = int(len(exps)//3.0)

print('Setting number of containers:', n_containers)

if n_containers == 1:
    print("Experiment with natural movie one:")
    pprint.pprint(exps)

Total Number of experiments: 144
Setting number of containers: 48


-----

#### But NOT all of the datasets ACTUALLY contains pupil size data! 

Haven't figured out what is going on. 

But it might be helpful just first count how many of them actually have pupil size data.

Side note: we always have running speed data.

In [15]:
# example that has it:
exp_1 = exps[69]
data_set_1 = boc.get_ophys_experiment_data(exp_1['id'])
data_set_1.get_pupil_size()

(array([  18.39164,   18.42489,   18.45813, ..., 3804.28383, 3804.31708,
        3804.35033]),
 array([2291.0261, 2332.0881, 2552.4097, ...,       nan,       nan,
              nan], dtype=float32))

In [18]:
# example that has it:
exp_2 = exps[66]
data_set_2 = boc.get_ophys_experiment_data(exp_2['id'])
data_set_2.get_pupil_size()

(array([   5.4208 ,    5.45405,    5.4873 , ..., 3520.69708, 3520.73033,
        3520.76358]),
 array([7791.2734, 7374.492 , 7691.526 , ...,       nan,       nan,
              nan], dtype=float32))

In [20]:
# example that doesn't have it:
exp_3 = exps[26]
data_set_3 = boc.get_ophys_experiment_data(exp_3['id'])
data_set_3.get_pupil_size()

2022-07-22 04:08:07,314 allensdk.api.api.retrieve_file_over_http INFO     Downloading URL: http://api.brain-map.org/api/v2/well_known_file_download/539607110


NoEyeTrackingException: No pupil tracking for this experiment.

--------
#### Questions on best way to deal with multiple datasets

1. what is the size of our smallest set of datasets that we want to play with? the estimated larger end? 


2. shall we all save/load stuff locally, or distributedly (each one of us save some parts of it), or save it on google drive? 


3. what is the time complexity here do we have a good estimate? Not too bad? (like 1 experiment takes about 5 min to get, and less than that to process initially I believe.)