based on notebooks in  
https://github.com/dandi/example-notebooks/tree/master/tutorials/neurodatarehack_2024 

In [1]:
import json
import numpy as np
from dandi.dandiapi import DandiAPIClient
from tqdm.notebook import tqdm
from isodate import parse_duration, Duration
from datetime import datetime
from warnings import simplefilter
simplefilter("ignore")  # Suppress namespace warnings from reading older NWB files

from nwbinspector.tools import get_s3_urls_and_dandi_paths
from pynwb import NWBHDF5IO
import remfile
import h5py

from collections import Counter

import lindi, pynwb

from dandi.dandiapi import DandiAPIClient
import pandas as pd
import pickle

In [2]:
client = DandiAPIClient()
dandisets = list(client.get_dandisets())

# nwb dandisets

In [3]:
nwb_dandisets = []

for dandiset in tqdm(dandisets):
    raw_metadata = dandiset.get_raw_metadata()

    if any(
        data_standard['identifier'] == "RRID:SCR_015242"  # this is the RRID for NWB
        for data_standard in raw_metadata['assetsSummary'].get('dataStandard', [])
    ):
        nwb_dandisets.append(dandiset)
print(f"There are currently {len(nwb_dandisets)} NWB datasets on DANDI!")

  0%|          | 0/698 [00:00<?, ?it/s]

There are currently 420 NWB datasets on DANDI!


# dandiset behavior types 


In [4]:
raw_metadata = dandisets[2].get_raw_metadata()

In [5]:
raw_metadata

{'id': 'DANDI:000005/0.220126.1853',
 'doi': '10.48324/dandi.000005/0.220126.1853',
 'url': 'https://dandiarchive.org/dandiset/000005/0.220126.1853',
 'name': 'Electrophysiology data from thalamic and cortical neurons during somatosensation',
 'about': [{'name': 'dorsal plus ventral thalamus',
   'schemaKey': 'Anatomy',
   'identifier': 'UBERON:0001897'}],
 'access': [{'status': 'dandi:OpenAccess',
   'schemaKey': 'AccessRequirements',
   'contactPoint': {'schemaKey': 'ContactPoint'}}],
 'license': ['spdx:CC-BY-4.0'],
 'version': '0.220126.1853',
 '@context': 'https://raw.githubusercontent.com/dandi/schema/master/releases/0.6.0/context.json',
 'citation': 'Yu, Jianing; Gutnisky, Diego A; Hires, S Andrew; Svoboda, Karel (2022) Electrophysiology data from thalamic and cortical neurons during somatosensation (Version 0.220126.1853) [Data set]. DANDI archive. https://doi.org/10.48324/dandi.000005/0.220126.1853',
 'keywords': [],
 'protocol': [],
 'schemaKey': 'Dandiset',
 'identifier': 'DA

In [6]:

def contains_behavior(data):
    if isinstance(data, dict):
        return any(contains_behavior(v) for v in data.values())
    elif isinstance(data, list):
        return any(contains_behavior(v) for v in data)
    elif isinstance(data, str):
        return 'behavior' in data.lower()
    return False

def find_behavior_keys(data, parent_key=""):
    keys_with_behavior = []

    if isinstance(data, dict):
        for key, value in data.items():
            full_key = f"{parent_key}.{key}" if parent_key else key
            keys_with_behavior.extend(find_behavior_keys(value, full_key))
    elif isinstance(data, list):
        for index, item in enumerate(data):
            full_key = f"{parent_key}[{index}]"
            # keys_with_behavior.extend(find_behavior_keys(item, full_key))
            keys_with_behavior.extend(find_behavior_keys(item, parent_key))
    elif isinstance(data, str):
        if 'behavior' in data.lower():
            keys_with_behavior.append(parent_key)

    return keys_with_behavior


In [7]:
# example
print(find_behavior_keys(raw_metadata))

# query
brbe_nwb_dandisets = []

# contains 'behavior'
for dset in tqdm(nwb_dandisets):
    raw_metadata = dset.get_raw_metadata()

    if contains_behavior(raw_metadata):
        behavior_keys = find_behavior_keys(raw_metadata)
        brbe_nwb_dandisets.append(dset)

print(len(brbe_nwb_dandisets))

[]


  0%|          | 0/420 [00:00<?, ?it/s]

178


In [8]:
# contains a behavior key

behavior_keys_list = []
behavior_keys_dandisets = []
for dset in tqdm(nwb_dandisets):
    raw_metadata = dset.get_raw_metadata()

    behavior_keys = find_behavior_keys(raw_metadata)
    behavior_keys_list.extend(behavior_keys)
    
    if behavior_keys: 
        approaches = raw_metadata['assetsSummary'].get('approach', [])
        if (
            any('electrophysiological' in a.get('name', '') for a in approaches)
        ):
            exist_ephys = True
        else: 
            exist_ephys = False
        if any('assetssummary' in x.lower() for x in behavior_keys):
            exist_asset = True 
        else:
            exist_asset = False  
        behavior_keys_dandisets.append({
            "dandiset_id": dset.identifier,
            "dandiset": dset,
            "behavior_keys": behavior_keys,
            "asset": exist_asset,
            "ephys": exist_ephys
        })

behavior_keys_dandisets = pd.DataFrame(behavior_keys_dandisets)
# count occurrences of each key
behavior_keys_counter = Counter(behavior_keys_list)
behavior_keys_counter


  0%|          | 0/420 [00:00<?, ?it/s]

Counter({'assetsSummary.approach.name': 122,
         'assetsSummary.measurementTechnique.name': 122,
         'description': 96,
         'assetsSummary.variableMeasured': 91,
         'name': 29,
         'citation': 29,
         'relatedResource.name': 19,
         'keywords': 15,
         'relatedResource.url': 11,
         'studyTarget': 8,
         'contributor.name': 4,
         'about.name': 4,
         'relatedResource.repository': 3,
         'contributor.affiliation.name': 2,
         'acknowledgement': 2,
         'relatedResource.identifier': 1,
         'wasGeneratedBy.description': 1})

In [9]:
behavior_keys_dandisets

Unnamed: 0,dandiset_id,dandiset,behavior_keys,asset,ephys
0,000003,DANDI:000003/0.250624.0409,"[name, citation, description, assetsSummary.ap...",True,True
1,000004,DANDI:000004/0.220126.1852,"[contributor.affiliation.name, contributor.aff...",False,True
2,000006,DANDI:000006/0.220126.1855,"[assetsSummary.approach.name, assetsSummary.va...",True,True
3,000009,DANDI:000009/0.220126.1903,"[assetsSummary.approach.name, assetsSummary.va...",True,True
4,000010,DANDI:000010/0.220126.1905,"[assetsSummary.approach.name, assetsSummary.va...",True,True
...,...,...,...,...,...
173,001515,DANDI:001515/draft,[description],False,False
174,001530,DANDI:001530/draft,"[assetsSummary.approach.name, assetsSummary.va...",True,True
175,001533,DANDI:001533/draft,"[assetsSummary.approach.name, assetsSummary.me...",True,True
176,001539,DANDI:001539/0.250804.1538,"[description, assetsSummary.approach.name, ass...",True,True


In [10]:
behavior_assets_dandisets = behavior_keys_dandisets[(behavior_keys_dandisets['asset']==True) & 
                                                    (behavior_keys_dandisets['ephys']==True)
                                                    ]
behavior_assets_dandisets

Unnamed: 0,dandiset_id,dandiset,behavior_keys,asset,ephys
0,000003,DANDI:000003/0.250624.0409,"[name, citation, description, assetsSummary.ap...",True,True
2,000006,DANDI:000006/0.220126.1855,"[assetsSummary.approach.name, assetsSummary.va...",True,True
3,000009,DANDI:000009/0.220126.1903,"[assetsSummary.approach.name, assetsSummary.va...",True,True
4,000010,DANDI:000010/0.220126.1905,"[assetsSummary.approach.name, assetsSummary.va...",True,True
5,000011,DANDI:000011/0.220126.1907,"[assetsSummary.approach.name, assetsSummary.va...",True,True
...,...,...,...,...,...
164,001347,DANDI:001347/0.250528.0702,"[assetsSummary.approach.name, assetsSummary.va...",True,True
167,001371,DANDI:001371/draft,"[description, assetsSummary.approach.name, ass...",True,True
174,001530,DANDI:001530/draft,"[assetsSummary.approach.name, assetsSummary.va...",True,True
175,001533,DANDI:001533/draft,"[assetsSummary.approach.name, assetsSummary.me...",True,True


In [11]:
beh_asset_nwb_dandisets = []

for dset in tqdm(nwb_dandisets):
    raw_metadata = dset.get_raw_metadata()

    approaches = raw_metadata['assetsSummary'].get('approach', [])
    measurement_techniques = raw_metadata['assetsSummary'].get('measurementTechnique', [])
    variables_measured = raw_metadata['assetsSummary'].get('variableMeasured', [])

    if (
        any('behavior' in a.get('name', '').lower() for a in approaches) or
        any('behavior' in m.get('name', '').lower() for m in measurement_techniques) or
        any('behavior' in str(v).lower() for v in variables_measured)
    ):
        beh_asset_nwb_dandisets.append(dset)

print(len(beh_asset_nwb_dandisets))



  0%|          | 0/420 [00:00<?, ?it/s]

122


# read nwb via lindi 

In [12]:
dandiset_id = behavior_assets_dandisets.iloc[1]['dandiset'].get_raw_metadata()['id'].split('/')[0].split(':')[-1]
s3_urls = get_s3_urls_and_dandi_paths(dandiset_id=dandiset_id)
# print(s3_urls)
# print(list(s3_urls.values()))

filepath = list(s3_urls.values())[0]

with DandiAPIClient() as client:
    asset = client.get_dandiset(dandiset_id).get_asset_by_path(filepath)
    s3_url = asset.get_content_url(follow_redirects=1, strip_query=True)
f = lindi.LindiH5pyFile.from_hdf5_file(asset.download_url)
nwb = pynwb.NWBHDF5IO(file=f, mode='r').read()

In [13]:
nwb.objects.values()

dict_values([root pynwb.file.NWBFile at 0x4768339952
Fields:
  acquisition: {
    lick_times <class 'pynwb.behavior.BehavioralEvents'>
  }
  devices: {
    H-129 <class 'pynwb.device.Device'>
  }
  electrode_groups: {
    H-129: 64 <class 'pynwb.ecephys.ElectrodeGroup'>
  }
  electrodes: electrodes <class 'pynwb.ecephys.ElectrodesTable'>
  experiment_description: Extracellular electrophysiology recordings performed on mouse anterior lateral motor cortex (ALM) in delay response task. Neural activity from two neuron populations, pyramidal track upper and lower, were characterized, in relation to movement execution.
  experimenter: ['Mike Economo']
  file_create_date: [datetime.datetime(2019, 10, 7, 17, 40, 36, 567483, tzinfo=tzoffset(None, -18000))]
  identifier: anm369962_2017-03-09_0
  institution: Janelia Research Campus
  intervals: {
    trials <class 'pynwb.epoch.TimeIntervals'>
  }
  keywords: <LindiH5pyDataset: /general/keywords>
  related_publications: ['doi:10.1038/s41586-018-0

In [14]:
nwb.experiment_description

np.str_('Extracellular electrophysiology recordings performed on mouse anterior lateral motor cortex (ALM) in delay response task. Neural activity from two neuron populations, pyramidal track upper and lower, were characterized, in relation to movement execution.')

In [15]:
[x for x in nwb.objects.values() if isinstance(x, pynwb.behavior.TimeSeries)]

[lick_right_times pynwb.base.TimeSeries at 0x4762607184
 Fields:
   comments: no comments
   conversion: 1.0
   data: <LindiH5pyDataset: /acquisition/lick_times/lick_right_times/data>
   description: no description
   interval: 1
   offset: 0.0
   resolution: 0.0
   timestamps: <LindiH5pyDataset: /acquisition/lick_times/lick_right_times/timestamps>
   timestamps_unit: seconds
   unit: a.u.,
 lick_left_times pynwb.base.TimeSeries at 0x4763729072
 Fields:
   comments: no comments
   conversion: 1.0
   data: <LindiH5pyDataset: /acquisition/lick_times/lick_left_times/data>
   description: no description
   interval: 1
   offset: 0.0
   resolution: 0.0
   timestamps: <LindiH5pyDataset: /acquisition/lick_times/lick_left_times/timestamps>
   timestamps_unit: seconds
   unit: a.u.]

In [16]:

[x for x in nwb.objects.values() if isinstance(x, pynwb.behavior.SpatialSeries)]

[]

In [17]:
time_array = [x for x in nwb.objects.values() if isinstance(x, pynwb.behavior.TimeSeries)][0].timestamps[:]

# differences between consecutive elements
time_differences = np.diff(time_array)

# Calculate the average difference
average_time_resolution = np.mean(time_differences)

# Print the result
print("Time differences:", time_differences)
print("Average time resolution:", average_time_resolution)

Time differences: [0.144829 0.16667  0.128495 ... 0.140833 0.127666 0.183499]
Average time resolution: 1.1241294529801327


# behavior asset temporal resolution

dandiset name  
get filepaths (for now [0])  
get file, file.download_url (api dandi path)  
stream nwb file  
check if pynwb.behavior class is empty, which  
calculate time resolution
save

In [18]:
import pynwb.behavior
import inspect

def get_non_empty_behavior_classes(nwb):
    """
    Function to find non-empty pynwb.behavior classes in an NWB file.

    Parameters:
        nwb: The NWB file object.

    Returns:
        A dictionary where keys are class names and values are counts of non-empty instances.
    """
    # Get all classes in pynwb.behavior
    behavior_classes = [
        cls for name, cls in inspect.getmembers(pynwb.behavior, inspect.isclass)
        if cls.__module__ == 'pynwb.behavior'
    ]

    # Check which classes are non-empty
    non_empty_classes = {}

    for behavior_class in behavior_classes:
        non_empty_objects = [
            x for x in nwb.objects.values() if isinstance(x, behavior_class)
        ]
        if non_empty_objects:  # If the list is not empty
            non_empty_classes[behavior_class.__name__] = {
                'count': len(non_empty_objects),
                'objects': non_empty_objects,
                }

    return non_empty_classes

In [19]:
behavior_assets_dandisets['filepaths'] = ''

for i, dset in tqdm(behavior_assets_dandisets.iterrows(), total=len(behavior_assets_dandisets)):
    # if i >2:
    #     break
    dandiset_id = dset['dandiset'].get_raw_metadata()['id'].split('/')[0].split(':')[-1]
    with DandiAPIClient() as client:
        c=client.get_dandiset(dandiset_id)
    files = [asset for asset in c.get_assets()]
    behavior_assets_dandisets.at[i, 'filepaths'] = files
    # print(files[0].path)

behavior_assets_dandisets = behavior_assets_dandisets.reset_index()
behavior_assets_dandisets

  0%|          | 0/64 [00:00<?, ?it/s]

Unnamed: 0,index,dandiset_id,dandiset,behavior_keys,asset,ephys,filepaths
0,0,000003,DANDI:000003/0.250624.0409,"[name, citation, description, assetsSummary.ap...",True,True,[DANDI:assets/5e9e92e1-f044-4aa0-ab47-1cfcb889...
1,2,000006,DANDI:000006/0.220126.1855,"[assetsSummary.approach.name, assetsSummary.va...",True,True,[DANDI:assets/a5ad932b-b893-4522-b989-8f406d78...
2,3,000009,DANDI:000009/0.220126.1903,"[assetsSummary.approach.name, assetsSummary.va...",True,True,[DANDI:assets/250ea757-e6a9-4520-99b5-f2efd5e3...
3,4,000010,DANDI:000010/0.220126.1905,"[assetsSummary.approach.name, assetsSummary.va...",True,True,[DANDI:assets/6b3b38b9-0736-46a4-a348-b00af509...
4,5,000011,DANDI:000011/0.220126.1907,"[assetsSummary.approach.name, assetsSummary.va...",True,True,[DANDI:assets/88dd3ee7-a37a-44b1-bb64-89855040...
...,...,...,...,...,...,...,...
59,164,001347,DANDI:001347/0.250528.0702,"[assetsSummary.approach.name, assetsSummary.va...",True,True,[DANDI:assets/973b5ace-4161-49e8-bbf2-b2fc8b9e...
60,167,001371,DANDI:001371/draft,"[description, assetsSummary.approach.name, ass...",True,True,[DANDI:assets/54c844f5-7549-4c74-a06c-7732000a...
61,174,001530,DANDI:001530/draft,"[assetsSummary.approach.name, assetsSummary.va...",True,True,[DANDI:assets/1a63f51e-2b39-4ef3-9e07-39275e9e...
62,175,001533,DANDI:001533/draft,"[assetsSummary.approach.name, assetsSummary.me...",True,True,[DANDI:assets/28db1016-1f79-42e4-9e14-46536756...


In [20]:
files[0].get_content_url(follow_redirects=1, strip_query=True)

'https://dandiarchive.s3.amazonaws.com/blobs/390/a27/390a27ba-13ed-42fb-8709-8fa6bbcca456'

In [123]:
def get_time_info_from_behavior_class(behavior_class, nwb=None):
    # ignore pupil tracking for now

    time_info = {}
    behavior_keys = behavior_class.keys()

    spatial_key_list = ['SpatialSeries', 'Eyetracking', 'CompassDirection']
    if behavior_keys:
        for key in behavior_keys:
            try:
                if key == 'BehavioralEvents':
                    try:
                        time_series = behavior_class[key]['objects'][0].time_series
                        print(len(time_series))
                        if len(time_series) == 0:
                            print("BehavioralEvents is empty")
                            time_info[key] = 'empty'
                        else:
                            time_stamps = list(time_series.values())[0].timestamps[:100]
                            diffs = np.diff(time_stamps)
                            mean_diff = np.mean(diffs)
                            std_diff = np.std(diffs)

                            time_info[key] = {
                                "mean_event_diff": np.round(mean_diff, 6),
                                "std_event_diff": np.round(std_diff, 6)
                            }
                    except Exception as e:
                        time_info[key] = e
                
                elif key == 'BehavioralTimeSeries':
                    obj_time_infos = {}
                    for i in range(len(behavior_class[key]['objects'])):
                        obj = behavior_class[key]['objects'][i]
                        obj_name = list(obj.time_series.keys())[0]
                        try:
                            # all = [x for x in nwb.objects.values() if isinstance(x, pynwb.behavior.TimeSeries)]
                            # time_stamps = all[0].timestamps[:1000]
                            list_dicts = list(obj.time_series.values())
                            # print(len(list_dicts))
                            for j in range(len(list_dicts)):
                                ts = list_dicts[j]
                                # print(ts)
                                starting_time = ts.starting_time
                                # print(starting_time)
                                starting_time_unit = ts.starting_time_unit
                                rate = ts.rate

                                time_stamps = ts.timestamps
                                if time_stamps is not None:
                                    diffs = np.diff(time_stamps[:500])
                                    mean_diff = np.mean(diffs)
                                    std_diff = np.std(diffs)
                                else:
                                    diffs, mean_diff, std_diff = None, None, None

                                obj_time_infos[obj_name] = {
                                    'name': obj_name,
                                    'rate': rate,
                                    'starting_time': starting_time,
                                    'starting_time_unit': starting_time_unit,
                                    "mean_diff": mean_diff,
                                    "std_diff": std_diff,
                                }
                        except Exception as e:
                            obj_time_infos[obj.name] = e
                    time_info[key] = obj_time_infos
                    
                elif key in spatial_key_list:
                    obj_time_infos = {}
                    for i in range(len(behavior_class[key]['objects'])):
                        obj = behavior_class[key]['objects'][i]
                        try:
                            # all = [x for x in nwb.objects.values() if isinstance(x, pynwb.behavior.TimeSeries)]
                            # return all 
                            starting_time = obj.starting_time
                            starting_time_unit = obj.starting_time_unit
                            rate = obj.rate
                            
                            time_stamps = obj.timestamps
                            if time_stamps is not None:
                                diffs = np.diff(time_stamps[:500])
                                mean_diff = np.mean(diffs)
                                std_diff = np.std(diffs)
                            else:
                                diffs, mean_diff, std_diff = None, None, None

                            obj_time_infos[obj.name] = {
                                'name': obj.name,
                                'rate': rate,
                                'starting_time': starting_time,
                                'starting_time_unit': starting_time_unit,
                                "mean_diff": mean_diff,
                                "std_diff": std_diff
                            }
                            
                            if rate is  None:
                                rate = 'no rate, check timestamps'
                              
                            
                            
                        except Exception as e:
                            obj_time_infos[obj.name] = e
                    time_info[key] = obj_time_infos
                elif key == 'Position':
                    obj_time_infos = {}
                    for i in range(len(behavior_class[key]['objects'])):
                        objs = list(behavior_class[key]['objects'][i].spatial_series.values())
                        print(f"position length {len(objs)}")
                        for j in range(len(objs)):
                            obj = objs[j]
                            # print(obj)
                            try:
                                # all = [x for x in nwb.objects.values() if isinstance(x, pynwb.behavior.TimeSeries)]
                                # return all 
                                starting_time = obj.starting_time
                                starting_time_unit = obj.starting_time_unit
                                rate = obj.rate
                                
                                time_stamps = obj.timestamps
                                if time_stamps is not None:
                                    diffs = np.diff(time_stamps[:500])
                                    mean_diff = np.mean(diffs)
                                    std_diff = np.std(diffs)
                                else:
                                    diffs, mean_diff, std_diff = None, None, None

                                obj_time_infos[obj.name] = {
                                    'name': obj.name,
                                    'rate': rate,
                                    'starting_time': starting_time,
                                    'starting_time_unit': starting_time_unit,
                                    "mean_diff": mean_diff,
                                    "std_diff": std_diff
                                }
                                
                                if rate is  None:
                                    rate = 'no rate, check timestamps'
                            except Exception as e:
                                obj_time_infos[obj.name] = e
                        time_info[key] = obj_time_infos
            except Exception as e:
                time_info[key] = Exception
                    

    return time_info, behavior_class

In [25]:
behavior_assets_dandisets_time = behavior_assets_dandisets.copy()
behavior_assets_dandisets_time['experiment_description'] = ''
behavior_assets_dandisets_time['behavior_class'] = ''
behavior_assets_dandisets_time['time_info'] = ''


In [26]:
problems = []

for i, dset in tqdm(behavior_assets_dandisets_time.iterrows(), total=len(behavior_assets_dandisets_time)):
    try:

        dandiset_id = dset['dandiset_id']
        first_filepath = dset['filepaths'][0].path

        with DandiAPIClient() as client:
            file=client.get_dandiset(dandiset_id).get_asset_by_path(first_filepath)
            lindi_url = file.download_url
        f = lindi.LindiH5pyFile.from_hdf5_file(lindi_url)
        nwb = pynwb.NWBHDF5IO(file=f, mode='r').read()

        behavior_classes = get_non_empty_behavior_classes(nwb)
        behavior_assets_dandisets_time.at[i, 'experiment_description'] = nwb.experiment_description
        behavior_assets_dandisets_time.at[i, 'behavior_class'] = behavior_classes
        
        time_info = get_time_info_from_behavior_class(behavior_classes)
        behavior_assets_dandisets_time.at[i, 'time_info'] = time_info
    except Exception as e:
        problems.append((i, dandiset_id, dset['dandiset'], e))

# remove probelem index
for index, _, _, _ in problems:
    behavior_assets_dandisets_time = behavior_assets_dandisets_time.drop(index)

behavior_assets_dandisets_time = behavior_assets_dandisets_time.reset_index(drop=True)
        
behavior_assets_dandisets_time.to_csv('./behavior_assets_dandisets_time.csv')
behavior_assets_dandisets_time_info = behavior_assets_dandisets_time[['dandiset_id', 'dandiset', 'behavior_class', 'time_info']]
behavior_assets_dandisets_time_info.to_csv('./behavior_assets_dandisets_time_info.csv')

  0%|          | 0/64 [00:00<?, ?it/s]

2
3
0
BehavioralEvents is empty
1
2
5
2
2
14
1
14
1
36
7
1
1


In [27]:
problems

[(34,
  '000409',
  <dandi.dandiapi.RemoteDandiset at 0x11bf56f20>,
  OSError('Unable to synchronously open file (file signature not found)')),
 (41,
  '000568',
  <dandi.dandiapi.RemoteDandiset at 0x11c014fa0>,
  OSError('Unable to synchronously open file (file signature not found)'))]

### remfile

In [None]:
# remfile
s3link = files[0].get_content_url(follow_redirects=1, strip_query=True)
print(s3link)
cache_dirname = './tmp/remfile_cache'
# disk_cache = remfile.DiskCache(cache_dirname)
rem_file = remfile.File(s3link, )
h5py_file = h5py.File(rem_file, 'r')
io = NWBHDF5IO(file=h5py_file)
nwbfile = io.read()

https://dandiarchive.s3.amazonaws.com/blobs/390/a27/390a27ba-13ed-42fb-8709-8fa6bbcca456


## spatial series

In [None]:
data = behavior_assets_dandisets.iloc[11]
data

index                                                                    17
dandiset_id                                                          000050
dandiset                                                 DANDI:000050/draft
behavior_keys             [assetsSummary.approach.name, assetsSummary.va...
asset                                                                  True
ephys                                                                  True
filepaths                 [DANDI:assets/f3de94e9-6af4-4169-b911-1e7028ca...
experiment_description                                                 None
behavior_class            {'BehavioralTimeSeries': {'count': 2, 'objects...
time_info                 {'BehavioralTimeSeries': {'running_speed': {'n...
Name: 11, dtype: object

In [None]:
data['behavior_class']

{'BehavioralTimeSeries': {'count': 2,
  'objects': [RunningBehavior pynwb.behavior.BehavioralTimeSeries at 0x6322120144
   Fields:
     time_series: {
       running_speed <class 'pynwb.base.TimeSeries'>
     },
   EyeBehavior pynwb.behavior.BehavioralTimeSeries at 0x6322856272
   Fields:
     time_series: {
       eye_area <class 'pynwb.base.TimeSeries'>,
       pupil_area <class 'pynwb.base.TimeSeries'>,
       screen_coordinates_spherical <class 'pynwb.base.TimeSeries'>
     }]}}

In [None]:
data['time_info']

{'BehavioralTimeSeries': {'running_speed': {'name': 'running_speed',
   'rate': None,
   'starting_time': None,
   'starting_time_unit': None,
   'mean_diff': np.float64(0.03316837675350701),
   'std_diff': np.float64(3.687483661380553e-06)},
  'eye_area': {'name': 'eye_area',
   'rate': None,
   'starting_time': None,
   'starting_time_unit': None,
   'mean_diff': np.float64(0.03316837675350701),
   'std_diff': np.float64(3.687483661380553e-06)}}}

In [None]:
get_time_info_from_behavior_class(data['behavior_class'])


{'BehavioralTimeSeries': {'running_speed': {'name': 'running_speed',
   'rate': None,
   'starting_time': None,
   'starting_time_unit': None,
   'mean_diff': np.float64(0.03316837675350701),
   'std_diff': np.float64(3.687483661380553e-06)},
  'eye_area': {'name': 'eye_area',
   'rate': None,
   'starting_time': None,
   'starting_time_unit': None,
   'mean_diff': np.float64(0.03316837675350701),
   'std_diff': np.float64(3.687483661380553e-06)}}}

In [None]:
behavior_assets_dandisets.columns

Index(['index', 'dandiset_id', 'dandiset', 'behavior_keys', 'asset', 'ephys',
       'filepaths', 'experiment_description', 'behavior_class', 'time_info'],
      dtype='object')

# time info

In [117]:
df_time_info = behavior_assets_dandisets_time.copy()
df_time_info.head(2)

Unnamed: 0,index,dandiset_id,dandiset,behavior_keys,asset,ephys,filepaths,experiment_description,behavior_class,time_info
0,0,3,DANDI:000003/0.250624.0409,"[name, citation, description, assetsSummary.ap...",True,True,[DANDI:assets/5e9e92e1-f044-4aa0-ab47-1cfcb889...,,"{'SpatialSeries': {'count': 2, 'objects': [pos...",({'SpatialSeries': {'position_sensor1': {'name...
1,2,6,DANDI:000006/0.220126.1855,"[assetsSummary.approach.name, assetsSummary.va...",True,True,[DANDI:assets/a5ad932b-b893-4522-b989-8f406d78...,Extracellular electrophysiology recordings per...,"{'BehavioralEvents': {'count': 1, 'objects': [...",({'BehavioralEvents': {'mean_event_diff': 0.98...


In [119]:
df_time_info_non_empty = df_time_info[df_time_info['behavior_class'].astype(bool)].reset_index()
df_time_info_non_empty.tail(10)

Unnamed: 0,level_0,index,dandiset_id,dandiset,behavior_keys,asset,ephys,filepaths,experiment_description,behavior_class,time_info
43,51,129,953,DANDI:000953/draft,"[description, assetsSummary.approach.name, ass...",True,True,[DANDI:assets/d80bc39d-0052-4468-bb4f-c1137c8c...,Two finger group movement in NHP. Behavior pro...,"{'BehavioralTimeSeries': {'count': 2, 'objects...",({'BehavioralTimeSeries': {'index': {'name': '...
44,52,131,978,DANDI:000978/0.240511.0307,"[description, assetsSummary.approach.name, ass...",True,True,[DANDI:assets/4b7c7959-04b3-4817-8f96-1af6f418...,Microdrive tetrode recordings in behaving rats,"{'Position': {'count': 1, 'objects': [Position...",({'Position': {'Position': 'Position' object h...
45,53,132,987,DANDI:000987/draft,"[assetsSummary.approach.name, assetsSummary.me...",True,True,[DANDI:assets/36de62ce-1d87-4c5f-a5ab-604354aa...,,"{'CompassDirection': {'count': 1, 'objects': [...",({'CompassDirection': {'CompassDirection': 'Co...
46,54,154,1209,DANDI:001209/draft,"[description, assetsSummary.approach.name, ass...",True,True,[DANDI:assets/991a0302-daff-462e-a909-de0a6004...,reach-to-grasp center out,"{'BehavioralTimeSeries': {'count': 1, 'objects...",({'BehavioralTimeSeries': {'APL': {'name': 'AP...
47,55,159,1275,DANDI:001275/draft,"[assetsSummary.approach.name, assetsSummary.me...",True,True,[DANDI:assets/fdec0520-4eb7-455f-8f1f-29dfe675...,,"{'SpatialSeries': {'count': 2, 'objects': [han...",({'SpatialSeries': {'hand_position': {'name': ...
48,56,160,1280,DANDI:001280/0.241218.2300,"[assetsSummary.approach.name, assetsSummary.va...",True,True,[DANDI:assets/9948fceb-bcf0-4be8-98c6-1dc27f05...,Hippocampus content feedback,"{'BehavioralEvents': {'count': 3, 'objects': [...",({'BehavioralEvents': {'mean_event_diff': 0.02...
49,58,167,1371,DANDI:001371/draft,"[description, assetsSummary.approach.name, ass...",True,True,[DANDI:assets/54c844f5-7549-4c74-a06c-7732000a...,Head-fixed mice were trained to perform a memo...,"{'BehavioralEvents': {'count': 2, 'objects': [...",({'BehavioralEvents': {'mean_event_diff': 0.00...
50,59,174,1530,DANDI:001530/draft,"[assetsSummary.approach.name, assetsSummary.va...",True,True,[DANDI:assets/1a63f51e-2b39-4ef3-9e07-39275e9e...,.....,"{'BehavioralEvents': {'count': 1, 'objects': [...",({'BehavioralEvents': {'mean_event_diff': 161....
51,60,175,1533,DANDI:001533/draft,"[assetsSummary.approach.name, assetsSummary.me...",True,True,[DANDI:assets/28db1016-1f79-42e4-9e14-46536756...,IBL aims to understand the neural basis of dec...,"{'CompassDirection': {'count': 1, 'objects': [...",({'CompassDirection': {'CompassDirection': 'Co...
52,61,176,1539,DANDI:001539/0.250804.1538,"[description, assetsSummary.approach.name, ass...",True,True,[DANDI:assets/f1a65eed-37d6-4391-a70c-9f298635...,Microdrive tetrode recordings in behaving rats,"{'Position': {'count': 1, 'objects': [Position...",({'Position': {'Position': 'Position' object h...


## error

In [57]:
def check_time_info_for_errors(time_info):
    for index, value in time_info.items():
        if isinstance(value, Exception) or "Error" in str(value):
            return index
    return None


In [58]:
csv_time_infos = df_time_info_non_empty['time_info']
csv_time_infos[3]

({'BehavioralEvents': 'empty'},
 {'BehavioralEvents': {'count': 1,
   'objects': [BehavioralEvents pynwb.behavior.BehavioralEvents at 0x5012285360
    Fields:]}})

In [59]:
check_time_info_for_errors(csv_time_infos[3][0])

In [60]:
error_indices = []

for i in range(len(csv_time_infos)):
    try:
        error = check_time_info_for_errors(csv_time_infos[i][0])
        if error is not None:
            error_indices.append((i, error))
    except Exception as e:
        error_indices.append((i, 'exception', e))

In [61]:
error_indices

[(10, 'Position'),
 (11, 'Position'),
 (12, 'Position'),
 (13, 'Position'),
 (15, 'Position'),
 (16, 'Position'),
 (17, 'Position'),
 (18, 'Position'),
 (19, 'Position'),
 (20, 'CompassDirection'),
 (22, 'CompassDirection'),
 (26, 'Position'),
 (29, 'Position'),
 (30, 'Position'),
 (35, 'Position'),
 (37, 'Position'),
 (38, 'Position'),
 (39, 'Position'),
 (41, 'CompassDirection'),
 (44, 'Position'),
 (45, 'CompassDirection'),
 (48, 'Position'),
 (49, 'CompassDirection'),
 (51, 'CompassDirection'),
 (52, 'Position')]

In [106]:
i = -1
behcla = df_time_info_non_empty.iloc[error_indices[i][0]]['behavior_class']
e, obj_ = get_time_info_from_behavior_class(behcla)
print(e)


position length 1
SpatialSeries pynwb.behavior.SpatialSeries at 0x13375872112
Fields:
  comments: Video frames taken at 30fps - row1:xpos row2:ypos row3:velocity
  conversion: 1.0
  data: <LindiH5pyDataset: /processing/behavior/Position/SpatialSeries/data>
  description: no description
  interval: 1
  offset: 0.0
  reference_frame: center well
  resolution: -1.0
  timestamps: <LindiH5pyDataset: /processing/behavior/Position/SpatialSeries/timestamps>
  timestamps_unit: seconds
  unit: centimeters; centimeters/second

{'Position': {'SpatialSeries': {'name': 'SpatialSeries', 'rate': None, 'starting_time': None, 'starting_time_unit': None, 'mean_diff': np.float64(0.03360460921843703), 'std_diff': np.float64(0.004826431037491482)}}, 'SpatialSeries': {'SpatialSeries': {'name': 'SpatialSeries', 'rate': None, 'starting_time': None, 'starting_time_unit': None, 'mean_diff': np.float64(0.03360460921843703), 'std_diff': np.float64(0.004826431037491482)}}}


In [154]:
for i in error_indices:
    print(i[0])
    behcla = df_time_info_non_empty.iloc[i[0]]['behavior_class']
    tinfo = get_time_info_from_behavior_class(behcla)
    df_time_info_non_empty.at[i[0], 'time_info'] = tinfo

10
position length 1
11
position length 9
12
position length 2
13
position length 1
15
position length 3
16
5
position length 5
17
position length 4
18
position length 23
19
position length 1
20
position length 2
22
position length 1
26
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
position length 1
positio

In [156]:
df_time_info_non_empty.to_csv('./df_time_info_non_empty_with_position.csv')

# dataset time info

In [181]:
df = df_time_info_non_empty.copy()
df = df[['dandiset_id', 'dandiset', 'experiment_description', 'time_info']]
df

Unnamed: 0,dandiset_id,dandiset,experiment_description,time_info
0,3,DANDI:000003/0.250624.0409,,({'SpatialSeries': {'position_sensor1': {'name...
1,6,DANDI:000006/0.220126.1855,Extracellular electrophysiology recordings per...,({'BehavioralEvents': {'mean_event_diff': 0.98...
2,10,DANDI:000010/0.220126.1905,Extracellular electrophysiology recordings wit...,({'BehavioralEvents': {'mean_event_diff': 12.0...
3,11,DANDI:000011/0.220126.1907,Extracellular electrophysiology recordings wit...,"({'BehavioralEvents': 'empty'}, {'BehavioralEv..."
4,13,DANDI:000013/0.220126.2143,Intracellular and extracellular electrophysiol...,({'BehavioralTimeSeries': {'amplitude': {'name...
5,17,DANDI:000017/0.240329.1926,Large-scale Neuropixels recordings across brai...,({'BehavioralEvents': {'mean_event_diff': 0.58...
6,39,DANDI:000039/0.230223.1216,,({'BehavioralTimeSeries': {'running_speed': {'...
7,45,DANDI:000045/0.211209.1413,ibl_neuropixel_brainwide_01,({'BehavioralTimeSeries': {'position': {'name'...
8,49,DANDI:000049/0.230223.1424,,({'BehavioralTimeSeries': {'running_speed': {'...
9,50,DANDI:000050/draft,,({'BehavioralTimeSeries': {'running_speed': {'...


In [182]:
behavior_events_only_index = []
for i, row in df.iterrows():
    if len(row['time_info'][0]) == 1:
        print(i, list(row['time_info'][0].keys())[0])
        if 'BehavioralEvents' in list(row['time_info'][0].keys())[0]:
            print(i)
            behavior_events_only_index.append(i)
print(behavior_events_only_index)

0 SpatialSeries
1 BehavioralEvents
1
3 BehavioralEvents
3
4 BehavioralTimeSeries
6 BehavioralTimeSeries
7 BehavioralTimeSeries
8 BehavioralTimeSeries
9 BehavioralTimeSeries
14 BehavioralEvents
14
21 BehavioralTimeSeries
24 BehavioralTimeSeries
25 SpatialSeries
27 BehavioralEvents
27
31 BehavioralTimeSeries
32 BehavioralTimeSeries
34 BehavioralEvents
34
40 SpatialSeries
42 BehavioralTimeSeries
43 BehavioralTimeSeries
46 BehavioralTimeSeries
47 SpatialSeries
[1, 3, 14, 27, 34]


In [183]:
df_dropped = df.drop(index=behavior_events_only_index).reset_index(drop=True)
df_dropped

Unnamed: 0,dandiset_id,dandiset,experiment_description,time_info
0,3,DANDI:000003/0.250624.0409,,({'SpatialSeries': {'position_sensor1': {'name...
1,10,DANDI:000010/0.220126.1905,Extracellular electrophysiology recordings wit...,({'BehavioralEvents': {'mean_event_diff': 12.0...
2,13,DANDI:000013/0.220126.2143,Intracellular and extracellular electrophysiol...,({'BehavioralTimeSeries': {'amplitude': {'name...
3,17,DANDI:000017/0.240329.1926,Large-scale Neuropixels recordings across brai...,({'BehavioralEvents': {'mean_event_diff': 0.58...
4,39,DANDI:000039/0.230223.1216,,({'BehavioralTimeSeries': {'running_speed': {'...
5,45,DANDI:000045/0.211209.1413,ibl_neuropixel_brainwide_01,({'BehavioralTimeSeries': {'position': {'name'...
6,49,DANDI:000049/0.230223.1424,,({'BehavioralTimeSeries': {'running_speed': {'...
7,50,DANDI:000050/draft,,({'BehavioralTimeSeries': {'running_speed': {'...
8,53,DANDI:000053/0.210819.0345,trial contrast: 100,({'Position': {'position': {'name': 'position'...
9,55,DANDI:000055/0.220127.0436,,"({'Position': {'L_Ear': {'name': 'L_Ear', 'rat..."


In [212]:

# Extract fields from the 'time_info' column and return as a DataFrame
def extract_time_info(row_tinfo):
    rows = []
    row_tinfo_keys = list(row_tinfo.keys())
    for i in range(len(row_tinfo_keys)):
        series_keys = list(row_tinfo[row_tinfo_keys[i]].keys())
        for j in range(len(series_keys)):
            info = row_tinfo[row_tinfo_keys[i]][series_keys[j]]
            # Combine row_tinfo key and series key to create the name
            combined_name = f"{row_tinfo_keys[i]}_{series_keys[j]}"
            rows.append({
                "name": combined_name,
                "rate": info.get("rate", None),
                "mean_diff": info.get("mean_diff", None),
                "std_diff": info.get("std_diff", None)
            })
    # Convert the list of rows into a DataFrame
    return pd.DataFrame(rows)

In [226]:
def extract_time_info(row_tinfo):
    rows = []
    row_tinfo_keys = list(row_tinfo.keys())
    for i, series in enumerate(row_tinfo.values()):
        series_keys = list(series.keys())
        for j, info in enumerate(series.values()):
            # Check if info is a dictionary
            if isinstance(info, dict):
                # Combine row_tinfo key and series key to create the name
                combined_name = f"{row_tinfo_keys[i]}_{series_keys[j]}"
                rows.append({
                    "name": combined_name,
                    "rate": info.get("rate", None),
                    "mean_diff": info.get("mean_diff", None),
                    "std_diff": info.get("std_diff", None)
                })
            else:
                # Handle unexpected numeric or other types of values
                print(f"{series_keys}, Unexpected info type: {type(info)} for {info}")
    # Convert the list of rows into a DataFrame
    return pd.DataFrame(rows)

In [227]:
for i, row in df_dropped.iterrows():
    row_tinfo = row['time_info'][0]
    time_row_df = extract_time_info(row_tinfo)
    

['mean_event_diff', 'std_event_diff'], Unexpected info type: <class 'numpy.float64'> for 12.003561
['mean_event_diff', 'std_event_diff'], Unexpected info type: <class 'numpy.float64'> for 5.985609
['mean_event_diff', 'std_event_diff'], Unexpected info type: <class 'numpy.float64'> for 0.582899
['mean_event_diff', 'std_event_diff'], Unexpected info type: <class 'numpy.float64'> for 1.258091
['mean_event_diff', 'std_event_diff'], Unexpected info type: <class 'numpy.float64'> for 0.039423
['mean_event_diff', 'std_event_diff'], Unexpected info type: <class 'numpy.float64'> for 0.057455
['allocentric_frame_tracking'], Unexpected info type: <class 'AttributeError'> for 'CompassDirection' object has no attribute 'starting_time'
['direction'], Unexpected info type: <class 'AttributeError'> for 'CompassDirection' object has no attribute 'starting_time'
['mean_event_diff', 'std_event_diff'], Unexpected info type: <class 'numpy.float64'> for 27.848495
['mean_event_diff', 'std_event_diff'], Unexpe

In [213]:
extract_time_info(row_tinfo)

Unnamed: 0,name,rate,mean_diff,std_diff
0,Position_SpatialSeries,,0.033605,0.004826
1,SpatialSeries_SpatialSeries,,0.033605,0.004826


In [219]:
# Initialize an empty list to collect all rows
all_rows = []

for i, row in df_dropped.iterrows():
    row_tinfo = row['time_info'][0]
    dandiset_id = row['dandiset_id']  # Assuming 'dandiset_id' exists in your DataFrame
    
    # Extract time info as a DataFrame
    time_row_df = extract_time_info(row_tinfo)
    
    # Add dandiset_id to the extracted DataFrame
    time_row_df['dandiset_id'] = dandiset_id
    
    # Determine time resolution based on rate, mean_diff, or std_diff
    time_row_df['time_resolution'] = time_row_df.apply(
        lambda x: f"rate_{x['rate']}" if x['rate'] is not None else (
            f"mean_diff_{x['mean_diff']}" if x['mean_diff'] is not None else (
                f"std_diff_{x['std_diff']}" if x['std_diff'] is not None else "unknown"
            )
        ), axis=1
    )
    
    # Append the DataFrame to the list
    all_rows.append(time_row_df)

# Concatenate all rows into a single DataFrame
final_df = pd.concat(all_rows, ignore_index=True)


Unexpected info type: <class 'numpy.float64'> for 12.003561
Unexpected info type: <class 'numpy.float64'> for 5.985609
Unexpected info type: <class 'numpy.float64'> for 0.582899
Unexpected info type: <class 'numpy.float64'> for 1.258091
Unexpected info type: <class 'numpy.float64'> for 0.039423
Unexpected info type: <class 'numpy.float64'> for 0.057455
Unexpected info type: <class 'AttributeError'> for 'CompassDirection' object has no attribute 'starting_time'
Unexpected info type: <class 'AttributeError'> for 'CompassDirection' object has no attribute 'starting_time'
Unexpected info type: <class 'numpy.float64'> for 27.848495
Unexpected info type: <class 'numpy.float64'> for 22.230676
Unexpected info type: <class 'numpy.float64'> for 5.973597
Unexpected info type: <class 'numpy.float64'> for 2.313472
Unexpected info type: <class 'numpy.float64'> for 15.74564
Unexpected info type: <class 'numpy.float64'> for 81.168183
Unexpected info type: <class 'numpy.float64'> for 6.081036
Unexpecte

In [218]:
final_df

Unnamed: 0,name,rate,mean_diff,std_diff,dandiset_id,time_resolution
0,SpatialSeries_position_sensor1,39.062500,,,000003,rate_39.0625
1,SpatialSeries_position_sensor0,39.062500,,,000003,rate_39.0625
2,BehavioralTimeSeries_lick_trace,,0.000100,6.020662e-15,000010,mean_diff_9.999999999998764e-05
3,BehavioralTimeSeries_amplitude,,0.001000,5.964883e-18,000013,mean_diff_0.001
4,BehavioralTimeSeries_face_motion_energy,0.025246,,,000017,rate_0.02524557875164268
...,...,...,...,...,...,...
250,SpatialSeries_position,,0.009945,1.058886e-03,001371,mean_diff_0.00994475617902472
251,BehavioralTimeSeries_C2Whisker_Angle,,0.010000,1.673763e-06,001530,mean_diff_0.00999995390781563
252,SpatialSeries_WheelPositionSeries,,0.032533,2.465115e-01,001533,mean_diff_0.03253335288580407
253,Position_SpatialSeries,,0.033605,4.826431e-03,001539,mean_diff_0.03360460921843703


In [228]:
final_df.to_csv('./final_df_time_info.csv')