based on notebooks in  
https://github.com/dandi/example-notebooks/tree/master/tutorials/neurodatarehack_2024 

In [10]:
import json
import numpy as np
from dandi.dandiapi import DandiAPIClient
from tqdm.notebook import tqdm
from isodate import parse_duration, Duration
from datetime import datetime
from warnings import simplefilter
simplefilter("ignore")  # Suppress namespace warnings from reading older NWB files

from nwbinspector.tools import get_s3_urls_and_dandi_paths
from pynwb import NWBHDF5IO
import remfile
import h5py

import lindi, pynwb

from dandi.dandiapi import DandiAPIClient



In [4]:
client = DandiAPIClient()
dandisets = list(client.get_dandisets())

# nwb dandisets

In [5]:
nwb_dandisets = []

for dandiset in tqdm(dandisets):
    raw_metadata = dandiset.get_raw_metadata()

    if any(
        data_standard['identifier'] == "RRID:SCR_015242"  # this is the RRID for NWB
        for data_standard in raw_metadata['assetsSummary'].get('dataStandard', [])
    ):
        nwb_dandisets.append(dandiset)
print(f"There are currently {len(nwb_dandisets)} NWB datasets on DANDI!")

  0%|          | 0/697 [00:00<?, ?it/s]

There are currently 420 NWB datasets on DANDI!


# dandisets with ephys

In [92]:
ephys_nwb_dandisets = []

for dset in tqdm(nwb_dandisets):
    raw_metadata = dset.get_raw_metadata()

    approaches = raw_metadata['assetsSummary'].get('approach', [])
    if (
        any('electrophysiological' in a.get('name', '') for a in approaches) 
    ):
        ephys_nwb_dandisets.append(dset)

print(len(ephys_nwb_dandisets))

  0%|          | 0/420 [00:00<?, ?it/s]

219


# dandisets with ephys and behavior  


In [93]:
raw_metadata = dandisets[0].get_raw_metadata()
raw_metadata

{'id': 'DANDI:000003/0.250624.0409',
 'doi': '10.48324/dandi.000003/0.250624.0409',
 'url': 'https://dandiarchive.org/dandiset/000003/0.250624.0409',
 'name': 'Physiological Properties and Behavioral Correlates of Hippocampal Granule Cells and Mossy Cells',
 'about': [{'name': 'hippocampus',
   'schemaKey': 'Anatomy',
   'identifier': 'UBERON:0002421'}],
 'access': [{'status': 'dandi:OpenAccess',
   'schemaKey': 'AccessRequirements',
   'contactPoint': {'email': 'petersen.peter@gmail.com',
    'schemaKey': 'ContactPoint'}}],
 'license': ['spdx:CC-BY-4.0'],
 'version': '0.250624.0409',
 '@context': 'https://raw.githubusercontent.com/dandi/schema/master/releases/0.6.0/context.json',
 'citation': 'Senzai, Yuta; Fernandez-Ruiz, Antonio; Buzsáki, György (2025) Physiological Properties and Behavioral Correlates of Hippocampal Granule Cells and Mossy Cells (Version 0.250624.0409) [Data set]. DANDI Archive. https://doi.org/10.48324/dandi.000003/0.250624.0409',
 'keywords': ['cell types',
  'cu

In [7]:
raw_metadata['assetsSummary']['species'][0].get('name', '')

'House mouse'

In [8]:
brbe_nwb_dandisets = []

for dset in tqdm(nwb_dandisets):
    raw_metadata = dset.get_raw_metadata()

    approaches = raw_metadata['assetsSummary'].get('approach', [])
    species = raw_metadata['assetsSummary'].get('species', [])
    if (
        any('electrophysiological' in a.get('name', '') for a in approaches) and
        any('behavioral' in a.get('name', '') for a in approaches)   
    ):
        brbe_nwb_dandisets.append(dset)

print(len(brbe_nwb_dandisets))

  0%|          | 0/420 [00:00<?, ?it/s]

64


## specify species

In [9]:
brbe_nwb_dandisets = []

for dset in tqdm(nwb_dandisets):
    raw_metadata = dset.get_raw_metadata()

    approaches = raw_metadata['assetsSummary'].get('approach', [])
    species = raw_metadata['assetsSummary'].get('species', [])
    if (
        any('electrophysiological' in a.get('name', '') for a in approaches) and
        any('behavioral' in a.get('name', '') for a in approaches) and
        
        isinstance(species[0], dict) and  
        'monkey' in species[0].get('name', '')  
    ):
        brbe_nwb_dandisets.append(dset)

print(len(brbe_nwb_dandisets))

  0%|          | 0/420 [00:00<?, ?it/s]

10


## number of units

In [99]:
a = brbe_nwb_dandisets[-1]
dandiset_id = a.get_raw_metadata()['id'].split('/')[0].split(':')[-1]

s3_urls = get_s3_urls_and_dandi_paths(dandiset_id=dandiset_id)

num_units_per_asset = dict()
for s3_url in tqdm(s3_urls):

    rem_file = remfile.File(s3_url)
    h5py_file = h5py.File(rem_file, "r")
    io = NWBHDF5IO(file=h5py_file, load_namespaces=True)
    nwbfile = io.read()
    
    if nwbfile.units:
        num_units_per_asset.update({s3_url: len(nwbfile.units)})

print(len(num_units_per_asset))

  0%|          | 0/38 [00:00<?, ?it/s]

# read nwb via lindi 

In [127]:
dandiset_id = brbe_nwb_dandisets[2].get_raw_metadata()['id'].split('/')[0].split(':')[-1]
s3_urls = get_s3_urls_and_dandi_paths(dandiset_id=dandiset_id)
print(s3_urls)
print(list(s3_urls.values()))

{'https://dandiarchive.s3.amazonaws.com/blobs/d4b/a5d/d4ba5d3e-31ac-4fd6-99e2-1ccdc6a0c5cf': 'sub-Reggie/sub-Reggie_ses-20170117T104643_behavior+ecephys.nwb', 'https://dandiarchive.s3.amazonaws.com/blobs/f69/54d/f6954daf-3b6e-4b0f-876f-fd209d974d5b': 'sub-Reggie/sub-Reggie_ses-20170125T100800_behavior+ecephys.nwb', 'https://dandiarchive.s3.amazonaws.com/blobs/fb1/0e9/fb10e9ca-c13c-45f6-a51c-1bad0067e36d': 'sub-Reggie/sub-Reggie_ses-20170124T094957_behavior+ecephys.nwb', 'https://dandiarchive.s3.amazonaws.com/blobs/264/4ee/2644ee67-8ce0-4069-8f68-30ebebf8e848': 'sub-Reggie/sub-Reggie_ses-20170119T123128_behavior+ecephys.nwb', 'https://dandiarchive.s3.amazonaws.com/blobs/111/465/11146555-c5a3-4654-83ee-5f2b84a792d3': 'sub-Reggie/sub-Reggie_ses-20170118T094022_behavior+ecephys.nwb', 'https://dandiarchive.s3.amazonaws.com/blobs/a52/253/a5225398-ec1c-4325-8bbe-83dc73fa8c87': 'sub-Reggie/sub-Reggie_ses-20170115T125333_behavior+ecephys.nwb', 'https://dandiarchive.s3.amazonaws.com/blobs/e00/a6

In [128]:
filepath = list(s3_urls.values())[0]

with DandiAPIClient() as client:
    asset = client.get_dandiset(dandiset_id).get_asset_by_path(filepath)
    s3_url = asset.get_content_url(follow_redirects=1, strip_query=True)
f = lindi.LindiH5pyFile.from_hdf5_file(asset.download_url)
nwb = pynwb.NWBHDF5IO(file=f, mode='r').read()

In [129]:
nwb.objects

{'23764436-c38f-4004-bc89-05b98e3950ea': root pynwb.file.NWBFile at 0x5032973392
 Fields:
   devices: {
     Utah Array(M1) <class 'pynwb.device.Device'>,
     Utah Array(PMd) <class 'pynwb.device.Device'>
   }
   electrode_groups: {
     1 <class 'pynwb.ecephys.ElectrodeGroup'>,
     2 <class 'pynwb.ecephys.ElectrodeGroup'>
   }
   electrodes: electrodes <class 'pynwb.ecephys.ElectrodesTable'>
   experiment_description: center out task for Monkeys
   experimenter: ['Blue Scheffer' 'Nir Even-Chen']
   file_create_date: [datetime.datetime(2021, 8, 2, 3, 47, 31, 488379, tzinfo=tzoffset(None, -14400))]
   identifier: 06f1c03a-c6aa-45a4-86e8-1a71175af294
   institution: Stanford University
   intervals: {
     trials <class 'pynwb.epoch.TimeIntervals'>
   }
   processing: {
     behavior <class 'pynwb.base.ProcessingModule'>,
     ecephys <class 'pynwb.base.ProcessingModule'>
   }
   related_publications: ['10.1371/journal.pcbi.1006808']
   session_description: no description
   session_st

In [130]:
trials = nwb.intervals["trials"]
trials.colnames

('start_time',
 'stop_time',
 'is_successful',
 'task_id',
 'version_id',
 'reach_time',
 'target_hold_time',
 'fail_time',
 'target_pos',
 'target_size',
 'barrier_points',
 'go_cue_time',
 'target_acquire_time',
 'target_held_time',
 'target_shown_time',
 'timeseries')

In [131]:
electrodes = nwb.electrodes
electrodes.colnames

('x',
 'y',
 'z',
 'imp',
 'location',
 'filtering',
 'group',
 'group_name',
 'gain',
 'offset')

In [132]:
nwb.get_linked_resources

<bound method HERDManager.get_linked_resources of root pynwb.file.NWBFile at 0x5032973392
Fields:
  devices: {
    Utah Array(M1) <class 'pynwb.device.Device'>,
    Utah Array(PMd) <class 'pynwb.device.Device'>
  }
  electrode_groups: {
    1 <class 'pynwb.ecephys.ElectrodeGroup'>,
    2 <class 'pynwb.ecephys.ElectrodeGroup'>
  }
  electrodes: electrodes <class 'pynwb.ecephys.ElectrodesTable'>
  experiment_description: center out task for Monkeys
  experimenter: ['Blue Scheffer' 'Nir Even-Chen']
  file_create_date: [datetime.datetime(2021, 8, 2, 3, 47, 31, 488379, tzinfo=tzoffset(None, -14400))]
  identifier: 06f1c03a-c6aa-45a4-86e8-1a71175af294
  institution: Stanford University
  intervals: {
    trials <class 'pynwb.epoch.TimeIntervals'>
  }
  processing: {
    behavior <class 'pynwb.base.ProcessingModule'>,
    ecephys <class 'pynwb.base.ProcessingModule'>
  }
  related_publications: ['10.1371/journal.pcbi.1006808']
  session_description: no description
  session_start_time: 2017-0

In [133]:
print(nwb.processing['behavior']['Position'].spatial_series['Cursor'])
nwb.processing['behavior']['Position'].spatial_series['Cursor'].timestamps[:10]

Cursor pynwb.behavior.SpatialSeries at 0x4997068832
Fields:
  comments: no comments
  conversion: nan
  data: <LindiH5pyDataset: /processing/behavior/Position/Cursor/data>
  description: cursor pos on screen in x,y
  interval: 1
  offset: 0.0
  reference_frame: screen center
  resolution: -1.0
  timestamps: <LindiH5pyDataset: /processing/behavior/Position/Cursor/timestamps>
  timestamps_unit: seconds
  unit: meters



array([0.   , 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008,
       0.009])

In [137]:
ss = [x for x in nwb.objects.values() if isinstance(x, pynwb.behavior.SpatialSeries)]