based on notebooks in  
https://github.com/dandi/example-notebooks/tree/master/tutorials/neurodatarehack_2024 

In [10]:
import json
import numpy as np
from dandi.dandiapi import DandiAPIClient
from tqdm.notebook import tqdm
from isodate import parse_duration, Duration
from datetime import datetime
from warnings import simplefilter
simplefilter("ignore")  # Suppress namespace warnings from reading older NWB files

from nwbinspector.tools import get_s3_urls_and_dandi_paths
from pynwb import NWBHDF5IO
import remfile
import h5py

import lindi, pynwb

from dandi.dandiapi import DandiAPIClient



In [4]:
client = DandiAPIClient()
dandisets = list(client.get_dandisets())

# nwb dandisets

In [5]:
nwb_dandisets = []

for dandiset in tqdm(dandisets):
    raw_metadata = dandiset.get_raw_metadata()

    if any(
        data_standard['identifier'] == "RRID:SCR_015242"  # this is the RRID for NWB
        for data_standard in raw_metadata['assetsSummary'].get('dataStandard', [])
    ):
        nwb_dandisets.append(dandiset)
print(f"There are currently {len(nwb_dandisets)} NWB datasets on DANDI!")

  0%|          | 0/697 [00:00<?, ?it/s]

There are currently 420 NWB datasets on DANDI!


# dandisets with ephys and behavior  


In [6]:
raw_metadata = dandisets[0].get_raw_metadata()

In [7]:
raw_metadata['assetsSummary']['species'][0].get('name', '')

'House mouse'

In [8]:
brbe_nwb_dandisets = []

for dset in tqdm(nwb_dandisets):
    raw_metadata = dset.get_raw_metadata()

    approaches = raw_metadata['assetsSummary'].get('approach', [])
    species = raw_metadata['assetsSummary'].get('species', [])
    if (
        any('electrophysiological' in a.get('name', '') for a in approaches) and
        any('behavioral' in a.get('name', '') for a in approaches)   
    ):
        brbe_nwb_dandisets.append(dset)

print(len(brbe_nwb_dandisets))

  0%|          | 0/420 [00:00<?, ?it/s]

64


## specify species

In [9]:
brbe_nwb_dandisets = []

for dset in tqdm(nwb_dandisets):
    raw_metadata = dset.get_raw_metadata()

    approaches = raw_metadata['assetsSummary'].get('approach', [])
    species = raw_metadata['assetsSummary'].get('species', [])
    if (
        any('electrophysiological' in a.get('name', '') for a in approaches) and
        any('behavioral' in a.get('name', '') for a in approaches) and
        
        isinstance(species[0], dict) and  
        'monkey' in species[0].get('name', '')  
    ):
        brbe_nwb_dandisets.append(dset)

print(len(brbe_nwb_dandisets))

  0%|          | 0/420 [00:00<?, ?it/s]

10


## number of units

In [99]:
a = brbe_nwb_dandisets[-1]
dandiset_id = a.get_raw_metadata()['id'].split('/')[0].split(':')[-1]

s3_urls = get_s3_urls_and_dandi_paths(dandiset_id=dandiset_id)

num_units_per_asset = dict()
for s3_url in tqdm(s3_urls):

    rem_file = remfile.File(s3_url)
    h5py_file = h5py.File(rem_file, "r")
    io = NWBHDF5IO(file=h5py_file, load_namespaces=True)
    nwbfile = io.read()
    
    if nwbfile.units:
        num_units_per_asset.update({s3_url: len(nwbfile.units)})

print(len(num_units_per_asset))

  0%|          | 0/38 [00:00<?, ?it/s]

# read nwb via lindi 

In [82]:
dandiset_id = brbe_nwb_dandisets[1].get_raw_metadata()['id'].split('/')[0].split(':')[-1]
s3_urls = get_s3_urls_and_dandi_paths(dandiset_id=dandiset_id)
print(s3_urls)
print(list(s3_urls.values()))

{'https://dandiarchive.s3.amazonaws.com/blobs/2b3/75e/2b375e1a-120d-4fc0-9d80-255712170214': 'sub-Jenkins/sub-Jenkins_ses-20090912_behavior+ecephys.nwb', 'https://dandiarchive.s3.amazonaws.com/blobs/7cb/3ad/7cb3adf9-66fe-4320-8511-8faa45d391f4': 'sub-Jenkins/sub-Jenkins_ses-20090923_behavior+ecephys.nwb', 'https://dandiarchive.s3.amazonaws.com/blobs/265/394/26539417-2aaa-41eb-a146-d9a90b0b3d10': 'sub-Nitschke/sub-Nitschke_ses-20090819_behavior+ecephys.nwb', 'https://dandiarchive.s3.amazonaws.com/blobs/afd/f6a/afdf6aa3-6165-4915-9677-a503f70e557d': 'sub-Nitschke/sub-Nitschke_ses-20090812_behavior+ecephys.nwb', 'https://dandiarchive.s3.amazonaws.com/blobs/2dd/66a/2dd66ad9-e7d9-4b96-9de1-4a90ca416048': 'sub-Jenkins/sub-Jenkins_ses-20090916_behavior+ecephys.nwb', 'https://dandiarchive.s3.amazonaws.com/blobs/2f4/f82/2f4f829f-19e8-4455-87d5-3bbf42f0acd9': 'sub-Nitschke/sub-Nitschke_ses-20090910_behavior+ecephys.nwb', 'https://dandiarchive.s3.amazonaws.com/blobs/756/12c/75612cdf-8d70-4f8a-933

In [83]:
filepath = list(s3_urls.values())[0]

with DandiAPIClient() as client:
    asset = client.get_dandiset(dandiset_id).get_asset_by_path(filepath)
    s3_url = asset.get_content_url(follow_redirects=1, strip_query=True)
f = lindi.LindiH5pyFile.from_hdf5_file(asset.download_url)
nwb = pynwb.NWBHDF5IO(file=f, mode='r').read()

In [84]:
nwb.objects

{'3f1cf5e4-f297-46ef-b706-f533ecbcfd37': root pynwb.file.NWBFile at 0x4994178960
 Fields:
   devices: {
     Utah Array(M1) <class 'pynwb.device.Device'>,
     Utah Array(PMd) <class 'pynwb.device.Device'>
   }
   electrode_groups: {
     1 <class 'pynwb.ecephys.ElectrodeGroup'>,
     2 <class 'pynwb.ecephys.ElectrodeGroup'>
   }
   electrodes: electrodes <class 'pynwb.ecephys.ElectrodesTable'>
   experimenter: ['Matthew T. Kaufman' 'Mark M. Churchland']
   file_create_date: [datetime.datetime(2021, 8, 11, 2, 59, 54, 8857, tzinfo=tzoffset(None, -14400))]
   identifier: 94563eca-e111-4c9e-8cad-3f01345163f7
   institution: Stanford University
   intervals: {
     trials <class 'pynwb.epoch.TimeIntervals'>
   }
   processing: {
     behavior <class 'pynwb.base.ProcessingModule'>,
     ecephys <class 'pynwb.base.ProcessingModule'>
   }
   related_publications: ['10.1038/nature11129 10.1152/jn.00892.2011 10.1038/nn.3643 10.1038/nn.4042 10.1146/annurev-neuro-062111-150509 10.7554/eLife.04677

In [86]:
trials = nwb.intervals["trials"]
trials.colnames

('start_time',
 'stop_time',
 'target_presentation_time',
 'go_cue_time',
 'reaction_time',
 'move_begins_time',
 'move_ends_time',
 'discard_trial',
 'task_success',
 'trial_type',
 'trial_version',
 'proto_trial',
 'maze_condition',
 'correct_reach',
 'maze_num_targets',
 'maze_num_barriers',
 'novel_maze',
 'target_positions',
 'frame_details',
 'hit_target_position',
 'target_size',
 'barrier_info',
 'timeseries')

In [87]:
electrodes = nwb.electrodes
electrodes.colnames

('x',
 'y',
 'z',
 'imp',
 'location',
 'filtering',
 'group',
 'group_name',
 'gain',
 'offset')

In [88]:
nwb.get_linked_resources

<bound method HERDManager.get_linked_resources of root pynwb.file.NWBFile at 0x4994178960
Fields:
  devices: {
    Utah Array(M1) <class 'pynwb.device.Device'>,
    Utah Array(PMd) <class 'pynwb.device.Device'>
  }
  electrode_groups: {
    1 <class 'pynwb.ecephys.ElectrodeGroup'>,
    2 <class 'pynwb.ecephys.ElectrodeGroup'>
  }
  electrodes: electrodes <class 'pynwb.ecephys.ElectrodesTable'>
  experimenter: ['Matthew T. Kaufman' 'Mark M. Churchland']
  file_create_date: [datetime.datetime(2021, 8, 11, 2, 59, 54, 8857, tzinfo=tzoffset(None, -14400))]
  identifier: 94563eca-e111-4c9e-8cad-3f01345163f7
  institution: Stanford University
  intervals: {
    trials <class 'pynwb.epoch.TimeIntervals'>
  }
  processing: {
    behavior <class 'pynwb.base.ProcessingModule'>,
    ecephys <class 'pynwb.base.ProcessingModule'>
  }
  related_publications: ['10.1038/nature11129 10.1152/jn.00892.2011 10.1038/nn.3643 10.1038/nn.4042 10.1146/annurev-neuro-062111-150509 10.7554/eLife.0467710.1523/ENEUR

In [91]:
nwb.processing

{'behavior': behavior pynwb.base.ProcessingModule at 0x4994173200
 Fields:
   data_interfaces: {
     Position <class 'pynwb.behavior.Position'>
   }
   description: contains monkey movement data,
 'ecephys': ecephys pynwb.base.ProcessingModule at 0x4977550624
 Fields:
   data_interfaces: {
     Processed <class 'pynwb.ecephys.FilteredEphys'>
   }
   description: Intermediate data from extracellular electrophysiology recordings, e.g., LFP.}