## External Resources in NWB with Patch-seq data

To prototype the inclusion of ontologized metadata in NWB, we have built this tutorial showing how to create files  including the new extensions and features:

- ndx-stimulation for stimulus waveform metadata
- ndx-genotype for strain, gene, and allele metadata
- ExternalResources for storing links to external databases and ontologies


In [1]:
from datetime import datetime
from dateutil.tz import tzlocal
import inspect
import numpy as np
import pandas as pd
import os

from pynwb import NWBHDF5IO, NWBFile
from pynwb.core import DynamicTableRegion, DynamicTable
from pynwb.device import Device

from pynwb import register_class, load_namespaces
from hdmf.utils import docval, call_docval_func, getargs, get_docval


# namespace for the SFT extension
ndx_stimulation_specpath = '/Users/pam.baker/Documents/ndx_stim/ndx-stimulation/spec/ndx-stimulation.namespace.yaml'

# namespace for the genotype extension
ndx_genotype_specpath = '/Users/pam.baker/Documents/ndx_stim/ndx-genotype/spec/ndx-genotype.namespace.yaml'

# Load the namespace                                                                                                 
load_namespaces(ndx_stimulation_specpath)
load_namespaces(ndx_genotype_specpath)

{'ndx-genotype': {'core': ('AbstractFeatureSeries',
   'AlignedDynamicTable',
   'AnnotationSeries',
   'BehavioralEpochs',
   'BehavioralEvents',
   'BehavioralTimeSeries',
   'CSRMatrix',
   'ClusterWaveforms',
   'Clustering',
   'CompassDirection',
   'Container',
   'CorrectedImageStack',
   'CurrentClampSeries',
   'CurrentClampStimulusSeries',
   'Data',
   'DecompositionSeries',
   'Device',
   'DfOverF',
   'DynamicTable',
   'DynamicTableRegion',
   'ElectricalSeries',
   'ElectrodeGroup',
   'ElementIdentifiers',
   'EventDetection',
   'EventWaveform',
   'ExperimentalConditionsTable',
   'EyeTracking',
   'FeatureExtraction',
   'FilteredEphys',
   'Fluorescence',
   'GrayscaleImage',
   'IZeroClampSeries',
   'Image',
   'ImageMaskSeries',
   'ImageSegmentation',
   'ImageSeries',
   'Images',
   'ImagingPlane',
   'ImagingRetinotopy',
   'IndexSeries',
   'IntervalSeries',
   'IntracellularElectrode',
   'IntracellularElectrodesTable',
   'IntracellularRecordingsTable',


In [2]:
%%HTML
<style type="text/css">
table.dataframe td, table.dataframe th {
    border: 1px  black solid !important;
  color: black !important;
}
</style>

In [3]:
from pynwb.icephys import VoltageClampStimulusSeries, VoltageClampSeries
from pynwb import NWBHDF5IO
from pynwb.core import DynamicTable, VectorData

#import sys
#sys.path.append("/Users/pam.baker/Documents/ndx_stim/ndx-stimulation/src/pynwb/tests/ndx_icephys_meta/src/pynwb/")

In [None]:
#from ndx_icephys_meta.icephys import ICEphysFile

#Create the file
nwbfile = NWBFile(
            session_description='my first synthetic recording',
            identifier='EXAMPLE_ID',
            session_start_time=datetime.now(tzlocal()),
            experimenter='Dr. Bilbo Baggins',
            lab='Bag End Laboratory',
            institution='University of Middle Earth at the Shire',
            experiment_description='I went on an adventure with thirteen dwarves to reclaim vast treasures.',
            session_id='LONELYMTN')

In [8]:
# Add a device
device = nwbfile.create_device(name='Heka ITC-1600')
# Add an intracellular electrode
electrode = nwbfile.create_icephys_electrode(name="elec0",
                                        description='a mock intracellular electrode',
                                        device=device)
# Create an ic-ephys stimulus
stimulus = VoltageClampStimulusSeries(
            name="ccss",
            data=[1, 2, 3, 4, 5],
            starting_time=123.6,
            rate=10e3,
            electrode=electrode,
            gain=0.02,
            sweep_number=np.uint64(15))
# Create and ic-response
response = VoltageClampSeries(
            name='vcs',
            data=[0.1, 0.2, 0.3, 0.4, 0.5],
            conversion=1e-12,
            resolution=np.nan,
            starting_time=123.6,
            rate=20e3,
            electrode=electrode,
            gain=0.02,
            capacitance_slow=100e-12,
            resistance_comp_correction=70.0,
            sweep_number=np.uint64(15))

In [9]:
# Add ICEPhys recording - replaces add_acquisition, add_stimulus
rowindex = nwbfile.add_intracellular_recording(electrode=electrode,
                                               stimulus=stimulus,
                                               response=response,
                                               id=10)

In [10]:
rowindex2 = nwbfile.add_intracellular_recording(electrode=electrode,
                                                stimulus=stimulus,
                                                id=12)

### Displaying the tables

In [13]:
nwbfile.intracellular_recordings.to_dataframe()

Unnamed: 0_level_0,electrodes,electrodes,stimuli,stimuli,responses,responses
Unnamed: 0_level_1,id,electrode,id,stimulus,id,response
"(intracellular_recordings, id)",Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
10,0,elec0 pynwb.icephys.IntracellularElectrode at ...,0,"(0, 5, ccss pynwb.icephys.VoltageClampStimulus...",0,"(0, 5, vcs pynwb.icephys.VoltageClampSeries at..."
12,1,elec0 pynwb.icephys.IntracellularElectrode at ...,1,"(0, 5, ccss pynwb.icephys.VoltageClampStimulus...",1,"(-1, -1, ccss pynwb.icephys.VoltageClampStimul..."


In [None]:
from pprint import pprint
pprint(dir(nwbfile))

In [12]:
rowindex = nwbfile.add_icephys_simultaneous_recording(recordings=[rowindex, rowindex2], 
                                     id=15)

### Stimulus Function Table

The stimulus function table is designed to hold the names of stimulus waveforms and their relevant parameters - float parameters for numeric values and function parameters that reference other waveform functions (for parameter values that vary as a function of time).

In [None]:
@register_class('StimulusFunctionTable', 'ndx-stimulation')
class StimulusFunctionTable(DynamicTable):   
    
    __columns__ = ( 
                    {'name':'function_name', 
                     'description': 'The names of the 1D stimulus waveforms.',
                     'required': True,
                     'index': False},
                    {'name':'float_parameters', 
                     'description': 'The names of the float parameters for the 1D stimulus waveforms.',
                     'required': True,
                     'index': True},
                    {'name':'function_parameters',
                     'description': 'The function parameters for the 1D stimulus waveforms.',
                     'required': True,
                     'index': True}
    )
    
    
    @docval(*get_docval(DynamicTable.__init__, 'id', 'columns', 'colnames'))
    def __init__(self, **kwargs):
        kwargs['name'] = ('StimulusFunctionTable')
        kwargs['description'] = ('Table for storing ontologized 1D stimulus waveform metadata')
        call_docval_func(super().__init__, kwargs)
        

We create an example stimulus function table to hold a couple of square waves with associated parameters. We use the add_row function inherited from Dynamic Tables to add new entries.

In [None]:
sft = StimulusFunctionTable()

float_params_sq1 = [('amplitude', -0.110, 'V'), 
                    ('duration', 0.500, 's'),
                    ('start_time', 0.1, 's')
                   ]

float_params_sq2 = [('amplitude', 0.090, 'V'), 
                    ('duration', 1, 's'),
                    ('start_time', 0.1, 's')
                   ]

float_params_ramp = [('amplitude', 0.200, 'V'), 
                     ('duration', 1, 's'),
                     ('start_time', 0.05, 's')
                    ]

float_params_sin = [('amplitude', 0.090, 'V'), 
                    ('duration', 1, 's'),
                    ('start_time', 0.1, 's')
                   ]

func_params_sin = [('frequency', 2, 'Hz')]


wave1 = {'function_name':'sq', 
         'float_parameters': float_params_sq1, 
         'function_parameters':[]}

wave2 = {'function_name':'sq', 
         'float_parameters': float_params_sq2, 
         'function_parameters':[]}

wave3 = {'function_name':'ramp', 
         'float_parameters': float_params_ramp, 
         'function_parameters':[]}

wave4 = {'function_name':'sin', 
         'float_parameters': float_params_sin, 
         'function_parameters': func_params_sin}


# Using add_row from DynamicTable

sft.add_row(data = wave1)
sft.add_row(data = wave2)
sft.add_row(data = wave3)
sft.add_row(data = wave4)


pd.set_option("display.max_colwidth", 100)

display(sft.to_dataframe())

## Genotypes and Alleles Table

- Genotypes for locus and allele information
- Alleles to store allele metadata


In [4]:
from datetime import timezone

from ndx_genotype import GenotypeNWBFile, GenotypeSubject, GenotypesTable, AllelesTable


nwbfile = GenotypeNWBFile(
            session_description='session_description',
            identifier='identifier',
            session_start_time=datetime.now(timezone.utc)
            )

nwbfile.subject = GenotypeSubject(
            subject_id='3',
            genotype='Vip-IRES-Cre/wt',
            )


  warn(_exp_warn_msg(cls))


In [5]:
gt = GenotypesTable(
            process='PCR',
            process_url='https://dx.doi.org/10.17504/protocols.io.yjifuke',
            assembly='GRCm38.p6',
            annotation='NCBI Mus musculus Annotation Release 108',
            )

In [6]:
nwbfile.subject.genotypes_table = gt 

gt.add_allele(symbol='Vip-IRES-Cre')
gt.add_allele(symbol='wt')


gt.add_genotype(locus='Vip',
                allele1='Vip-IRES-Cre',
                allele2='wt',
                locus_resource_name='locus_resource_name',
                locus_resource_uri='locus_resource_uri',
                locus_entity_id='locus_entity_id',
                locus_entity_uri='locus_entity_uri')


In [7]:
gt.to_dataframe()

Unnamed: 0_level_0,locus,allele1,allele2
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,Vip,"(s, y, m, b, o, l)","(s, y, m, b, o, l)"


In [None]:
gt.alleles_table.to_dataframe()


In [None]:
er = nwbfile.external_resources

er.keys.to_dataframe()

## External Resources Table

- Resources: eg ontology we are referencing
- Entities: as defined by an entry in ontology/controlled terms
- Keys: reference from object into the ER tables that maps onto entities 
- Objects: thing we are ontologizing (eg SFT)


In [None]:
from hdmf.common import ExternalResources
from hdmf import Container, Data
import pandas as pd

#er = ExternalResources(name='ExtResTable')
er = nwbfile.external_resources

The *StimulusFunctionTable* is the container object we will link to in the External Resources

In [None]:
object_id = sft.object_id
print(object_id)

In [None]:
# add_ref creates an entry across all tables (entities, keys, objects, resources)

er.add_ref(container = object_id, 
           field='', 
           key='sq',
           resource_name='Estim Ontology',
           resource_uri='Estim_Onto_uri',
           entity_id='Estim_square_ID',
           entity_uri='Estim_square_uri'
          )

er.add_ref(container = object_id, 
           field='', 
           key='duration',
           resource_name='Estim Ontology',
           resource_uri='Estim_Onto_uri',
           entity_id='Estim_duration_ID',
           entity_uri='Estim_duration_uri'
          )

er.add_ref(container = object_id, 
           field='', 
           key='amplitude',
           resource_name='Estim Ontology',
           resource_uri='Estim_Onto_uri',
           entity_id='Estim_amplitude_ID',
           entity_uri='Estim_amplitude_uri'
          )

er.add_ref(container = object_id, 
           field='', 
           key='start_time',
           resource_name='Estim Ontology',
           resource_uri='Estim_Onto_uri',
           entity_id='Estim_shift_ID',
           entity_uri='Estim_shift_uri'
          )


In [None]:
er.resources.to_dataframe()

In [None]:
er.keys.to_dataframe()

In [None]:
er.entities.to_dataframe()

In [None]:
k=er.add_key(key_name='sq')
er.keys.to_dataframe()

In [None]:
key_object = er.get_key(key_name='sq')
print(key_object)

### Write

In [None]:
from datetime import datetime
from dateutil.tz import tzlocal

start_time = datetime(2017, 4, 3, 11, tzinfo=tzlocal())
create_date = datetime(2017, 4, 15, 12, tzinfo=tzlocal())


nwbfile = NWBFile('demo', 'NWB456', start_time,
                  file_create_date=create_date)

nwbfile.add_acquisition(sft)

In [None]:
# Write the SFT out to file

from pynwb import NWBHDF5IO

io = NWBHDF5IO('sft_ert_geno.nwb', mode='w')
io.write(nwbfile)
io.close()

In [None]:
# Reading in the file I just wrote

io = NWBHDF5IO('sft_ert_testy.nwb', mode='r', load_namespaces=True)
sft_nwbfile = io.read()

In [None]:
# print(sft_nwbfile.acquisition)

sft_in = sft_nwbfile.get_acquisition()

In [None]:
sft_in.to_dataframe()

In [None]:
sft_in['function_parameters'].target[0]