# Ingesing OpenNeuro datasets into  Blue Brain Nexus 
---
Date: Nov 22, 2019
Hack Group Members:
- Jay Hennessy
- Adeel Ansari
- John Griffiths
- Michael Joseph
 
The goal of this group is to crawl OpenNeuro datasets with datalad and ingest project resources into nexus.


The python environment must include pynidm, datalad, pybids and nexus-sdk.
(Note: the script we used from pynidm (BIDSMRI2NIRM.py) had to be tweaked since it threw errors if subject level jsons were not described for each datatype. It was also not compatible with most recent version of pybids.)

The steps are:
 - crawl openneuro with datalad
 - produce nidm *.json* output files for each project
 - tweak the nidm *.json* files to be compatible with nexus
 - import the nidm resources into nexus
 
Before we began we created a local folder for storing project datasets and one for NIDM outputs:
 /path/to/projects
 /path/to/rdf_files

## Step 1: Get the OpenFMRI data with Datalad

In [16]:
# Set up environment 
import os
import glob
import json
from pprint import pprint
from subprocess import Popen, PIPE
import nexussdk as nexus
from datalad.coreapi import metadata, extract_metadata, install
from nidm.experiment.tools.BIDSMRI2NIDM import main as bids2nidm

# Folder Configs:
proj_folder = '/external/nexus_temp/nexus_temp2/projects'
rdf_folder = '/external/nexus_temp/nexus_temp2/rdf_files'
nexus_res_folder = '/external/nexus_temp/nexus_temp2/nexus_resources'
source = '///openfmri'

# Nexus Configs:
nexus_deployment = "https://nexus-research.camh.ca/v1" # Change this to your instance of nexus
token = '' # get the token from your instance of nexus. (follow the tutorial at https://bluebrainnexus.io/docs/tutorial/index.html)
org = "users"
project = "jays_test"

# get the data  using datalad
os.chdir(proj_folder)
openfmri = install(source, recursive=True)


In [2]:
# Get a list of project folders
datasets = list(filter(os.path.isdir, glob.glob(proj_folder+'/openfmri/*')))
try:
    datasets.remove(proj_folder+'/openfmri/scripts') # datalad includes a scripts folder, so remove it
except:
    print('no scripts file, FYI')
    
#pprint(datasets)

## Step 2: Create NIDM Outputs

In [6]:
# run pynidm's BIDSMRI2NIDM on each dataset
# Need to use subprocess since bids2nidm is a command line script
# Also, not all the script on OpenNeuro adhere to bids properly so these projects will fail and be skipped
for d in datasets:
    output = os.path.join(rdf_folder, os.path.basename(d)+'.json')
    
    argv = ['/opt/pyNIDM/PyNIDM/nidm/experiment/tools/BIDSMRI2NIDM.py',
        '-d', d,
        '-o', output,
        '--jsonld',
        '--bidsignore'
        ]
    try:
        out = Popen(argv, stdout=PIPE, stderr=PIPE)
        stdout, stderr = out.communicate()
        if out.returncode is not 0:
            print('{} messed up. return code {}, stderr: {}'.format(d, out.returncode, stderr))
        else:
            print('{} Success, returncode: {}, output: {}'.format(os.path.basename(d), out.returncode, output))
    except Exception as e:
        print('{} Exception happened calling bids2nidm: {}'.format(d, e))
        

ds000001 Success, returncode: 0, output: /external/nexus_temp/nexus_temp2/rdf_files/ds000001.json
ds000002 Success, returncode: 0, output: /external/nexus_temp/nexus_temp2/rdf_files/ds000002.json
ds000003 Success, returncode: 0, output: /external/nexus_temp/nexus_temp2/rdf_files/ds000003.json
ds000005 Success, returncode: 0, output: /external/nexus_temp/nexus_temp2/rdf_files/ds000005.json
ds000006 Success, returncode: 0, output: /external/nexus_temp/nexus_temp2/rdf_files/ds000006.json
/external/nexus_temp/nexus_temp2/projects/openfmri/ds000007 messed up. return code 1, stderr: b'Traceback (most recent call last):\n  File "/opt/pyNIDM/PyNIDM/nidm/experiment/tools/BIDSMRI2NIDM.py", line 612, in <module>\n    main(sys.argv[1:])\n  File "/opt/pyNIDM/PyNIDM/nidm/experiment/tools/BIDSMRI2NIDM.py", line 134, in main\n    project = bidsmri2project(directory,args)\n  File "/opt/pyNIDM/PyNIDM/nidm/experiment/tools/BIDSMRI2NIDM.py", line 234, in bidsmri2project\n    bids_layout = BIDSLayout(direc

KeyboardInterrupt: 

## Step 3: Update NIDM RDF Outputs

In [13]:
def update_resource(nidm_file, nexus_file):
    """ Changes the json nidm output to be compatible
        with nexus.
        Return 0 is success or 1 if failed.
    """
    try:
        with open(nidm_file, 'r') as f:
            data = json.load(f)

        del data["@context"]["records"]
        del data['@context']['@version']
        
        context = data["@context"]
        shapes = data["@graph"]

        context_list = ["https://bluebrain.github.io/nexus/contexts/shacl-20170720.json",
                        "https://bluebrain.github.io/nexus/contexts/resource.json"]

        context_list.append(context)

        data["@context"] = context_list
        data["shapes"] = shapes
        data["@type"] = "owl:Ontology"
        del data["@graph"]
        
        
        
        with open(nexus_file, 'w') as f:
            json.dump(data, f)
        return 0
    except Exception as e:
        print('Failed updating resource {}, {}'.format(nidm_file, e))
        return 1

In [15]:
# Need to tweak the nidm json-ld output to be compatible with nexus
nidm_outputs = list(filter(os.path.isfile, glob.glob(rdf_folder+'/*')))
nexus_outputs = list(map(lambda x: x.replace('rdf_files', 'nexus_resources'), nidm_outputs))

nexus_success = map(update_resource, nidm_outputs, nexus_outputs)

#Check if all the json-ld's updated successfully
if not sum(nexus_success):
    print('Update Success!')

['/external/nexus_temp/nexus_temp2/rdf_files/ds000001.json',
 '/external/nexus_temp/nexus_temp2/rdf_files/ds000002.json',
 '/external/nexus_temp/nexus_temp2/rdf_files/ds000003.json',
 '/external/nexus_temp/nexus_temp2/rdf_files/ds000005.json',
 '/external/nexus_temp/nexus_temp2/rdf_files/ds000006.json',
 '/external/nexus_temp/nexus_temp2/rdf_files/ds000008.json',
 '/external/nexus_temp/nexus_temp2/rdf_files/ds000011.json',
 '/external/nexus_temp/nexus_temp2/rdf_files/ds000017.json']
['/external/nexus_temp/nexus_temp2/nexus_resources/ds000001.json',
 '/external/nexus_temp/nexus_temp2/nexus_resources/ds000002.json',
 '/external/nexus_temp/nexus_temp2/nexus_resources/ds000003.json',
 '/external/nexus_temp/nexus_temp2/nexus_resources/ds000005.json',
 '/external/nexus_temp/nexus_temp2/nexus_resources/ds000006.json',
 '/external/nexus_temp/nexus_temp2/nexus_resources/ds000008.json',
 '/external/nexus_temp/nexus_temp2/nexus_resources/ds000011.json',
 '/external/nexus_temp/nexus_temp2/nexus_re

## Step 4: Upload Nexus Resources

In [17]:
# Initialize Nexus than loop through the nexus json-lds and upload each resource.
nexus.config.set_environment(nexus_deployment)
nexus.config.set_token(token)
nexus.permissions.fetch()

for nexus_file in nexus_outputs:
    with open(nexus_file, 'r') as f:
        data = json.load(f)
    payload = nexus.resources.create(org, project,data)


<class 'dict'>
OrderedDict([('@context',
              'https://bluebrain.github.io/nexus/contexts/resource.json'),
             ('@id',
              'https://nexus-research.camh.ca/v1/resources/users/jays_test/_/7d539f01-ecae-499f-bcdb-f005a524b42a'),
             ('@type', 'http://www.w3.org/2002/07/owl#Ontology'),
             ('_self',
              'https://nexus-research.camh.ca/v1/resources/users/jays_test/_/7d539f01-ecae-499f-bcdb-f005a524b42a'),
             ('_constrainedBy',
              'https://bluebrain.github.io/nexus/schemas/unconstrained.json'),
             ('_project',
              'https://nexus-research.camh.ca/v1/projects/users/jays_test'),
             ('_rev', 1),
             ('_deprecated', False),
             ('_createdAt', '2019-12-16T19:15:43.876937Z'),
             ('_createdBy',
              'https://nexus-research.camh.ca/v1/realms/camh/users/jay_hennessy'),
             ('_updatedAt', '2019-12-16T19:15:43.876937Z'),
             ('_updatedBy',
    

              'https://nexus-research.camh.ca/v1/resources/users/jays_test/_/380c3c52-6e3d-4bdd-91e2-521464e71284/outgoing')])
<class 'dict'>
OrderedDict([('@context',
              'https://bluebrain.github.io/nexus/contexts/resource.json'),
             ('@id',
              'https://nexus-research.camh.ca/v1/resources/users/jays_test/_/57a7fb6a-a649-4a84-b611-f2b67d779304'),
             ('@type', 'http://www.w3.org/2002/07/owl#Ontology'),
             ('_self',
              'https://nexus-research.camh.ca/v1/resources/users/jays_test/_/57a7fb6a-a649-4a84-b611-f2b67d779304'),
             ('_constrainedBy',
              'https://bluebrain.github.io/nexus/schemas/unconstrained.json'),
             ('_project',
              'https://nexus-research.camh.ca/v1/projects/users/jays_test'),
             ('_rev', 1),
             ('_deprecated', False),
             ('_createdAt', '2019-12-16T19:15:46.180524Z'),
             ('_createdBy',
              'https://nexus-research.camh.ca/v1

### All done!

If you have any questions you can reach me at jay.hennessy@camh.ca.