### Analyze Openneuro metadata

The code below will parse the openneuro metadata (obtained via the GraphlQL API) to identify funding sources.

In [1]:
import json
import re
import pandas as pd
import string
from collections import defaultdict
import requests

from parse_metadata import get_bi_project_numbers, get_bi_grant_nums,\
    extract_funding_info, extract_nih_grants, is_nih_grant,\
    extract_nsf_grants, is_nsf_grant, mentions_bi


grant_codes = list(pd.read_csv('ActivityCodes.csv')['ACT_CODE'])
institute_codes = list(pd.read_csv('IC_abbrevs.csv', header=None).loc[:, 0])
grant_codes.remove('S10')

verbose = True
# funding metadata obtained using get_metadata.py
metadata_file = 'funding_metadata.json'
# funded grants obtained on 5/30/21 from BRAIN Initiative web site
bi_grant_file = 'funded_awards-2021-05-30T12-08-20.csv'


Load BRAIN Initiative grant info


In [2]:
bi_grant_df = pd.read_csv(bi_grant_file).dropna()
bi_project_nums = get_bi_project_numbers(bi_grant_file)
bi_grants = get_bi_grant_nums(bi_project_nums)
if verbose:
    print(f'found {len(bi_grants)} BI grants')


found 935 BI grants


Load and parse OpenNeuro metadata

In [3]:
with open(metadata_file) as f:
    funding = json.load(f)
if verbose:
    print(f'found metadata for {len(funding)} OpenNeuro datasets')

grant_info = {}
grants_cited = defaultdict(lambda: [])
nsf_grants_cited = defaultdict(lambda: [])
bi_mentions = {}
for k, md in funding.items():
    if md is None:
        continue
    f = extract_funding_info(md)
    # don't store entries with no funding/ackowledgments
    if f is not None and len(f) > 0:
        grant_info[k] = f
print(f'found {len(grant_info)} datasets with possible funding info')



found metadata for 1326 OpenNeuro datasets
found 1003 datasets with possible funding info


Find grants for each dataset

In [4]:
for k, gi in grant_info.items():
    dsnum = k.split(':')[0]
    for s in gi:
        nih_grants_found = extract_nih_grants(s)
        for grant in nih_grants_found:
            if is_nih_grant(grant):
                grants_cited[dsnum].append(grant)
        nsf_grants_found = extract_nsf_grants(s)
        for grant in nsf_grants_found:
            if is_nsf_grant(grant):
                nsf_grants_cited[dsnum].append(grant)
        if mentions_bi(s):
            bi_mentions[k] = s

for k, grants in grants_cited.items():
    grants_cited[k] = list(set(grants))
for k, grants in nsf_grants_cited.items():
    nsf_grants_cited[k] = list(set(grants))


Consolidate all grants


In [5]:

cited_grants = defaultdict(lambda: []) # add default []
cited_bi_grants = defaultdict(lambda: [])
all_grants = []
for k, grants in grants_cited.items():
    if len(grants) > 0:
        all_grants += grants
all_grants = list(set(all_grants))
print(f'{len(all_grants)} unique NIH grants cited in OpenNeuro')


141 unique NIH grants cited in OpenNeuro


In [6]:

all_nsf_grants = []
for k, grants in nsf_grants_cited.items():
    if grants is not None:
        for g in grants:
            all_nsf_grants.append(g)
all_nsf_grants = list(set(all_nsf_grants))
print(f'{len(all_nsf_grants)} unique NSF grants cited in OpenNeuro')
   


31 unique NSF grants cited in OpenNeuro


Index by datasets

In [7]:

dataset_grants = defaultdict(lambda: [])
dataset_grants_nsf = defaultdict(lambda: [])
for k, grants in grants_cited.items():
    for g in grants:
        dataset_grants[g].append(k)
for k, grants in nsf_grants_cited.items():
    for g in grants:
        dataset_grants_nsf[g].append(k)

Get matches for BRAIN initiative grants

In [8]:
bi_matches = []
# crossreference against bi grants
for g in all_grants:
    if g in bi_grants:
        bi_matches.append(g)

print(f'{len(bi_matches)} BI grants cited in OpenNeuro')

bi_datasets = []
for i in bi_matches:
    project_match = [p for p in bi_grant_df['Project Number'] if p.find(i) > -1]
    matching_grant = bi_grant_df.query('`Project Number` == "%s"' % project_match[0])
    print(project_match)
    print(matching_grant['Title'].values[0])
    print(matching_grant['Investigator'].values[0])
    print(dataset_grants[i])
    bi_datasets += dataset_grants[i]
    print('')
bi_datasets = list(set(bi_datasets))

print(f'{len(bi_datasets)} unique datasets associated with BI grants')

6 BI grants cited in OpenNeuro
['1U01NS103780-01']
Causal mapping of emotion networks with concurrent electrical stimulation and fMRI
Adolphs, Ralph  (contact) Howard, Matthew A. Poldrack, Russell A
['ds002799']

['1UH3NS100548-01', '3UH3NS100548-01S2']
Combined  Cortical  and  Subcortical  Recording  and Stimulation  as  a  Circuit-Oriented  Treatment  for  Obsessive-Compulsive  Disorder
Dougherty, Darin D (contact) Eskandar, Emad N
['ds001784']

['1U01MH117023-01']
Imaging and Analysis Techniques to Construct a Cell Census Atlas of the Human Brain
Boas, David A Fischl, Bruce  (contact)
['ds002179']

['1R24MH117179-01']
OpenNeuro: An open archive for analysis and sharing of BRAIN Initiative data
Poldrack, Russell A
['ds002799']

['1R01MH111447-01']
Neurons, Vessels and Voxels:  Multi-modal Imaging of Layer Specific Signals
Kara, Prakash Naselaris, Thomas P Olman, Cheryl A. Ugurbil, Kamil  (contact)
['ds002684', 'ds003043']

['1R03MH111320-01']
Computational Modeling of Deep Brain Stim