# CrowdMRI (aka MRIQC Web API)



<img src="crowdmri-flowchart.png" />

## Querying the database

- Manually (online): https://mriqc.nimh.nih.gov
- Python

In [1]:
import os
import sys
import pandas as pd
from json import load
import urllib.request, json 
from pandas.io.json import json_normalize

In [2]:
def get_iqms(modality, versions=None, software='mriqc', out_file=None, page_size=25):
    """
    Grab all iqms for the given modality and the list of versions
    """
    

    
    url_root = 'https://mriqc.nimh.nih.gov/api/v1/{modality}?{query}'
    page = 1
    
    if out_file is None:
        out_file = 'webapi-%s.csv' % modality
    
    database = pd.DataFrame()
    if os.path.isfile(out_file):
        database = pd.read_csv(out_file)
        page = len(database) // page_size + 1
        print('Found old records. Starting on page ', page)

    if versions is None:
        versions = ['*']

    for version in versions:
        while True:
            query = 'max_results=%d&page=%d' % (page_size, page)
            
            filters = []
            if software is not None:
                filters.append('"provenance.software":"%s"' % software)
            
            if version != '*':
                filters.append('"provenance.version":"%s"' % version)
            
            if filters:
                query += '&where={%s}' % ','.join(filters)
                
            page_url = url_root.format(
                modality=modality,
                query=query
            )
            with urllib.request.urlopen(page_url) as url:
                data = json.loads(url.read().decode())
                database = database.append(
                    json_normalize(data['_items']), sort=False)
                
                database.to_csv(out_file)
                sys.stdout.write('.')
                sys.stdout.flush()
                if 'next' not in data['_links'].keys():
                    break
                else:
                    page += 1

    database.to_csv(out_file)
    return database

In [None]:
df_t1w = get_iqms('T1w', software=None, out_file=os.path.expanduser('~/data/nipreps/.crowdmri-T1w.csv'), page_size=1000)
df_t1w_unique = df_t1w.drop_duplicates(subset=['provenance.md5sum'])