    Run `python make_pooled_datasets.py configs/3T_NIST_T1maps.json 3T_NIST_T1maps` first

In [1]:
from pathlib import Path
import pandas as pd
import json

In [2]:
data_folder_name = '3T_human_T1maps'
output_filename = '3T_human_T1maps_database'

In [3]:
configFile = Path('configs/3T_human_T1maps.json')

In [4]:
with open(configFile) as json_file:
    configJson = json.load(json_file)

In [5]:
def create_database(configJson):
    columns = [
    'id',
    'OSF dataset', 'OSF link', 'NIFTI filename', 'Data type',
    'contact',
    'site name', 'MRI vendor', 'MRI version', 'MRI field',
    'sample type',
    'phantom version', 'phantom serial number', 'phantom temperature',
    'age', 'sex',
    'sequence name', 'sequence type', 'matrix size', 'resolution', 'dimension', 'TR', 'echo time', 'TI', 'bandwidth'
    ]
    
    df = pd.DataFrame(columns=columns)
    df = df.set_index('id')
    
    df = parse_dataset_json(df, configJson)
    
    return df

In [6]:
def parse_dataset_json(df, configJson):

    id = 1
    subid = 1

    for dataset_name in configJson:
        db_id = id+subid*0.001
        for key1 in configJson[dataset_name]:
            if key1 == 'datasets':
                for key2 in configJson[dataset_name][key1]:
                    db_id = id+subid*0.001
                    subid = subid+1

                    dataset_series = {
                        'OSF dataset': dataset_name,
                        'OSF link': configJson[dataset_name]['OSF_link'],
                        'NIFTI filename': configJson[dataset_name]['datasets'][key2]['imagePath'],
                        'Data type': configJson[dataset_name]['datasets'][key2]['dataType']
                    }
                    
                    t1File = configJson[dataset_name]['datasets'][key2]['imagePath']
                    t1JsonFile = data_folder_name / Path(t1File[:-7] + '.json')
                    
                    with open(t1JsonFile) as json_file:
                        t1Json = json.load(json_file)

                    dataset_series = parse_t1_json(dataset_series, t1Json)
                    df = df.append(pd.Series(dataset_series, index = df.columns, name = db_id))
        # Increment dataset ID counter
        id = id + 1
    
        # Reset subdataset ID counter
        subid = 1
    return df

In [7]:
def parse_t1_json(databaset_series, t1Json):
    databaset_series.update({
        'contact': t1Json['submitter']['contact'],
    })

    databaset_series.update({
        'site name': t1Json['site']['name'],
        'MRI vendor': t1Json['site']['manufacturer'],
        'MRI version': t1Json['site']['version'],
        'MRI field': t1Json['site']['field'],
    })
    
    if 'temperature' in t1Json['sample']:
        temp = t1Json['sample']['temperature']
    else:
        temp = None
    
    if t1Json['sample']['type'] is 'NIST':
        databaset_series.update({
            'sample type': t1Json['sample']['type'],
            'phantom version': t1Json['sample']['version'],
            'phantom serial number': t1Json['sample']['serial_number'],
            'phantom temperature': temp,
        })
        databaset_series.update({
            'age': None,
            'sex': None,
        })
    else:
        databaset_series.update({
            'sample type': 'Human',
            'age': t1Json['sample']['age'],
            'sex': t1Json['sample']['sex'],
        })
        databaset_series.update({
            'sample type': None,
            'phantom version': None,
            'phantom serial number': None,
            'phantom temperature': None,
        })


    if 'bandwidth' in t1Json['sequence']:
        bandwidth = t1Json['sequence']['bandwidth']
    else:
        bandwidth = None

    databaset_series.update({
        'sequence name': t1Json['sequence']['name'],
        'sequence type': t1Json['sequence']['type'],
        'matrix size': t1Json['sequence']['matrix_size'],
        'resolution': t1Json['sequence']['resolution'],
        'dimension': t1Json['sequence']['dimension'],
        'TR': t1Json['sequence']['repetition_time'],
        'echo time': t1Json['sequence']['echo_time'],
        'TI': t1Json['sequence']['inversion_times'],    
        'bandwidth': bandwidth,        
    })
    return databaset_series

In [8]:
df = create_database(configJson)

In [9]:
df

Unnamed: 0_level_0,OSF dataset,OSF link,NIFTI filename,Data type,contact,site name,MRI vendor,MRI version,MRI field,sample type,...,sex,sequence name,sequence type,matrix size,resolution,dimension,TR,echo time,TI,bandwidth
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1.001,refaatgabr_mcgovern_human,https://osf.io/2f8j5/download/,20200103_refaatgabr_mcgovern_human/20200103_re...,Magnitude,Refaat Gabr,UTHealth McGovern Medical School,Philips,Ingenia R5.4.1,3.0,,...,male,T1CHALLENGE^T1CHALLENGE_WIP_IR_0050_FSL_6_1,TSE inversion recovery,[256 256 1],[1 1 2],2D,2560.0,14.0,"[50, 400, 1100, 2500]",173.8
2.001,mrel_usc_human,https://osf.io/z8dyw/download/,20200128_mrel_usc_human/20200128_mrel_usc_GE3T...,Magnitude,Nam Lee,Keck Medical Center of University of Southern ...,GE,Signa HDxt,3.0,,...,female,"2D Spin Echo, IR",Spin-echo inversion recovery,[256 256 1],[0.75 0.75 2],2D,2550.0,14.0,"[50, 400, 1100, 2500]",244.141
2.002,mrel_usc_human,https://osf.io/z8dyw/download/,20200128_mrel_usc_human/20200128_mrel_usc_GE3T...,Magnitude,Nam Lee,Keck Medical Center of University of Southern ...,GE,Signa HDxt,3.0,,...,female,"2D Spin Echo, IR",Spin-echo inversion recovery,[256 256 1],[0.875 0.875 2],2D,2550.0,14.0,"[50, 400, 1100, 2500]",244.141
2.003,mrel_usc_human,https://osf.io/z8dyw/download/,20200128_mrel_usc_human/20200128_mrel_usc_GE3T...,Magnitude,Nam Lee,Keck Medical Center of University of Southern ...,GE,Signa HDxt,3.0,,...,male,"2D Spin Echo, IR",Spin-echo inversion recovery,[256 256 1],[1 1 2],2D,2550.0,14.0,"[50, 400, 1100, 2500]",244.141
2.004,mrel_usc_human,https://osf.io/z8dyw/download/,20200128_mrel_usc_human/20200206_mrel_usc_GE3T...,Magnitude,Nam Lee,Keck Medical Center of University of Southern ...,GE,Signa HDxt,3.0,,...,female,"2D Spin Echo, IR",Spin-echo inversion recovery,[256 256 1],[1 1 2],2D,2550.0,14.0,"[50, 400, 1100, 2500]",244.141
2.005,mrel_usc_human,https://osf.io/z8dyw/download/,20200128_mrel_usc_human/20200228_mrel_usc_GE3T...,Magnitude,Nam Lee,Keck Medical Center of University of Southern ...,GE,Signa HDxt,3.0,,...,male,"2D Spin Echo, IR",Spin-echo inversion recovery,[256 256 1],[1 1 2],2D,2550.0,14.0,"[50, 400, 1100, 2500]",244.141
2.006,mrel_usc_human,https://osf.io/z8dyw/download/,20200128_mrel_usc_human/20200228_mrel_usc_GE3T...,Magnitude,Nam Lee,Keck Medical Center of University of Southern ...,GE,Signa HDxt,3.0,,...,male,"2D Spin Echo, IR",Spin-echo inversion recovery,[256 256 1],[1 1 2],2D,2550.0,14.0,"[50, 400, 1100, 2500]",244.141
3.001,iveslevesque_muhc_mgh_human,https://osf.io/n9edp/download/,20200203_iveslevesque_muhc_mgh_human/20200203_...,Magnitude,Ives Levesque,McGill University Health Centre - Montreal Gen...,Siemens,syngo MR E11,3.0,,...,male,T1_se_cor_TI,Spin-echo inversion recovery,[256 240 1],[1 1 2],2D,2550.0,14.0,"[50, 400, 1100, 2500]",390
4.001,siyuanhu_casewestern_human,https://osf.io/jrzg6/download/,20200203_siyuanhu_casewestern_human/20200203_s...,Magnitude,Siyuan Hu,"Biomedical Engineering, Case Western Reserve U...",Siemens,Skyra,3.0,,...,male,SE_IR,Spin-echo inversion recovery,[256 256 1],[1 1 2],2D,2550.0,14.0,"[50, 400, 1100, 2500]",
5.001,jorgejovicich_cimec_human,https://osf.io/g5z4e/download/,20200207_jorgejovicich_cimec_human/subject_1/2...,Magnitude,Jorge Jovicich,CIMeC - Universita' di Trento,Siemens,Prisma syngo_MR_E11,3.0,,...,male,_tir2d1_4,Spin-echo inversion recovery,[256 256 10],[1 1 2],2D,2550.0,13.0,"[50, 400, 1100, 2500]",465


In [10]:
df.to_excel(output_filename + ".xlsx")  