# Create databases of RRSG/qMRSG Challenge data

## Imports

In [1]:
from pathlib import Path
import pandas as pd
import json
import nibabel as nib
import numpy as np

from src.database import *
from make_pooled_datasets import *

## Configurations

In [2]:
configFile = Path('configs/3T_NIST_T1maps.json')
roiConfigFile = Path('configs/3T_NIST_T1maps_rois.json')
data_folder_name = '3T_NIST_T1maps'
roi_folder_name = '3T_NIST_T1maps_rois'
output_filename = data_folder_name + '_database' 
output_folder = Path('databases/')

## Download datasets

In [3]:
if not Path(data_folder_name).exists():
    make_pooled_dataset(configFile, data_folder_name)

if not Path(roi_folder_name).exists():
    make_pooled_dataset(roiConfigFile, roi_folder_name)

https://osf.io/7k96d/download/
100% [............................................................................] 568226 / 568226

https://osf.io/46nd9/download/
100% [............................................................................] 312539 / 312539

https://osf.io/nk6ae/download/
100% [............................................................................] 279647 / 279647

https://osf.io/t5y28/download/
100% [..........................................................................] 1677684 / 1677684

https://osf.io/vw24u/download/
100% [............................................................................] 292703 / 292703

https://osf.io/sjy5g/download/
100% [..........................................................................] 2234880 / 2234880

https://osf.io/nax9z/download/
100% [............................................................................] 384589 / 384589

https://osf.io/bfh4m/download/
100% [.......................................

## Create database

In [4]:
df = create_database(configFile, data_folder_name, roiConfigFile, roi_folder_name)

## Save database

In [5]:
df.to_excel(output_folder / (output_filename + '.xlsx')) # To excel
df.to_pickle(output_folder / (output_filename + '.pkl')) # To pickle (Python object)
df.to_csv(output_folder / (output_filename + '.csv')) # To csv

## Database manipulation demo

### View entire database

In [6]:
df

Unnamed: 0_level_0,OSF dataset,OSF link,NIFTI filename,Data type,contact,site name,MRI vendor,MRI version,MRI field,sample type,...,T1 - NIST sphere 5,T1 - NIST sphere 6,T1 - NIST sphere 7,T1 - NIST sphere 8,T1 - NIST sphere 9,T1 - NIST sphere 10,T1 - NIST sphere 11,T1 - NIST sphere 12,T1 - NIST sphere 13,T1 - NIST sphere 14
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1.001,matthewgrechsollars_ICL_NIST,https://osf.io/7k96d/download/,20200121_matthewgrechsollars_ICL_NIST/20200121...,Magnitude,Matthew Grech-Sollars,Imperial College London,Siemens,Prisma VE11C,3.0,NIST,...,"[486.3, 485.5, 487.2, 489.1, 492.1, 494.1, 488...","[345.5, 346.6, 344.9, 345.4, 343.7, 352.4, 342...","[245.8, 248.4, 245.6, 243.3, 238.6, 236.1, 240...","[175.1, 173.4, 178.8, 180.8, 181.3, 177.8, 176...","[183.8, 181.6, 185.3, 184.6, 118.4, 118.1, 116...","[108.6, 96.3, 81.1, 72.4, 90.5, 100.2, 94.6, 8...","[101.0, 86.5, 2.5, 1.1, 2.6, 56.5, 68.2, 71.5,...","[4.0, 2.8, 4.2, 77.1, 69.6, 1.2, 80.8, 63.0, 2...","[75.6, 80.7, 79.2, 72.4, 106.0, 90.5, 3.7, 3.0...","[57.9, 71.3, 1.2, 2.8, 1.5, 97.1, 4.4, 84.3, 2..."
1.002,matthewgrechsollars_ICL_NIST,https://osf.io/7k96d/download/,20200121_matthewgrechsollars_ICL_NIST/20200121...,Complex,Matthew Grech-Sollars,Imperial College London,Siemens,Prisma VE11C,3.0,NIST,...,"[486.3, 485.5, 487.2, 489.1, 492.3, 494.1, 488...","[345.5, 346.6, 344.9, 345.4, 343.7, 352.4, 342...","[245.8, 248.4, 245.6, 243.3, 238.6, 236.1, 240...","[175.1, 173.4, 178.8, 180.8, 181.3, 177.8, 176...","[127.7, 128.7, 129.9, 128.7, 118.4, 118.1, 116...","[98.7, 96.4, 81.1, 72.4, 90.6, 91.6, 86.8, 81....","[101.0, 86.5, 6.2, 10.0, 3.3, 58.2, 70.5, 73.3...","[3.3, 1.1, 6.7, 76.9, 69.8, 10.0, 80.8, 71.9, ...","[75.5, 81.0, 79.2, 72.3, 106.1, 90.3, 3.3, 1.9...","[89.1, 113.3, 5.9, 4.0, 1.5, 96.7, 3.4, 84.5, ..."
2.001,siyuanhu_casewestern_NIST,https://osf.io/46nd9/download/,20200124_siyuanhu_casewestern_NIST/20200124_si...,Magnitude,Siyuan Hu,"Biomedical Engineering, Case Western Reserve U...",Siemens,Skyra,3.0,NIST,...,"[411.8, 405.8, 404.0, 403.3, 399.1, 397.0, 398...","[243.9, 257.4, 265.5, 260.9, 266.9, 264.5, 268...","[194.7, 99.2, 102.7, 103.9, 97.1, 118.0, 132.9...","[14.1, 9.5, 10.0, 1.4, 3.0, 4.9, 14.0, 14.0, 1...","[3.3, 10.0, 3.0, 10.6, 2.7, 13.2, 13.0, 2.0, 6...","[1.2, 10.7, 1.2, 3.3, 3.3, 3.6, 9.5, 97.4, 2.6...","[72.6, 98.0, 9.5, 97.6, 66.4, 59.3, 70.3, 83.5...","[83.1, 1.3, 71.6, 77.7, 1.0, 96.8, 94.5, 84.6,...","[9.5, 2.7, 115.4, 93.9, 101.7, 114.1, 108.8, 8...","[85.8, 104.3, 1.2, 1.8, 127.8, 1.3, 124.5, 5.3..."
3.001,iveslevesque_muhc_mgh_NIST,https://osf.io/nk6ae/download/,20200203_iveslevesque_muhc_mgh_NIST/20200203_i...,Magnitude,Ives Levesque,McGill University Health Centre - Montreal Gen...,Siemens,syngo MR E11,3.0,NIST,...,"[441.5, 444.1, 447.8, 439.5, 461.8, 466.1, 468...","[343.7, 331.3, 326.6, 350.0, 334.6, 328.2, 346...","[233.2, 239.1, 241.6, 237.5, 237.0, 243.1, 240...","[172.9, 177.2, 183.0, 180.9, 177.0, 175.1, 177...","[182.6, 181.7, 180.5, 127.3, 119.7, 112.3, 117...","[111.7, 99.3, 92.3, 86.1, 113.5, 116.2, 99.2, ...","[47.2, 9.5, 84.5, 95.7, 70.7, 1.0, 75.3, 86.0,...","[3.0, 1.8, 1.2, 5.6, 74.3, 9.5, 46.4, 96.2, 2....","[9.6, 2.0, 2.0, 90.5, 97.2, 107.1, 3.3, 2.7, 1...","[97.6, 1.3, 10.0, 107.2, 132.8, 9.6, 72.2, 153..."
4.001,mrel_usc_NIST,https://osf.io/t5y28/download/,20200204_mrel_usc_NIST/20200204_mrel_usc_GE3T_...,Magnitude,Nam Lee,Keck Medical Center of University of Southern ...,GE,Signa HDxt,3.0,NIST,...,"[475.4, 483.3, 483.7, 481.8, 485.6, 482.1, 479...","[351.5, 344.0, 341.9, 338.0, 338.5, 343.8, 343...","[241.4, 237.1, 239.9, 243.0, 241.4, 241.9, 239...","[167.0, 169.7, 172.5, 171.0, 166.8, 168.0, 173...","[61.5, 112.4, 117.0, 114.3, 117.3, 110.7, 126....","[69.5, 89.5, 90.8, 88.0, 91.1, 66.0, 90.3, 87....","[55.0, 63.8, 62.1, 66.7, 66.5, 1.3, 41.6, 71.0...","[66.3, 10.0, 63.0, 74.6, 86.5, 2.5, 64.4, 74.9...","[1.0, 69.1, 50.9, 2.5, 1.9, 94.2, 6.1, 3.8, 2....","[60.7, 58.7, 47.6, 100.5, 40.0, 2.0, 1.6, 3.2,..."
4.002,mrel_usc_NIST,https://osf.io/t5y28/download/,20200204_mrel_usc_NIST/20200204_mrel_usc_GE3T_...,Magnitude,Nam Lee,Keck Medical Center of University of Southern ...,GE,Signa HDxt,3.0,NIST,...,"[484.2, 480.1, 485.4, 485.4, 481.7, 479.9, 480...","[338.9, 335.0, 341.5, 338.6, 340.6, 341.7, 342...","[238.8, 237.7, 237.4, 236.8, 236.9, 235.7, 241...","[173.5, 174.4, 173.6, 178.5, 171.9, 171.1, 170...","[106.7, 109.9, 114.2, 117.8, 123.1, 123.7, 124...","[3.2, 63.4, 3.0, 4.1, 48.1, 78.1, 1.3, 85.5, 9...","[6.0, 5.0, 71.8, 73.7, 85.8, 89.1, 83.0, 69.1,...","[1.2, 3.0, 3.0, 3.3, 49.5, 2.7, 4.7, 7.0, 6.3,...","[4.2, 1.3, 42.7, 42.6, 1.8, 7.9, 6.7, 104.3, 1...","[10.7, 3.6, 58.9, 121.0, 96.1, 129.9, 89.3, 83..."
4.003,mrel_usc_NIST,https://osf.io/t5y28/download/,20200204_mrel_usc_NIST/20200209_mrel_usc_GE3T_...,Magnitude,Nam Lee,Keck Medical Center of University of Southern ...,GE,Signa HDxt,3.0,NIST,...,"[467.4, 495.4, 486.6, 481.5, 482.8, 480.0, 466...","[324.7, 340.0, 343.0, 347.8, 339.7, 348.7, 351...","[240.6, 246.7, 246.6, 244.3, 248.4, 248.8, 242...","[174.8, 175.9, 178.1, 180.4, 178.3, 178.3, 179...","[141.5, 147.4, 125.8, 129.8, 146.7, 147.1, 124...","[93.4, 95.4, 92.3, 90.2, 81.9, 76.0, 91.6, 80....","[3.1, 2.4, 6.1, 2.0, 2.3, 3.2, 78.3, 2.5, 52.5...","[109.4, 3.4, 8.4, 8.9, 5.3, 42.9, 91.1, 9.5, 4...","[4.1, 70.5, 2.8, 68.4, 52.7, 2.5, 3.1, 70.2, 5...","[112.9, 104.8, 6.0, 2.7, 2.3, 3.1, 2.1, 3.0, 5..."
4.004,mrel_usc_NIST,https://osf.io/t5y28/download/,20200204_mrel_usc_NIST/20200209_mrel_usc_GE3T_...,Magnitude,Nam Lee,Keck Medical Center of University of Southern ...,GE,Signa HDxt,3.0,NIST,...,"[489.8, 482.0, 480.5, 482.9, 483.1, 479.1, 482...","[325.2, 335.3, 338.3, 337.7, 330.3, 337.6, 342...","[240.2, 243.3, 242.8, 241.3, 239.9, 239.3, 236...","[172.3, 168.9, 171.2, 171.5, 171.8, 170.9, 173...","[112.7, 108.0, 124.3, 110.4, 111.2, 108.9, 113...","[90.0, 87.4, 78.8, 72.3, 2.8, 52.4, 80.2, 57.3...","[3.3, 6.2, 3.3, 3.0, 1.0, 6.4, 2.0, 1.9, 49.8,...","[1.0, 1.1, 1.2, 1.0, 2.1, 8.1, 79.8, 1.3, 1.2,...","[53.4, 8.3, 2.1, 2.7, 3.4, 3.7, 1.9, 2.7, 96.3...","[1.0, 2.5, 7.2, 4.0, 2.5, 2.4, 6.5, 4.8, 4.5, ..."
4.005,mrel_usc_NIST,https://osf.io/t5y28/download/,20200204_mrel_usc_NIST/20200209_mrel_usc_GE3T_...,Magnitude,Nam Lee,Keck Medical Center of University of Southern ...,GE,Signa HDxt,3.0,NIST,...,"[486.2, 486.0, 480.2, 478.1, 481.6, 485.8, 487...","[337.0, 335.4, 337.8, 335.6, 327.5, 341.3, 346...","[246.0, 241.5, 241.3, 242.5, 241.1, 240.2, 244...","[176.4, 174.6, 290.1, 178.2, 179.6, 292.5, 176...","[118.0, 118.5, 142.4, 140.5, 121.1, 116.8, 140...","[91.4, 71.4, 84.4, 85.7, 85.3, 81.3, 92.6, 85....","[2.0, 42.0, 63.4, 69.0, 77.8, 74.8, 8.2, 81.8,...","[1.9, 1.0, 1.3, 6.6, 3.2, 2.8, 1.0, 61.5, 3.0,...","[2.7, 2.0, 3.3, 90.7, 1.8, 1.8, 2.7, 1.3, 4.2,...","[118.2, 3.3, 56.4, 65.1, 1.3, 2.0, 1.2, 8.3, 5..."
4.006,mrel_usc_NIST,https://osf.io/t5y28/download/,20200204_mrel_usc_NIST/20200209_mrel_usc_GE3T_...,Magnitude,Nam Lee,Keck Medical Center of University of Southern ...,GE,Signa HDxt,3.0,NIST,...,"[491.1, 487.7, 481.8, 486.0, 487.0, 489.6, 485...","[347.1, 347.5, 342.4, 342.0, 344.7, 347.0, 355...","[244.0, 240.7, 240.0, 242.1, 245.2, 240.3, 242...","[279.7, 295.7, 284.7, 288.5, 176.8, 177.4, 175...","[135.8, 136.6, 130.6, 138.8, 134.7, 137.9, 139...","[84.7, 82.3, 76.1, 77.5, 74.1, 83.4, 3.3, 79.7...","[3.2, 3.3, 3.7, 5.0, 1.1, 2.0, 9.5, 1.2, 70.4,...","[2.0, 1.1, 1.0, 4.0, 1.3, 4.6, 9.5, 1.9, 3.1, ...","[98.9, 1.3, 4.8, 2.0, 2.0, 3.0, 3.0, 3.0, 7.7,...","[1.1, 9.5, 6.3, 7.2, 3.4, 10.0, 10.0, 2.0, 1.0..."


### View a column values for a specific database ID (i.e. row)

In [7]:
df.loc[1.001]

OSF dataset                                   matthewgrechsollars_ICL_NIST
OSF link                                    https://osf.io/7k96d/download/
NIFTI filename           20200121_matthewgrechsollars_ICL_NIST/20200121...
Data type                                                        Magnitude
contact                                              Matthew Grech-Sollars
site name                                          Imperial College London
MRI vendor                                                         Siemens
MRI version                                                   Prisma VE11C
MRI field                                                              3.0
sample type                                                           NIST
phantom version                                                        130
phantom serial number                                                  102
phantom temperature                                                  21.89
age                      

### Get the value of a cell entry (row/column intersection)

In [8]:
df.loc[1.001]['T1 - genu (WM)']