# Create databases of RRSG/qMRSG Challenge data

## Imports

In [1]:
from pathlib import Path
import pandas as pd
import json
import nibabel as nib
import numpy as np

from src.database import *
from make_pooled_datasets import *

## Configurations

In [2]:
configFile = Path('configs/3T_human_T1maps.json')
data_folder_name = '3T_human_T1maps'
output_filename = data_folder_name + '_database' 
output_folder = Path('databases/')

## Download datasets

In [3]:
if not Path(data_folder_name).exists():
    make_pooled_dataset(configFile, data_folder_name)

## Create database

In [4]:
df = create_database(configFile, data_folder_name)

## Save database

In [5]:
df.to_excel(output_folder / (output_filename + '.xlsx')) # To excel
df.to_pickle(output_folder / (output_filename + '.pkl')) # To pickle (Python object)
df.to_csv(output_folder / (output_filename + '.csv')) # To csv

## Database manipulation demo

### View entire database

In [6]:
df

Unnamed: 0_level_0,OSF dataset,OSF link,NIFTI filename,Data type,contact,site name,MRI vendor,MRI version,MRI field,sample type,...,T1 - NIST sphere 5,T1 - NIST sphere 6,T1 - NIST sphere 7,T1 - NIST sphere 8,T1 - NIST sphere 9,T1 - NIST sphere 10,T1 - NIST sphere 11,T1 - NIST sphere 12,T1 - NIST sphere 13,T1 - NIST sphere 14
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1.001,refaatgabr_mcgovern_human,https://osf.io/2f8j5/download/,20200103_refaatgabr_mcgovern_human/20200103_re...,Magnitude,Refaat Gabr,UTHealth McGovern Medical School,Philips,Ingenia R5.4.1,3.0,,...,,,,,,,,,,
2.001,mrel_usc_human,https://osf.io/z8dyw/download/,20200128_mrel_usc_human/20200128_mrel_usc_GE3T...,Magnitude,Nam Lee,Keck Medical Center of University of Southern ...,GE,Signa HDxt,3.0,,...,,,,,,,,,,
2.002,mrel_usc_human,https://osf.io/z8dyw/download/,20200128_mrel_usc_human/20200128_mrel_usc_GE3T...,Magnitude,Nam Lee,Keck Medical Center of University of Southern ...,GE,Signa HDxt,3.0,,...,,,,,,,,,,
2.003,mrel_usc_human,https://osf.io/z8dyw/download/,20200128_mrel_usc_human/20200128_mrel_usc_GE3T...,Magnitude,Nam Lee,Keck Medical Center of University of Southern ...,GE,Signa HDxt,3.0,,...,,,,,,,,,,
2.004,mrel_usc_human,https://osf.io/z8dyw/download/,20200128_mrel_usc_human/20200206_mrel_usc_GE3T...,Magnitude,Nam Lee,Keck Medical Center of University of Southern ...,GE,Signa HDxt,3.0,,...,,,,,,,,,,
2.005,mrel_usc_human,https://osf.io/z8dyw/download/,20200128_mrel_usc_human/20200228_mrel_usc_GE3T...,Magnitude,Nam Lee,Keck Medical Center of University of Southern ...,GE,Signa HDxt,3.0,,...,,,,,,,,,,
2.006,mrel_usc_human,https://osf.io/z8dyw/download/,20200128_mrel_usc_human/20200228_mrel_usc_GE3T...,Magnitude,Nam Lee,Keck Medical Center of University of Southern ...,GE,Signa HDxt,3.0,,...,,,,,,,,,,
3.001,iveslevesque_muhc_mgh_human,https://osf.io/n9edp/download/,20200203_iveslevesque_muhc_mgh_human/20200203_...,Magnitude,Ives Levesque,McGill University Health Centre - Montreal Gen...,Siemens,syngo MR E11,3.0,,...,,,,,,,,,,
4.001,siyuanhu_casewestern_human,https://osf.io/jrzg6/download/,20200203_siyuanhu_casewestern_human/20200203_s...,Magnitude,Siyuan Hu,"Biomedical Engineering, Case Western Reserve U...",Siemens,Skyra,3.0,,...,,,,,,,,,,
5.001,jorgejovicich_cimec_human,https://osf.io/g5z4e/download/,20200207_jorgejovicich_cimec_human/subject_1/2...,Magnitude,Jorge Jovicich,CIMeC - Universita' di Trento,Siemens,Prisma syngo_MR_E11,3.0,,...,,,,,,,,,,


### View a column values for a specific database ID (i.e. row)

In [7]:
df.loc[1.001]

OSF dataset                                      refaatgabr_mcgovern_human
OSF link                                    https://osf.io/2f8j5/download/
NIFTI filename           20200103_refaatgabr_mcgovern_human/20200103_re...
Data type                                                        Magnitude
contact                                                        Refaat Gabr
site name                                 UTHealth McGovern Medical School
MRI vendor                                                         Philips
MRI version                                                 Ingenia R5.4.1
MRI field                                                                3
sample type                                                           None
phantom version                                                       None
phantom serial number                                                 None
phantom temperature                                                   None
age                      

### Get the value of a cell entry (row/column intersection)

In [8]:
df.loc[1.001]['T1 - genu (WM)']

array([1306.3, 1195.7, 1258.8, 1389.5, 1484.2, 1217.3, 1236.7, 1223.9,
       1314.8, 1347. , 1210.2, 1220.4, 1250.9, 1177.3, 1136.7, 1212.8,
       1193.6, 1268.3, 1199.9, 1099.1, 1225.8, 1282.4, 1344.7, 1279.1,
       1155.5])