# BIDSifer & Dicom Conversion


## Numbering convention preceding scan label _IS NOT CONSISTENT_ across subjects
## Script calls "heuristics.csv" 
### heuristics.csv contains two columns:
##### - init_name -- designated data file type (e.g. T1w, T2w, pd, etc)
##### - dest_name -- naming convention for data folder

In [1]:
import os
import os.path as op
import json
import shutil
import tarfile
import subprocess as sp
from glob import glob
import pydicom as dcm
import pandas as pd
import numpy as np
import shutil 

#### FUNCTIONS ####

def convert_nifti(target_folder, destination): #conversion function, removes superfluous directory tree
    os.makedirs(destination, exist_ok=True) #make new directory in permanent storage
    src_dcms = '{0}/resources/DICOM/files/'.format(target_folder) 
    cmd = "dcm2niix -o {0} {1}".format(destination, src_dcms) #convert and save to respective scan type folder
    sp.Popen(cmd, shell=True).wait() #subprocess executes conversion command, waits if necessary
    for i in glob(destination + "/*"): #grabs all files
        idx = i.index(".") #returns index of "." in current path/filename to identify beginning of extension
        shutil.move(i, destination + i[idx:]) #move converted NIFTI from scratch to data location with extension
    shutil.rmtree(destination) #remove old tree
    
###################
       
df = pd.read_csv('heuristics.csv') #open heuristics file as pandas dataframe  

for tar_file in glob('/home/data/madlab/Pruden_SEA/sourcedata/Pruden_SEA_*-S1.tar.gz'): #iterate through Ss tars
    sub = os.path.basename(tar_file).split('_')[-1].split('-')[0][:4] #gets only Ss ID 
    pain_name = os.path.basename(tar_file).split('_')[-1].split('-')[0][:] #gets only Ss ID
  
    if os.path.exists('/home/data/madlab/Pruden_SEA/dset/sub-{0}/'.format(sub)): #check to see if Ss is new
        continue        
    os.makedirs('/scratch/madlab/bidsify_sea/SEA-{0}/'.format(sub), exist_ok=True) #creates subdirectories
    
    #unzipping file into new directories without deletion 
    sp.Popen('tar -xf {0} -C /scratch/madlab/bidsify_sea/SEA-{1}'.format(tar_file, sub), shell=True).wait()
    #defines current working directory for subject data
    curr_dir = '/scratch/madlab/bidsify_sea/SEA-{0}/Pruden_SEA_{1}-S1/scans/'.format(sub, pain_name)
       
    #grabs relevant directories (excluding setter, 32ch, and MPRG) 
    all_dirs = sorted([x for x in glob(curr_dir + "*") if not "setter" in x and not "32ch" in x and not "MPRG" in x])
    final_dirs = []
    for x in ['T1', 'T2w', 't2', 'pd_tse', 'dMRI_PA', 'dMRI_DistortionMap_AP', 'dMRI_DistortionMap_PA']: 
        type_nums = []
        temp = []
        for idx, scan_type in enumerate(all_dirs): #iterate through the filtered scans    
            #print(x, idx, scan_type)
            if x in scan_type: #iterate through targeted scan types
                temp.append(all_dirs[idx])
                type_nums.append(int(all_dirs[idx].split('/')[-1].split('-')[0])) #grab the scan CIS assigned scan #       
        if len(temp) > 1: #when more than one scan of target type is found
            #print(temp, type_nums.index(min(type_nums)))
            final_dirs.append(temp[type_nums.index(max(type_nums))]) #remove the first of that type from all_dirs  
        elif len(temp) == 1:
            final_dirs.append(temp[0])
    #stupid dMRI is in three different scan types... needs its own crap loop
    dMRIs = glob('/scratch/madlab/bidsify_sea/SEA-{0}/Pruden_SEA_{1}-S1/scans/*-dMRI'.format(sub, pain_name))
    dMRI_nums = []
    for idx, dMRI in enumerate(dMRIs):
        dMRI_nums.append(int(dMRIs[idx].split('/')[-1].split('-')[0]))
    if len(dMRIs) > 1:
        final_dirs.append(dMRIs[dMRI_nums.index(max(dMRI_nums))])
    elif len(dMRIs) == 1:
        final_dirs.append(dMRIs[0])           
    final_dirs = sorted(final_dirs)
    print("FINAL", final_dirs)   
    print("")
  
    for i in ['anat', 'dwi', 'fmap']: #iterate through folder types      
        os.makedirs('/home/data/madlab/Pruden_SEA/dset/sub-{0}/{1}/'.format(sub, i), exist_ok=True) #create dir for each
    completed = []    
    for curr_scan in final_dirs: #iterate through filtered scans
        for row, scan_type in enumerate(df['init_name']): #iterate through scan types defined in heuristics file
            if scan_type in curr_scan: #if init name matches current scan type 
                if not scan_type in completed: #and it hasn't hit before
                    break #use row where break stops
        print(curr_scan, '/home/data/madlab/Pruden_SEA/dset/sub-{0}/'.format(sub)+df['dest_name'][row].format(sub))
        #use custom function to run dcm2niix on current dicoms
        convert_nifti(curr_scan,'/home/data/madlab/Pruden_SEA/dset/sub-{0}/'.format(sub)+df['dest_name'][row].format(sub)) 
        completed.append(scan_type)
        print(sub, curr_scan) #prints current Ss identity failure          

FINAL ['/scratch/madlab/bidsify_sea/SEA-1035/Pruden_SEA_1035-S1/scans/9-T1w_MPR_vNav']

/scratch/madlab/bidsify_sea/SEA-1035/Pruden_SEA_1035-S1/scans/9-T1w_MPR_vNav /home/data/madlab/Pruden_SEA/dset/sub-1035/anat/sub-1035_T1w
Chris Rorden's dcm2niiX version v1.0.20190902  GCC4.8.5 (64-bit Linux)
Found 176 DICOM file(s)
Convert 176 DICOM as /home/data/madlab/Pruden_SEA/dset/sub-1035/anat/sub-1035_T1w/files_T1w_MPR_vNav_20220501154331_9 (256x256x176x1)
Conversion required 2.715244 seconds (2.500000 for core code).
1035 /scratch/madlab/bidsify_sea/SEA-1035/Pruden_SEA_1035-S1/scans/9-T1w_MPR_vNav


## ---Brute Force Method (_used when naming convention was compromised_)---

In [None]:
import os
import os.path as op
import json
import shutil
import tarfile
import subprocess as sp
from glob import glob
import pydicom as dcm
import pandas as pd
import numpy as np
import shutil 

### T1 Scan

In [None]:
%%bash 

dcm2niix -o /home/data/madlab/Pruden_SEA/dset/sub-1008/anat/ /scratch/madlab/bidsify_sea/SEA-1008/Pruden_SEA_1008EH-S1/scans/4-T1w_MPR_vNav/resources/DICOM/files/

### T2 Scan

### PD Scan

In [None]:
%%bash 

dcm2niix -o /home/data/madlab/Pruden_SEA/dset/sub-1008/anat/ /scratch/madlab/bidsify_sea/SEA-1008/Pruden_SEA_1008EH-S1/scans/6-pd_tse_Cor_T2_PDHR_FCS/resources/DICOM/files/

### DWI Scan

In [None]:
%%bash 

dcm2niix -o /home/data/madlab/Pruden_SEA/dset/sub-1005/dwi/ /scratch/madlab/bidsify_sea/SEA-1005/Pruden_SEA_1005T6-S1/scans/13-dMRI/resources/DICOM/files/

### DWI Scan - PA

In [None]:
%%bash 

dcm2niix -o /home/data/madlab/Pruden_SEA/dset/sub-1005/dwi/ /scratch/madlab/bidsify_sea/SEA-1005/Pruden_SEA_1005T6-S1/scans/14-dMRI_PA/resources/DICOM/files/

### Distortion Map - PA

In [None]:
%%bash 

dcm2niix -o /home/data/madlab/Pruden_SEA/dset/sub-1008/fmap/ /scratch/madlab/bidsify_sea/SEA-1008/Pruden_SEA_1008EH-S1/scans/8-dMRI_DistortionMap_PA/resources/DICOM/files/

### Distortion Map - AP

In [None]:
%%bash 

dcm2niix -o /home/data/madlab/Pruden_SEA/dset/sub-1008/fmap/ /scratch/madlab/bidsify_sea/SEA-1008/Pruden_SEA_1008EH-S1/scans/9-dMRI_DistortionMap_AP/resources/DICOM/files/

# ---Random Code/Notes---

In [None]:
    #an issue was discovered in how the file tree saved for some tars -- *special considerations*
    #old Ss directories --> long format
    #long_dir = '/scratch/madlab/bidsify_sea/SEA-{0}/home/data/madlab/Pruden_SEA/sourcedata/Pruden_SEA_{0}-S1/scans/'.format(sub)
    #new Ss directories --> short format
    #short_dir = '/scratch/madlab/bidsify_sea/SEA-{0}/Pruden_SEA_{0}-S1/scans/'.format(sub)
    #if os.path.exists(long_dir):
        #curr_dir = long_dir #previously acquired Ss
    #else:
        #curr_dir = short_dir #future Ss
        
#previous method of dealing with protocol inconsistencies        
    corr_scan = {} #only correct scans 
    corr_idx = {} #indices of correct scans within all_dirs
    #scan sequence (numbers before scan type) are not consistent across Ss -- *special considerations*
    for idx, curr_scan in enumerate(all_dirs): #dealing with the inconsistent sequence of scans
        i = int(curr_scan.split("/")[-1].split("-")[0]) #get scan number and convert from string to int
        for row, scan_type in enumerate(df['init_name']): #iterate through scan types in heuristics.csv
            #print(i, idx, scan_type)
            if scan_type in curr_scan: #current scan contains init_name listed in heuristics file
                if not scan_type in corr_scan: #current scan type is not in correct list
                    corr_idx[scan_type] = idx #new sublist for indices of scan type
                    corr_scan[scan_type] = i #new sublist for correct scan paths

    temp = [] 
    for key in corr_scan: #iterate through scan types in correct list
        temp.append(all_dirs[corr_idx[key]]) #isolate and append targeted scan path to temp
    all_dirs = temp #reassign all_dirs to contain only the scans we targeted
    #print(all_dirs)