In [11]:
import os
import pandas as pd
import numpy as np
import difflib as diff

In [12]:
data_path = "/mnt/data/romy/hypnomed/MRI_raw"
dicom_path = os.path.join(data_path,'DICOM')
bids_path = os.path.join(data_path,'BIDS')

heudiconv_path = os.path.join(bids_path,'.heudiconv')
bids_info_paths = list()

for _,sub in enumerate(sorted(os.listdir(heudiconv_path))):
    bids_info_paths.append(os.path.join(heudiconv_path,'{}/ses-001/info/dicominfo_ses-001.tsv'.format(sub))) #permet de récupérer les paths des infos pour chaque sujet

### Retrive DICOM/BIDS infos for subjects

In [48]:
def get_dicom_infos(bids_info_paths,print_paths=False):

    #Create 3 dict : 
    dcm_dict_indxs = dict() #store the tsv in df format for each subject
    infos_dict_indxs = dict() #strores the infos of each dicom
    dcm_dict_paths = dict() #stores the paths of the files of interest

    for sub_indx,bid_tsv in enumerate(bids_info_paths,1):
        if print_paths: #print the paths for sanitary check
            print(70*'-')
            print('Getting infos for subject n°{}, stored in {}'.format(sub_indx,bid_tsv))
            print(70*'-')
        infos = pd.read_csv(bid_tsv,sep='\t')
        infos_dict_indxs["Sub-{}".format(sub_indx)]=infos 
        dcm_dict_indxs["Sub-{}".format(sub_indx)]={}
        dcm_dict_paths["Sub-{}".format(sub_indx)]={} 

        t2star_dcm = [type for type in infos.dcm_dir_name if 't2star' in type]
        rs_dcm = [type for type in infos.dcm_dir_name if 'rs' in type]
        t1_dcm = [type for type in infos.dcm_dir_name if '3DT1' in type]
        t2_dcm = [type for type in infos.dcm_dir_name if '3DT2' in type]

        dcm_dict = {'RS':rs_dcm,
                    'T1':t1_dcm,
                    'T2':t2_dcm,
                    'T2*':t2star_dcm}

        
        rs_indxs = list()
        t1_indxs = list()
        t2_indxs = list()
        t2star_indxs = list()
        
        for _,(key,val) in enumerate(zip(dcm_dict.keys(),dcm_dict.values())):
            if print_paths: #print the names of the files for sanitary check
                print("{} files : {}\n".format(key,val))

            if key == 'RS': #get the indx number that corresponds to each RS file :
                rs_indxs.extend([val[i].split('_')[0] for i in range(len(val))])
                dcm_dict_indxs["Sub-{}".format(sub_indx)]['RS'] = rs_indxs
                dcm_dict_paths["Sub-{}".format(sub_indx)]['RS'] = val

            elif key == 'T1':
                t1_indxs.extend([val[i].split('_')[0] for i in range(len(val))])
                dcm_dict_indxs["Sub-{}".format(sub_indx)]['T1'] = t1_indxs
                dcm_dict_paths["Sub-{}".format(sub_indx)]['T1'] = val

            elif key == 'T2':
                t2_indxs.extend([val[i].split('_')[0] for i in range(len(val))])
                dcm_dict_indxs["Sub-{}".format(sub_indx)]['T2'] = t2_indxs
                dcm_dict_paths["Sub-{}".format(sub_indx)]['T2'] = val

            elif key == 'T2*':
                t2star_indxs.extend([val[i].split('_')[0] for i in range(len(val))])
                dcm_dict_indxs["Sub-{}".format(sub_indx)]['T2*'] = t2star_indxs
                dcm_dict_paths["Sub-{}".format(sub_indx)]['T2*'] = val
        
    dcm_df = pd.DataFrame.from_dict(dcm_dict_indxs, orient='index') #convert the dictionnary into dataframes
    return dcm_df,infos_dict_indxs,dcm_dict_paths

In [54]:
dcm_df,infos_dict_indxs,dcm_dict_paths = get_dicom_infos(bids_info_paths,print_paths=False)

In [58]:
infos_dict_indxs

{'Sub-1':     total_files_till_now                                   example_dcm_file  \
 0                    128  1.3.12.2.1107.5.2.43.66012.3000001806040846088...   
 1                    133  1.3.12.2.1107.5.2.43.66012.3000001806040846088...   
 2                    136  1.3.12.2.1107.5.2.43.66012.3000001806040846088...   
 3                    139  1.3.12.2.1107.5.2.43.66012.3000001806040846088...   
 4                    158  1.3.12.2.1107.5.2.43.66012.3000001806040846088...   
 5                    159  1.3.12.2.1107.5.2.43.66012.3000001806040846088...   
 6                    394  1.3.12.2.1107.5.2.43.66012.3000001806040846088...   
 7                    550  1.3.12.2.1107.5.2.43.66012.3000001806040846088...   
 8                    950  1.3.12.2.1107.5.2.43.66012.3000001806040846088...   
 9                   1350  1.3.12.2.1107.5.2.43.66012.3000001806040846088...   
 10                  1558  1.3.12.2.1107.5.2.43.66012.3000001806040846088...   
 11                  1766  1.3.

In [77]:
select_subjects = [1,2] #None to run all, or ids of the subjetcs for whom we want to get the infos
select_subjects = None
if select_subjects:
    bids_info_paths_selected = list()
    bids_info_paths_selected.extend([path for (_,path) in enumerate(bids_info_paths,1) if _ in select_subjects]) 
    print('Selected files : ',bids_info_paths_selected)
    dcm_df,infos_dict_indxs,dcm_dict_paths = get_dicom_infos(bids_info_paths=bids_info_paths_selected,print_paths=False)
else:
    select_subjects = list(np.arange(1,40))
    dcm_df,infos_dict_indxs,dcm_dict_paths = get_dicom_infos(bids_info_paths,print_paths=False)
    
indx = 1

for indx in (infos_dict_indxs):
    print(indx)
    #print(infos_dict_indxs['Sub-{}'.format(indx)])
    print(infos_dict_indxs[indx].series_id)



Sub-1
0                       1-AUTOALIGN
1                       2-AUTOALIGN
2                       3-AUTOALIGN
3                       4-AUTOALIGN
4                5-LOCA_T1_FL2D_SAG
5               7-RUNx_QC_tilt25_AP
6      8-RUNx_rs_3mmiso_TR2.4_44cpe
7     10-RUNx_er_3mmiso_TR2.4_44cpe
8               12-anat_t2star_2iso
9               13-anat_t2star_2iso
10              14-anat_3DT2_0.9iso
11              15-anat_3DT2_0.9iso
12    16-RUNx_rs_3mmiso_TR2.4_44cpe
13    18-RUNx_er_3mmiso_TR2.4_44cpe
14              20-anat_3DT1_0.9iso
15              21-anat_3DT1_0.9iso
16    22-RUNx_rs_3mmiso_TR2.4_44cpe
17    24-RUNx_er_3mmiso_TR2.4_44cpe
18              26-anat_3DT1_0.9iso
19              27-anat_3DT1_0.9iso
Name: series_id, dtype: object
Sub-2
0                       1-AUTOALIGN
1                       2-AUTOALIGN
2                       3-AUTOALIGN
3                       4-AUTOALIGN
4                5-LOCA_T1_FL2D_SAG
5               7-RUNx_QC_tilt25_AP
6      8-RUNx_rs_3mmi

In [21]:
print('T1',np.unique(dcm_df['T1']))
print(70*'-')
print('T2',np.unique(dcm_df['T2']))
print(70*'-')
print('T2*',np.unique(dcm_df['T2*']))
print(70*'-')
print('RS',np.unique(dcm_df['RS']))

T1 [list(['12', '13']) list(['13', '14']) list(['19', '20'])
 list(['20', '21']) list(['20', '21', '26', '27']) list(['22', '23'])
 list(['23', '24']) list(['26', '27']) list(['27', '28'])]
----------------------------------------------------------------------
T2 [list(['14', '15']) list(['16', '17']) list(['17', '18'])
 list(['20', '21']) list(['27', '28']) list(['32', '33'])]
----------------------------------------------------------------------
T2* [list(['12', '13']) list(['12', '13', '5001']) list(['14', '15'])
 list(['15', '16']) list(['18', '19']) list(['25', '26'])
 list(['30', '31'])]
----------------------------------------------------------------------
RS [list(['10', '18', '24']) list(['11', '19', '25'])
 list(['14', '22', '28']) list(['15', '21', '29']) list(['8', '14', '22'])
 list(['8', '16', '22']) list(['8', '16', '22', '101', '5003'])
 list(['8', '16', '29']) list(['8', '16', '29', '33'])
 list(['8', '18', '24']) list(['9', '15', '26', '34'])]


In [8]:
#d.DataFrame.from_dict(infos_dict_indxs, orient='index')

### Get the differences between subjects datas

Using difflib : https://towardsdatascience.com/find-the-difference-in-python-68bbd000e513

In [20]:
dcm_df

Unnamed: 0,RS,T1,T2,T2*
Sub-1,"[8, 16, 22]","[20, 21, 26, 27]","[14, 15]","[12, 13]"
Sub-2,"[8, 16, 22]","[20, 21]","[14, 15]","[12, 13]"
Sub-3,"[8, 16, 22]","[20, 21]","[14, 15]","[12, 13]"
Sub-4,"[8, 16, 22]","[20, 21]","[14, 15]","[12, 13]"
Sub-5,"[14, 22, 28]","[26, 27]","[20, 21]","[18, 19]"
Sub-6,"[8, 16, 22]","[20, 21]","[14, 15]","[12, 13]"
Sub-7,"[15, 21, 29]","[19, 20]","[27, 28]","[25, 26]"
Sub-8,"[8, 14, 22]","[12, 13]","[20, 21]","[18, 19]"
Sub-9,"[8, 16, 22]","[20, 21]","[14, 15]","[12, 13]"
Sub-10,"[8, 16, 22]","[20, 21]","[14, 15]","[12, 13]"
