In [220]:
import os
from os import path,makedirs,listdir
import pandas as pd
import numpy as np
import difflib as diff



data_path = "/mnt/data/romy/hypnomed/MRI_raw"
heudiconv_path = os.path.join(data_path,'BIDS/.heudiconv')
info_output = os.path.join(data_path,'fmri_infos')

save_csv = True

In [249]:
def df_tsv(subject_idx,dtype):

    path_tsv = os.path.join(heudiconv_path,'{}/ses-001/info/dicominfo_ses-001.tsv'.format(subject_idx))
    df_tsv = pd.read_csv(path_tsv,sep='\t')

    filename = [type for type in df_tsv.dcm_dir_name if dtype in type]
    df = df_tsv.loc[df_tsv["dcm_dir_name"].isin(filename)]

    return df.set_axis([idx.split('_')[0] for idx in df['dcm_dir_name']],axis='index')  #set the index with the identifiant of the subject



def heudiconv_csv(heudiconv_path,save_csv):

    subject_idxs = [sub for sub in (sorted(os.listdir(heudiconv_path)))] #can choose from there the index of the subject we want to get infos 
    dtypes = ['rs','3DT1','3DT2','t2star']

    subjects_dict = {}
    for _,sub in enumerate(subject_idxs):
        subjects_dict['sub-{}'.format(sub)]={}
        for _,dtype in enumerate(dtypes):
            subjects_dict['sub-{}'.format(sub)][dtype]=df_tsv(sub,dtype)
            df = df_tsv(sub,dtype)

            if save_csv:
                df.to_csv(info_output+'/sub-{}/{}_{}_diffinfos.csv'.format(sub,sub,dtype)) #write csv 
            
    df_heudiconv = pd.DataFrame.from_dict(subjects_dict, orient='index')
    return df_heudiconv

In [250]:
df_heudiconv = heudiconv_csv(heudiconv_path,save_csv=True)
df_heudiconv.to_csv(info_output+'/heudiconv_infos.csv')
df_heudiconv

Unnamed: 0,rs,3DT1,3DT2,t2star
sub-01,total_files_till_now ...,total_files_till_now ...,total_files_till_now ...,total_files_till_now ...
sub-02,total_files_till_now ...,total_files_till_now ...,total_files_till_now ...,total_files_till_now ...
sub-03,total_files_till_now ...,total_files_till_now ...,total_files_till_now ...,total_files_till_now ...
sub-04,total_files_till_now ...,total_files_till_now ...,total_files_till_now ...,total_files_till_now ...
sub-05,total_files_till_now ...,total_files_till_now ...,total_files_till_now ...,total_files_till_now ...
sub-06,total_files_till_now ...,total_files_till_now ...,total_files_till_now ...,total_files_till_now ...
sub-07,total_files_till_now ...,total_files_till_now ...,total_files_till_now ...,total_files_till_now ...
sub-08,total_files_till_now ...,total_files_till_now ...,total_files_till_now ...,total_files_till_now ...
sub-09,total_files_till_now ...,total_files_till_now ...,total_files_till_now ...,total_files_till_now ...
sub-10,total_files_till_now ...,total_files_till_now ...,total_files_till_now ...,total_files_till_now ...


In [242]:
df_heudiconv.loc['sub-05','3DT1']

Unnamed: 0,total_files_till_now,example_dcm_file,series_id,dcm_dir_name,unspecified2,unspecified3,dim1,dim2,dim3,dim4,...,study_description,referring_physician_name,series_description,sequence_name,image_type,accession_number,patient_age,patient_sex,date,series_uid
26,2558,1.3.12.2.1107.5.2.43.66012.3000001808210749349...,26-anat_3DT1_0.9iso,26_anat_3DT1_0.9iso,-,-,256,256,208,1,...,HUMAINS^NEUCOSE,,anat_3DT1_0.9iso,*tfl3d1_16ns,"('ORIGINAL', 'PRIMARY', 'M', 'ND')",BLUAL07960,025Y,M,20180821,1.3.12.2.1107.5.2.43.66012.2018082112242068150...
27,2766,1.3.12.2.1107.5.2.43.66012.3000001808210749349...,27-anat_3DT1_0.9iso,27_anat_3DT1_0.9iso,-,-,256,256,208,1,...,HUMAINS^NEUCOSE,,anat_3DT1_0.9iso,*tfl3d1_16ns,"('ORIGINAL', 'PRIMARY', 'M', 'ND', 'NORM')",BLUAL07960,025Y,M,20180821,1.3.12.2.1107.5.2.43.66012.2018082112170679919...


### Spot the differences for one specific subject and one specific type 

In [244]:
# Example with only one subject : 2 ways 

#1st way : 

subject_idx = '05' #can be any of one from the sub_idxs list
dtype = 't2star'
save = True
show_diff = True


#1st way : 
df = df_tsv(subject_idx,dtype)

#2nd way : 
df = df_heudiconv.loc['sub-{}'.format(subject_idx),dtype if dtype is not None else ':']


if save:
    df.to_csv(info_output+'/{}_{}_diffinfos.csv'.format(subject_idx,dtype)) #write csv 

if show_diff:
    for key in df.keys():
        print('\n{}'.format(key))
        for id in df.index:
            print('- file {} : {}'.format(id,df.loc[id]['{}'.format(key)]))




total_files_till_now
- file 18 : 1149
- file 19 : 1549

example_dcm_file
- file 18 : 1.3.12.2.1107.5.2.43.66012.30000018082107493498100000004-18-1-fcw86c.dcm
- file 19 : 1.3.12.2.1107.5.2.43.66012.30000018082107493498100000004-19-1-1kgdene.dcm

series_id
- file 18 : 18-anat_t2star_2iso
- file 19 : 19-anat_t2star_2iso

dcm_dir_name
- file 18 : 18_anat_t2star_2iso
- file 19 : 19_anat_t2star_2iso

unspecified2
- file 18 : -
- file 19 : -

unspecified3
- file 18 : -
- file 19 : -

dim1
- file 18 : 108
- file 19 : 108

dim2
- file 18 : 128
- file 19 : 128

dim3
- file 18 : 400
- file 19 : 400

dim4
- file 18 : 1
- file 19 : 1

TR
- file 18 : 2.4
- file 19 : 2.4

TE
- file 18 : 10.0
- file 19 : 10.0

protocol_name
- file 18 : anat_t2star_2iso
- file 19 : anat_t2star_2iso

is_motion_corrected
- file 18 : False
- file 19 : False

is_derived
- file 18 : False
- file 19 : False

patient_id
- file 18 : NEUCOSE_BLUAL07960
- file 19 : NEUCOSE_BLUAL07960

study_description
- file 18 : HUMAINS^NEUCO

# Multiple files

In [153]:
data_path = "/mnt/data/romy/hypnomed/MRI_raw"
dicom_path = os.path.join(data_path,'DICOM')
bids_path = os.path.join(data_path,'BIDS')

heudiconv_path = os.path.join(bids_path,'.heudiconv')
bids_info_paths = list()

for _,sub in enumerate(sorted(os.listdir(heudiconv_path))):
    bids_info_paths.append(os.path.join(heudiconv_path,'{}/ses-001/info/dicominfo_ses-001.tsv'.format(sub))) #permet de récupérer les paths des infos pour chaque sujet

In [154]:
def get_dicom_infos(bids_info_paths,print_paths=False):

    #Create 3 dict : 
    dcm_dict_indxs = dict() #store the tsv in df format for each subject
    infos_dict_indxs = dict() #strores the infos of each dicom
    dcm_dict_paths = dict() #stores the paths of the files of interest

    for sub_indx,bid_tsv in enumerate(bids_info_paths,1):
        if print_paths: #print the paths for sanitary check
            print(70*'-')
            print('Getting infos for subject n°{}, stored in {}'.format(sub_indx,bid_tsv))
            print(70*'-')
        infos = pd.read_csv(bid_tsv,sep='\t')
        infos_dict_indxs["Sub-{}".format(sub_indx)]=infos 
        dcm_dict_indxs["Sub-{}".format(sub_indx)]={}
        dcm_dict_paths["Sub-{}".format(sub_indx)]={} 

        t2star_dcm = [type for type in infos.dcm_dir_name if 't2star' in type]
        rs_dcm = [type for type in infos.dcm_dir_name if 'rs' in type]
        t1_dcm = [type for type in infos.dcm_dir_name if '3DT1' in type]
        t2_dcm = [type for type in infos.dcm_dir_name if '3DT2' in type]

        dcm_dict = {'RS':rs_dcm,
                    'T1':t1_dcm,
                    'T2':t2_dcm,
                    'T2*':t2star_dcm}

        
        rs_indxs = list()
        t1_indxs = list()
        t2_indxs = list()
        t2star_indxs = list()
        
        for _,(key,val) in enumerate(zip(dcm_dict.keys(),dcm_dict.values())):
            if print_paths: #print the names of the files for sanitary check
                print("{} files : {}\n".format(key,val))

            if key == 'RS': #get the indx number that corresponds to each RS file :
                rs_indxs.extend([val[i].split('_')[0] for i in range(len(val))])
                dcm_dict_indxs["Sub-{}".format(sub_indx)]['RS'] = rs_indxs
                dcm_dict_paths["Sub-{}".format(sub_indx)]['RS'] = val

            elif key == 'T1':
                t1_indxs.extend([val[i].split('_')[0] for i in range(len(val))])
                dcm_dict_indxs["Sub-{}".format(sub_indx)]['T1'] = t1_indxs
                dcm_dict_paths["Sub-{}".format(sub_indx)]['T1'] = val

            elif key == 'T2':
                t2_indxs.extend([val[i].split('_')[0] for i in range(len(val))])
                dcm_dict_indxs["Sub-{}".format(sub_indx)]['T2'] = t2_indxs
                dcm_dict_paths["Sub-{}".format(sub_indx)]['T2'] = val

            elif key == 'T2*':
                t2star_indxs.extend([val[i].split('_')[0] for i in range(len(val))])
                dcm_dict_indxs["Sub-{}".format(sub_indx)]['T2*'] = t2star_indxs
                dcm_dict_paths["Sub-{}".format(sub_indx)]['T2*'] = val
        
    dcm_df = pd.DataFrame.from_dict(dcm_dict_indxs, orient='index') #convert the dictionnary into dataframes
    return dcm_df,infos_dict_indxs,dcm_dict_paths


dcm_df,infos_dict_indxs,dcm_dict_paths = get_dicom_infos(bids_info_paths,print_paths=False)