In [190]:
import os
from os import path,makedirs,listdir
import pandas as pd
import numpy as np
import difflib as diff
import pydicom
from pydicom import dcmread




data_path = "/mnt/data/romy/hypnomed/MRI_raw"
heudiconv_path = os.path.join(data_path,'BIDS/.heudiconv')
dicom_path = os.path.join(data_path,'DICOM')
info_output = os.path.join(data_path,'fmri_infos')
info_output_git = '/home/romy.beaute/projects/hypnomed/code/Visualize/infos'

save_csv = True
show_diff = False
subject_idx = '01' #can be any of one from the sub_idxs list
dtype = 't2star'


In [258]:
def sub_indexes_df(dicom_path,save_csv=False):
    id_dict =  {}
    heudi_list = sorted(os.listdir(heudiconv_path))
    dicom_list = sorted(os.listdir(dicom_path))
    dicom_list_idx = [idx.split('-')[1] for idx in dicom_list] #get the id number of each subject

    for idc,idh in zip(dicom_list,heudi_list):
        id_dict[idc] = {}
        #df = df_tsv(subject_idx=idh,dtype='t2star')  
        #id_dict[idc]['neucose_id']= df.patient_id[1]
        #id_dict[idc]['age']= df.patient_age
        dcm_files = [file for file in os.listdir('/mnt/data/romy/hypnomed/MRI_raw/DICOM/{}/ses-001/1_AUTOALIGN/'.format(idc)) if not file.startswith('._')]
        dcm_file = os.path.join((os.path.join(dicom_path,'{}/ses-001/1_AUTOALIGN'.format(idc))),dcm_files[0]) #take the first dcm file corresp to the 1_AUTOALIGN folder of sub idc
        ds = pydicom.dcmread(dcm_file,force=True)
        try:
            id_dict[idc]['sub_id']= ds.PatientName
            id_dict[idc]['neucose_id'] = ds.PatientID
            id_dict[idc]['image_type']= ds.ImageType
            id_dict[idc]['age']= ds.PatientAge #for sanitary check
        except:
            pass

    if save_csv:
        df_subid = pd.DataFrame.from_dict(id_dict,orient='index')
        df_subid.to_csv(info_output_git+'/sub_ids.csv') #write csv 

    map_id_neuco = dict()

    for _,(id,neuco) in enumerate(zip(list(df_subid.sub_id),list(df_subid.neucose_id))):
        map_id_neuco[id] = neuco

    return df_subid,map_id_neuco


df_subid,map_id_neuco = sub_indexes_df(dicom_path,save_csv=False)


In [266]:
def df_tsv(subject_idx,dtype):

    path_tsv = os.path.join(heudiconv_path,'{}/ses-001/info/dicominfo_ses-001.tsv'.format(subject_idx))
    df_tsv = pd.read_csv(path_tsv,sep='\t')

    filename = [type for type in df_tsv.dcm_dir_name if dtype in type]
    df = df_tsv.loc[df_tsv["dcm_dir_name"].isin(filename)]

    return df.set_axis([idx.split('_')[0] for idx in df['dcm_dir_name']],axis='index')  #set the index with the identifiant of the subject



def heudiconv_csv(heudiconv_path,save_csv,map_id_neuco):

    subject_idxs = [sub for sub in (sorted(os.listdir(heudiconv_path)))] #can choose from there the index of the subject we want to get infos 
    dtypes = ['rs','3DT1','3DT2','t2star']

    subjects_dict = {}
    for _,sub in enumerate(subject_idxs):
        subjects_dict['sub-{}'.format(sub)]={}
        for _,dtype in enumerate(dtypes):
            subjects_dict['sub-{}'.format(sub)][dtype]=df_tsv(sub,dtype)
            df = df_tsv(sub,dtype)
            subjects_dict['sub-{}'.format(sub)]['neucose_id'] = df.patient_id[1]
            subjects_dict['sub-{}'.format(sub)]['sub_id'] = [key  for (key, value) in map_id_neuco.items() if value == df.patient_id[1]]
            

            if save_csv:
                df.to_csv(info_output+'/sub-{}/{}_{}_diffinfos.csv'.format(sub,sub,dtype)) #write csv 
            
    df_heudiconv = pd.DataFrame.from_dict(subjects_dict, orient='index')
    return df_heudiconv

In [272]:
df_heudiconv = heudiconv_csv(heudiconv_path,save_csv,map_id_neuco)
df_heudiconv.to_csv(info_output_git+'/heudiconv_infos.csv')
df_heudiconv.head()

Unnamed: 0,rs,neucose_id,sub_id,3DT1,3DT2,t2star
sub-01,total_files_till_now ...,NEUCOSE_ELSES07842,"[(E, H, 0, 7, 0)]",total_files_till_now ...,total_files_till_now ...,total_files_till_now ...
sub-02,total_files_till_now ...,NEUCOSE_LECPA07862,"[(L, P, 0, 8, 7)]",total_files_till_now ...,total_files_till_now ...,total_files_till_now ...
sub-03,total_files_till_now ...,NEUCOSE_HARMA07939,"[(H, M, 0, 7, 6)]",total_files_till_now ...,total_files_till_now ...,total_files_till_now ...
sub-04,total_files_till_now ...,NEUCOSE_BUEAL07925,"[(B, A, 0, 4, 3)]",total_files_till_now ...,total_files_till_now ...,total_files_till_now ...
sub-05,total_files_till_now ...,NEUCOSE_BLUAL07960,"[(B, A, 0, 9, 3)]",total_files_till_now ...,total_files_till_now ...,total_files_till_now ...


In [221]:
#types of infos we can have access to 
keys = [key for key in df_heudiconv.loc['sub-{}'.format(subject_idx),'{}'.format(dtype)].keys()]
print(keys)

#1st way : 
df = df_tsv(subject_idx,dtype)
df.patient_id

['total_files_till_now', 'example_dcm_file', 'series_id', 'dcm_dir_name', 'unspecified2', 'unspecified3', 'dim1', 'dim2', 'dim3', 'dim4', 'TR', 'TE', 'protocol_name', 'is_motion_corrected', 'is_derived', 'patient_id', 'study_description', 'referring_physician_name', 'series_description', 'sequence_name', 'image_type', 'accession_number', 'patient_age', 'patient_sex', 'date', 'series_uid']


12    NEUCOSE_ELSES07842
13    NEUCOSE_ELSES07842
Name: patient_id, dtype: object

### Spot the differences for one specific subject and one specific type 

In [195]:
# Example with only one subject : 2 ways 


#2nd way : 
df = df_heudiconv.loc['sub-{}'.format(subject_idx),dtype if dtype is not None else ':']


if save_csv:
    df.to_csv(info_output+'/{}_{}_diffinfos.csv'.format(subject_idx,dtype)) #write csv 

if show_diff:
    for key in df.keys():
        print('\n{}'.format(key))
        for id in df.index:
            print('- file {} : {}'.format(id,df.loc[id]['{}'.format(key)]))



# Multiple files

In [196]:
data_path = "/mnt/data/romy/hypnomed/MRI_raw"
dicom_path = os.path.join(data_path,'DICOM')
bids_path = os.path.join(data_path,'BIDS')

heudiconv_path = os.path.join(bids_path,'.heudiconv')
bids_info_paths = list()

for _,sub in enumerate(sorted(os.listdir(heudiconv_path))):
    bids_info_paths.append(os.path.join(heudiconv_path,'{}/ses-001/info/dicominfo_ses-001.tsv'.format(sub))) #permet de récupérer les paths des infos pour chaque sujet

In [197]:
def get_dicom_infos(bids_info_paths,print_paths=False):

    #Create 3 dict : 
    dcm_dict_indxs = dict() #store the tsv in df format for each subject
    infos_dict_indxs = dict() #strores the infos of each dicom
    dcm_dict_paths = dict() #stores the paths of the files of interest

    for sub_indx,bid_tsv in enumerate(bids_info_paths,1):
        if print_paths: #print the paths for sanitary check
            print(70*'-')
            print('Getting infos for subject n°{}, stored in {}'.format(sub_indx,bid_tsv))
            print(70*'-')
        infos = pd.read_csv(bid_tsv,sep='\t')
        infos_dict_indxs["Sub-{}".format(sub_indx)]=infos 
        dcm_dict_indxs["Sub-{}".format(sub_indx)]={}
        dcm_dict_paths["Sub-{}".format(sub_indx)]={} 

        t2star_dcm = [type for type in infos.dcm_dir_name if 't2star' in type]
        rs_dcm = [type for type in infos.dcm_dir_name if 'rs' in type]
        t1_dcm = [type for type in infos.dcm_dir_name if '3DT1' in type]
        t2_dcm = [type for type in infos.dcm_dir_name if '3DT2' in type]

        dcm_dict = {'RS':rs_dcm,
                    'T1':t1_dcm,
                    'T2':t2_dcm,
                    'T2*':t2star_dcm}

        
        rs_indxs = list()
        t1_indxs = list()
        t2_indxs = list()
        t2star_indxs = list()
        
        for _,(key,val) in enumerate(zip(dcm_dict.keys(),dcm_dict.values())):
            if print_paths: #print the names of the files for sanitary check
                print("{} files : {}\n".format(key,val))

            if key == 'RS': #get the indx number that corresponds to each RS file :
                rs_indxs.extend([val[i].split('_')[0] for i in range(len(val))])
                dcm_dict_indxs["Sub-{}".format(sub_indx)]['RS'] = rs_indxs
                dcm_dict_paths["Sub-{}".format(sub_indx)]['RS'] = val

            elif key == 'T1':
                t1_indxs.extend([val[i].split('_')[0] for i in range(len(val))])
                dcm_dict_indxs["Sub-{}".format(sub_indx)]['T1'] = t1_indxs
                dcm_dict_paths["Sub-{}".format(sub_indx)]['T1'] = val

            elif key == 'T2':
                t2_indxs.extend([val[i].split('_')[0] for i in range(len(val))])
                dcm_dict_indxs["Sub-{}".format(sub_indx)]['T2'] = t2_indxs
                dcm_dict_paths["Sub-{}".format(sub_indx)]['T2'] = val

            elif key == 'T2*':
                t2star_indxs.extend([val[i].split('_')[0] for i in range(len(val))])
                dcm_dict_indxs["Sub-{}".format(sub_indx)]['T2*'] = t2star_indxs
                dcm_dict_paths["Sub-{}".format(sub_indx)]['T2*'] = val
        
    dcm_df = pd.DataFrame.from_dict(dcm_dict_indxs, orient='index') #convert the dictionnary into dataframes
    return dcm_df,infos_dict_indxs,dcm_dict_paths


dcm_df,infos_dict_indxs,dcm_dict_paths = get_dicom_infos(bids_info_paths,print_paths=False)