In [2]:
import scipy
import numpy as np
import scipy.io as spio


def loadmat(filename):
    '''
    this function should be called instead of direct spio.loadmat
    as it cures the problem of not properly recovering python dictionaries
    from mat files. It calls the function check keys to cure all entries
    which are still mat-objects
    '''
    def _check_keys(d):
        '''
        checks if entries in dictionary are mat-objects. If yes
        todict is called to change them to nested dictionaries
        '''
        for key in d:
            if isinstance(d[key], spio.matlab.mio5_params.mat_struct):
                d[key] = _todict(d[key])
        return d

    def _todict(matobj):
        '''
        A recursive function which constructs from matobjects nested dictionaries
        '''
        d = {}
        for strg in matobj._fieldnames:
            elem = matobj.__dict__[strg]
            if isinstance(elem, spio.matlab.mio5_params.mat_struct):
                d[strg] = _todict(elem)
            elif isinstance(elem, np.ndarray):
                d[strg] = _tolist(elem)
            else:
                d[strg] = elem
        return d

    def _tolist(ndarray):
        '''
        A recursive function which constructs lists from cellarrays
        (which are loaded as numpy ndarrays), recursing into the elements
        if they contain matobjects.
        '''
        elem_list = []
        for sub_elem in ndarray:
            if isinstance(sub_elem, spio.matlab.mio5_params.mat_struct):
                elem_list.append(_todict(sub_elem))
            elif isinstance(sub_elem, np.ndarray):
                elem_list.append(_tolist(sub_elem))
            else:
                elem_list.append(sub_elem)
        return elem_list
    data = scipy.io.loadmat(filename, struct_as_record=False, squeeze_me=True)
    return _check_keys(data)

In [None]:
from pydicom import dcmread

In [None]:
cd '/home/raghuram/Desktop/radiomics/STUDIES/LGG_study/WORKSPACE/TCGA_DATA'

In [None]:
FLAIR_data = loadmat('T2Fpath.mat')
T1W_data = loadmat('T1Wpath.mat')
T1CE_data = loadmat('T1CEpath.mat')
T2W_data = loadmat('T2Wpath.mat')

In [5]:
import os
os.chdir('/media/raghuram/My Passport')

In [None]:
import pandas as pd
df = pd.read_csv('dicom_filenames.csv', names=['filename'])

In [None]:
list_of_path = [FLAIR_data['T2Fpath'], T1W_data['T1Wpath'],
               T1CE_data['T1CEpath'], T2W_data['T2Wpath']]

In [None]:
sequence_dict = {}

In [None]:
t1ce_path = T1CE_data['T1CEpath']

In [None]:
flair_path = FLAIR_data['T2Fpath']

In [None]:
t2w_path = T2W_data['T2Wpath']

In [None]:
t1w_path = T1W_data['T1Wpath']

In [None]:
def write_to_dict(sequence_dict, sequence_path, sequence_name):
    for idx, instanceid in enumerate(sequence_path):
        if not isinstance(instanceid, str):
            continue
        sequence_dict[instanceid] = sequence_name

In [None]:
write_to_dict(sequence_dict, t1ce_path, 'T1CE')
write_to_dict(sequence_dict, t1w_path, 'T1W')
write_to_dict(sequence_dict, t2w_path, 'T2W')
write_to_dict(sequence_dict, flair_path, 'T2F')

In [None]:
def fetch_magnetic_strength(data):
    
    try:
        mag_field_strength = data.MagneticFieldStrength
        if mag_field_strength > 1000:
            mag_field_strength /= 10000
        return mag_field_strength
    
    except Exception:
        return None
    

In [None]:
def fetch_patient_name(filename):
    try:
        return filename.split('/')[6]
    except Exception:
        return None

In [None]:
def fetch_scanner_model_name(data):
    try:
        return data.ManufacturerModelName
    except:
        return None

In [None]:
def fetch_scanner_model_manufacturer(data):
    try:
        return data.Manufacturer
    except: 
        return None

In [None]:
def fetch_sequence_name(seriesinstanceUID):
    try:
        return sequence_dict.get(seriesinstanceUID, 'NA')
    except:
        return None

In [None]:
def fetch_seriesinstance_uid(data):
    try:
        return data.SeriesInstanceUID
    except:
        return None

In [None]:
def fetch_flip_angle(data):
    try:
        return data.FlipAngle
    except:
        return None

In [None]:
def fetch_inversion_time(data):
    try:
        return data.InversionTime
    except:
        return None

In [None]:
def fetch_repetition_time(data):
    try:
        return data.RepetitionTime
    except:
        return None

In [None]:
def fetch_excitation_time(data):
    try:
        return data.EchoTime
    except:
        return None

In [None]:
sequence_name = []
magnetic_strength_list = []
scanner_model_list = []
scanner_manufacturer_list = []
patient_list = []
series_list = []
flip_angle_list = []
repetition_time = []
excitation_time = []

for index, row in df.iterrows():
    if index%1000 == 0:
        print('{} entries processed'.format(index+1))
    try:
        data = dcmread(row['filename'])
        scanner_model_list.append(fetch_scanner_model_name(data))
        scanner_manufacturer_list.append(fetch_scanner_model_manufacturer(data))
        patient_list.append(fetch_patient_name(row['filename']))
        magnetic_strength_list.append(fetch_magnetic_strength(data))
        series_list.append(fetch_seriesinstance_uid(data))
        sequence_name.append(fetch_sequence_name(data.SeriesInstanceUID))
        flip_angle_list.append(fetch_flip_angle(data))
        repetition_time.append(fetch_repetition_time(data))
        excitation_time.append(fetch_excitation_time(data))
    except Exception as e:
        print('Error {} at index {}'.format(e ,index))
        

In [None]:
df['seriesinstanceuid'] = series_list
df['scanner_model'] = scanner_model_list
df['scanner_manufacturer'] = scanner_manufacturer_list
df['flip_angle'] = flip_angle_list
df['patient_name'] = patient_list
df['mag_field_strength'] = magnetic_strength_list
df['sequence_name'] = sequence_name
df['repetition_time'] = repetition_time
df['excitation_time'] = excitation_time

In [None]:
df.drop_duplicates(subset=['seriesinstanceuid', 'sequence_name'], inplace=True)
df.shape

In [None]:
df.replace({'General Electric':'GE', 'GE MEDICAL SYSTEMS':'GE', 'SIEMENS':'Siemens', 'Philips Healthcare': 'Philips', 
           'Philips Medical Systems': 'Philips'}, inplace=True)
df = df[df['scanner_manufacturer'] != 'Hitachi Medical Corporation']
df['mat_file_name'] = df['patient_name']+'_'+df['sequence_name']+'.mat'
# df.drop(columns=['filename', 'seriesinstanceuid'], inplace=True)
df.to_csv('mapped_files_to_sequences.csv', index=False)

In [7]:
os.chdir('/media/raghuram/My Passport')
import pandas as pd
df = pd.read_csv('mapped_files_to_sequences.csv')

In [9]:
df_T1CE = df[df['sequence_name'] == 'T1CE']
T1CE_files = list(df_T1CE['mat_file_name'])

In [10]:
os.chdir('/home/raghuram/Desktop/radiomics/TEXTURES/mat_folder')

In [11]:
import glob

In [16]:
T1CE_mat_list = glob.glob('*_T1CE.mat')

In [17]:
def extract_flatten_features(data, filename):
    
    features_flattened = []
    
    for experiment_, values in data['textures']['List'].items():
        
        experiment_number = int(experiment_.split('Experiment')[1])
        if experiment_number > 25:
            break
        scale_ = float(values.split(',')[0].split('=')[1])
        algo_ = values.split(',')[1].split('=')[1]
        ng_ = int(values.split(',')[2].split('=')[1])
        
        flattened_df = pd.io.json.json_normalize(data['textures'][experiment_], sep='_')
        flattened_df['mat_file_name'] = filename
        flattened_df_merged = pd.merge(flattened_df, df, on='mat_file_name', how='inner')
        flattened_df_merged['experiment_number'] = experiment_number
        flattened_df_merged['scale'] = scale_
        flattened_df_merged['algo'] = algo_
        flattened_df_merged['ng'] = ng_
        features_flattened.append(flattened_df_merged)
    
    features_df_concat = pd.concat(features_flattened, ignore_index=True)
    features_df_concat.to_csv(filename.split('.')[0]+'_features'+'.csv', index=False)

In [18]:
def form_texture_csv(sequence_mat_list, sequence_file_list):
    common_files = set(sequence_file_list).intersection(set(sequence_mat_list))
    print(len(common_files))
    for idx, mat_file in enumerate(common_files):
        print('Processing file number {}'.format(idx+1))
        data = loadmat(mat_file)
        extract_flatten_features(data, mat_file)

In [19]:
form_texture_csv(T1CE_mat_list, T1CE_files)

99
Processing file number 1
Processing file number 2
Processing file number 3
Processing file number 4
Processing file number 5
Processing file number 6
Processing file number 7
Processing file number 8
Processing file number 9
Processing file number 10
Processing file number 11
Processing file number 12
Processing file number 13
Processing file number 14
Processing file number 15
Processing file number 16
Processing file number 17
Processing file number 18
Processing file number 19
Processing file number 20
Processing file number 21
Processing file number 22
Processing file number 23
Processing file number 24
Processing file number 25
Processing file number 26
Processing file number 27
Processing file number 28
Processing file number 29
Processing file number 30
Processing file number 31
Processing file number 32
Processing file number 33
Processing file number 34
Processing file number 35
Processing file number 36
Processing file number 37
Processing file number 38
Processing file nu

In [20]:
t1ce_files = glob.glob('*_T1CE_features.csv')

In [22]:
df_list = []
for t1ce_file in t2f_files:
    df = pd.read_csv(t1ce_file)
    df['Tumor'] = df['patient_name']
    df.drop(columns=['filename', 'seriesinstanceuid', 'patient_name', 'parameters_Algo', 'parameters_Scale',
                    'parameters_Ng'], inplace=True)
    df_list.append(df)

pd.concat(df_list).to_csv(os.path.join('/home/raghuram/Desktop/radiomics/TEXTURES', 'expt_t1ce.csv'), index=False)