## Jupyter Notebook for Data Vis and Exploration

In [2]:
import scipy.io 
import os
import numpy as np
import pandas as pd

#### Loading .mat data for one rabbit
1. Loading multi-parametric data into dataframe

In [27]:
gl_path = 'MATLAB_Output_Datasets/'

#anNumbers = [f for f in os.listdir(gl_path) if os.path.isdir(os.path.join(gl_path,f))]
print(anNumbers)
anNumbers = ['18_044','18_045']
# Define functions for generating categorical segmentation of each index 

def getSegmentations(df):
    # Segmentation 1+ "Viable" and "Non-viable" only 
    def segmentmethod1(row):
        if row['allV'] == 1:
            return 'allV'
        if row['nonV'] == 1: 
            return 'nonV'

    Seg1 = df.apply(segmentmethod1,axis=1)
    #df['Seg1'] = df.apply(segmentmethod1,axis=1)
    print('Number of voxels in Seg1:')
    print(Seg1.value_counts())

    # Segmentation 2: "Non-viable", "Viable Boundary","Viable Muscle"
    def segmentmethod2(row):
        if row['nonV'] == 1:
            return 'nonV'
        if row['bndV'] == 1:
            return 'bndV'
        if row['mucV'] == 1:
            return 'mucV'

    Seg2 = df.apply(segmentmethod2,axis=1)
    print('Numer of voxels in Seg2:')
    print(Seg2.value_counts())
          
    return Seg1,Seg2

['18_044', '18_045', '18_047', '18_048', '18_054', '18_060']


In [31]:
masDF = pd.HDFStore('Master_dataframes.h5')
for anID in anNumbers:

    # Get the Multi-parametric data for 18_044
    data_path = 'MATLAB_Output_DataSets/' + anID + '/Param_maps/'

    # Get the list of .mat files from the animal path 
    file_list = [f for f in os.listdir(data_path) if os.path.isfile(os.path.join(data_path, f))]
    file_list = np.asarray(file_list)


    # Initialize import_array with first file in file_list
    fname = file_list[0]
    mat = scipy.io.loadmat(data_path + fname)
    datarray = mat['paramImg']
    sz = datarray.shape
    datvec = datarray.reshape((sz[0]*sz[1]*sz[2],1))
    import_array = datvec
    colNames = fname[0:-4]

    #define the remaining files in file-listn
    file_listn = file_list[1:]  

    # Loop through files, load data, reshape into an array and append to "import_array"
    for f in file_listn:
        mat = scipy.io.loadmat(data_path + f)
        datarray = mat['paramImg']
        sz = datarray.shape
        datvec = datarray.reshape((sz[0]*sz[1]*sz[2],1))
        import_array = np.concatenate([import_array,datvec],axis=1)
        colName = f[0:-4]
        colNames = np.append(colNames,colName)

    # Create the dataframe
    dfAN = pd.DataFrame(data=import_array, columns=colNames)
    dfAN['anID'] = anID
    dfAN['ImageIdx'] = pd.Series(df044.index,index=df044.index)

    # Get the ROIs and Labels too 
    data_path = 'MATLAB_Output_DataSets/' + anID + '/Param_maps/ROI_masks/'

    # Get the list of .mat files from the animal path 
    file_list = [f for f in os.listdir(data_path) if os.path.isfile(os.path.join(data_path, f))]
    file_list = np.asarray(file_list)
    ii = [i for i,item in enumerate(file_list) if "Res" in item]
    files = file_list[ii]

    for f in files:
        colName = f[10:-4]
        print('ROI file type: ' + colName)
        mat = scipy.io.loadmat(data_path + f)
        allV = mat['ROIviaA']  #all viable tissue in muscle and tumor
        nonV = mat['ROInpv']   # all non-perfused/non-viable tissue
        bndV = mat['ROIbnd']   # viable voxels that surround non-perfused volume (mostly enhancing)
        mucV = mat['ROIviaB']  # viable voxels that exclude bndV (mostly healthy mucsle)

        # reshape into vectors 
        sz = allV.shape
        allV = allV.reshape((sz[0]*sz[1]*sz[2]))
        nonV = nonV.reshape((sz[0]*sz[1]*sz[2]))
        bndV = bndV.reshape((sz[0]*sz[1]*sz[2]))
        mucV = mucV.reshape((sz[0]*sz[1]*sz[2]))

        df = pd.DataFrame({'allV': allV,'nonV': nonV,'bndV':bndV,'mucV':mucV},index = dfAN.index)

        colNames = ['Seg1_' + colName, 'Seg2_' + colName, 'LABEL_'+ colName]
        [Seg1, Seg2] = getSegmentations(df)

        dfAN.loc[:,colNames[0]] = pd.DataFrame(Seg1,index=dfAN.index)
        dfAN.loc[:,colNames[1]] = pd.DataFrame(Seg2,index=dfAN.index)

        data_path2 = 'MATLAB_Output_DataSets/' + anID + '/Param_maps/Labels/'

        # Get the list of .mat files from the animal path 
        file_list2 = [f for f in os.listdir(data_path2) if os.path.isfile(os.path.join(data_path2, f))]
        file_list2 = np.asarray(file_list2)
        ii = [i for i,item in enumerate(file_list2) if colName in item]
        f2 = file_list2[ii]
        print('NPV file: ' + f2[0])

        mat2 = scipy.io.loadmat(data_path2 + f2[0])
        label = mat2['labelImg']
        sz = label.shape
        label = label.reshape((sz[0]*sz[1]*sz[2]))
        dfAN.loc[:,colNames[2]] = pd.DataFrame(label,index=dfAN.index)
        masDF[anID] =dfAN
        
    print(dfAN.head())

  
    

ROI file type: Grp_HighRes
Number of voxels in Seg1:
allV    14966
nonV     1242
dtype: int64
Numer of voxels in Seg2:
mucV    10408
bndV     4558
nonV     1242
dtype: int64
NPV file: NPV_labels_Grp_HighRes.mat




ROI file type: Grp_LowRes
Number of voxels in Seg1:
allV    15137
nonV     1071
dtype: int64
Numer of voxels in Seg2:
mucV    10408
bndV     4729
nonV     1071
dtype: int64
NPV file: NPV_labels_Grp_LowRes.mat
ROI file type: Grp_LowRes2
Number of voxels in Seg1:
allV    14628
nonV     1608
dtype: int64
Numer of voxels in Seg2:
mucV    10408
bndV     4220
nonV     1608
dtype: int64
NPV file: NPV_labels_Grp_LowRes2.mat
ROI file type: Indv_HighRes
Number of voxels in Seg1:
allV    15238
nonV      974
dtype: int64
Numer of voxels in Seg2:
mucV    10408
bndV     4830
nonV      974
dtype: int64
NPV file: NPV_labels_Indv_HighRes.mat
    CTD_Map    MTP_Map  Post_ADCMap  Post_T1Map    Post_T1w  Post_T2Map  \
0  0.007767  39.867596  2524.004639      3068.0  173.793387       409.5   
1  0.005658  39.593403  2541.000244      3077.0  166.113100       409.5   
2  0.002975  39.426872  2512.001221      3140.0  170.768320       409.5   
3  0.002764  39.416546  2522.000244      3042.0  174.975658       4



ROI file type: Grp_LowRes
Number of voxels in Seg1:
allV    21864
dtype: int64
Numer of voxels in Seg2:
mucV    18759
bndV     3113
dtype: int64
NPV file: NPV_labels_Grp_LowRes.mat
ROI file type: Grp_LowRes2
Number of voxels in Seg1:
allV    21860
nonV        4
dtype: int64
Numer of voxels in Seg2:
mucV    18759
bndV     3109
nonV        4
dtype: int64
NPV file: NPV_labels_Grp_LowRes2.mat
ROI file type: Indv_HighRes
Number of voxels in Seg1:
allV    21864
dtype: int64
Numer of voxels in Seg2:
mucV    18759
bndV     3113
dtype: int64
NPV file: NPV_labels_Indv_HighRes.mat
   CTD_Map    MTP_Map  Post_ADCMap  Post_T1Map    Post_T1w  Post_T2Map  \
0      NaN  38.534931          NaN         NaN  118.922363         NaN   
1      NaN  38.415974          NaN         NaN  184.235657         NaN   
2      NaN  38.423077          NaN         NaN  190.512024         NaN   
3      NaN  38.821178          NaN         NaN  142.895569         NaN   
4      NaN  38.863365          NaN         NaN  168.1

2. Loading each case of labels and corresponding ROIs 
    

In [22]:
anID = '18_044'
# Get the Multi-parametric data for 18_044
data_path = 'MATLAB_Output_DataSets/18_044/Param_maps/ROI_masks/'

# Get the list of .mat files from the animal path 
file_list = [f for f in os.listdir(data_path) if os.path.isfile(os.path.join(data_path, f))]
file_list = np.asarray(file_list)
ii = [i for i,item in enumerate(file_list) if "Res" in item]
files = file_list[ii]

#dfOut = pd.DataFrame(index=df044.index.copy())
for f in files:
    colName = f[10:-4]
    print('ROI file type: ' + colName)
    mat = scipy.io.loadmat(data_path + f)
    allV = mat['ROIviaA']  #all viable tissue in muscle and tumor
    nonV = mat['ROInpv']   # all non-perfused/non-viable tissue
    bndV = mat['ROIbnd']   # viable voxels that surround non-perfused volume (mostly enhancing)
    mucV = mat['ROIviaB']  # viable voxels that exclude bndV (mostly healthy mucsle)

    # reshape into vectors 
    sz = allV.shape
    allV = allV.reshape((sz[0]*sz[1]*sz[2]))
    nonV = nonV.reshape((sz[0]*sz[1]*sz[2]))
    bndV = bndV.reshape((sz[0]*sz[1]*sz[2]))
    mucV = mucV.reshape((sz[0]*sz[1]*sz[2]))

    df = pd.DataFrame({'allV': allV,'nonV': nonV,'bndV':bndV,'mucV':mucV},index = df044.index)

    colNames = ['Seg1_' + colName, 'Seg2_' + colName, 'LABEL_'+ colName]
    [Seg1, Seg2] = getSegmentations(df)
    
    dfAN.loc[:,colNames[0]] = pd.DataFrame(Seg1,index=dfAN.index)
    dfAN.loc[:,colNames[1]] = pd.DataFrame(Seg2,index=dfAN.index)
    
    data_path2 = 'MATLAB_Output_DataSets/18_044/Param_maps/Labels/'

    # Get the list of .mat files from the animal path 
    file_list2 = [f for f in os.listdir(data_path2) if os.path.isfile(os.path.join(data_path2, f))]
    file_list2 = np.asarray(file_list2)
    ii = [i for i,item in enumerate(file_list2) if colName in item]
    f2 = file_list2[ii]
    print('NPV file: ' + f2[0])
    mat2 = scipy.io.loadmat(data_path2 + f2[0])
    label = mat2['labelImg']
    sz = label.shape
    label = label.reshape((sz[0]*sz[1]*sz[2]))
    df044.loc[:,colNames[2]] = pd.DataFrame(label,index=df044.index)
    

  
df044.head()



ROI file type: Grp_HighRes
Number of voxels in Seg1:
allV    14966
nonV     1242
dtype: int64
Numer of voxels in Seg2:
mucV    10408
bndV     4558
nonV     1242
dtype: int64
Size of Seg1 is:
Size of Seg2 is:
NPV file: NPV_labels_Grp_HighRes.mat
ROI file type: Grp_LowRes
Number of voxels in Seg1:
allV    15137
nonV     1071
dtype: int64
Numer of voxels in Seg2:
mucV    10408
bndV     4729
nonV     1071
dtype: int64
Size of Seg1 is:
Size of Seg2 is:
NPV file: NPV_labels_Grp_LowRes.mat
ROI file type: Grp_LowRes2
Number of voxels in Seg1:
allV    14628
nonV     1608
dtype: int64
Numer of voxels in Seg2:
mucV    10408
bndV     4220
nonV     1608
dtype: int64
Size of Seg1 is:
Size of Seg2 is:
NPV file: NPV_labels_Grp_LowRes2.mat
ROI file type: Indv_HighRes
Number of voxels in Seg1:
allV    15238
nonV      974
dtype: int64
Numer of voxels in Seg2:
mucV    10408
bndV     4830
nonV      974
dtype: int64
Size of Seg1 is:
Size of Seg2 is:
NPV file: NPV_labels_Indv_HighRes.mat


Unnamed: 0,CTD_Map,MTP_Map,Post_ADCMap,Post_T1Map,Post_T1w,Post_T2Map,Post_T2w,Pre_ADCMap,Pre_T1Map,Pre_T1w,...,Seg1_Grp_LowRes,Seg2_Grp_LowRes,Seg1_Grp_LowRes2,Seg2_Grp_LowRes2,Seg1_Indv_HighRes,Seg2_Indv_HighRes,LABEL_Grp_HighRes,LABEL_Grp_LowRes,LABEL_Grp_LowRes2,LABEL_Indv_HighRes
0,0.007767,39.867596,2524.004639,3068.0,173.793387,409.5,382.886522,,,,...,,,,,,,,,,
1,0.005658,39.593403,2541.000244,3077.0,166.1131,409.5,397.828145,,,,...,,,,,,,,,,
2,0.002975,39.426872,2512.001221,3140.0,170.76832,409.5,408.997083,,,,...,,,,,,,,,,
3,0.002764,39.416546,2522.000244,3042.0,174.975658,409.5,429.948499,,,,...,,,,,,,,,,
4,0.003721,39.771465,2930.997559,3025.0,164.624358,409.5,432.650843,,,,...,,,,,,,,,,


In [20]:
 mat2 = scipy.io.loadmat(data_path2 + f2[0])
    
name = f2[0]
print(f2)

print('NPV file: ' + name)
mat2

['NPV_labels_Grp_HighRes.mat']
NPV file: NPV_labels_Grp_HighRes.mat


{'__globals__': [],
 '__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Wed Oct 17 11:56:13 2018',
 '__version__': '1.0',
 'labelImg': array([[[0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         ..., 
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0]],
 
        [[0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         ..., 
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0]],
 
        [[0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         ..., 
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0]],
 
        ..., 
        [[0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         ..., 
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0,