## Jupyter Notebook for Data Vis and Exploration

In [1]:
import scipy.io 
import os
import numpy as np
import pandas as pd

#### Loading .mat data for one rabbit
1. Loading multi-parametric data into dataframe

In [20]:
anID = '18_044'
# Get the Multi-parametric data for 18_044
data_path = 'MATLAB_Output_DataSets/18_044/Param_maps/'

# Get the list of .mat files from the animal path 
file_list = [f for f in os.listdir(data_path) if os.path.isfile(os.path.join(data_path, f))]
file_list = np.asarray(file_list)


# Initialize import_array with first file in file_list
fname = file_list[0]
mat = scipy.io.loadmat(data_path + fname)
datarray = mat['paramImg']
sz = datarray.shape
datvec = datarray.reshape((sz[0]*sz[1]*sz[2],1))
import_array = datvec
colNames = fname[0:-4]

#define the remaining files in file-listn
file_listn = file_list[1:]  

# Loop through files, load data, reshape into an array and append to "import_array"
for f in file_listn:
    mat = scipy.io.loadmat(data_path + f)
    datarray = mat['paramImg']
    sz = datarray.shape
    datvec = datarray.reshape((sz[0]*sz[1]*sz[2],1))
    import_array = np.concatenate([import_array,datvec],axis=1)
    colName = f[0:-4]
    colNames = np.append(colNames,colName)

# Create the dataframe
df044 = pd.DataFrame(data=import_array, columns=colNames)
df044['anID'] = anID
print(df044.head())
print(df044.shape)
print(df044.index)
    

    CTD_Map    MTP_Map  Post_ADCMap  Post_T1Map    Post_T1w  Post_T2Map  \
0  0.007767  39.867596  2524.004639      3068.0  173.793387       409.5   
1  0.005658  39.593403  2541.000244      3077.0  166.113100       409.5   
2  0.002975  39.426872  2512.001221      3140.0  170.768320       409.5   
3  0.002764  39.416546  2522.000244      3042.0  174.975658       409.5   
4  0.003721  39.771465  2930.997559      3025.0  164.624358       409.5   

     Post_T2w  Pre_ADCMap  Pre_T1Map  Pre_T1w  Pre_T2Map  Pre_T2w    anID  
0  382.886522         NaN        NaN      NaN        NaN      NaN  18_044  
1  397.828145         NaN        NaN      NaN        NaN      NaN  18_044  
2  408.997083         NaN        NaN      NaN        NaN      NaN  18_044  
3  429.948499         NaN        NaN      NaN        NaN      NaN  18_044  
4  432.650843         NaN        NaN      NaN        NaN      NaN  18_044  
(175104, 13)
RangeIndex(start=0, stop=175104, step=1)


2. Loading each case of labels and corresponding ROIs 
    

In [3]:
# functions for generating categorical segmentation of each index 

def getSegmentations(df):
    # Segmentation 1+ "Viable" and "Non-viable" only 
    def segmentmethod1(row):
        if row['allV'] == 1:
            return 'allV'
        if row['nonV'] == 1: 
            return 'nonV'

    Seg1 = df.apply(segmentmethod1,axis=1)
    #df['Seg1'] = df.apply(segmentmethod1,axis=1)
    print('Number of voxels in Seg1:')
    print(Seg1.value_counts())

    # Segmentation 2: "Non-viable", "Viable Boundary","Viable Muscle"
    def segmentmethod2(row):
        if row['nonV'] == 1:
            return 'nonV'
        if row['bndV'] == 1:
            return 'bndV'
        if row['mucV'] == 1:
            return 'mucV'

    Seg2 = df.apply(segmentmethod2,axis=1)
    print('Numer of voxels in Seg2:')
    print(Seg2.value_counts())
    
    print('Size of Seg1 is:')
    print(Seg1.shape)
    print('Size of Seg2 is:')
    print(Seg2.shape)
           
    return Seg1,Seg2

In [21]:
anID = '18_044'
# Get the Multi-parametric data for 18_044
data_path = 'MATLAB_Output_DataSets/18_044/Param_maps/ROI_masks/'

# Get the list of .mat files from the animal path 
file_list = [f for f in os.listdir(data_path) if os.path.isfile(os.path.join(data_path, f))]
file_list = np.asarray(file_list)
ii = [i for i,item in enumerate(file_list) if "Res" in item]
files = file_list[ii]
#dfOut = pd.DataFrame(index=df044.index.copy())
#for f in files:
f = files[0]
colName = f[10:-4]
mat = scipy.io.loadmat(data_path + f)
allV = mat['ROIviaA']  #all viable tissue in muscle and tumor
nonV = mat['ROInpv']   # all non-perfused/non-viable tissue
bndV = mat['ROIbnd']   # viable voxels that surround non-perfused volume (mostly enhancing)
mucV = mat['ROIviaB']  # viable voxels that exclude bndV (mostly healthy mucsle)

# reshape into vectors 
sz = allV.shape
allV = allV.reshape((sz[0]*sz[1]*sz[2]))
nonV = nonV.reshape((sz[0]*sz[1]*sz[2]))
bndV = bndV.reshape((sz[0]*sz[1]*sz[2]))
mucV = mucV.reshape((sz[0]*sz[1]*sz[2]))

df = pd.DataFrame({'allV': allV,'nonV': nonV,'bndV':bndV,'mucV':mucV},index = df044.index)
    
colNames = ['Seg1_' + colName, 'Seg2_' + colName]

df044.loc[:,colNames[0]] = pd.DataFrame(Seg1,index=df044.index)
df044.loc[:,colNames[1]] = pd.DataFrame(Seg2,index=df044.index)
print(df044.head())



    CTD_Map    MTP_Map  Post_ADCMap  Post_T1Map    Post_T1w  Post_T2Map  \
0  0.007767  39.867596  2524.004639      3068.0  173.793387       409.5   
1  0.005658  39.593403  2541.000244      3077.0  166.113100       409.5   
2  0.002975  39.426872  2512.001221      3140.0  170.768320       409.5   
3  0.002764  39.416546  2522.000244      3042.0  174.975658       409.5   
4  0.003721  39.771465  2930.997559      3025.0  164.624358       409.5   

     Post_T2w  Pre_ADCMap  Pre_T1Map  Pre_T1w  Pre_T2Map  Pre_T2w    anID  \
0  382.886522         NaN        NaN      NaN        NaN      NaN  18_044   
1  397.828145         NaN        NaN      NaN        NaN      NaN  18_044   
2  408.997083         NaN        NaN      NaN        NaN      NaN  18_044   
3  429.948499         NaN        NaN      NaN        NaN      NaN  18_044   
4  432.650843         NaN        NaN      NaN        NaN      NaN  18_044   

   Seg1_Grp_HighRes  Seg2_Grp_HighRes  
0               NaN               NaN  
1     

<class 'str'>
    CTD_Map    MTP_Map  Post_ADCMap  Post_T1Map    Post_T1w  Post_T2Map  \
0  0.007767  39.867596  2524.004639      3068.0  173.793387       409.5   
1  0.005658  39.593403  2541.000244      3077.0  166.113100       409.5   
2  0.002975  39.426872  2512.001221      3140.0  170.768320       409.5   
3  0.002764  39.416546  2522.000244      3042.0  174.975658       409.5   
4  0.003721  39.771465  2930.997559      3025.0  164.624358       409.5   

     Post_T2w  Pre_ADCMap  Pre_T1Map  Pre_T1w  Pre_T2Map  Pre_T2w    anID  \
0  382.886522         NaN        NaN      NaN        NaN      NaN  18_044   
1  397.828145         NaN        NaN      NaN        NaN      NaN  18_044   
2  408.997083         NaN        NaN      NaN        NaN      NaN  18_044   
3  429.948499         NaN        NaN      NaN        NaN      NaN  18_044   
4  432.650843         NaN        NaN      NaN        NaN      NaN  18_044   

   Seg1_Grp_HighRes  Seg2_Grp_HighRes  
0               NaN             