In [4]:
#To fix the GPU memory allocation issue
import os
#os.environ['TF_FORCE_GPU_ALLOW_GROWTH']='true'
#os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
#os.environ["CUDA_VISIBLE_DEVICES"]="1"

# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.backend import clear_session

#sklearn
from sklearn.manifold import TSNE
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix,accuracy_score, precision_score, recall_score,roc_curve, auc

#others
import pandas as pd
import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
import math
import itertools
import datetime

In [5]:
#There are 9 phantoms (9 labels) but we are only doing 3-way classifications, grouping on one variable
#This function will change the labels to create 3 class labels
def changeLabels(labels,oldclasses,newclass):
    labels[labels==(oldclasses-1)]=newclass
    return labels


def normalizeData(data):
    scaler = MinMaxScaler()
    scaler.fit(data.T)
    return scaler.transform(data.T).T


#Loads in each fold of a specified type. 
def loadFold(foldnum, type):
    filePath = r"C:\Users\Justin\OneDrive - Queen's University\MSc\TeUS Focal Change Project\data\savedRF\Jan17\EMBC folds\\"

    if type=='moving':
        filename = "fold_" + str(foldnum) + ".mat"
    elif type == 'fixed':
        filename = "fold_" + str(foldnum) + "_fixed.mat"
    else:
        print("ERROR: LOADING FOLDS")

    fold = sio.loadmat(filePath+filename)
    return fold


#There are 9 total phantoms with 3 elasticitiy (E) and 3 scatterer sizes (S)
#This function selects the 3 phantoms of specified properties to use
def generatePhantomsToUse(type):
    if type=='0p5EdiffS': return [1,2,3]
    elif type=='1EdiffS': return [4,5,6]
    elif type=='2EdiffS': return [7,8,9]
    elif type=='23SdiffE': return [1,4,7]
    elif type=='32SdiffE': return [2,5,8]
    elif type=='60SdiffE': return [3,6,9]
    elif type=='All': return [1,2,3,4,5,6,7,8,9]
    else:
        print("ERROR IN PHANTOMS TO USE")
        return None

In [9]:
def model(train_data, train_labels, val_data, val_labels, test_data, test_labels):
    train_data = np.float32(train_data)
    test_data = np.float32(test_data)
    val_data = np.float32(val_data)
    
    num_classes = val_labels.shape[1]
    
    #model parameters
    inputShape = 101
    ae_s1_size = 80
    ae_s2_size = 60
    ae_s3_size = 30
    latent_size = 15
    cl_dense_size = 10
    
    #auto-encoder encoding->decoding
    ENC1 = Dense(ae_s1_size, activation='relu', activity_regularizer=l2(0.0001))
    ENC2 = Dense(ae_s2_size, activation='relu', activity_regularizer=l2(0.0001))
    ENC3 = Dense(ae_s3_size, activation='relu', activity_regularizer=l2(0.0001))
    ENC4 = Dense(latent_size, activation='relu', name='latent_out')
    DEC1 = Dense(ae_s3_size, activation='relu', activity_regularizer=l2(0.0001))
    DEC2 = Dense(ae_s2_size, activation='relu', activity_regularizer=l2(0.0001))
    DEC3 = Dense(ae_s1_size, activation='relu', activity_regularizer=l2(0.0001))
    DEC4 = Dense(inputShape, activation='sigmoid', name='ae_out')

    #dense and classification layers
    DENS1 = Dense(cl_dense_size, activation='relu', activity_regularizer=l2(0.00001))
    CLASS = Dense(num_classes, activation='softmax', name='class_out')

    #auto-encoder and the latent space inputs
    input_ae = Input(shape=(inputShape,), name='ae_in')

    #latent space (input->enc1->enc2->enc3->enc4->latent)
    latent_ae = ENC4(ENC3(ENC2(ENC1( input_ae ))))
    
    #output of the ae (latent->dec1->dec2->dec3->dec4->output)
    output_ae = DEC4(DEC3(DEC2(DEC1( latent_ae ))))
    
    #output of classification is (latent->dense->classification)
    output_class = CLASS(DENS1( latent_ae ))

    #3 models for training - only AE, full model (autoencoder-classifier), only classifier
    autoencoder_alone = Model(input_ae, output_ae)
    autoencoder_joint = Model(inputs=[input_ae], outputs=[output_ae, output_class])
    classifier_alone = Model(input_ae, output_class)
    
    
    
    time_download_start = datetime.datetime.now()
    
    #uncomment block for training of only the autoencoder
    #This may be used to first optimize the weights prior to joint training
    '''
    autoencoder_alone.compile(optimizer='adam',loss='mean_squared_error',metrics=['mse'])
    history = autoencoder_alone.fit(train_data,train_data,
                          validation_data=(val_data,val_data), 
                          epochs=500, batch_size=4096, shuffle=1, verbose=2,
                          callbacks = [EarlyStopping(monitor='val_loss', mode='min', patience=30, restore_best_weights=True)])

    pyplot.figure()
    pyplot.plot(history.history['loss'], label='train')
    pyplot.plot(history.history['val_loss'], label='test')
    pyplot.legend()
    pyplot.title('Autoencoder alone loss')
    pyplot.show()

    test1 = autoencoder_alone(test_data)
    '''
    
    
    #uncomment this block for training of the autoencoder-classifier
    autoencoder_joint.compile(optimizer='adam',
                    loss={'ae_out':'mean_squared_error','class_out':'categorical_crossentropy'},
                    loss_weights={'ae_out':1,'class_out':5},
                    metrics={'ae_out':'mse','class_out':'accuracy'})
    history = autoencoder_joint.fit({'ae_in':train_data},
                    {'ae_out':train_data,'class_out':train_labels},
                    validation_data=(val_data,[val_data,val_labels]), 
                    epochs=300, batch_size=1024, shuffle=1, verbose=0,
                    callbacks = [EarlyStopping(monitor='val_loss', mode='min', patience=50, restore_best_weights=True)])

    pyplot.figure()
    pyplot.plot(history.history['loss'], label='train')
    pyplot.plot(history.history['val_loss'], label='test')
    pyplot.legend()
    pyplot.title('Overal Loss')
    pyplot.show()
    
    #generate the test predictions
    test_pred = classifier_alone.predict(test_data)
    y_test = np.argmax(test_labels,1)
    y_pred = np.argmax(test_pred,1)
    temp = np.max(test_pred, axis=1)<0
    y_pred[temp] = np.max(y_test)+1
    cnf_matrix = confusion_matrix(y_test, y_pred)
    acc=accuracy_score(y_test,y_pred)
    precision = precision_score(y_test,y_pred, average='micro')
    recall = recall_score(y_test,y_pred, average='micro')
    
    return acc, precision, recall, cnf_matrix

In [None]:
def runModel(mode, trainingFoldsToUse, validationFoldToUse, testingFoldToUse, type):
    #phantom names
    phantomName1 = '0p5x 23u'
    phantomName2 = '0p5x 32u'
    phantomName3 = '0p5x 60u'
    phantomName4 = '1x 23u'
    phantomName5 = '1x 32u'
    phantomName6 = '1x 60u'
    phantomName7 = '23u 2x'
    phantomName8= '32u 2x'
    phantomName9= '60u 2x'
    phantomNames = [phantomName1, phantomName2, phantomName3, phantomName4, phantomName5, phantomName6, 
                    phantomName7, phantomName8, phantomName9]
    
    #load each fold
    #fold 6 is testing
    fold1 = loadFold(1,type)
    fold2 = loadFold(2,type)
    fold3 = loadFold(3,type)
    fold4 = loadFold(4,type)
    fold5 = loadFold(5,type)
    fold6 = loadFold(6,type)
    
    #select the phantoms to use for classification
    phantomToUse = generatePhantomsToUse(mode)
    phantomNames = [phantomNames[i-1] for i in phantomToUse]
    
    #Create training, validation, and testing sets
    count=0
    for i in range(len(trainingFoldsToUse)):
        foldToAdd = eval("fold%s"%trainingFoldsToUse[i])
        for p in phantomToUse:
            if count==0:
                train_data = foldToAdd['p'+str(p)]
                train_labels=foldToAdd['p'+str(p)+'c']
            else:
                train_data=np.concatenate((train_data, foldToAdd['p'+str(p)]), axis=0)
                train_labels=np.concatenate((train_labels, foldToAdd['p'+str(p)+'c']), axis=0)
            count+=1
    
    count=0
    for i in range(len(validationFoldToUse)):
        foldToAdd = eval("fold%s"%validationFoldToUse[i])
        for p in phantomToUse:
            if count==0:
                val_data = foldToAdd['p'+str(p)]
                val_labels=foldToAdd['p'+str(p)+'c']
            else:
                val_data=np.concatenate((val_data, foldToAdd['p'+str(p)]), axis=0)
                val_labels=np.concatenate((val_labels, foldToAdd['p'+str(p)+'c']), axis=0)
            count+=1
    
    count=0
    for i in range(len(testingFoldToUse)):
        exec("foldToAdd=fold%s"%testingFoldToUse[i])
        for p in phantomToUse:
            if count==0:
                test_data = foldToAdd['p'+str(p)]
                test_labels=foldToAdd['p'+str(p)+'c']
            else:
                test_data=np.concatenate((test_data, foldToAdd['p'+str(p)]), axis=0)
                test_labels=np.concatenate((test_labels, foldToAdd['p'+str(p)+'c']), axis=0)
            count+=1
    
    #normalization
    train_data = normalizeData(train_data)
    val_data = normalizeData(val_data)
    test_data = normalizeData(test_data)
    
    #fix labels to be 0,1,2
    train_labels = changeLabels(train_labels,phantomToUse[0],0)
    train_labels = changeLabels(train_labels,phantomToUse[1],1)
    train_labels = changeLabels(train_labels,phantomToUse[2],2)

    test_labels = changeLabels(test_labels,phantomToUse[0],0)
    test_labels = changeLabels(test_labels,phantomToUse[1],1)
    test_labels = changeLabels(test_labels,phantomToUse[2],2)

    val_labels = changeLabels(val_labels,phantomToUse[0],0)
    val_labels = changeLabels(val_labels,phantomToUse[1],1)
    val_labels = changeLabels(val_labels,phantomToUse[2],2)

    #print("Train:      ", train_data.shape)
    #print("Validation: ", val_data.shape)
    #print("Test:       ", test_data.shape)

    train_labels = to_categorical(train_labels)
    val_labels = to_categorical(val_labels)
    test_labels = to_categorical(test_labels)
    #print("Train Labels:      ", train_labels.shape)
    #print("Validation Labels: ", val_labels.shape)
    #print("Test Labels:       ", test_labels.shape)
    
    #run the model
    acc, precision, recall, cnf_matrix = model(train_data, train_labels, val_data, val_labels, test_data, test_labels)
    return acc,precision, recall, cnf_matrix

In [10]:
mode=['0p5EdiffS', '1EdiffS', '2EdiffS', '23SdiffE', '32SdiffE', '60SdiffE'];
#testing set is fixed, so only 5 fold
numFolds = 5

#create list of folds to use for training & validation for each iteration
trainingFoldsToUse = list(itertools.combinations(range(1, numFolds+1),numFolds-1))

validationFoldToUse = []
for i in range(numFolds):
    validationFoldToUse.append(sorted(set(range(1,numFolds+1)) - set(trainingFoldsToUse[i]))) 

testingFoldToUse=[6];

columns = ['Mode', 'Fold', 'Acc', 'Recall', 'Precision', 'ConfMat']
movingFP = pd.DataFrame(index=None, columns=columns)
fixedFP = pd.DataFrame(index=None, columns=columns)

In [None]:
#main
for i in range(len(mode)):
    for j in range(numFolds):
        acc, prec, rec, conf = runModel(mode[i],trainingFoldsToUse[j], validationFoldToUse[j], testingFoldToUse, 'moving')
        movingFP.loc[j+numFolds*(i)] = [mode[i], j, acc, prec, rec, conf]
        clear_session()

for i in range(len(mode)):
    for j in range(numFolds):
        acc, prec, rec, conf = runModel(mode[i],trainingFoldsToUse[j], validationFoldToUse[j], testingFoldToUse, 'fixed')
        fixedFP.loc[j+numFolds*i] = [mode[i], j, acc, prec, rec, conf]
        clear_session()

In [19]:
filePath = r"C:\Users\Justin\OneDrive - Queen's University\MSc\TeUS Focal Change Project\saved results\\"

movingFP.to_csv(filePath+"EMBC_5folds_AE_moving.csv")
fixedFP.to_csv(filePath+"EMBC_5folds_AE_fixed.csv")