# Dependencies

In [32]:
import MURA
import cv2
import image_manipulation
from multiprocessing import Pool
from models import *
import numpy as np
import glob
import matplotlib.pyplot as plt
import preprocessing
import json
from os import path, mkdir

import keras
from argparse import ArgumentParser
import tensorflow as tf
from sklearn import metrics

# Model to Run
This section lets the user edit on what model parameters to run, the model directory and parameters should exist prior to changing the file path

In [2]:
# All editable variables
model_file_path = "models/model_1"
# If preprocessing needs to run
run_preprocessing = True

In [3]:
# Get model parameters
try:
    params = json.load(open(model_file_path + '/parameters.json'))

    # Model to use
    model_is_VAE = params['is_VAE']
    # Model parameters
    multiplier = params['multiplier']
    latent_size = params['latent_size']
    input_shape = params['input_shape']

    # Training parameters
    epochs = params['num_epochs']
    batch_size = params['batch_size']
    learning_rate = params['learning_rate']

    # Dataset Path
    image_paths = MURA.MURA_DATASET()
    dataset_file_path = params['dataset_path']
    all_image_paths = image_paths.get_combined_image_paths()
    all_image_paths = all_image_paths.to_numpy()[:,0]
except:
    raise Exception("No parameters.json file found in the model's directory.")

In [4]:
# Do preprocessing
if run_preprocessing:
    preprocess = preprocessing.preprocessing(input_path = all_image_paths, output_path = dataset_file_path)
    if __name__ == '__main__':
        preprocess.start()

Creating directories...
Shuffling Data...
Processing and Augmenting images...


100%|██████████| 1560/1560 [00:22<00:00, 70.02it/s]


Moving files to correct directory
Finished preprocessing data


In [5]:
# each array contains the training, validation, and testing in order
image_datasets = {'train': [],
                'valid': [],
                'test': []}
for dataset_name in image_datasets.keys():
    for image_path in glob.glob(f'{dataset_file_path}/{dataset_name}/*.png'):
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image_datasets[dataset_name].append(image)
    image_datasets[dataset_name] = np.array(image_datasets[dataset_name])

# Model Training
This section creates and trains the model

In [6]:
if __name__ == "__main__":

    if model_is_VAE:
        model = VAE(False, input_shape, multiplier, latent_size)
    else:
        model = UPAE(True, input_shape, multiplier, latent_size)

    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

    model.build(input_shape=(None,) + tuple(input_shape))

    model.compile(optimizer= optimizer
                  ,metrics=[tf.keras.metrics.Accuracy()])
    
    model.summary()
    # Where images of each epoch will be saved
    # save_directory = 'Images/images_epochs' #edited in models where automatically make folder if non existent
    save_callback = SaveImageCallback(image_datasets['train'])

    # plot_model(model, 'autoencoder_compress.png', show_shapes=True)
    #training on training set.
    history_train = model.fit(image_datasets['train'], 
                epochs=epochs, 
                batch_size=batch_size,
                validation_split=0.15,
                callbacks=[save_callback])
    

INPUT SHAPE ACCEPTED:  (None, 64, 64, 1)
Model: "vae"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder_decoder (encoder_de  multiple                 19946433  
 coder)                                                          
                                                                 
Total params: 19,946,439
Trainable params: 19,935,937
Non-trainable params: 10,502
_________________________________________________________________
Epoch 1/3
Vanilla Loss
Data Shape:  (None, 64, 64)
Reconstructed Shape:  (None, 64, 64)
Vanilla Loss
Data Shape:  (None, 64, 64)
Reconstructed Shape:  (None, 64, 64)
callback predict
Epoch 2/3
callback predict
Epoch 3/3
callback predict


# Model Prediction

# Plots Creation

# Testing of the Model with the Test Set (CARA VERSION)
This section tests the model with the current test set
TODO: 
- Get the label of each image in the test set
- Test the images
- Create Linear Regression for the abnormality score to get the threshold for determining abnormal or normal images

In [27]:
test_images = []
labels = []
for image_path in glob.glob(f'{dataset_file_path}/test/*.png'):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Get if it contains positive or negative
    if 'positive' in image_path:
        test_images.append(image)
        labels.append(1)
    else:
        test_images.append(image)
        labels.append(0)
test_images = np.array(test_images)

In [7]:
history_valid = model.predict(image_datasets['test'], batch_size=batch_size)

not callback predict


In [20]:
abnor_scores = history_valid[1]
#converting to an array of numbers instead of tensor
abnor_scores = [item.numpy() for item in abnor_scores if isinstance(item, tf.Tensor)]
abnor_scores = [float(item) for item in abnor_scores]

In [48]:
#checking if equal number of images
print(len(abnor_scores),len(labels) )

7488 7488


In [33]:
auc = metrics.roc_auc_score(labels, abnor_scores)

In [36]:
#Getting roc_curve output
fpr, tpr, thresholds = metrics.roc_curve(labels, abnor_scores)
idx = None

#Computation for the threshold
for i in range(len(fpr)):
    fnr = 1 - tpr[i]
    if abs(fpr[i] - fnr) <= 5e-3:
        idx = i
        break
assert idx is not None
t = thresholds[idx]

In [44]:
#using threshold t , will be used to classify abnormality scores as normal or abnormal in the y_pred array;
y_pred = np.zeros_like(labels)
y_pred[abnor_scores < t] = 0
y_pred[abnor_scores >= t] = 1


# getting metrics score using y_pred which is now either 0  or 1
pres = metrics.precision_score(labels, y_pred)
sens = metrics.recall_score(labels, y_pred, pos_label=1)
spec = metrics.recall_score(labels, y_pred, pos_label=0)
f1 = metrics.f1_score(labels, y_pred)
print('Error rate:{}'.format(fpr[idx]))
print('Precision:{} Sensitivity:{} Specificity:{} f1:{}\n'.format(
     pres, sens, spec, f1))

Error rate:0.5482233502538071
Precision:0.7536355859709153 Sensitivity:0.44705683355886333 Specificity:0.4517766497461929 f1:0.5612060728315108



# Saving of final reconstructed images 

In [9]:
# Create directory in models folder for reconstructed images
dataset_name = dataset_file_path.split('/')[-1]
reconstructed_images_path = model_file_path + "/" + dataset_name
if not path.exists(reconstructed_images_path):
    mkdir(reconstructed_images_path)

In [10]:
for x in range(len(history_valid)):
    fig, axs = plt.subplots(1,2, figsize=(8,4))
    axs[0].imshow(image_datasets['test'][x])
    axs[0].set_title('Original Image')
    new_image = np.floor(history_valid[0][x]).astype(np.uint8)
    axs[1].imshow(new_image)
    axs[1].set_title('Reconstructed Image')
    plt.savefig(f'{reconstructed_images_path}/Valid_Image_{x}.png')
    plt.close()
    break

# Saving of Model Weights

In [None]:
model.save_weights(model_file_path + '/model_weights.h5')

In [None]:
history_valid[0].shape

In [None]:
new_image = np.concatenate(history_valid[0], axis=1)
plt.imshow(new_image)

In [None]:
history_valid[0]