<font color=gray>Oracle Cloud Infrastructure Data Science Demo Notebook

Copyright (c) 2021 Oracle, Inc.<br>
Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
</font>

# Validation of the CNN Model  

In [None]:
%load_ext autoreload
%autoreload 2

import keras 
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.utils import plot_model

from matplotlib import pyplot as plt 
import numpy as np 
import json 
import urllib
from zipfile import ZipFile 
import skimage as ski
import os 
import pandas as pd 
import glob
from numpy import random as random
import urllib 
import tensorflow as tf

from sklearn.metrics import confusion_matrix

from skimage import transform 
from seaborn import heatmap 

from utilities import display_xray_image, evaluate_model_performance

In [None]:
path_to_train_dataset = f"./data/chest_xray/train/"
path_to_test_dataset = f"./data/chest_xray/test/"
model_artifact_path = f"./model_artifact"
model_file = f"xray_predictor4-march21.hdf5"
model_path = os.path.join(model_artifact_path, model_file)

In [None]:
# Pulling some statistics about the test dataset:

pneumonia_test_list = glob.glob(path_to_test_dataset+'PNEUMONIA/*')
normal_test_list = glob.glob(path_to_test_dataset+'NORMAL/*')
test_list = pneumonia_test_list + normal_test_list
print("Test sample size = {}, Pneumonia = {}, Normal = {}".format(len(test_list), 
                                                                      len(pneumonia_test_list), 
                                                                      len(normal_test_list)))

In [None]:
# Building out the dataframe that will contain all the metadata about the x-ray images 

test_df = pd.DataFrame(data={"path":test_list})
test_df["observed_class"] = test_df["path"].apply(lambda x: 0 if "/NORMAL/" in x else 1 )
test_df["extension"] = test_df["path"].apply(lambda x: os.path.splitext(x)[1])

In [None]:
print(test_df.shape)

In [None]:
test_df.head()

In [None]:
display_xray_image(test_df['path'].iloc[0])

## Image Transformations

In [None]:
# Defining those image transformations: 

def image_transformations(image_path, dims=(200, 300)): 
    """
    """
    # Resize the original image. Consistent with training dataset: 
    image = transform.resize(ski.io.imread(image_path), output_shape=dims)
    # Take the first channel only: 
    image = image[:,:,0] if len(image.shape)>2 else image
    return image

In [None]:
# Applying transformations to images and observed labels: 

test_df['resized_image'] = test_df['path'].apply(lambda x: image_transformations(x))

# encoding the class as a numpy array: 
test_df['y'] = test_df['observed_class'].apply(lambda x: np.array([0, 1]) 
                                            if x==1 else  np.array([1, 0]))

Xtest = test_df['resized_image'].values 
Ytest = test_df['y'].values

Xtest = np.asarray([i.reshape(200,300,1) for i in Xtest])
Ytest = np.asarray([i.reshape(2) for i in Ytest])

In [None]:
print("Xtest shape: {}, Ytest shape: {}".format(Xtest.shape, Ytest.shape))

In [None]:
display_xray_image(test_df.iloc[0]['resized_image'])

# Evaluating the CNN model 

In [None]:
model = keras.models.load_model(model_path)

In [None]:
evaluate_model_performance(model_path, Xtest, Ytest, test_df['observed_class'].values, 
                           labels=["normal", "pneumonia"])