In [1]:
#Importing dependencies
import csv
import cv2
import h5py
import itertools
import keras
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import numpy as np
import os
import pandas as pd
import random
import scipy
import seaborn as sns
import sklearn
import skimage
import warnings
import zlib

from glob import glob
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
from keras import backend as K
from keras import models, layers, optimizers
from keras.applications.vgg16 import VGG16
from keras.applications.inception_v3 import InceptionV3
from keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras.layers import Activation, Dense, Dropout, Flatten, Conv2D, MaxPool2D, Lambda, MaxPooling2D, AveragePooling2D, BatchNormalization
from keras.models import Model, Sequential, model_from_json
from keras.optimizers import SGD, RMSprop, Adam, Adagrad, Adadelta, RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from keras.utils.np_utils import to_categorical
from skimage.transform import resize
from sklearn import model_selection
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split, learning_curve, KFold, cross_val_score, StratifiedKFold
from sklearn.utils import class_weight
from tqdm import tqdm
from PIL import Image

%matplotlib inline
warnings.filterwarnings("ignore")

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
#Variables
#Directories
dir_dataset          = "../DataSet/"
dir_dataset_train    = dir_dataset + "train/"
dir_dataset_val      = dir_dataset + "val/"
dir_dataset_test     = dir_dataset + "test/"
dir_transferlearning = "../TransferLearning/"

#Dataset
name_dataset = "dataset.h5"
exists_dataset = True

dir_transferlearning_vgg16 = dir_transferlearning +  "Vgg16.h5"
dir_transferlearning_inception = dir_transferlearning +  "InceptionV3.h5"

In [3]:
#Defining hyperparameters
height_img            = 224
width_img             = 224
channel_img           = 3

normal_label          = 0
pneumonia_label       = 1
unknown_label         = 2

image_rescale         = 1./255
image_shear_range     = 0.2
image_zoom_range      = 0.2
image_horizontal_flip = True
image_class_mode      = 'binary'

train_batch_size      = 1
val_batch_size        = 1
test_batch_size       = 1

max_epoch_vgg16       = 30
max_epoch_inception   = 100

number_classes        = 2
optimizer             = keras.optimizers.RMSprop(lr=0.0001)

In [5]:
#Defining map characters and transferLearning
map_characters = {normal_label: 'No Pneumonia',
                  pneumonia_label: 'Yes Pneumonia'}

pretrained_model_VGG16 = VGG16(weights = dir_transferlearning_vgg16,
                               include_top = False,
                               input_shape = (height_img, width_img, channel_img))

pretrained_model_InceptionV3 = InceptionV3(weights = dir_transferlearning_inception,
                                           include_top = False,
                                           input_shape = (height_img, width_img, channel_img))

In [6]:
#Load and define mehotd resizing load image
train_datagen = ImageDataGenerator(
        rescale = image_rescale,
        shear_range = image_shear_range,
        zoom_range = image_zoom_range,
        horizontal_flip = image_horizontal_flip)

test_datagen = ImageDataGenerator(rescale=image_rescale)

train_generator = train_datagen.flow_from_directory(
        dir_dataset_train,
        target_size=(height_img, width_img),
        batch_size=train_batch_size,
        class_mode=image_class_mode)

test_generator = test_datagen.flow_from_directory(
        dir_dataset_test,
        target_size=(height_img, width_img),
        batch_size=test_batch_size,
        class_mode=image_class_mode)

Found 5216 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


In [7]:
#defining metric and presentation of the obtained results
class MetricsCheckpoint(Callback):    
    def __init__(self, savepath):
        super(MetricsCheckpoint, self).__init__()
        self.savepath = savepath
        self.history = {}
        
    def on_epoch_end(self, epoch, logs=None):
        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)
        np.save(self.savepath, self.history)

def plotKerasLearningCurve():
    plt.figure(figsize=(10,5))
    metrics = np.load('logs.npy')[()]
    filt = ['acc']
    
    for k in filter(lambda x : np.any([kk in x for kk in filt]), metrics.keys()):
        l = np.array(metrics[k])
        plt.plot(l, c= 'r' if 'val' not in k else 'b', label='val' if 'val' in k else 'train')
        x = np.argmin(l) if 'loss' in k else np.argmax(l)
        y = l[x]
        plt.scatter(x,y, lw=0, alpha=0.25, s=100, c='r' if 'val' not in k else 'b')
        plt.text(x, y, '{} = {:.10f}'.format(x,y), size='15', color= 'r' if 'val' not in k else 'b')   
        
    plt.legend(loc=4)
    plt.axis([0, None, None, None]);
    plt.grid()
    plt.xlabel('Number of epochs')
    plt.ylabel('Accuracy')

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.figure(figsize = (5,5))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)
    
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
        
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

def plot_learning_curve(history):
    plt.figure(figsize=(8,8))
    plt.subplot(1,2,1)
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.savefig('./accuracy_curve.png')
    plt.subplot(1,2,2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')

    plt.savefig('./loss_curve.png')

In [11]:
#Defining network architecture
def pretrainedNetwork(i_train_generator, i_test_generator, i_pretrained_model, 
                      i_class_weight, i_num_classes, i_max_epochs, i_optimizer,
                      i_labels):
    
    model = Sequential()
    model.add(i_pretrained_model)       
    
    model.compile(loss='categorical_crossentropy', 
                  optimizer=i_optimizer, 
                  metrics=['accuracy'])

    model.fit_generator(
            i_train_generator,
            steps_per_epoch=2000,
            epochs=50,
            validation_data=i_test_generator,
            validation_steps=800)
    
    callbacks_list = [keras.callbacks.EarlyStopping(monitor='val_acc', patience=3, verbose=1)]
    model.summary()
    
    history = model.fit(i_train_generator, epochs=i_max_epochs, class_weight=i_class_weight,
                        validation_data=i_test_generator, verbose=1,
                        callbacks = [MetricsCheckpoint('logs')])
    
    model.save_weights('DeepLearningPneumonia.h5')
    
    score = model.evaluate(xtest, ytest, verbose=0)
    print('\nKeras CNN - accuracy:', score[1], '\n')
    
    y_pred = model.predict(xtest)
    print('\n', sklearn.metrics.classification_report(np.where(ytest > 0)[1],
                                                      np.argmax(y_pred, axis=1),
                                                      target_names=list(i_labels.values())), sep='') 
    
    Y_pred_classes = np.argmax(y_pred, axis = 1) 
    Y_true = np.argmax(ytest, axis = 1) 
    confusion_mtx = confusion_matrix(Y_true, Y_pred_classes) 
    plotKerasLearningCurve()
    plt.show()
    plot_learning_curve(history)
    plt.show()
    plot_confusion_matrix(confusion_mtx, classes = list(i_labels.values()))
    plt.show()
    return model            

In [12]:
#Training network VGG16
pretrainedNetwork(train_generator, test_generator, pretrained_model_VGG16,
                  class_weight, number_classes, max_epoch_vgg16, optimizer,
                  map_characters)

Epoch 1/50


ValueError: Error when checking target: expected vgg16 to have 4 dimensions, but got array with shape (1, 1)

In [None]:
#Training network InceptionV3

#Não obitivemos bons resultados com a InceptionV3

#pretrainedNetwork(train_generator, test_generator, pretrained_model_InceptionV3,
                  #class_weight_ros, number_classes, max_epoch_inception, optimizer,
                  #map_characters)    