# Conf

In [7]:
#where the data are stored
data_url = 'http://www.jackdellequerce.com/data/reduced_chest_xray.zip'

#where to place the data
download_target_imgs = '/content/data/'

#Keras constants
BATCH_SIZE = 32
IMAGE_SIZE = [256, 256]
IMAGE_SHAPE = (IMAGE_SIZE[0], IMAGE_SIZE[1] , 3)
EPOCHS = 12

###############################################
# https://github.com/fchollet/deep-learning-with-python-notebooks/blob/master/5.3-using-a-pretrained-convnet.ipynb
# https://github.com/Bixi81/Python-ml/blob/master/keras_pretrained_imagerec_multiclass.py

# DIR with training images
base_dir = '/content/data/reduced_chest_xray/'
# Number training images
ntrain = 300
# Number validation images
nval  = 100
# Batch size
batch_size = 20 #20
# Epochs 
ep = 50
# Number of classes (for training, output layer)
nclasses = 2
###############################################

# Data setup

In [8]:
import os
import time
import subprocess
from urllib.request import urlopen
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import glob
import requests
import zipfile

#these two lists should contain the full paths of all train and validation images
train_filenames = glob.glob(download_target_imgs + 'reduced_chest_xray/train/*/*')
val_filenames   = glob.glob(download_target_imgs + 'reduced_chest_xray/test/*/*')

#let's check that we actually have the data
if len(train_filenames) == 0 or len(val_filenames) == 0:
  #either the data was never downloaded or something bad happened
  #in any case, we donwload and unzip everything

  #room for data
  os.makedirs(download_target_imgs, exist_ok=True)

  #downloading
  r = requests.get(data_url)
  open(download_target_imgs + 'local_archive.zip', 'wb').write(r.content)

  #unpacking
  z = zipfile.ZipFile(download_target_imgs + 'local_archive.zip')
  z.extractall(path = download_target_imgs)

  #at this point data is there, we are ready to get the list of files
  train_filenames = glob.glob(download_target_imgs + 'reduced_chest_xray/train/*/*')
  val_filenames   = glob.glob(download_target_imgs + 'reduced_chest_xray/test/*/*')

#whatever the original case, at this point we have the files
print('Available images for train: ' + str(len(train_filenames)))
print('Available images for validation: ' + str(len(val_filenames)))

Available images for train: 300
Available images for validation: 100


# CNN

## Import

In [9]:

import os, datetime
import numpy as np
from keras.applications.vgg16 import VGG16 
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras import models, layers, optimizers, regularizers
from keras.callbacks import EarlyStopping
from keras.callbacks import ReduceLROnPlateau
from keras.layers.core import Dense, Dropout, Activation
from PIL import ImageFile
import statistics
ImageFile.LOAD_TRUNCATED_IMAGES = True


## Data setup

## Data generators

In [10]:
#why rescale: https://github.com/Arsey/keras-transfer-learning-for-oxford102/issues/1

start = datetime.datetime.now()

train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'test')

train_datagen = ImageDataGenerator(
      rescale=1./255,
      rotation_range=10,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=False,
      fill_mode='nearest')


# Note that the validation data should not be augmented!
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        # This is the target directory
        train_dir,
        # All images will be resized to 150x150
        target_size=(150, 150),
        batch_size=batch_size,
        # Since we use categorical_crossentropy loss, we need binary labels
        class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(
        validation_dir,
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode='categorical')

Found 300 images belonging to 2 classes.
Found 100 images belonging to 2 classes.


## Model - architecture

In [11]:
conv_base = VGG16(weights='imagenet', include_top=False, input_shape=(150, 150, 3))
conv_base.trainable = True

model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(nclasses, activation='softmax'))

## Model - compile

In [12]:
# Model compile / fit
model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(learning_rate=2e-5),
              metrics=['acc'])

#here we configure two callbacks, early stopping moniroting the loss, and
#a learning rate reduction for fine tuning
#more on callbacks: https://keras.io/api/callbacks/

es = EarlyStopping(monitor='loss', mode='min', min_delta=0.001, verbose=1, patience=40, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='loss', mode='min', factor=0.9, patience=15, min_lr=1e-20, verbose=1, cooldown=3)

## Model - fit

In [13]:
history = model.fit(
      train_generator,
      steps_per_epoch=round(ntrain/batch_size,0),
      epochs=ep,
      validation_data=validation_generator,
      validation_steps=round(nval/batch_size,0),
      verbose=2,
      callbacks=[es, reduce_lr])

Epoch 1/50
15/15 - 14s - loss: 0.4758 - acc: 0.7967 - val_loss: 0.4712 - val_acc: 0.8000 - lr: 2.0000e-05 - 14s/epoch - 949ms/step
Epoch 2/50
15/15 - 7s - loss: 0.2711 - acc: 0.9000 - val_loss: 0.3409 - val_acc: 0.8500 - lr: 2.0000e-05 - 7s/epoch - 450ms/step
Epoch 3/50
15/15 - 7s - loss: 0.1829 - acc: 0.9400 - val_loss: 0.4212 - val_acc: 0.8300 - lr: 2.0000e-05 - 7s/epoch - 445ms/step
Epoch 4/50
15/15 - 7s - loss: 0.2517 - acc: 0.9067 - val_loss: 0.2748 - val_acc: 0.8900 - lr: 2.0000e-05 - 7s/epoch - 450ms/step
Epoch 5/50
15/15 - 7s - loss: 0.1726 - acc: 0.9367 - val_loss: 0.2673 - val_acc: 0.8800 - lr: 2.0000e-05 - 7s/epoch - 450ms/step
Epoch 6/50
15/15 - 7s - loss: 0.1257 - acc: 0.9600 - val_loss: 0.2828 - val_acc: 0.8900 - lr: 2.0000e-05 - 7s/epoch - 446ms/step
Epoch 7/50
15/15 - 7s - loss: 0.1427 - acc: 0.9500 - val_loss: 0.3322 - val_acc: 0.8900 - lr: 2.0000e-05 - 7s/epoch - 441ms/step
Epoch 8/50
15/15 - 7s - loss: 0.1449 - acc: 0.9467 - val_loss: 0.2894 - val_acc: 0.8900 - lr: 2

## Closing remarks

In [15]:
# Save model
model.save(os.path.join(download_target_imgs, 'keras_multiclass_model.hdf5'))
end = datetime.datetime.now()
delta = str(end-start)

# Metrics
acc = history.history['acc']
acc = acc[-5:]
val_acc = history.history['val_acc']
val_acc = val_acc[-5:]
loss = history.history['loss']
loss = loss[-5:]
val_loss = history.history['val_loss']
val_loss = val_loss[-5:]

# End statement
print("============================================")
print("Time taken (h/m/s): %s" %delta[:7])
print("============================================")
print("Metrics (average last five steps)")
print("--------------------------------------------")
print("Loss       %.3f" %statistics.mean(loss))
print("Val. Loss  %.3f" %statistics.mean(val_loss))
print("--------------------------------------------")
print("Acc.       %.3f" %statistics.mean(acc))
print("Val. Acc.  %.3f" %statistics.mean(val_acc))
print("============================================")
print("Epochs:    %s" %(ep))

Time taken (h/m/s): 0:06:40
Metrics (average last five steps)
--------------------------------------------
Loss       0.019
Val. Loss  0.380
--------------------------------------------
Acc.       0.995
Val. Acc.  0.922
Epochs:    50
