# Note: No Train-test Split, Just Train validation Split. In this training model, we are not concerned with testing the performance of the model, Just ensuring that the model has learned enough to discriminate between Normal V/s Pneumatic classifications to adapt and transfer learn with an additional class [COVID in our case]

https://stackoverflow.com/questions/42443936/keras-split-train-test-set-when-using-imagedatagenerator

In [2]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline 

from sklearn.metrics import *

#To see our directory
import os
import random

#Tensorflow and Keras for models and other DL tasks
import tensorflow as tf

ModuleNotFoundError: No module named 'tensorflow'

In [3]:
!mkdir /tmp/Xray_train_data

In [6]:
!cp -R "COVID-19 Radiography Database/Viral Pneumonia" "/tmp/Xray_train_data/Viral Pneumonia"

In [7]:
!cp -R "COVID-19 Radiography Database/NORMAL" "/tmp/Xray_train_data"

In [8]:
!ls "/tmp/Xray_train_data/"

[34mNORMAL[m[m          [34mViral Pneumonia[m[m


In [9]:
!ls -1 "/tmp/Xray_train_data/NORMAL"| wc -l 

    1341


In [10]:
!ls -1 "/tmp/Xray_train_data/Viral Pneumonia"| wc -l 

    1345


In [8]:
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
import keras

In [9]:
train_data_dir= "/tmp/Xray_train_data"

In [10]:
def data_generator(img_height = 1024, img_width=1024, split = 0.3):

    train_datagen = ImageDataGenerator(validation_split=split, horizontal_flip=True, rescale=1./255, featurewise_center=True,
                     featurewise_std_normalization=True)

    train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_height, img_width),
        class_mode='categorical',
        subset='training')
    validation_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_height, img_width),
        class_mode='categorical',
        subset='validation')
    
    return train_generator, validation_generator

In [11]:
def get_Sequential_Model(img_height = 1024, img_width=1024):
    model=keras.Sequential()
    model.add(keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(img_height, img_width, 3), padding='same'))
    model.add(keras.layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding="valid"))
    model.add(keras.layers.Conv2D(8, (3,3), activation='relu', padding='same'))
    model.add(keras.layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding="valid"))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(256))
    model.add(BatchNormalization())
    model.add(keras.layers.Dropout(.5, input_shape=(32,)))
    model.add(keras.layers.Dense(128))
    model.add(BatchNormalization())
    model.add(keras.layers.Dropout(.3, input_shape=(32,)))
    model.add(keras.layers.Dense(64))
    model.add(BatchNormalization())
    model.add(keras.layers.Dropout(.1, input_shape=(32,)))
    model.add(keras.layers.Dense(2, activation='sigmoid'))
    return model

In [12]:
try:
    model = load_model('PneumoniaVNormal.h5')
    print('Model Loaded')
except:
    model = get_Sequential_Model(img_height = 224, img_width=224)
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics = 'accuracy')

    train_generator, validation_generator = data_generator(img_height = 224, img_width=224)

    trainer = model.fit(train_generator,validation_data=validation_generator,epochs=20,verbose=2)

    model.save('PneumoniaVNormal.h5')

    print('Model saved.')

Found 1881 images belonging to 2 classes.
Found 805 images belonging to 2 classes.




Epoch 1/20
59/59 - 48s - loss: 0.2567 - accuracy: 0.8969 - val_loss: 0.3166 - val_accuracy: 0.9019
Epoch 2/20
59/59 - 48s - loss: 0.1383 - accuracy: 0.9484 - val_loss: 1.1940 - val_accuracy: 0.5193
Epoch 3/20
59/59 - 48s - loss: 0.1078 - accuracy: 0.9612 - val_loss: 2.2035 - val_accuracy: 0.5217
Epoch 4/20
59/59 - 48s - loss: 0.0792 - accuracy: 0.9724 - val_loss: 1.1865 - val_accuracy: 0.6435
Epoch 5/20
59/59 - 47s - loss: 0.0729 - accuracy: 0.9718 - val_loss: 0.3348 - val_accuracy: 0.8783
Epoch 6/20
59/59 - 48s - loss: 0.0460 - accuracy: 0.9835 - val_loss: 0.1304 - val_accuracy: 0.9478
Epoch 7/20
59/59 - 47s - loss: 0.0376 - accuracy: 0.9888 - val_loss: 0.2065 - val_accuracy: 0.9391
Epoch 8/20
59/59 - 47s - loss: 0.0452 - accuracy: 0.9835 - val_loss: 0.9682 - val_accuracy: 0.6820
Epoch 9/20
59/59 - 48s - loss: 0.0388 - accuracy: 0.9883 - val_loss: 2.7719 - val_accuracy: 0.5627
Epoch 10/20
59/59 - 47s - loss: 0.0223 - accuracy: 0.9931 - val_loss: 0.1256 - val_accuracy: 0.9478
Epoch 11/

Reason for using categorical mode for datagen and using 2 neurons in final dense layer:
https://datascience.stackexchange.com/questions/45833/keras-model-only-predicts-one-class-for-all-the-test-images

(Creates Problem with binary mode, due to the fact output layer will only give one probability, and the predictor which will take class based on max, while doing it manually, will always attribute things to class 0, Hence, the problem)

In [14]:
print('Testing:')

model.evaluate_generator(validation_generator)
validation_generator.reset()
predIdx = model.predict_generator(validation_generator, len(validation_generator))
predIdxs = np.argmax(predIdx, axis=1) 
validation_labels = validation_generator.classes

target_names = ['NORMAL', 'Viral Pneumonia']
print('\n')
print(classification_report(validation_labels, predIdxs, target_names=target_names, digits=5))

Testing:


                 precision    recall  f1-score   support

         NORMAL    0.48916   0.50498   0.49694       402
Viral Pneumonia    0.48974   0.47395   0.48172       403

       accuracy                        0.48944       805
      macro avg    0.48945   0.48946   0.48933       805
   weighted avg    0.48945   0.48944   0.48932       805



In [15]:
print('Train Data metrics:')
    
model.evaluate_generator(train_generator)

train_generator.reset()
predIdxs = model.predict_generator(train_generator, len(train_generator))
predIdxs = np.argmax(predIdxs, axis=1) 
labels = train_generator.classes

target_names = ['NORMAL', 'Viral Pneumonia']
print('\n')
print(classification_report(labels, predIdxs, target_names=target_names, digits=5))

Train Data metrics:


                 precision    recall  f1-score   support

         NORMAL    0.50053   0.50053   0.50053       939
Viral Pneumonia    0.50212   0.50212   0.50212       942

       accuracy                        0.50133      1881
      macro avg    0.50133   0.50133   0.50133      1881
   weighted avg    0.50133   0.50133   0.50133      1881

