In [0]:
# pneumonia cnn classifier
# author: Sushrut Borkar

import os
import zipfile
import random
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile

In [0]:
from google.colab import drive
drive.mount('/content/drive')

!unzip -uq "/content/drive/My Drive/chest-xray-pneumonia.zip" -d "/tmp/chest_xray"

local_zip = '/tmp/chest_xray'
zip_ref   = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')
zip_ref.close()

In [0]:
print(len(os.listdir('/tmp/chest_xray/NORMAL/')))
print(len(os.listdir('/tmp/chest_xray/PNEUMONIA/')))

In [0]:
try:
    os.mkdir('/tmp/chest_xray')
    os.mkdir('/tmp/chest_xray/train')
    os.mkdir('/tmp/chest_xray/test')
    os.mkdir('/tmp/chest_xray/train/NORMAL')
    os.mkdir('/tmp/chest_xray/train/PNEUMONIA')
    os.mkdir('/tmp/chest_xray/test/NORMAL')
    os.mkdir('/tmp/chest_xray/test/PNEUMONIA')
except OSError:
    pass

In [0]:
def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
    files = []
    for filename in os.listdir(SOURCE):
        file = SOURCE + filename
        if os.path.getsize(file) > 0:
            files.append(filename)
        else:
            print(filename + " is zero length, so ignoring.")

    training_length = int(len(files) * SPLIT_SIZE)
    testing_length = int(len(files) - training_length)
    shuffled_set = random.sample(files, len(files))
    training_set = shuffled_set[0:training_length]
    testing_set = shuffled_set[-testing_length:]

    for filename in training_set:
        this_file = SOURCE + filename
        destination = TRAINING + filename
        copyfile(this_file, destination)

    for filename in testing_set:
        this_file = SOURCE + filename
        destination = TESTING + filename
        copyfile(this_file, destination)


NORMAL_SOURCE_DIR = "/tmp/chest_xray/NORMAL/"
TRAINING_NORMAL_DIR = "/tmp/chest_xray/train/NORMAL/"
TESTING_NORMAL_DIR = "/tmp/chest_xray/test/NORMAL/"
PNEUMONIA_SOURCE_DIR = "/tmp/chest_xray/PNEUMONIA/"
TRAINING_PNEUMONIA_DIR = "/tmp/chest_xray/train/PNEUMONIA/"
TESTING_PNEUMONIA_DIR = "/tmp/chest_xray/test/PNEUMONIA/"

split_size = .9
split_data(NORMAL_SOURCE_DIR, TRAINING_NORMAL_DIR, TESTING_NORMAL_DIR, split_size)
split_data(PNEUMONIA_SOURCE_DIR, TRAINING_PNEUMONIA_DIR, TESTING_PNEUMONIA_DIR, split_size)

In [0]:
print(len(os.listdir('/tmp/chest_xray/train/NORMAL/')))
print(len(os.listdir('/tmp/chest_xray/train/PNEUMONIA/')))
print(len(os.listdir('/tmp/chest_xray/test/NORMAL/')))
print(len(os.listdir('/tmp/chest_xray/test/PNEUMONIA/')))

In [0]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer=RMSprop(lr=0.001), loss='binary_crossentropy', metrics=['acc'])


In [0]:
TRAINING_DIR = "/tmp/chest_xray/train/"
train_datagen = ImageDataGenerator(rescale=1.0/255.)
train_generator = train_datagen.flow_from_directory(TRAINING_DIR,
                                                    batch_size=100,
                                                    class_mode='binary',
                                                    target_size=(150, 150))

VALIDATION_DIR = "/tmp/chest_xray/test/"
validation_datagen = ImageDataGenerator(rescale=1.0/255.)
validation_generator = validation_datagen.flow_from_directory(VALIDATION_DIR,
                                                              batch_size=100,
                                                              class_mode='binary',
                                                              target_size=(150, 150))

In [0]:
history = model.fit_generator(train_generator,
                              epochs=15,
                              verbose=1,
                              validation_data=validation_generator)

In [0]:
%matplotlib inline

import matplotlib.image  as mpimg
import matplotlib.pyplot as plt

acc=history.history['acc']
val_acc=history.history['val_acc']
loss=history.history['loss']
val_loss=history.history['val_loss']

epochs=range(len(acc)) 

plt.plot(epochs, acc, 'r', "Training Accuracy")
plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
plt.title('Training and validation accuracy')
plt.figure()

plt.plot(epochs, loss, 'r', "Training Loss")
plt.plot(epochs, val_loss, 'b', "Validation Loss")
plt.figure()

In [0]:
import numpy as np
from google.colab import files
from keras.preprocessing import image

uploaded = files.upload()

for fn in uploaded.keys():
 
path = '/content/' + fn
img = image.load_img(path, target_size=(150, 150))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)

images = np.vstack([x])
classes = model.predict(images, batch_size=10)
print(classes[0])
if classes[0]>0.5:
  print(fn + " has pneumonia")
else:
  print(fn + " doesn't have pneumonia")