<a href="https://colab.research.google.com/github/rmcnew/Pneumonia_Classifier/blob/master/Pneumonia_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pneumonia Classifer

In [0]:
# Richard Scott McNew
# A02077329
# CS 6600: Intelligent Systems

# use tensorflow 2.x
%tensorflow_version 2.x
from __future__ import absolute_import, division, print_function, unicode_literals
from google.colab import drive, files
import datetime
import io
import os
import pathlib
from time import sleep
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

# enable accelerated linear algebra
tf.config.optimizer.set_jit(True)
# enable tensorflow AUTOTUNE
AUTOTUNE = tf.data.experimental.AUTOTUNE

# Load the TensorBoard notebook extension for training metrics graphs
%load_ext tensorboard


################### Constants #######################
BATCH_SIZE = 8  # Use small batches to allow current batch to fit in GPU memory for faster training
IMAGE_SIDE = 450 
SHUFFLE_SIZE = 25
EPOCHS = 2  # Only run a few epochs at a time since Colaboratory times out 


######### Dataset Download and Path Construction #####################
# Local path to the dataset
DATASET_PATH = "/content/Pneumonia_Classifier/dataset"

# There is a copy of the Pneumonia dataset in my Pneumonia_Classifier GitHub repo
# We can clone the 'dataset_only' branch to get a local copy
def get_dataset_files_from_github():
    if not os.path.isdir(DATASET_PATH):
        print("Downloading the dataset from GitHub . . .")
        !git clone -b dataset_only https://github.com/rmcnew/Pneumonia_Classifier.git 
    else:
        print("Using previously downloaded dataset")

# There is a tarball of the Pneumonia dataset available as a publicly shared link
# from my Google Drive account.  This is probably the fastest way to download a 
# local copy of the dataset since it should be all within Google's networks
def get_dataset_files_from_google_drive_shared():
    if not os.path.isdir(DATASET_PATH):
        print("Downloading the dataset from Google Drive shared link . . .")
        !gdown https://drive.google.com/uc?id=1u2_Ap4rOxHuEKnSb5te070skuoXcJTX9
        !tar xjf Pneumonia_Classifier_dataset.tar.bz2
        print("Download completed!  Untarring the model . . .")
    else:
        print("Using previously downloaded dataset")

# Download a local copy of the dataset and then build paths 
# to the different dataset subsets: 'train', 'test', and 'validate'
#get_dataset_files_from_github()
get_dataset_files_from_google_drive_shared()
dataset = pathlib.Path(DATASET_PATH)
test = dataset.joinpath("test")
test_count = len(list(test.glob('**/*.jpeg')))
train = dataset.joinpath("train")
train_count = len(list(train.glob('**/*.jpeg')))
validate = dataset.joinpath("validate")
validate_count = len(list(validate.glob('**/*.jpeg')))


####################### Dataset Preprocessing #########################
def create_train_image_generator():
    train_image_generator = ImageDataGenerator(rescale=1./255, zoom_range=0.5)
    train_data_gen = train_image_generator.flow_from_directory(
            batch_size=BATCH_SIZE, 
            directory=str(train), 
            shuffle=True, 
            target_size=(IMAGE_SIDE, IMAGE_SIDE), 
            class_mode='binary')
    return train_data_gen

def create_test_image_generator():
    test_image_generator = ImageDataGenerator(rescale=1./255)
    test_data_gen = test_image_generator.flow_from_directory(
            batch_size=BATCH_SIZE, 
            directory=str(test), 
            target_size=(IMAGE_SIDE, IMAGE_SIDE), 
            class_mode='binary')
    return test_data_gen

def create_validate_image_generator():
    validate_image_generator = ImageDataGenerator(rescale=1./255)
    validate_data_gen = validate_image_generator.flow_from_directory(
            batch_size=BATCH_SIZE, 
            directory=str(validate), 
            target_size=(IMAGE_SIDE, IMAGE_SIDE), 
            class_mode='binary')
    return validate_data_gen


############################ Model Creation, Loading, and Saving ##############################

# val 90
#def create_model():
#    model = Sequential([
#        Conv2D(450, 10, padding='same', activation='relu', kernel_regularizer='l2', input_shape=(450, 450, 3)),
#        MaxPooling2D(),
#        Conv2D(225, 5, padding='same', activation='relu', kernel_regularizer='l2'),
#        MaxPooling2D(),
#        Dropout(0.2),
#        Conv2D(100, 5, padding='same', activation='relu', kernel_regularizer='l2'),
#        MaxPooling2D(),        
#        Flatten(),
#        Dense(512, activation='relu'),
#        Dense(1, activation='sigmoid')
#    ])
#    model.compile(optimizer='SGD', loss='binary_crossentropy', metrics=['accuracy']) 
#    return model


def create_model():
    model = Sequential([
        Conv2D(IMAGE_SIDE, 10, padding='same', activation='relu', kernel_regularizer='l2', input_shape=(IMAGE_SIDE, IMAGE_SIDE, 3)),
        MaxPooling2D(),
        Conv2D(225, 5, padding='same', activation='relu', kernel_regularizer='l2'),
        MaxPooling2D(),
        Dropout(0.2),
        Conv2D(100, 5, padding='same', activation='relu', kernel_regularizer='l2'),
        MaxPooling2D(),        
        Flatten(),
        Dense(512, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='SGD', loss='binary_crossentropy', metrics=['accuracy']) 
    return model

    
# There is a trained persisted model that is shared from my Google Drive account
# Download a copy to local storage for use in evaluating against the datasets
def download_trained_model_from_google_drive_shared(download_anyway=False):
    MODEL_PATH = "/content/pneumonia_classifier_model.h5"
    if not os.path.exists(MODEL_PATH) or download_anyway:
        print("Downloading trained model from Google Drive shared link . . .")
        !gdown https://drive.google.com/uc?id=1Qwm5QlveZsUzO6vU7jMBUOgx_uHlSN9i
        print("Download completed!  Loading the model . . .")
    else:
        print("Using previously downloaded model")
    model = tf.keras.models.load_model(MODEL_PATH, custom_objects=None, compile=True)
    print("Model loaded.")
    return model

# Save the trained model and then create a web browser dialogue to download it
# This can take a bit to run and download since the model occupies about 700 MB on disk
def download_trained_model_via_browser(model):
    TMP_PATH = '/tmp/pneumonia_classifier_model.h5'
    model.save(TMP_PATH, overwrite=True, include_optimizer=True, save_format='h5')
    print("Preparing model for browser download.  Please wait as it could take a while . . . .")
    sleep(2)  # pause for two seconds for the file to save
    files.download(TMP_PATH)

########################### Google Drive functions #############################
# Note that these functions will require interactive steps (opening a page in a 
# web browser and copying / pasting the authoriztaion code) to create OAuth 
# tokens that will be used to authorize access to your Google Drive account

# Google Drive path to the saved model
GOOGLE_DRIVE_MODEL_PATH = "/content/drive/My Drive/USU/intelligent_systems/Pneumonia_Classifier/pneumonia_classifier_model.h5"

# mount Google drive
def mount_drive():
    drive.mount('/content/drive')

# unmount Google drive
def unmount_drive():
    drive.flush_and_unmount()

def save_model_to_google_drive(model):
    mount_drive()
    model.save(GOOGLE_DRIVE_MODEL_PATH, overwrite=True, include_optimizer=True, save_format='h5')
    unmount_drive()

def load_trained_model_from_google_drive():
    mount_drive()
    model = tf.keras.models.load_model(GOOGLE_DRIVE_MODEL_PATH, custom_objects=None, compile=True)
    unmount_drive()
    return model


###################### Model Training and Evaluation ##############################
def train_model():
    train_data_gen = create_train_image_generator()
    test_data_gen = create_test_image_generator()
    model = create_model()
    log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
    history = model.fit(
        train_data_gen,        
        steps_per_epoch=train_count // BATCH_SIZE,
        epochs=EPOCHS,
        validation_data=test_data_gen,        
        validation_steps=test_count // BATCH_SIZE,
        callbacks=[tensorboard_callback]
    ) 
    save_model_to_google_drive(model)
    %tensorboard --logdir logs/fit    


def train_model_more():
    train_data_gen = create_train_image_generator()
    test_data_gen = create_test_image_generator()
    model = load_trained_model_from_google_drive()    
    log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
    history = model.fit(
        train_data_gen,
        steps_per_epoch=train_count // BATCH_SIZE,
        epochs=EPOCHS,
        validation_data=test_data_gen,
        validation_steps=test_count // BATCH_SIZE,
        callbacks=[tensorboard_callback]
    )    
    save_model_to_google_drive(model)
    %tensorboard --logdir logs/fit
    
    
def test_trained_model():
    test_data_gen = create_test_image_generator()
    model = download_trained_model_from_google_drive_shared()
    model.evaluate(test_data_gen)

def validate_trained_model():
    validate_data_gen = create_validate_image_generator()
    model = download_trained_model_from_google_drive_shared()
    model.evaluate(validate_data_gen)

############ Main Section ###############

#test_trained_model()
train_model_more()



The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
Downloading the dataset from Google Drive shared link . . .
Downloading...
From: https://drive.google.com/uc?id=1u2_Ap4rOxHuEKnSb5te070skuoXcJTX9
To: /content/Pneumonia_Classifier_dataset.tar.bz2
1.19GB [00:06, 172MB/s]
