In [None]:
import tensorflow as tf
from tensorflow.keras.applications.vgg19 import VGG19
### Run this cell to import the packages you will need to unpack the dataset
# File manipulation and IO (input/output)
import os
import pickle
import zipfile
from google.colab import files

# Import numerical and dataframe handling
import numpy as np
import scipy
import pandas as pd

# Data preprocessing
from PIL import Image
from sklearn.utils import shuffle

# Model scoring
from sklearn.metrics import confusion_matrix
from sklearn import metrics

# Import standard machine learning machinery
import tensorflow as tf
#from tensorflow import keras
#from tensorflow.keras import layers

# Garbage collection (for saving RAM during training)
import gc

# Import plotting functionality
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.ticker import MultipleLocator
import matplotlib

from skimage.color import gray2rgb
from skimage import img_as_ubyte
import time

from glob import glob
#size to test vgg

# import the libraries as shown below

from tensorflow.keras.layers import Input, Lambda, Dense, Flatten,Conv2D
from tensorflow.keras.models import Model
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator,load_img
from tensorflow.keras.models import Sequential
import numpy as np
from glob import glob
import matplotlib.pyplot as plt

IMAGE_SIZE = [224,224]

In [None]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

#mounting testing_set
!ls "/content/drive/My Drive/BWSI Medlytics/NASA JR Final Project/training_set"

In [None]:
#vgg model
mobilnet = VGG19(input_shape=IMAGE_SIZE + [3], weights='imagenet', include_top=False)

# don't train existing weights
for layer in mobilnet.layers:
    layer.trainable = False

folders = glob('Nasa Jr Final Project/training_set/*')

# our layers - you can add more if you want
x = Flatten()(mobilnet.output)

prediction = Dense(len(folders), activation='softmax')(x)

# create a model object
model = Model(inputs=mobilnet.input, outputs=prediction)

In [None]:
### Create Model from scratch using CNN(additional layer to transfer learning vgg model)
from tensorflow.keras.layers import MaxPooling2D

model=Sequential()
model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(224,224,3)))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation ="relu"))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
model.add(MaxPooling2D(pool_size=2))
model.add(Flatten())
model.add(Dense(500,activation="relu"))
model.add(Dense(3,activation="softmax"))
model.summary()

In [None]:
# Specify the loss function to use
loss_func = tf.keras.losses.categorical_crossentropy

#auc
auc = tf.keras.metrics.AUC(curve='ROC',multi_label=True)

#Compile the model using the specified loss function and potimizer
model.compile(loss=loss_func, optimizer='adam', metrics=['accuracy', auc]) 

In [None]:
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale = 1./255,
                                   zoom_range = 0.2, validation_split=0.2)

test_datagen = ImageDataGenerator(rescale = 1./255)

training_set = train_datagen.flow_from_directory ('/content/drive/My Drive/BWSI Medlytics/NASA JR Final Project/training_set', target_size = (224, 224),
                                                 batch_size = 32,
                                                 class_mode = 'categorical',
                                                 subset= 'training')
validation_set = train_datagen.flow_from_directory(
    directory='/content/drive/My Drive/BWSI Medlytics/NASA JR Final Project/training_set', # same directory as training data
    target_size=(224, 224),
    batch_size=32,
    shuffle = False,
    class_mode='categorical',
    subset='validation')

test_set = test_datagen.flow_from_directory ('/content/drive/My Drive/BWSI Medlytics/NASA JR Final Project/testing_set', target_size = (224, 224),
                                                 batch_size = 32,
                                                 class_mode = 'categorical', shuffle = False)

In [None]:

class garbage_collect_callback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs=None):
    gc.collect()


history = model.fit(training_set, epochs = 30, validation_data=validation_set,callbacks = [garbage_collect_callback(), tf.keras.callbacks.EarlyStopping(patience = 5, restore_best_weights=True)])

In [None]:
#true test labels
y_test = test_set.labels

#test_pred originally gives out probability of the images being a certain class.
test_pred = model.predict(test_set)

#val_pred uses arg-max, which spits out the index of the 
test_pred = np.argmax(test_pred, axis=1)




In [None]:
#test confusion matrix
test_conf_mat = confusion_matrix(y_test, test_pred)
print(test_conf_mat)

In [None]:
#helps plot confusion matrix
import numpy as np


def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion matrix',
                          cmap=None,
                          normalize=True):
    """
    given a sklearn confusion matrix (cm), make a nice plot

    Arguments
    ---------
    cm:           confusion matrix from sklearn.metrics.confusion_matrix

    target_names: given classification classes such as [0, 1, 2]
                  the class names, for example: ['high', 'medium', 'low']

    title:        the text to display at the top of the matrix

    cmap:         the gradient of the values displayed from matplotlib.pyplot.cm
                  see http://matplotlib.org/examples/color/colormaps_reference.html
                  plt.get_cmap('jet') or plt.cm.Blues

    normalize:    If False, plot the raw numbers
                  If True, plot the proportions

    Usage
    -----
    plot_confusion_matrix(cm           = cm,                  # confusion matrix created by
                                                              # sklearn.metrics.confusion_matrix
                          normalize    = True,                # show proportions
                          target_names = y_labels_vals,       # list of names of the classes
                          title        = best_estimator_name) # title of graph

    Citiation
    ---------
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html

    """
    import matplotlib.pyplot as plt
    import numpy as np
    import itertools

    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    plt.show()

In [None]:

#graph of test set
plot_confusion_matrix(cm           =  test_conf_mat,
                      normalize    = False,
                      target_names = ["lung_aca",  "lung_n",  "lung_scc"],
                      title        = "Confusion Matrix of Test Dataset")




In [None]:

loss_test, acc_test, roc_test = model.evaluate_generator(test_set)

In [None]:
#print roc, acc, loss for val and test

print("Test Loss: " + str(loss_test) + ", Test Acc: " + str(acc_test) + ", Test AUROC: " + str(roc_test))

In [None]:
from sklearn.metrics import f1_score
print("f1_score of test: " + str(f1_score(y_test, test_pred, average='weighted')))