In [2]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
import numpy as np
from PIL import Image
import pathlib

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.efficientnet import EfficientNetB7, preprocess_input
from tensorflow.keras.layers import Input, Dense, Flatten, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Loading In Data

In [None]:
data_dir = "/content/drive/MyDrive/lung_image_sets"
data = ImageDataGenerator(validation_split=0.2)
BATCH_SIZE = 32

X=224
Y=224

training = data.flow_from_directory(data_dir,
                                    class_mode = 'categorical',
                                    target_size = (X, Y),
                                    color_mode = "rgb",
                                    batch_size = BATCH_SIZE,
                                    shuffle = False,
                                    subset = "training",
                                    seed = 89)

validation = data.flow_from_directory(data_dir,
                                      class_mode = 'categorical',
                                      target_size = (X, Y),
                                      color_mode = "rgb",
                                      batch_size = BATCH_SIZE,
                                      shuffle = False,
                                      subset = 'validation',
                                      seed = 89)

Found 12008 images belonging to 3 classes.
Found 3002 images belonging to 3 classes.


# Displaying Sample Data

# Loading Model

In [None]:
base_model = EfficientNetB7(
    input_shape=(X, Y, 3),
    weights = 'imagenet',
    include_top = False)

x = GlobalAveragePooling2D()(base_model.output)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation = 'relu')(x)
y = Dense(3, activation = 'softmax')(x)

model = Model(inputs=base_model.input, outputs=y)

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb7_notop.h5


In [None]:
model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [None]:
model.fit(training,
          validation_data=validation,
          epochs=50)

Epoch 1/50


KeyboardInterrupt: ignored

The Model above struggled with overfitting. The validation accuracy was 33% while the training accuracy was 95.
The next model will introduce regularization to combat overfitting.

Model v2

In [None]:
base_model = EfficientNetB7(
    input_shape=(X, Y, 3),
    weights = 'imagenet',
    include_top = False)

x = GlobalAveragePooling2D()(base_model.output)
x = Flatten()(x)
x = Dense(128, activation='relu', activity_regularizer=tf.keras.regularizers.L2(0.01))(x)
x = Dense(64, activation = 'relu')(x)
y = Dense(3, activation = 'softmax')(x)

model = Model(inputs=base_model.input, outputs=y)

In [None]:
estop = EarlyStopping(monitor='val_loss', patience=4)

In [None]:
model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [None]:
model.fit(
    training,
    validation_data = validation,
    epochs = 25,
    callbacks = [estop]
)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25


<keras.callbacks.History at 0x7c4fd330a350>

# Model 2 Evaluation

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

In [None]:
# classification report
y_pred = model.predict(validation)
y_pred = np.argmax(y_pred, axis=1)

print(classification_report(validation.classes, y_pred))

              precision    recall  f1-score   support

           0       0.56      0.19      0.28      1000
           1       0.33      0.88      0.48      1002
           2       0.00      0.00      0.00      1000

    accuracy                           0.36      3002
   macro avg       0.30      0.35      0.25      3002
weighted avg       0.30      0.36      0.25      3002



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Model 3

In [None]:
base_model = EfficientNetB7(
    input_shape=(X, Y, 3),
    weights = 'imagenet',
    include_top = False)

x = GlobalAveragePooling2D()(base_model.output)
x = Flatten()(x)
x = Dense(128, activation='relu', activity_regularizer=tf.keras.regularizers.L2(0.01))(x)
x = Dense(64, activation = 'relu')(x)
y = Dense(3, activation = 'softmax')(x)

model = Model(inputs=base_model.input, outputs=y)

In [None]:
model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [None]:
model.fit(
    training,
    validation_data = validation,
    epochs = 25
)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
 18/376 [>.............................] - ETA: 2:00 - loss: 1.1111 - accuracy: 0.2778

KeyboardInterrupt: ignored

## Model 3 evaluation
- I interrupted this model wbecause there was no evidence of increased validation accuracy and I wanted to save my colab compute units for more promising models. Thus, unfortunately there is no model classification report for this model.

# Model 4

In [None]:
data_dir = "/content/drive/MyDrive/lung_image_sets"
data = ImageDataGenerator(validation_split=0.2)
BATCH_SIZE = 10

X=224
Y=224

training = data.flow_from_directory(data_dir,
                                    class_mode = 'categorical',
                                    target_size = (X, Y),
                                    color_mode = "rgb",
                                    batch_size = BATCH_SIZE,
                                    shuffle = False,
                                    subset = "training",
                                    seed = 89)

validation = data.flow_from_directory(data_dir,
                                      class_mode = 'categorical',
                                      target_size = (X, Y),
                                      color_mode = "rgb",
                                      batch_size = BATCH_SIZE,
                                      shuffle = False,
                                      subset = 'validation',
                                      seed = 89)

Found 12008 images belonging to 3 classes.
Found 3002 images belonging to 3 classes.


In [None]:
base_model = EfficientNetB7(
    input_shape=(X, Y, 3),
    weights = 'imagenet',
    include_top = False)

x = GlobalAveragePooling2D()(base_model.output)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation = 'relu', activity_regularizer=tf.keras.regularizers.L2(0.05))(x)
y = Dense(3, activation = 'softmax')(x)

model = Model(inputs=base_model.input, outputs=y)

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb7_notop.h5


In [None]:
model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [None]:
estop = EarlyStopping(monitor='val_loss', patience=4)

In [None]:
model.fit(
    training,
    validation_data = validation,
    epochs = 25,
    callbacks = [estop]
)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25


<keras.callbacks.History at 0x7fcb807e6800>

# Model 5
- Increased regularization weights.

In [4]:
data_dir = "/content/drive/MyDrive/lung_image_sets"
data = ImageDataGenerator(validation_split=0.2)
BATCH_SIZE = 10

X=224
Y=224

training = data.flow_from_directory(data_dir,
                                    class_mode = 'categorical',
                                    target_size = (X, Y),
                                    color_mode = "rgb",
                                    batch_size = BATCH_SIZE,
                                    shuffle = False,
                                    subset = "training",
                                    seed = 89)

validation = data.flow_from_directory(data_dir,
                                      class_mode = 'categorical',
                                      target_size = (X, Y),
                                      color_mode = "rgb",
                                      batch_size = BATCH_SIZE,
                                      shuffle = False,
                                      subset = 'validation',
                                      seed = 89)

Found 12008 images belonging to 3 classes.
Found 3002 images belonging to 3 classes.


In [5]:
base_model = EfficientNetB7(
    input_shape=(X, Y, 3),
    weights = 'imagenet',
    include_top = False)

x = GlobalAveragePooling2D()(base_model.output)
x = Flatten()(x)
x = Dense(128, activation='relu', activity_regularizer=tf.keras.regularizers.L2(0.1))(x)
x = Dense(64, activation = 'relu', activity_regularizer=tf.keras.regularizers.L2(0.1))(x)
y = Dense(3, activation = 'softmax')(x)

model = Model(inputs=base_model.input, outputs=y)

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb7_notop.h5


In [6]:
estop = EarlyStopping(monitor='val_loss', patience=4)

In [7]:
model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [8]:
model.fit(
    training,
    validation_data = validation,
    epochs = 25,
    callbacks = [estop]
)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25


<keras.callbacks.History at 0x7bc78c2e6b30>

In [9]:
# classification report
y_pred = model.predict(validation)
y_pred = np.argmax(y_pred, axis=1)

print(classification_report(validation.classes, y_pred))

              precision    recall  f1-score   support

           0       0.33      1.00      0.50      1000
           1       0.00      0.00      0.00      1002
           2       0.00      0.00      0.00      1000

    accuracy                           0.33      3002
   macro avg       0.11      0.33      0.17      3002
weighted avg       0.11      0.33      0.17      3002



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
