In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense,Conv2D, Flatten, MaxPool2D, Dropout
from tensorflow.keras import Model
from tensorflow.keras.models import Sequential, load_model, Model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, TensorBoard
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.applications.inception_v3 import InceptionV3



In [2]:
import gdown
url = "https://drive.google.com/file/d/18uXV88Z3PjWkYib-dKHVMr55VrqiiFrl/view?usp=sharing"
file_id = url.split("/")[-2]
file_id

'18uXV88Z3PjWkYib-dKHVMr55VrqiiFrl'

In [3]:
prefix = "https://drive.google.com/uc?export=download&id="
gdown.download(prefix+file_id, "chest_data.zip")

Downloading...
From (original): https://drive.google.com/uc?export=download&id=18uXV88Z3PjWkYib-dKHVMr55VrqiiFrl
From (redirected): https://drive.google.com/uc?export=download&id=18uXV88Z3PjWkYib-dKHVMr55VrqiiFrl&confirm=t&uuid=8baf1e08-43b2-45c6-b49a-8feea48b5b55
To: /content/chest_data.zip
100%|██████████| 124M/124M [00:01<00:00, 98.4MB/s]


'chest_data.zip'

In [5]:
import zipfile
unzip_path = '/content'
os.makedirs(unzip_path, exist_ok=True)
with zipfile.ZipFile("chest_data.zip", 'r') as zip_ref:
  zip_ref.extractall(unzip_path)

In [7]:
# File Directory for both the train and test
train_path = "../content/Data/train"
val_path = "../content/Data/valid"
test_path = "../content/Data/test"

In [8]:
# Define function to count number of images per class using a dictionary
def GetDatasetSize(path):
    num_of_image = {}
    for folder in os.listdir(path):
        # count files in the folder
        num_of_image[folder] = len(os.listdir(os.path.join(path, folder)));
    return num_of_image;

# Get the number of images per class in each set (train, validation and test)
train_set = GetDatasetSize(train_path)
val_set = GetDatasetSize(val_path)
test_set = GetDatasetSize(test_path)
print(train_set,"\n\n",val_set,"\n\n",test_set)

{'normal': 148, 'large.cell.carcinoma_left.hilum_T2_N2_M0_IIIa': 115, 'adenocarcinoma_left.lower.lobe_T2_N0_M0_Ib': 195, 'squamous.cell.carcinoma_left.hilum_T1_N2_M0_IIIa': 155} 

 {'normal': 13, 'large.cell.carcinoma_left.hilum_T2_N2_M0_IIIa': 21, 'adenocarcinoma_left.lower.lobe_T2_N0_M0_Ib': 23, 'squamous.cell.carcinoma_left.hilum_T1_N2_M0_IIIa': 15} 

 {'normal': 54, 'squamous.cell.carcinoma': 90, 'large.cell.carcinoma': 51, 'adenocarcinoma': 120}


# DATA PREPROCESSING

In [9]:
# Image data generator with specified augmentation configurations (mostly geometric transformations)
train_datagen = ImageDataGenerator(rescale = 1.0/255.0,
                                  horizontal_flip = True,
                                  fill_mode = 'nearest',
                                  zoom_range=0.2,
                                  shear_range = 0.2,
                                  width_shift_range=0.2,
                                  height_shift_range=0.2,
                                  rotation_range=0.4)

# Using data generator to create augmented data from image files in train_path directory
train_data = train_datagen.flow_from_directory(train_path,
                                                   batch_size = 5,
                                                   target_size = (350,350),
                                                   class_mode = 'categorical')

# Dicctionary with class names to their respective indices in the generated data
train_data.class_indices

Found 613 images belonging to 4 classes.


{'adenocarcinoma_left.lower.lobe_T2_N0_M0_Ib': 0,
 'large.cell.carcinoma_left.hilum_T2_N2_M0_IIIa': 1,
 'normal': 2,
 'squamous.cell.carcinoma_left.hilum_T1_N2_M0_IIIa': 3}

In [10]:
val_datagen = ImageDataGenerator(rescale = 1.0/255.0)
val_data = val_datagen.flow_from_directory(val_path,
                                                   batch_size = 5,
                                                   target_size = (350,350),
                                                   class_mode = 'categorical')
val_data.class_indices

Found 72 images belonging to 4 classes.


{'adenocarcinoma_left.lower.lobe_T2_N0_M0_Ib': 0,
 'large.cell.carcinoma_left.hilum_T2_N2_M0_IIIa': 1,
 'normal': 2,
 'squamous.cell.carcinoma_left.hilum_T1_N2_M0_IIIa': 3}

In [11]:
test_datagen = ImageDataGenerator(rescale = 1.0/255.0)
test_data = test_datagen.flow_from_directory(test_path,
                                                   batch_size = 5,
                                                   target_size = (350,350),
                                                   class_mode = 'categorical')
test_data.class_indices

Found 315 images belonging to 4 classes.


{'adenocarcinoma': 0,
 'large.cell.carcinoma': 1,
 'normal': 2,
 'squamous.cell.carcinoma': 3}

# EfficientNetB0


In [12]:
base_model = EfficientNetB0(input_shape = (350, 350, 3),
                         include_top = False,
                         weights = 'imagenet')

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5


In [13]:
mc = ModelCheckpoint(
    filepath="./ct_effnet_best_model.hdf5",
    monitor= 'val_accuracy',
    verbose= 1,
    save_best_only= True,
    mode = 'auto'
    );

call_back = [ mc];

tensorboard = TensorBoard(log_dir = 'logs')
reduce_lr = ReduceLROnPlateau(monitor = 'val_accuracy', factor = 0.3, patience = 2, min_delta = 0.001,
                              mode='auto',verbose=1)

#We also add a earlystop for prevent the computer from wasting time if it's not making progress.


early_stopping = EarlyStopping(monitor='val_acc', patience=3, restore_best_weights=True)

# Training the model

In [14]:
#Customize our base model
EffNetmodel = base_model.output
EffNetmodel = tf.keras.layers.GlobalAveragePooling2D()(EffNetmodel)
# to provide overfitting problem
EffNetmodel = tf.keras.layers.Dropout(rate=0.5)(EffNetmodel)

#Finally, we add a layer with 4 'neurons' that will help us classify things into different categories.
EffNetmodel = tf.keras.layers.Dense(4,activation='softmax')(EffNetmodel)

In [15]:
#We put together the original EfficientNetB0 and our new custom parts
EffNetmodel = tf.keras.models.Model(inputs=base_model.input, outputs = EffNetmodel)

#Compile up the rules and tools for training your neural network.
#The optimizer determines how the model should update its internal parameters, the loss function quantifies how well the model is doing, and the metrics provide additional measures to track the model's performance.
EffNetmodel.compile(loss='categorical_crossentropy',optimizer = 'Adam', metrics= ['accuracy'])

In [16]:
%time
EffNetB0 = EffNetmodel.fit(
    train_data,
    steps_per_epoch = train_data.samples//train_data.batch_size,
    epochs = 64,
    validation_data = val_data,
    validation_steps = val_data.samples//val_data.batch_size,
    callbacks = [tensorboard, mc, reduce_lr, early_stopping]
    )

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.44 µs
Epoch 1/64
Epoch 1: val_accuracy improved from -inf to 0.15714, saving model to ./ct_effnet_best_model.hdf5


  saving_api.save_model(


Epoch 2/64
Epoch 2: val_accuracy improved from 0.15714 to 0.22857, saving model to ./ct_effnet_best_model.hdf5




Epoch 3/64
Epoch 3: val_accuracy did not improve from 0.22857




Epoch 4/64
Epoch 4: val_accuracy improved from 0.22857 to 0.30000, saving model to ./ct_effnet_best_model.hdf5




Epoch 5/64
Epoch 5: val_accuracy did not improve from 0.30000




Epoch 6/64
Epoch 6: val_accuracy did not improve from 0.30000

Epoch 6: ReduceLROnPlateau reducing learning rate to 0.0003000000142492354.




Epoch 7/64
Epoch 7: val_accuracy improved from 0.30000 to 0.42857, saving model to ./ct_effnet_best_model.hdf5




Epoch 8/64
Epoch 8: val_accuracy did not improve from 0.42857




Epoch 9/64
Epoch 9: val_accuracy did not improve from 0.42857

Epoch 9: ReduceLROnPlateau reducing learning rate to 9.000000427477062e-05.




Epoch 10/64
Epoch 10: val_accuracy did not improve from 0.42857




Epoch 11/64
Epoch 11: val_accuracy did not improve from 0.42857

Epoch 11: ReduceLROnPlateau reducing learning rate to 2.700000040931627e-05.




Epoch 12/64
Epoch 12: val_accuracy improved from 0.42857 to 0.52857, saving model to ./ct_effnet_best_model.hdf5




Epoch 13/64
Epoch 13: val_accuracy improved from 0.52857 to 0.77143, saving model to ./ct_effnet_best_model.hdf5




Epoch 14/64
Epoch 14: val_accuracy did not improve from 0.77143




Epoch 15/64
Epoch 15: val_accuracy did not improve from 0.77143

Epoch 15: ReduceLROnPlateau reducing learning rate to 8.100000013655517e-06.




Epoch 16/64
Epoch 16: val_accuracy improved from 0.77143 to 0.85714, saving model to ./ct_effnet_best_model.hdf5




Epoch 17/64
Epoch 17: val_accuracy improved from 0.85714 to 0.91429, saving model to ./ct_effnet_best_model.hdf5




Epoch 18/64
Epoch 18: val_accuracy did not improve from 0.91429




Epoch 19/64
Epoch 19: val_accuracy did not improve from 0.91429

Epoch 19: ReduceLROnPlateau reducing learning rate to 2.429999949526973e-06.




Epoch 20/64
Epoch 20: val_accuracy did not improve from 0.91429




Epoch 21/64
Epoch 21: val_accuracy improved from 0.91429 to 0.92857, saving model to ./ct_effnet_best_model.hdf5




Epoch 22/64
Epoch 22: val_accuracy did not improve from 0.92857




Epoch 23/64
Epoch 23: val_accuracy did not improve from 0.92857

Epoch 23: ReduceLROnPlateau reducing learning rate to 7.289999985005124e-07.




Epoch 24/64
Epoch 24: val_accuracy did not improve from 0.92857




Epoch 25/64
Epoch 25: val_accuracy did not improve from 0.92857

Epoch 25: ReduceLROnPlateau reducing learning rate to 2.1870000637136398e-07.




Epoch 26/64
Epoch 26: val_accuracy improved from 0.92857 to 0.94286, saving model to ./ct_effnet_best_model.hdf5




Epoch 27/64
Epoch 27: val_accuracy did not improve from 0.94286




Epoch 28/64
Epoch 28: val_accuracy did not improve from 0.94286

Epoch 28: ReduceLROnPlateau reducing learning rate to 6.561000276406048e-08.




Epoch 29/64
Epoch 29: val_accuracy did not improve from 0.94286




Epoch 30/64
Epoch 30: val_accuracy did not improve from 0.94286

Epoch 30: ReduceLROnPlateau reducing learning rate to 1.9683000829218145e-08.




Epoch 31/64
Epoch 31: val_accuracy did not improve from 0.94286




Epoch 32/64
Epoch 32: val_accuracy did not improve from 0.94286

Epoch 32: ReduceLROnPlateau reducing learning rate to 5.904900035602622e-09.




Epoch 33/64
Epoch 33: val_accuracy did not improve from 0.94286




Epoch 34/64
Epoch 34: val_accuracy did not improve from 0.94286

Epoch 34: ReduceLROnPlateau reducing learning rate to 1.7714700373261393e-09.




Epoch 35/64
Epoch 35: val_accuracy did not improve from 0.94286




Epoch 36/64
Epoch 36: val_accuracy did not improve from 0.94286

Epoch 36: ReduceLROnPlateau reducing learning rate to 5.314410245205181e-10.




Epoch 37/64
Epoch 37: val_accuracy did not improve from 0.94286




Epoch 38/64
Epoch 38: val_accuracy did not improve from 0.94286

Epoch 38: ReduceLROnPlateau reducing learning rate to 1.5943230069481729e-10.




Epoch 39/64
Epoch 39: val_accuracy did not improve from 0.94286




Epoch 40/64
Epoch 40: val_accuracy did not improve from 0.94286

Epoch 40: ReduceLROnPlateau reducing learning rate to 4.7829690208445185e-11.




Epoch 41/64
Epoch 41: val_accuracy did not improve from 0.94286




Epoch 42/64
Epoch 42: val_accuracy did not improve from 0.94286

Epoch 42: ReduceLROnPlateau reducing learning rate to 1.434890747886719e-11.




Epoch 43/64
Epoch 43: val_accuracy did not improve from 0.94286




Epoch 44/64
Epoch 44: val_accuracy did not improve from 0.94286

Epoch 44: ReduceLROnPlateau reducing learning rate to 4.304672243660157e-12.




Epoch 45/64
Epoch 45: val_accuracy did not improve from 0.94286




Epoch 46/64
Epoch 46: val_accuracy did not improve from 0.94286

Epoch 46: ReduceLROnPlateau reducing learning rate to 1.2914016210563428e-12.




Epoch 47/64
Epoch 47: val_accuracy did not improve from 0.94286




Epoch 48/64
Epoch 48: val_accuracy did not improve from 0.94286

Epoch 48: ReduceLROnPlateau reducing learning rate to 3.874204993273289e-13.




Epoch 49/64
Epoch 49: val_accuracy did not improve from 0.94286




Epoch 50/64
Epoch 50: val_accuracy did not improve from 0.94286

Epoch 50: ReduceLROnPlateau reducing learning rate to 1.162261530508052e-13.




Epoch 51/64
Epoch 51: val_accuracy did not improve from 0.94286




Epoch 52/64
Epoch 52: val_accuracy did not improve from 0.94286

Epoch 52: ReduceLROnPlateau reducing learning rate to 3.4867844288938296e-14.




Epoch 53/64
Epoch 53: val_accuracy did not improve from 0.94286




Epoch 54/64
Epoch 54: val_accuracy did not improve from 0.94286

Epoch 54: ReduceLROnPlateau reducing learning rate to 1.0460353083393582e-14.




Epoch 55/64
Epoch 55: val_accuracy did not improve from 0.94286




Epoch 56/64
Epoch 56: val_accuracy did not improve from 0.94286

Epoch 56: ReduceLROnPlateau reducing learning rate to 3.138105874196098e-15.




Epoch 57/64
Epoch 57: val_accuracy did not improve from 0.94286




Epoch 58/64
Epoch 58: val_accuracy did not improve from 0.94286

Epoch 58: ReduceLROnPlateau reducing learning rate to 9.414317622588293e-16.




Epoch 59/64
Epoch 59: val_accuracy did not improve from 0.94286




Epoch 60/64
Epoch 60: val_accuracy did not improve from 0.94286

Epoch 60: ReduceLROnPlateau reducing learning rate to 2.8242954138314303e-16.




Epoch 61/64
Epoch 61: val_accuracy did not improve from 0.94286




Epoch 62/64
Epoch 62: val_accuracy did not improve from 0.94286

Epoch 62: ReduceLROnPlateau reducing learning rate to 8.472885923856935e-17.




Epoch 63/64
Epoch 63: val_accuracy did not improve from 0.94286




Epoch 64/64
Epoch 64: val_accuracy did not improve from 0.94286

Epoch 64: ReduceLROnPlateau reducing learning rate to 2.541865737452411e-17.






# Model Evaluation

In [18]:
# Loading the Best Fit Model
model_eff = load_model("./ct_effnet_best_model.hdf5")
# Checking the Accuracy of the Model
accuracy_effnet = model_eff.evaluate_generator(generator= test_data)[1]
loss_effnet = model_eff.evaluate_generator(generator= test_data)[0]
print(f"The accuracy of the model is = {accuracy_effnet*100} %")
print(f"The loss of the model is = {loss_effnet} %")

  accuracy_effnet = model_eff.evaluate_generator(generator= test_data)[1]
  loss_effnet = model_eff.evaluate_generator(generator= test_data)[0]


The accuracy of the model is = 94.60317492485046 %
The loss of the model is = 0.24305105209350586 %
