In [None]:
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/

In [None]:
! pip install kaggle

# download lung and colon cancer dataset from kaggle and unzip it
! kaggle datasets download -d andrewmvd/lung-and-colon-cancer-histopathological-images
! unzip -qq lung-and-colon-cancer-histopathological-images.zip


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Downloading lung-and-colon-cancer-histopathological-images.zip to /content
100% 1.75G/1.76G [00:09<00:00, 184MB/s]
100% 1.76G/1.76G [00:09<00:00, 195MB/s]


In [None]:
# place the data in required folders
! mkdir lung_colon_data
! mkdir lung_data
! mkdir colon_data
! cp -R lung_colon_image_set/lung_image_sets/* lung_colon_data/
! cp -R lung_colon_image_set/colon_image_sets/* lung_colon_data/
! cp -R lung_colon_image_set/lung_image_sets/* lung_data/
! cp -R lung_colon_image_set/colon_image_sets/* colon_data/

In [None]:
"""# **Data Reduction**"""

! ls -1 lung_colon_data/colon_aca/ | egrep -c ''
! ls -1 lung_colon_data/colon_n/ | egrep -c ''
! ls -1 lung_colon_data/lung_aca/ | egrep -c ''
! ls -1 lung_colon_data/lung_n/ | egrep -c ''
! ls -1 lung_colon_data/lung_scc/ | egrep -c ''

! ls -1 -d "$PWD/lung_colon_data/colon_aca/"* | head -3000 | xargs rm
! ls -1 lung_colon_data/colon_aca/ | egrep -c ''

! ls -1 -d "$PWD/lung_colon_data/colon_n/"* | head -3000 | xargs rm
! ls -1 lung_colon_data/colon_n/ | egrep -c ''

! ls -1 -d "$PWD/lung_colon_data/lung_aca/"* | head -3000 | xargs rm
! ls -1 lung_colon_data/lung_aca/ | egrep -c ''

! ls -1 -d "$PWD/lung_colon_data/lung_n/"* | head -3000 | xargs rm
! ls -1 lung_colon_data/lung_n/ | egrep -c ''

! ls -1 -d "$PWD/lung_colon_data/lung_scc/"* | head -3000 | xargs rm
! ls -1 lung_colon_data/lung_scc/ | egrep -c ''

5000
5000
5000
5000
5000
2000
2000
2000
2000
2000


In [11]:
# **Import the required libraries for the applying deep learning**

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from glob import glob
from tensorflow.keras.applications.efficientnet import EfficientNetB7
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.layers import Input, Dense, Flatten, GlobalAveragePooling2D, Conv2D , MaxPool2D  , Dropout , BatchNormalization
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image


In [None]:
data = ImageDataGenerator(validation_split = 0.2)
BATCH_SIZE = 128
X = Y = 224

In [None]:
lung_colon_data_dir = './lung_colon_data'
lung_data_dir = './lung_data'
colon_data_dir = './colon_data'

In [None]:
# traininig data pipeline for classifying lung and colon images together
lung_colon_training_data = data.flow_from_directory(lung_colon_data_dir,
                                    class_mode = "categorical",
                                    target_size = (X, Y),
                                    color_mode="rgb",
                                    batch_size = BATCH_SIZE, 
                                    shuffle = False,
                                    subset='training',
                                    seed = 22)

# validation data pipeline for classifying lung and colon images together
lung_colon_validation_data = data.flow_from_directory(lung_colon_data_dir,
                                      class_mode = "categorical",
                                      target_size = (X, Y),
                                      color_mode="rgb",
                                      batch_size = BATCH_SIZE, 
                                      shuffle = False,
                                      subset='validation',
                                      seed = 22)

Found 8000 images belonging to 5 classes.
Found 2000 images belonging to 5 classes.


In [None]:
# training data pipeline for classifying only the lung cancer images
lung_training_data = data.flow_from_directory(lung_data_dir,
                                    class_mode = "categorical",
                                    target_size = (X, Y),
                                    color_mode="rgb",
                                    batch_size = BATCH_SIZE, 
                                    shuffle = False,
                                    subset='training',
                                    seed = 22)

# validation data pipeline for classifying only the lung cancer images
lung_validation_data = data.flow_from_directory(lung_data_dir,
                                    class_mode = "categorical",
                                    target_size = (X, Y),
                                    color_mode="rgb",
                                    batch_size = BATCH_SIZE, 
                                    shuffle = False,
                                    subset='validation',
                                    seed = 22)


Found 12000 images belonging to 3 classes.
Found 3000 images belonging to 3 classes.


In [None]:
# training data pipeline for classifying only the colon cancer images
colon_training_data = data.flow_from_directory(colon_data_dir,
                                    class_mode = "categorical",
                                    target_size = (X, Y),
                                    color_mode="rgb",
                                    batch_size = BATCH_SIZE, 
                                    shuffle = False,
                                    subset='training',
                                    seed = 22)

# validation data pipeline for classifying only the colon cancer images
colon_validation_data = data.flow_from_directory(colon_data_dir,
                                    class_mode = "categorical",
                                    target_size = (X, Y),
                                    color_mode="rgb",
                                    batch_size = BATCH_SIZE, 
                                    shuffle = False,
                                    subset='validation',
                                    seed = 22)

Found 8000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


# EfficientNetB7

In [None]:


# EfficientNet B7 model published by Google AI
efficientnet = EfficientNetB7(input_shape=(X, Y, 3),
    weights='imagenet',
    include_top=False)

efficientnet.trainable = False

x = GlobalAveragePooling2D()(efficientnet.output)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
y = Dense(3, activation='softmax')(x)

model_efficientnet = Model(inputs=efficientnet.input, outputs=y)
model_efficientnet.compile(
  loss='categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3)
history = model_efficientnet.fit(
    lung_training_data,
    validation_data=lung_validation_data,
    epochs=2,
    callbacks=[early_stopping])

Epoch 1/2
Epoch 2/2


In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
from sklearn.metrics import classification_report
import numpy as np
Y_pred = model_efficientnet.predict(lung_validation_data)
y_pred = np.argmax(Y_pred, axis=1)

print(classification_report(lung_validation_data.classes, y_pred))

              precision    recall  f1-score   support

           0       0.94      0.92      0.93      1000
           1       0.97      1.00      0.99      1000
           2       0.94      0.94      0.94      1000

    accuracy                           0.95      3000
   macro avg       0.95      0.95      0.95      3000
weighted avg       0.95      0.95      0.95      3000



In [None]:


# EfficientNet B7 model published by Google AI
efficientnet = EfficientNetB7(input_shape=(X, Y, 3),
    weights='imagenet',
    include_top=False)

efficientnet.trainable = False

x = GlobalAveragePooling2D()(efficientnet.output)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
y = Dense(2, activation='softmax')(x)

model_efficientnet = Model(inputs=efficientnet.input, outputs=y)
model_efficientnet.compile(
  loss='categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3)
history = model_efficientnet.fit(
    colon_training_data,
    validation_data=colon_validation_data,
    epochs=2,
    callbacks=[early_stopping])

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
from sklearn.metrics import classification_report
import numpy as np
Y_pred = model_efficientnet.predict(colon_validation_data)
y_pred = np.argmax(Y_pred, axis=1)
print(classification_report(colon_validation_data.classes, y_pred))

# InceptionResNetV2

In [None]:
# InceptionResNetV2 model
inception_resnet_v2 = InceptionResNetV2(input_shape=(X, Y, 3),
    weights='imagenet',
    include_top=False)

inception_resnet_v2.trainable = False

x = GlobalAveragePooling2D()(inception_resnet_v2.output)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
y = Dense(3, activation='softmax')(x)

model_inception_resnet_v2 = Model(inputs=inception_resnet_v2.input, outputs=y)
model_inception_resnet_v2.compile(
  loss='categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3)
history = model_inception_resnet_v2.fit(
    lung_training_data,
    validation_data=lung_validation_data,
    epochs=20,
    callbacks=[early_stopping])

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
from sklearn.metrics import classification_report
import numpy as np
Y_pred = model_inception_resnet_v2.predict(lung_validation_data)
y_pred = np.argmax(Y_pred, axis=1)
print(classification_report(lung_validation_data.classes, y_pred))

In [None]:
# InceptionResNetV2 model
inception_resnet_v2 = InceptionResNetV2(input_shape=(X, Y, 3),
    weights='imagenet',
    include_top=False)

inception_resnet_v2.trainable = False

x = GlobalAveragePooling2D()(inception_resnet_v2.output)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
y = Dense(2, activation='softmax')(x)

model_inception_resnet_v2 = Model(inputs=inception_resnet_v2.input, outputs=y)
model_inception_resnet_v2.compile(
  loss='categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3)
history = model_inception_resnet_v2.fit(
    colon_training_data,
    validation_data=colon_validation_data,
    epochs=2,
    callbacks=[early_stopping])

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss'] T7
val_loss = history.history['val_loss']

In [None]:
from sklearn.metrics import classification_report
import numpy as np
Y_pred = model_inception_resnet_v2.predict(colon_validation_data)
y_pred = np.argmax(Y_pred, axis=1)
print(classification_report(colon_validation_data.classes, y_pred))

# VGG19

In [None]:
# VGG19Net model
from tensorflow.keras.applications.vgg19 import VGG19

vgg_19 = VGG19(input_shape=(X, Y, 3),
    weights='imagenet',Z
    include_top=False)

vgg_19.trainable = False

x = GlobalAveragePooling2D()(inception_resnet_v2.output)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
y = Dense(3, activation='softmax')(x)

model_vgg_19 = Model(inputs=vgg_19.input, outputs=y)
model_vgg_19.compile(
  loss='categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3)
history = model_vgg_19.fit(
    lung_training_data,
    validation_data=lung_validation_data,
    epochs=20,
    callbacks=[early_stopping])

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
from sklearn.metrics import classification_report
import numpy as np
Y_pred = model_vgg_19.predict(lung_validation_data)
y_pred = np.argmax(Y_pred, axis=1)
print(classification_report(lung_validation_data.classes, y_pred))

In [None]:
# VGG19Net model
from tensorflow.keras.applications.vgg19 import VGG19

vgg_19 = VGG19(input_shape=(X, Y, 3),
    weights='imagenet',Z
    include_top=False)

vgg_19.trainable = False

x = GlobalAveragePooling2D()(inception_resnet_v2.output)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
y = Dense(2, activation='softmax')(x)

model_vgg_19 = Model(inputs=vgg_19.input, outputs=y)
model_vgg_19.compile(
  loss='categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3)
history = model_vgg_19.fit(
    colon_training_data,
    validation_data=colon_validation_data,
    epochs=2,
    callbacks=[early_stopping])

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
from sklearn.metrics import classification_report
import numpy as np
Y_pred = model_vgg_19.predict(colon_validation_data)
y_pred = np.argmax(Y_pred, axis=1)
print(classification_report(colon_validation_data.classes, y_pred))

# Lung and Colon Cancer

# EfficientNet

In [None]:


# EfficientNet B7 model published by Google AI
efficientnet = EfficientNetB7(input_shape=(X, Y, 3),
    weights='imagenet',
    include_top=False)

efficientnet.trainable = False

x = GlobalAveragePooling2D()(efficientnet.output)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
y = Dense(5, activation='softmax')(x)

model_efficientnet = Model(inputs=efficientnet.input, outputs=y)
model_efficientnet.compile(
  loss='categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3)
history = model_efficientnet.fit(
    lung_colon_training_data,
    validation_data=lung_colon_validation_data,
    epochs=2,
    callbacks=[early_stopping])

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
from sklearn.metrics import classification_report
import numpy as np
Y_pred = model_efficientnet.predict(lung_colon_validation_data)
y_pred = np.argmax(Y_pred, axis=1)

print(classification_report(lung_colon_validation_data.classes, y_pred))

# InceptionV3

In [None]:
# InceptionResNetV2 model
inception_resnet_v2 = InceptionResNetV2(input_shape=(X, Y, 3),
    weights='imagenet',
    include_top=False)

inception_resnet_v2.trainable = False

x = GlobalAveragePooling2D()(inception_resnet_v2.output)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
y = Dense(5, activation='softmax')(x)

model_inception_resnet_v2 = Model(inputs=inception_resnet_v2.input, outputs=y)
model_inception_resnet_v2.compile(
  loss='categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3)
history = model_inception_resnet_v2.fit(
    lung_colon_training_data,
    validation_data=lung_colon_validation_data,
    epochs=20,
    callbacks=[early_stopping])

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
from sklearn.metrics import classification_report
import numpy as np
Y_pred = model_inception_resnet_v2.predict(lung_colon_validation_data)
y_pred = np.argmax(Y_pred, axis=1)
print(classification_report(lung_colon_validation_data.classes, y_pred))

# VGG19

In [None]:
# VGG19Net model
from tensorflow.keras.applications.vgg19 import VGG19

vgg_19 = VGG19(input_shape=(X, Y, 3),
    weights='imagenet',Z
    include_top=False)

vgg_19.trainable = False

x = GlobalAveragePooling2D()(inception_resnet_v2.output)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
y = Dense(5, activation='softmax')(x)

model_vgg_19 = Model(inputs=vgg_19.input, outputs=y)
model_vgg_19.compile(
  loss='categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3)
history = model_vgg_19.fit(
    lung_colon_training_data,
    validation_data=lung_colon_validation_data,
    epochs=20,
    callbacks=[early_stopping])

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
from sklearn.metrics import classification_report
import numpy as np
Y_pred = model_vgg_19.predict(lung_colon_validation_data)
y_pred = np.argmax(Y_pred, axis=1)
print(classification_report(lung_colon_validation_data.classes, y_pred))