In [2]:
import pandas as pd 
import numpy as np
import cv2
import os
import glob
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report
from tensorflow.keras.applications import ResNet50, VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

In [3]:
file_path = "/Users/johnson/Documents/Lung_Disease/dataset"
name_class = os.listdir(file_path)
filepaths = list(glob.glob(file_path+'/**/*.*'))
labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1],filepaths))
filepath = pd.Series(filepaths, name='Filepath').astype(str)
labels = pd.Series(labels, name='Label')
data = pd.concat([filepath, labels], axis=1)
data = data.sample(frac=1).reset_index(drop=True)

#Splitting the dataset into training and testing sets
train, test = train_test_split(data, test_size=0.25, random_state=42)

#Loading the label encoder
label_encoder = LabelEncoder()
label_encoder.fit(train['Label'])


test['Label']= test['Label'].astype(str)

In [4]:
#Data augmentation for training data
train_datagen = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

#Data augmentation for testing
test_datagen = ImageDataGenerator(rescale=1.0/255)

train_gen = train_datagen.flow_from_dataframe(
    train,
    x_col='Filepath',
    y_col='Label',
    target_size=(100, 100),
    batch_size=32,
    class_mode='categorical'
)

test_gen = test_datagen.flow_from_dataframe(
    test,
    x_col='Filepath',
    y_col='Label',
    target_size=(100, 100),
    batch_size=32,
    class_mode='categorical'
)

#Encode the labels for training
label_encoder = LabelEncoder()
train_gen.classes = label_encoder.fit_transform(train_gen.classes)
test_gen.classes = label_encoder.transform(test_gen.classes)

Found 5609 validated image filenames belonging to 4 classes.
Found 1870 validated image filenames belonging to 4 classes.


In [5]:
# Define and compile the CNN Model
cnn_model = Sequential([
    Conv2D(64, (3, 3), activation='relu', input_shape=(100, 100, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
    
)

#Train the CNN model
cnn_history = cnn_model.fit(
    train_gen,
    epochs=10,
    validation_data=test_gen,
    callbacks=[EarlyStopping(monitor='val_accuracy', min_delta=0, patience=2, mode='auto')]
)

  super().__init__(


Epoch 1/10


  self._warn_if_super_not_called()


[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m252s[0m 1s/step - accuracy: 0.6036 - loss: 1.0106 - val_accuracy: 0.8604 - val_loss: 0.3638
Epoch 2/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m267s[0m 1s/step - accuracy: 0.8121 - loss: 0.4804 - val_accuracy: 0.8898 - val_loss: 0.3118
Epoch 3/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m238s[0m 1s/step - accuracy: 0.8198 - loss: 0.4596 - val_accuracy: 0.8765 - val_loss: 0.3205
Epoch 4/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m452s[0m 3s/step - accuracy: 0.8374 - loss: 0.4164 - val_accuracy: 0.8535 - val_loss: 0.4116


In [6]:
#Define and compile the ResNet model
resnet_model = ResNet50(
    input_shape=(100, 100, 3),
    include_top=False,
    weights='imagenet',
    pooling='avg'
)

for layer in resnet_model.layers:
    layer.trainable = False

resnet_model.layers[-1].trainable = True 

resnet_model = Sequential([resnet_model, Dense(len(label_encoder.classes_), activation='softmax')])

resnet_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)


In [7]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

#Train the ResNet model
resnet_history = resnet_model.fit(
    train_gen,
    epochs=10,
    validation_data=test_gen,
    callbacks=[EarlyStopping(monitor='val_accuracy', min_delta=0, patience=2, mode='auto')]
)

#Define and compile the VGG model
vgg_model = VGG16(
    input_shape=(100, 100, 3),
    include_top=False,
    weights='imagenet',
    pooling='avg'
) 

for layer in vgg_model.layers:
    layer.trainable = False


vgg_model.layers[-1].trainable = True

vgg_model = Sequential([vgg_model, Dense(len(label_encoder.classes_), activation='softmax')])

vgg_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

Epoch 1/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m288s[0m 2s/step - accuracy: 0.5315 - loss: 1.1586 - val_accuracy: 0.5947 - val_loss: 1.0194
Epoch 2/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m278s[0m 2s/step - accuracy: 0.6213 - loss: 1.0089 - val_accuracy: 0.6235 - val_loss: 0.9614
Epoch 3/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m281s[0m 2s/step - accuracy: 0.6378 - loss: 0.9355 - val_accuracy: 0.6358 - val_loss: 0.8685
Epoch 4/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m281s[0m 2s/step - accuracy: 0.6472 - loss: 0.9118 - val_accuracy: 0.6460 - val_loss: 0.8483
Epoch 5/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2552s[0m 15s/step - accuracy: 0.6537 - loss: 0.8786 - val_accuracy: 0.6422 - val_loss: 0.8179
Epoch 6/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m319s[0m 2s/step - accuracy: 0.6575 - loss: 0.8609 - val_accuracy: 0.6460 - val_loss: 0.8119


In [8]:
#Train the VGG model 
vgg_history = vgg_model.fit(
    train_gen,
    epochs=10,
    validation_data=test_gen,
    callbacks=[EarlyStopping(monitor='val_accuracy', min_delta=0, patience=2, mode='auto')]
)

Epoch 1/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2444s[0m 14s/step - accuracy: 0.4912 - loss: 1.2076 - val_accuracy: 0.6914 - val_loss: 0.7848
Epoch 2/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27911s[0m 159s/step - accuracy: 0.7289 - loss: 0.7538 - val_accuracy: 0.8262 - val_loss: 0.5944
Epoch 3/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m603s[0m 3s/step - accuracy: 0.7875 - loss: 0.6051 - val_accuracy: 0.8428 - val_loss: 0.5089
Epoch 4/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m629s[0m 4s/step - accuracy: 0.8105 - loss: 0.5421 - val_accuracy: 0.8513 - val_loss: 0.4587
Epoch 5/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m642s[0m 4s/step - accuracy: 0.8203 - loss: 0.5087 - val_accuracy: 0.8561 - val_loss: 0.4245
Epoch 6/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1134s[0m 6s/step - accuracy: 0.8396 - loss: 0.4663 - val_accuracy: 0.8615 - val_loss: 0.4126
Epoch 7/10
[1m

In [10]:
#Compare and visualize results
cnn_test_results = cnn_model.evaluate(test_gen, verbose=0)
resnet_test_results = resnet_model.evaluate(test_gen,verbose=0)
vgg_test_results = vgg_model.evaluate(test_gen, verbose=0)

print("CNN Test Accuracy: {:.2f}%".format(cnn_test_results[1]*100))
print("ResNet Test Accuracy: {:.2f}%".format(resnet_test_results[1]*100))
print("VGG Test Accuracy: {:.2f}%".format(vgg_test_results[1]*100))

CNN Test Accuracy: 85.35%
ResNet Test Accuracy: 64.60%
VGG Test Accuracy: 87.17%


In [11]:
#Choose best model
best_model = None
best_model_name = ""
if cnn_test_results[1] >= resnet_test_results[1] and cnn_test_results[1] >= vgg_test_results[1]:
    best_model = cnn_model
    best_model_name = "CNN"
elif resnet_test_results[1] >= vgg_test_results[1]:
    best_model = resnet_model
    best_model_name = "ResNet"
else:
    best_model = vgg_model
    best_model_name = "VGG"


print(f"The best model is {best_model_name}")

best_test_results = best_model.evaluate(test_gen, verbose=0)
print("Best model Test Accuracy: {:.2f}%".format(best_test_results[1] * 100))

The best model is VGG
Best model Test Accuracy: 87.17%
