<font size="12">Exploring the datasets</font>

# Setting up

## Importing libraries

In [1]:
import pandas as pd
import pathlib
import keras
import cv2
import glob
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input as preprocess_input_vgg16
from tensorflow.keras.applications.vgg19 import VGG19, preprocess_input as preprocess_input_vgg19
from tensorflow.keras.applications.resnet_v2 import ResNet50V2, preprocess_input as preprocess_input_resnet
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, MaxPool2D
from tensorflow.keras import regularizers, layers
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils import shuffle


## Importing data

In [2]:
url_normal = '../data/CT/Lung/normal/'

url_airspace = '../data/CT/Lung/Airspace_opacity/'
# url_bronch = '../data/CT/Lung/Bronchiectasis/'
# url_inter = '../data/CT/Lung/interstitial_lung_disease/'
# url_nodule = '../data/CT/Lung/Nodule/'
# url_parenchyma = '../data/CT/Lung/Parenchyma_destruction/'

In [3]:
limit = 100

In [4]:
images_normal = [cv2.imread(file) for file in glob.glob(url_normal+"*.png")]

In [5]:
images_airspace = [cv2.imread(file) for file in glob.glob(url_airspace+"*.png")][:limit]
# images_bronch = [cv2.imread(file) for file in glob.glob(url_bronch+"*.png")][:limit]
# images_inter = [cv2.imread(file) for file in glob.glob(url_inter+"*.png")][:limit]
# images_nodule = [cv2.imread(file) for file in glob.glob(url_nodule+"*.png")][:limit]
# images_parenchyma = [cv2.imread(file) for file in glob.glob(url_parenchyma+"*.png")][:limit]

In [6]:
images_

[]

In [6]:
#images_nodule = [cv2.imread(file) for file in glob.glob(url_nodule + "*.png")]
labels_airspace = [2] * len(images_nodule)
labels_bronch = [3] * len(images_nodule)
labels_inter = [4] * len(images_nodule)
labels_nodule = [1] * len(images_nodule)
labels_parenchyma = [5] * len(images_nodule)

#images_normal = [cv2.imread(file) for file in glob.glob(url_normal + "*.png")]
labels_normal = [0] * len(images_normal)

## Checking data

In [7]:
normal_count = len(images_normal)
normal_count

0

In [8]:
img = np.asarray(images_nodule[0])
imgplot = plt.imshow(img)

IndexError: list index out of range

# Model definition

In [9]:
def initialize_model():

    model = Sequential()

    model.add(layers.Conv2D(256, (4,4), activation="relu",
                            input_shape=(224, 224, 3), padding = "same",kernel_regularizer=regularizers.L1L2(0.01,0.1)))
    model.add(layers.MaxPool2D(pool_size=(2,2), padding = "same"))
    
    model.add(layers.Conv2D(128, (3,3), activation="relu", padding = "same"))
    model.add(layers.MaxPool2D(pool_size=(2,2), padding = "same"))
    
    model.add(layers.Conv2D(64, (3,3), activation="relu", padding = "same"))
    model.add(layers.MaxPool2D(pool_size=(2,2), padding = "same"))
    
    model.add(layers.Conv2D(32, (2,2), activation="relu"))
    model.add(layers.MaxPool2D(pool_size=(2,2)))
              
    model.add(layers.Dropout(0.25)) 
    
    model.add(layers.Flatten())
    
    model.add(layers.Dense(32, activation='relu'))

    model.add(layers.Dense(6, activation='softmax'))
    
    ### Model compilation
    model.compile(loss='sparse_categorical_crossentropy', 
              optimizer='adam',
              metrics=['accuracy'])
       
    
    return model

##  VGG16

In [10]:
def initialize_vgg16_model():
    # Load the pre-trained VGG16 model without the top (fully connected) layers
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

    # Freeze the layers of the pre-trained VGG16 model
    for layer in base_model.layers:
        layer.trainable = False

    # Create a Sequential model
    model = Sequential()

    # Add the pre-trained VGG16 model as the first layer
    model.add(base_model)


    model.add(Flatten())
    model.add(Dense(500, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(6, activation='softmax'))

    # Model compilation
    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    return model

## VGG19

In [11]:
def initialize_vgg19_model():
    # Load the pre-trained VGG16 model without the top (fully connected) layers
    base_model = VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

    # Freeze the layers of the pre-trained VGG19 model
    for layer in base_model.layers:
        layer.trainable = False

    # Create a Sequential model
    model = Sequential()

     # Add data augmentation layers
    model.add(layers.RandomFlip("horizontal"))
    model.add(layers.RandomZoom(0.1))
    model.add(layers.RandomTranslation(0.2, 0.2))
    model.add(layers.RandomRotation(0.1))

    # Add the pre-trained VGG16 model as the first layer
    model.add(base_model)

    # Add custom layers
    # model.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer=regularizers.L1L2(0.01, 0.1)))
    # model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(6, activation='softmax'))

    # Build the model
    model.build((None, 224, 224, 3))

    # Model compilation
    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    return model

## RestNet50V2

In [12]:
def initialize_restnet50v2_model():
    # Load the pre-trained VGG16 model without the top (fully connected) layers
    base_model = ResNet50V2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    
    # Freeze the layers of the pre-trained VGG19 model
    for layer in base_model.layers:
        layer.trainable = False

    # Create a Sequential model
    model = Sequential()

    # Add the pre-trained VGG16 model as the first layer
    model.add(base_model)
    
    model.add(Flatten())
    
    model.add(Dense(6, activation='softmax'))

    # Define data augmentation parameters
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    val_datagen = ImageDataGenerator(rescale=1./255)

    # Model compilation
    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    train_datagen.fit(X_train)

    return model, train_datagen

# Creating X and y for training and testing

In [13]:
X = np.concatenate((images_airspace, images_bronch, images_inter, images_nodule, images_parenchyma, images_normal), axis=0)
y = np.concatenate((labels_airspace, labels_bronch, labels_inter, labels_nodule, labels_parenchyma, labels_normal), axis=0)

In [14]:
X, y = shuffle(X, y, random_state=42)

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)

In [16]:
#X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.1, random_state=42)

In [18]:
X_train.shape

(1271, 224, 224, 3)

In [19]:
X_train = preprocess_input(X_train) 
X_test = preprocess_input(X_test)

In [20]:
X_train.shape

(1271, 224, 224, 3)

In [21]:
y_train.shape

(1271,)

# Training the model

In [22]:
# model = initialize_restnet50v2_model()

In [23]:
# Assuming you have labels stored in a separate array y_train
model, train_datagen = initialize_restnet50v2_model()

In [24]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50v2 (Functional)     (None, 7, 7, 2048)        23564800  
                                                                 
 flatten (Flatten)           (None, 100352)            0         
                                                                 
 dense (Dense)               (None, 6)                 602118    
                                                                 
Total params: 24166918 (92.19 MB)
Trainable params: 602118 (2.30 MB)
Non-trainable params: 23564800 (89.89 MB)
_________________________________________________________________


In [25]:
train_generator = train_datagen.flow(X_train, y_train, batch_size=32)
val_generator = train_datagen.flow(X_val, y_val, batch_size=32)

In [26]:
es = EarlyStopping(patience = 5, restore_best_weights=True)

# history = model.fit(X_train, y_train, 
#           epochs=50,
#           batch_size=32,
#           validation_split = 0.2,
#           callbacks=[es],
#           verbose=1)


history = model.fit(
    train_generator,
    validation_data=val_generator,
    callbacks=[es],
    verbose=1,
    epochs=50
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50

KeyboardInterrupt: 

# Testing the model and displaying results

In [None]:
def plot_loss_accuracy(history, title=None):
    fig, ax = plt.subplots(1,2, figsize=(20,7))
    
    # --- LOSS --- 
    
    ax[0].plot(history.history['loss'])
    ax[0].plot(history.history['val_loss'])
    ax[0].set_title('Model loss')
    ax[0].set_ylabel('Loss')
    ax[0].set_xlabel('Epoch')
    ax[0].set_ylim((0,3))
    ax[0].legend(['Train', 'Validation'], loc='best')
    ax[0].grid(axis="x",linewidth=0.5)
    ax[0].grid(axis="y",linewidth=0.5)
    
    # --- ACCURACY
    
    ax[1].plot(history.history['accuracy'])
    ax[1].plot(history.history['val_accuracy'])
    ax[1].set_title('Model Accuracy')
    ax[1].set_ylabel('Accuracy')
    ax[1].set_xlabel('Epoch')
    ax[1].legend(['Train', 'Validation'], loc='best')
    ax[1].set_ylim((0,1))
    ax[1].grid(axis="x",linewidth=0.5)
    ax[1].grid(axis="y",linewidth=0.5)
    
    if title:
        fig.suptitle(title)

In [None]:
plot_loss_accuracy(history, title=None)

In [None]:
evaluation = model.evaluate(X_test, y_test)

In [None]:
print(f'Model accuracy is {round(evaluation[1]*100,2)} %')