## Import

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import gc
import os, sys
import seaborn as sns
from tqdm import tqdm

from google.colab import files
import cv2

from sklearn.model_selection import train_test_split, KFold, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report

from tensorflow.keras.layers import Conv2D,Flatten,MaxPool2D,BatchNormalization,GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.densenet import DenseNet121 as DenseNet
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from keras.models import load_model

sns.set_style('darkgrid')

## Link data

In [None]:
JSRT_dataset = '/content/drive/MyDrive/COURSES/CS431/Lung segmentation/Dataset/JSRT/preprocessed/'
nodule = 'Nodule/'
healthy ='Non-nodule/'

In [None]:
df = {'file_name': [], 'label': []}
df = pd.DataFrame(df)
for filename in os.listdir(JSRT_dataset + nodule):
    new_row = {'file_name': nodule+filename, 'label': 'nodule'}
    df = df.append(new_row, ignore_index=True)

for filename in os.listdir(JSRT_dataset+healthy):
    new_row = {'file_name': healthy+filename, 'label': 'healthy'}
    df = df.append(new_row, ignore_index=True)
df

Unnamed: 0,file_name,label
0,Nodule/JPCLN072.png,nodule
1,Nodule/JPCLN066.png,nodule
2,Nodule/JPCLN067.png,nodule
3,Nodule/JPCLN098.png,nodule
4,Nodule/JPCLN107.png,nodule
...,...,...
188,Non-nodule/JPCNN033.png,healthy
189,Non-nodule/JPCNN027.png,healthy
190,Non-nodule/JPCLN094.png,healthy
191,Non-nodule/JPCNN026.png,healthy


## Split train and test


In [None]:
# 90% train, 10% test
train_df, test_df = train_test_split(df, test_size=0.1, random_state = 42)

In [None]:
X_df = train_df.drop(columns=['label'])
y_df = train_df['label']

## Image Data Generator

In [None]:
batch_size = 32
img_height = 480
img_width = 480
target_size = (img_height, img_width)

In [None]:
train_datagen = ImageDataGenerator(rotation_range=10,
                                    width_shift_range=0.1,
                                    height_shift_range=0.1,
                                    rescale=1.,
                                    zoom_range=0.2,
                                    fill_mode='nearest',
                                    cval=0)

test_datagen = ImageDataGenerator(rescale=1.)

## Create model

In [None]:
base_model = DenseNet(include_top=False, weights='imagenet', input_shape=(img_height, img_width, 3))

In [None]:
len(base_model.layers)

427

In [None]:
for layer in base_model.layers[:427]:
    layer.trainable = False

In [None]:
# for i, layer in enumerate(base_model.layers):
#     print(i, layer.name, "-", layer.trainable)

In [None]:
model = tf.keras.Sequential([
    base_model,
    Flatten(),
    BatchNormalization(),
    Dense(128, activation='elu'),
    Dropout(0.5),
    BatchNormalization(),
    Dense(64, activation='elu'),
    Dropout(0.5),
    BatchNormalization(),
    Dense(1, activation='sigmoid'),
])

In [None]:
model.compile(loss='binary_crossentropy', optimizer=SGD(learning_rate=5e-4), metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
densenet121 (Functional)     (None, 15, 15, 1024)      7037504   
_________________________________________________________________
flatten (Flatten)            (None, 230400)            0         
_________________________________________________________________
batch_normalization (BatchNo (None, 230400)            921600    
_________________________________________________________________
dense (Dense)                (None, 128)               29491328  
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 128)               512       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                8

## Callbacks


In [None]:
patience = 3
stop_patience = 5
factor = 0.1
callbacks = [
    ModelCheckpoint("/content/drive/MyDrive/COURSES/CS431/Lung segmentation/model_checkpoints/densenet.h5", save_weights_only=True, save_best_only=True, verbose = 1),
    EarlyStopping(patience=stop_patience, monitor='val_loss', verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=factor, patience=patience, min_lr=1e-6, verbose=1)
]

## Build model

In [None]:
def create_model():
    base_model = DenseNet(include_top=False, weights='imagenet', input_shape=(img_height, img_width, 3))
    for layer in base_model.layers[:427]:
        layer.trainable = False
    model = tf.keras.Sequential([
        base_model,
        Flatten(),
        BatchNormalization(),
        Dense(128, activation='elu'),
        Dropout(0.5),
        BatchNormalization(),
        Dense(64, activation='elu'),
        Dropout(0.5),
        BatchNormalization(),
        Dense(1, activation='sigmoid'),
    ])
    model.compile(loss='binary_crossentropy', optimizer=SGD(learning_rate=5e-4), metrics=['accuracy'])
    return model

## K-fold and fit

In [None]:
epochs = 30
KF_model = KerasClassifier(build_fn=model, epochs=epochs, batch_size=5, callbacks=callbacks, verbose=1)

In [None]:
cv = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)

## Test model

In [None]:
TRAIN_PATH = JSRT_dataset
j = 0
data_kfold = pd.DataFrame()

In [None]:
for train_idx, val_idx in list(cv.split(X_df, y_df)):
    x_train_df = df.iloc[train_idx]
    x_valid_df = df.iloc[val_idx]

    j += 1

    training_set = train_datagen.flow_from_dataframe(dataframe=x_train_df, directory=TRAIN_PATH,

                                                     x_col="file_name", y_col="label",

                                                     class_mode="binary",

                                                     target_size=(img_height, img_width), batch_size=batch_size)

    validation_set = test_datagen.flow_from_dataframe(dataframe=x_valid_df, directory=TRAIN_PATH,

                                                            x_col="file_name", y_col='label',

                                                            class_mode="binary",

                                                            target_size=(img_height, img_width), batch_size=batch_size)

    model = create_model()

    history = model.fit_generator(training_set,
                                validation_data=validation_set,
                                epochs=epochs,
                                callbacks=callbacks,
                                verbose=1)

    test_generator = ImageDataGenerator(rescale=1.)

    test_set = test_generator.flow_from_dataframe(dataframe=test_df, directory=TRAIN_PATH,

                                                  x_col="file_name", y_col='label',

                                                  class_mode='binary',

                                                  target_size=(img_height, img_width))



    best_model = model
    best_model.evaluate(test_set)

    # predicted_class_indices = np.argmax(pred, axis=1)

    # data_kfold[j] = predicted_class_indices

    # gc.collect()

Found 155 validated image filenames belonging to 2 classes.
Found 18 validated image filenames belonging to 2 classes.




Epoch 1/30

Epoch 00001: val_loss improved from inf to 1.09195, saving model to /content/drive/MyDrive/COURSES/CS431/Lung segmentation/model_checkpoints/densenet.h5
Epoch 2/30

Epoch 00002: val_loss improved from 1.09195 to 0.88264, saving model to /content/drive/MyDrive/COURSES/CS431/Lung segmentation/model_checkpoints/densenet.h5
Epoch 3/30

Epoch 00003: val_loss did not improve from 0.88264
Epoch 4/30

Epoch 00004: val_loss improved from 0.88264 to 0.85614, saving model to /content/drive/MyDrive/COURSES/CS431/Lung segmentation/model_checkpoints/densenet.h5
Epoch 5/30

Epoch 00005: val_loss did not improve from 0.85614
Epoch 6/30

Epoch 00006: val_loss did not improve from 0.85614
Epoch 7/30

Epoch 00007: val_loss did not improve from 0.85614

Epoch 00007: ReduceLROnPlateau reducing learning rate to 5.0000002374872565e-05.
Epoch 8/30

Epoch 00008: val_loss did not improve from 0.85614
Epoch 9/30

Epoch 00009: val_loss did not improve from 0.85614
Epoch 00009: early stopping
Found 20 



Epoch 1/30

Epoch 00001: val_loss did not improve from 0.85614
Epoch 2/30

Epoch 00002: val_loss did not improve from 0.85614
Epoch 3/30

Epoch 00003: val_loss did not improve from 0.85614
Epoch 4/30

Epoch 00004: val_loss did not improve from 0.85614
Epoch 5/30

Epoch 00005: val_loss did not improve from 0.85614
Epoch 6/30

Epoch 00006: val_loss did not improve from 0.85614
Epoch 7/30

Epoch 00007: val_loss did not improve from 0.85614
Epoch 8/30

Epoch 00008: val_loss did not improve from 0.85614

Epoch 00008: ReduceLROnPlateau reducing learning rate to 5.0000002374872565e-05.
Epoch 9/30

Epoch 00009: val_loss did not improve from 0.85614
Epoch 10/30

Epoch 00010: val_loss did not improve from 0.85614
Epoch 00010: early stopping
Found 20 validated image filenames belonging to 2 classes.
Found 155 validated image filenames belonging to 2 classes.
Found 18 validated image filenames belonging to 2 classes.




Epoch 1/30

Epoch 00001: val_loss did not improve from 0.85614
Epoch 2/30

Epoch 00002: val_loss did not improve from 0.85614
Epoch 3/30

Epoch 00003: val_loss did not improve from 0.85614
Epoch 4/30

Epoch 00004: val_loss did not improve from 0.85614
Epoch 5/30

Epoch 00005: val_loss did not improve from 0.85614

Epoch 00005: ReduceLROnPlateau reducing learning rate to 5.0000002374872565e-05.
Epoch 6/30

Epoch 00006: val_loss did not improve from 0.85614
Epoch 7/30

Epoch 00007: val_loss did not improve from 0.85614
Epoch 00007: early stopping
Found 20 validated image filenames belonging to 2 classes.
Found 156 validated image filenames belonging to 2 classes.
Found 17 validated image filenames belonging to 2 classes.




Epoch 1/30

Epoch 00001: val_loss did not improve from 0.85614
Epoch 2/30

Epoch 00002: val_loss did not improve from 0.85614
Epoch 3/30

Epoch 00003: val_loss did not improve from 0.85614
Epoch 4/30

Epoch 00004: val_loss did not improve from 0.85614
Epoch 5/30

Epoch 00005: val_loss improved from 0.85614 to 0.82235, saving model to /content/drive/MyDrive/COURSES/CS431/Lung segmentation/model_checkpoints/densenet.h5
Epoch 6/30

Epoch 00006: val_loss improved from 0.82235 to 0.63127, saving model to /content/drive/MyDrive/COURSES/CS431/Lung segmentation/model_checkpoints/densenet.h5
Epoch 7/30

Epoch 00007: val_loss improved from 0.63127 to 0.60475, saving model to /content/drive/MyDrive/COURSES/CS431/Lung segmentation/model_checkpoints/densenet.h5
Epoch 8/30

Epoch 00008: val_loss improved from 0.60475 to 0.58416, saving model to /content/drive/MyDrive/COURSES/CS431/Lung segmentation/model_checkpoints/densenet.h5
Epoch 9/30

Epoch 00009: val_loss improved from 0.58416 to 0.54736, savi



Epoch 1/30

Epoch 00001: val_loss did not improve from 0.50500
Epoch 2/30

Epoch 00002: val_loss did not improve from 0.50500
Epoch 3/30

Epoch 00003: val_loss did not improve from 0.50500
Epoch 4/30

Epoch 00004: val_loss did not improve from 0.50500
Epoch 5/30

Epoch 00005: val_loss did not improve from 0.50500
Epoch 6/30

Epoch 00006: val_loss did not improve from 0.50500

Epoch 00006: ReduceLROnPlateau reducing learning rate to 5.0000002374872565e-05.
Epoch 7/30

Epoch 00007: val_loss did not improve from 0.50500
Epoch 8/30

Epoch 00008: val_loss did not improve from 0.50500
Epoch 00008: early stopping
Found 20 validated image filenames belonging to 2 classes.
Found 156 validated image filenames belonging to 2 classes.
Found 17 validated image filenames belonging to 2 classes.




Epoch 1/30

Epoch 00001: val_loss did not improve from 0.50500
Epoch 2/30

Epoch 00002: val_loss did not improve from 0.50500
Epoch 3/30

Epoch 00003: val_loss improved from 0.50500 to 0.50495, saving model to /content/drive/MyDrive/COURSES/CS431/Lung segmentation/model_checkpoints/densenet.h5
Epoch 4/30

Epoch 00004: val_loss did not improve from 0.50495
Epoch 5/30

Epoch 00005: val_loss did not improve from 0.50495
Epoch 6/30

Epoch 00006: val_loss did not improve from 0.50495

Epoch 00006: ReduceLROnPlateau reducing learning rate to 5.0000002374872565e-05.
Epoch 7/30

Epoch 00007: val_loss did not improve from 0.50495
Epoch 8/30

Epoch 00008: val_loss did not improve from 0.50495
Epoch 00008: early stopping
Found 20 validated image filenames belonging to 2 classes.
Found 156 validated image filenames belonging to 2 classes.
Found 17 validated image filenames belonging to 2 classes.




Epoch 1/30

Epoch 00001: val_loss did not improve from 0.50495
Epoch 2/30

Epoch 00002: val_loss did not improve from 0.50495
Epoch 3/30

Epoch 00003: val_loss did not improve from 0.50495
Epoch 4/30

Epoch 00004: val_loss did not improve from 0.50495
Epoch 5/30

Epoch 00005: val_loss did not improve from 0.50495
Epoch 6/30

Epoch 00006: val_loss did not improve from 0.50495
Epoch 7/30

Epoch 00007: val_loss did not improve from 0.50495

Epoch 00007: ReduceLROnPlateau reducing learning rate to 5.0000002374872565e-05.
Epoch 8/30

Epoch 00008: val_loss did not improve from 0.50495
Epoch 9/30

Epoch 00009: val_loss did not improve from 0.50495
Epoch 00009: early stopping
Found 20 validated image filenames belonging to 2 classes.
Found 156 validated image filenames belonging to 2 classes.
Found 17 validated image filenames belonging to 2 classes.




Epoch 1/30

Epoch 00001: val_loss did not improve from 0.50495
Epoch 2/30

Epoch 00002: val_loss did not improve from 0.50495
Epoch 3/30

Epoch 00003: val_loss did not improve from 0.50495
Epoch 4/30

Epoch 00004: val_loss did not improve from 0.50495

Epoch 00004: ReduceLROnPlateau reducing learning rate to 5.0000002374872565e-05.
Epoch 5/30

Epoch 00005: val_loss did not improve from 0.50495
Epoch 6/30

Epoch 00006: val_loss did not improve from 0.50495
Epoch 7/30

Epoch 00007: val_loss did not improve from 0.50495
Epoch 8/30

Epoch 00008: val_loss did not improve from 0.50495
Epoch 9/30

Epoch 00009: val_loss did not improve from 0.50495
Epoch 10/30

Epoch 00010: val_loss did not improve from 0.50495
Epoch 11/30

Epoch 00011: val_loss did not improve from 0.50495
Epoch 12/30

Epoch 00012: val_loss did not improve from 0.50495
Epoch 13/30

Epoch 00013: val_loss did not improve from 0.50495
Epoch 14/30

Epoch 00014: val_loss did not improve from 0.50495
Epoch 15/30

Epoch 00015: val_l



Epoch 1/30

Epoch 00001: val_loss did not improve from 0.50495
Epoch 2/30

Epoch 00002: val_loss did not improve from 0.50495
Epoch 3/30

Epoch 00003: val_loss did not improve from 0.50495
Epoch 4/30

Epoch 00004: val_loss did not improve from 0.50495
Epoch 5/30

Epoch 00005: val_loss did not improve from 0.50495
Epoch 6/30

Epoch 00006: val_loss did not improve from 0.50495
Epoch 7/30

Epoch 00007: val_loss did not improve from 0.50495

Epoch 00007: ReduceLROnPlateau reducing learning rate to 5.0000002374872565e-05.
Epoch 8/30

Epoch 00008: val_loss did not improve from 0.50495
Epoch 9/30

Epoch 00009: val_loss did not improve from 0.50495
Epoch 00009: early stopping
Found 20 validated image filenames belonging to 2 classes.
Found 156 validated image filenames belonging to 2 classes.
Found 17 validated image filenames belonging to 2 classes.




Epoch 1/30

Epoch 00001: val_loss did not improve from 0.50495
Epoch 2/30

Epoch 00002: val_loss did not improve from 0.50495
Epoch 3/30

Epoch 00003: val_loss did not improve from 0.50495
Epoch 4/30

Epoch 00004: val_loss did not improve from 0.50495
Epoch 5/30

Epoch 00005: val_loss did not improve from 0.50495
Epoch 6/30

Epoch 00006: val_loss did not improve from 0.50495
Epoch 7/30

Epoch 00007: val_loss did not improve from 0.50495
Epoch 8/30

Epoch 00008: val_loss did not improve from 0.50495
Epoch 9/30

Epoch 00009: val_loss did not improve from 0.50495
Epoch 10/30

Epoch 00010: val_loss did not improve from 0.50495
Epoch 11/30

Epoch 00011: val_loss did not improve from 0.50495
Epoch 12/30

Epoch 00012: val_loss did not improve from 0.50495

Epoch 00012: ReduceLROnPlateau reducing learning rate to 5.0000002374872565e-05.
Epoch 13/30

Epoch 00013: val_loss did not improve from 0.50495
Epoch 14/30

Epoch 00014: val_loss did not improve from 0.50495
Epoch 00014: early stopping
Fou