## Modelling Draft: Xception for M-Stage Classification<br>
Framework: Tensorflow Keras <br>
Base models to test: <br>
<li>Xception ✓</li>
<li>MobileNet </li>
<li>NasNet Mobile </li>

#### Import Packages

In [12]:

# from PIL import Image
# from tensorflow.keras import backend as K
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow
import random
import os
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Dropout, BatchNormalization, Flatten, Input
from tensorflow.keras.optimizers import *
from tensorflow.keras.metrics import *
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.applications.mobilenet import preprocess_input
from sklearn.model_selection import train_test_split
from sklearn.metrics._plot.confusion_matrix import ConfusionMatrixDisplay
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils import class_weight
from time import perf_counter

import sys
if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")
pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000
pd.set_option('display.max_colwidth', None)  # or 199
print ('All modules are loaded')

All modules are loaded


In [13]:
#Labels
# t_stg= ['is','1','1a','1b','1c','2','2a','2b','3','4']
# n_stg = ['0','1','2','3']
m_stg = ['0','1','1a','1b','1c','2','3'] 

#Load the image batches, labels == t,m,n
m_raw_df = pd.read_csv(r'D:\Lung-PET-CT-Dx\manifest\raw_ct_delimited.csv',usecols=['filenames','m'])
raw_dir = r'C:\Users\lzsnc\Desktop\Raw CT resized'
m_raw_df.head()
os.chdir(r'C:\Users\lzsnc\Desktop\Raw CT resized')

In [14]:
m_raw_df.head()

Unnamed: 0,filenames,m
0,Lung_Dx-A0001_04-04-2007-NA-Chest-07990_2.000000-5mm-40805_1-13.tiff,1b
1,Lung_Dx-A0001_04-04-2007-NA-Chest-07990_2.000000-5mm-40805_1-14.tiff,1b
2,Lung_Dx-A0001_04-04-2007-NA-Chest-07990_2.000000-5mm-40805_1-15.tiff,1b
3,Lung_Dx-A0001_04-04-2007-NA-Chest-07990_2.000000-5mm-40805_1-16.tiff,1b
4,Lung_Dx-A0001_04-04-2007-NA-Chest-07990_2.000000-5mm-40805_1-17.tiff,1b


#### Split Dataset: <br>
80% Train, 20% Test

In [15]:
m_train_df, m_test_df = train_test_split(m_raw_df, test_size=0.2, random_state=42) 
m_train_df.tail()
#convert m-stage df to string
m_train_df['m'] = m_train_df['m'].apply(str)
m_test_df['m'] = m_test_df['m'].apply(str)

#### Train/Test for 'T' Stage

In [16]:
train_batch = 32
train_datagen = ImageDataGenerator(rescale=1 / 255.0,
                                   featurewise_center=True,
                                   featurewise_std_normalization=True,)

m_train_ds = train_datagen.flow_from_dataframe(
    dataframe=m_train_df,
    directory=raw_dir,
    x_col='filenames',
    y_col='m',
    # color_mode='grayscale',
    class_mode='categorical',
    shuffle=True,
    seed=32
)

Found 24250 validated image filenames belonging to 7 classes.


#### Test Image Generator

In [17]:
test_batch = 32
test_datagen = ImageDataGenerator(rescale=1 / 255.0)
m_test_ds = test_datagen.flow_from_dataframe(
    dataframe=m_test_df,
    directory=raw_dir,
    x_col='filenames',
    y_col='m',
    # color_mode='grayscale',
    class_mode='categorical',
    shuffle=False, #re-run this!
    seed=32 
)

Found 6063 validated image filenames belonging to 7 classes.


In [18]:
m_class_dict = m_train_ds.class_indices
print(m_class_dict)
m_num_classes = len(m_class_dict)
m_num_classes

{'0': 0, '1': 1, '1a': 2, '1b': 3, '1c': 4, '2': 5, '3': 6}


7

In [19]:
m_class_dict

{'0': 0, '1': 1, '1a': 2, '1b': 3, '1c': 4, '2': 5, '3': 6}

Try adding weights

In [20]:
m_class_weights = class_weight.compute_class_weight(
           class_weight='balanced',
           classes=np.unique(m_train_ds.classes),
           y=m_train_ds.classes)
m_class_weights = dict(zip(np.unique(m_train_ds.classes), m_class_weights))
m_class_weights

NameError: name 'np' is not defined

Model 3: Xception

In [None]:
base_model = Xception(
    weights='imagenet',
    include_top=False,
    input_shape=(224, 224, 3)
)
base_model.trainable = False
base_model.summary()

In [None]:
#Add random seed
seed_value = 42
random.seed(seed_value)
np.random.seed(seed_value)
tensorflow.random.set_seed(seed_value) #use in dropout

In [None]:
#Create the model
model = Sequential()
model.add(base_model)
# model.add(Dropout(0.3))
model.add(tensorflow.keras.layers.GlobalAveragePooling2D())
model.add(BatchNormalization(axis = -1, momentum = 0.99, epsilon = 0.001))
model.add(tensorflow.keras.layers.Reshape((-1,1)))
model.add(tensorflow.keras.layers.AveragePooling1D())
model.add(Flatten())
model.add(Dropout(0.3,seed=seed_value))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.3,seed=seed_value))
model.add(Dense(m_num_classes,activation='softmax'))

model.summary()

In [None]:
learning_rate = 0.0001
optimizer = tensorflow.keras.optimizers.Adam(learning_rate=learning_rate)
loss = tensorflow.keras.losses.CategoricalCrossentropy(from_logits=True)
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

Actual Training

In [None]:
os.chdir(r'D:\Lung-PET-CT-Dx\manifest\Models')
checkpoint_callback = tensorflow.keras.callbacks.ModelCheckpoint("multiclass_m_stage_xception_v2.h5",
                     save_best_only=True, # only keep the best model weights on file (delete the rest)
                     save_weights_only=False,   # save only the model weights
                     verbose=1, # don't print out whether or not model is being saved
                     monitor='val_loss')
                    #  mode='max') # save the model weights which score the best validation accuracy
early_stop = tensorflow.keras.callbacks.EarlyStopping(monitor='val_accuracy',
                                              mode='auto',
                                              patience=5)

In [None]:
start = perf_counter()
history = model.fit(m_train_ds,
                    epochs=20,
                    steps_per_epoch=len(m_train_ds),
                    validation_data=m_test_ds,
                    validation_steps=len(m_test_ds),
                    callbacks=[checkpoint_callback, early_stop],
                    verbose=1,
                    class_weight=m_class_weights)
end = perf_counter()
print(end - start)


Load model - skip training <br>
Approximate training time: 10 hours 14 mins 20 secs

In [None]:
#Evaluate performance
def plot_loss_curves(history):
  loss = history.history['loss']
  val_loss = history.history['val_loss']

  accuracy = history.history['accuracy']
  val_accuracy = history.history['val_accuracy']

  epochs = range(len(history.history['loss']))

  # Plot loss
  plt.plot(epochs, loss, label='training_loss')
  plt.plot(epochs, val_loss, label='val_loss')
  plt.title('Loss')
  plt.xlabel('Epochs')
  plt.legend()

  # Plot accuracy
  plt.figure()
  plt.plot(epochs, accuracy, label='training_accuracy')
  plt.plot(epochs, val_accuracy, label='val_accuracy')
  plt.title('Accuracy')
  plt.xlabel('Epochs')
  plt.legend();

In [None]:
plot_loss_curves(history)

In [None]:
# m_stg = ['0','1','1a','1b','1c','2','3'] 
# t_Y_pred = model.predict(t_test_ds)
m_Y_pred = model.predict(m_test_ds) #predictions
m_Y_ground = m_test_df.m.to_list()
m_y_pred = np.argmax(m_Y_pred,axis=1)
print('Confusion Matrix')
cm = confusion_matrix(m_test_ds.classes, m_y_pred)
# cm = confusion_matrix(t_Y_ground, t_Y_pred,normalize='true')
cmd = ConfusionMatrixDisplay(cm, display_labels=m_stg)
cmd.plot()

print('Classification Report for M-Stage')

print(classification_report(m_test_ds.classes, m_y_pred, target_names=m_stg))


In [None]:
from tensorflow.keras.preprocessing import image
test_img = r'C:\Users\lzsnc\Desktop\Raw CT Images\Lung_Dx-A0065_10-26-2008-NA-lungc-32319_3.000000-5mm Lung SS50-68818_1-34.tiff'
test = image.load_img(test_img, target_size=(224,224))
test_img_array = image.img_to_array(test)
img_batch = np.expand_dims(test_img_array, axis=0)
img_preprocessed = preprocess_input(img_batch)
prediction = model.predict(img_preprocessed)
print(prediction[0])