# **Project name: Endoscopic Capsule -- 150_ConvoRGB_ResNet_DenseNet_LMR_zip**






# **00. Project configuration**



## 00.01 General configuration


In [None]:
## Project folder
collab_path = '/content/'
project_path = collab_path
training_folder = 'TRAIN'
val_folder = 'VAL'

## Imagery folders
imagery_path = ''
raw_imagery_path = '/raw/'

## Drive folder
drive_path = '/content/drive'
drive_model_path = '/model/'
drive_log_path = '/log/'
drive_csv_path = '/csv_log/'

## Project dataset
file_id ="1aeG-twXlUW2_d87TF7b_tKT3MH4GVcj3"
file_name = "LMR_Capstone_Delivery.zip"
image_size = 100
image_bands = 3
num_classes = 11

## Training mode
single_dataset = True
num_samples_train = 0
num_samples_val = 0
validation_fraction = 0
retrain = False
validation_file = ''
weighted_train = False

## 00.02 Training mode

In [None]:
############################
############################

evaluating = False
retrain = False

mode01=True
mode11=False
mode21=False

## Training
if (not evaluating):
  num_epochs  = 100

############################
############################
batch_size  = 32
learning_rate = 0.0005
min_learning_rate = learning_rate / 100
patience = num_epochs
momentum = 0.9

## 00.03 raining configuration

In [None]:
if (evaluating):
  num_epochs  = 0

retrain = retrain or evaluating
model_name = "model_cnn_lumaro.h5"
drive_project_path = '/MyDrive/ML'

#Mode 01
if (mode01):
  drive_project_path = '/MyDrive/ML_150_ConvoRGB_mode01'
  single_dataset = True
  weighted_train = False
  training_file = 'split_all.csv'
  validation_fraction = .3

#Mode 11
if (mode11):
  drive_project_path = '/MyDrive/ML_150_ResNet_mode11'
  single_dataset = True
  weighted_train = False
  training_file = 'split_all.csv'
  validation_fraction = .3

#Mode 21
if (mode21):
  drive_project_path = '/MyDrive/ML_150_DenseNet_mode21'
  single_dataset = True
  weighted_train = False
  training_file = 'split_all.csv'
  validation_fraction = .3

## 00.04 Loading Modules and Libraries

In [None]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

!pip install tensorflow-addons

In [None]:
import os                                               # OS module in Python provides a way of using operating system dependent functionality

import pandas as pd                                     # Data analysis and manipultion tool
import numpy as np                                      # Fundamental package for linear algebra and multidimensional arrays
import tensorflow as tf                                 # Deep Learning Tool
import tensorflow_addons as tfa
import cv2                                              # Library for image processing

import matplotlib.pyplot as plt
import seaborn as sns

from tensorflow.keras.applications.resnet import ResNet152,ResNet50
from tensorflow.keras.applications.densenet import DenseNet169

import sklearn.metrics as mtc
from sklearn.metrics import classification_report, precision_recall_fscore_support
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

from psutil import virtual_memory
from tqdm import tqdm
from google.colab import drive
from pathlib import Path
import shutil

## 00.05 Showing Environtment configuration

In [None]:
print("Tensorflow version: ",tf.version.VERSION)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM'.format(ram_gb))
if ram_gb < 20:
  print('(Not using a high-RAM runtime)')
else:
  print('(You are using a high-RAM runtime!)')

## 00.06 Setting-up (Google-Drive)

In [None]:
drive.mount(drive_path)

In [None]:
if not os.path.exists(drive_path+drive_project_path):
  os.makedirs(drive_path+drive_project_path)
if not os.path.exists(drive_path+drive_project_path+drive_model_path):
  os.makedirs(drive_path+drive_project_path+drive_model_path)
if not os.path.exists(drive_path+drive_project_path+drive_log_path):
  os.makedirs(drive_path+drive_project_path+drive_log_path)
if not os.path.exists(drive_path+drive_project_path+drive_csv_path):
  os.makedirs(drive_path+drive_project_path+drive_csv_path)

## 00.07 Setting-up the environtment (Collab)

In [None]:
if not os.path.exists(project_path):
  os.makedirs(project_path)

In [None]:
if not os.path.exists(file_name):
  !pip install gdown
  !gdown --id $file_id


In [None]:
#unzip the data
dir = project_path+imagery_path
!unzip -q -n $file_name -d $dir
dir


# **01. Loading and preparing training data**


## 01.01 Recovering info from training file

In [None]:
file = project_path+imagery_path+training_file
file

In [None]:
labels = pd.read_csv(file)   # loading the labels
labels

In [None]:
dir = project_path+imagery_path+'raw/'
dir

In [None]:
file_paths = [dir + fname for fname in labels['filename']]
file_paths

In [None]:
# Confirm if number of images is same as number of labels given
if len(labels) == len(file_paths):
    print('Number of labels i.e. ', len(labels), 'matches the number of filenames i.e. ', len(file_paths))
else:
    print('Number of labels does not match the number of filenames')

#### Adding the file_paths to dataframe

In [None]:
train_data = labels
train_data['filepaths'] = file_paths
print("Num of train_data: ",len(train_data))
train_data      

## 01.02 Plotting some training images


In [None]:
def show_image(image,label):
  fig, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=True,figsize=(12,10))
  orig_img = cv2.imread(image)
  orig_img = orig_img[..., ::-1]
  processed_img = cv2.imread(image, cv2.IMREAD_GRAYSCALE)
  processed_img_2 = cv2.cvtColor(orig_img, cv2.COLOR_BGR2HSV)
  ax1.imshow(orig_img)
  ax2.imshow(processed_img, cmap='gray', vmin=0, vmax=255)
  ax3.imshow(processed_img_2)
  ax1.set_title(f'RGB Image class {label}')
  ax2.set_title(f'Gray Image class {label}')
  ax3.set_title(f'HSV Image class {label}')

In [None]:
vec_labels = np.unique([label for label in labels['label']])
vec_labels

In [None]:
for item in vec_labels:
  sub_list = train_data[train_data['label'] == item].sample().reset_index()
  show_image(sub_list['filepaths'][0],sub_list['label'][0])


## 01.03 Make Folders for training images

In [None]:
_dir_train = os.path.join(project_path,training_folder)
_dir_train

In [None]:
if os.path.exists(_dir_train):
  shutil.rmtree(_dir_train)
os.path.exists(_dir_train)

In [None]:
if not os.path.exists(_dir_train):
    os.makedirs(_dir_train)
_dir_train

In [None]:
labels = train_data['label'].unique().tolist()
labels

In [None]:
for label in labels:
  _dir = os.path.join(project_path,training_folder,label)
  if not os.path.exists(_dir):
    os.makedirs(_dir)

## 01.04 Copy Training images to respective folders

In [None]:
if (num_samples_train == 0):
  for i in tqdm(train_data.index):
    images = train_data["filepaths"][i]
    lbls = train_data["label"][i]
    dest = os.path.join(project_path,training_folder,lbls)
    if os.stat(images).st_size != 0:
      shutil.copy(images,dest)

In [None]:
labels = train_data['label'].unique().tolist()
labels

In [None]:
counts = train_data.label.value_counts()
counts

In [None]:
if (num_samples_train > 0):
  for label in tqdm(labels):
    clas_data = train_data[train_data['label']==label]
    clas_data = clas_data.sample(frac=1)
    lap = 1000
    num = num_samples_train
    while (num > 0):
      for i in clas_data.index:
        image = clas_data["filepaths"][i]
        lbl = clas_data["label"][i]
        name = str(lap)+'_'+os.path.basename(image)
        dest = os.path.join(project_path,training_folder,lbl,name)
        if os.stat(image).st_size != 0:
          if (num > 0):
            shutil.copy(image,dest)
            num = num - 1
      lap = lap + 1


## 01.05 Statistics of training data



In [None]:
train_data['label'].unique().tolist()

In [None]:
train_data.label.value_counts()

In [None]:
sns.set_style('darkgrid')
plt.figure(figsize=(12,8))
plt.xticks(rotation =90)
sns.countplot(train_data.label)

In [None]:
def f(r):
  if r == "Angiectasia":
    return 0
  elif r == "Blood_fresh":
    return 1
  elif r == "Erosion":
    return 2
  elif r == "Erythematous":
    return 3
  elif r == "Foreign_body":
    return 4
  elif r == "Ileocecal_valve":
    return 5
  elif r == "Lymphangiectasia":
    return 6
  elif r == "Normal":
    return 7
  elif r == "Pylorus":
    return 8
  elif r == "Reduced_mucosal_view":
    return 9
  else:
    return 10

In [None]:
train_data['label'] = train_data['label'].apply(f)

In [None]:
labels = train_data['label'].unique().tolist()
labels

In [None]:
sns.set_style('darkgrid')
plt.figure(figsize=(12,8))
plt.xticks(rotation =90)
sns.countplot(train_data.label)

## 01.06 Training Dataset creation


In [None]:
if (not single_dataset):
  subset = None
  validation_split = 0
else:
  subset = 'training'
  validation_split = validation_fraction

train_dataset = tf.keras.utils.image_dataset_from_directory(
    project_path + training_folder,
    labels = "inferred",
    label_mode = "categorical",
    class_names = None,
    color_mode = "rgb",
    batch_size = batch_size,
    image_size = (image_size, image_size),
    shuffle = True,
    seed = 42,
    validation_split = validation_split,
    subset = subset,
    interpolation = "bilinear",
    follow_links = False,
    crop_to_aspect_ratio = False
)

train_dataset_len = len(train_dataset.file_paths)

# **02. Loading and preparing validation data** (as a fraction of main dataset)

## 02.01 Validation Dataset creation


In [None]:
if (single_dataset):
  val_dataset = tf.keras.utils.image_dataset_from_directory(
      project_path + training_folder,
      labels = "inferred",
      label_mode = "categorical",
      class_names = None,
      color_mode = "rgb",
      batch_size = batch_size,
      image_size = (image_size, image_size),
      shuffle = True,
      seed = 42,
      validation_split = validation_fraction,
      subset = "validation",
      interpolation = "bilinear",
      follow_links = False,
      crop_to_aspect_ratio = False
  )
  val_dataset_len = len(val_dataset.file_paths)

# **03. Training**

## 03.01 Prefetched and cached datasets

In [None]:
train_dataset = train_dataset.cache().prefetch(buffer_size=tf.data.AUTOTUNE)
val_dataset = val_dataset.cache().prefetch(buffer_size=tf.data.AUTOTUNE)

## 03.02 Definition of the model

### Basic convolutional model (Case 01)

In [None]:
if (mode01):
  model = tf.keras.models.Sequential([
      tf.keras.layers.Input([image_size, image_size, 3], dtype = tf.uint8),
      tf.keras.layers.Rescaling(scale=1./127.5, offset=-1),
      tf.keras.layers.RandomFlip(mode="horizontal", seed=42),
      tf.keras.layers.RandomFlip(mode="vertical", seed=42),
      tf.keras.layers.RandomRotation(1, fill_mode="reflect", interpolation="bilinear", seed=42, fill_value=0.0),
      tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu'),
      tf.keras.layers.BatchNormalization(),
      tf.keras.layers.MaxPooling2D((2, 2)), 
      tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
      tf.keras.layers.BatchNormalization(),
      tf.keras.layers.MaxPooling2D((2, 2)),
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dense(64, activation='relu'),
      tf.keras.layers.Dropout(0.20),
      tf.keras.layers.Dense(num_classes, activation='sigmoid')
  ])

### ResNet based model (Case 11)

In [None]:
if (mode11):
  pre_trained_model = ResNet152(include_top=False, weights= 'imagenet', pooling='avg')
  for layer in pre_trained_model.layers:  
      layer.trainable=False

  inputs = tf.keras.layers.Input((image_size, image_size, 3))

  x = tf.keras.applications.resnet50.preprocess_input(inputs) # Preprocessing layer, normalization -1 1

  x = tf.keras.layers.RandomFlip(mode="horizontal", seed=42)(x)
  x = tf.keras.layers.RandomFlip(mode="vertical", seed=42)(x)
  x = tf.keras.layers.RandomRotation(1, fill_mode="reflect", interpolation="bilinear", seed=42, fill_value=0.0)(x)

  x = pre_trained_model(x)

  x = tf.keras.layers.Dropout(0.4)(x)
  x = tf.keras.layers.Dense(128, activation='relu')(x)
  x = tf.keras.layers.Dropout(0.2)(x)   
  out = tf.keras.layers.Dense(num_classes, activation='softmax')(x)           

  model = tf.keras.Model(inputs=inputs, outputs=out) 

### DenseNet based model (Case 21)

In [None]:
if (mode21):
  pre_trained_model = DenseNet169(include_top=False, weights= 'imagenet', pooling='avg')
  for layer in pre_trained_model.layers:  
      layer.trainable=False

  inputs = tf.keras.layers.Input((image_size, image_size, 3))
  x = tf.keras.applications.densenet.preprocess_input(inputs) # Preprocessing layer, normalization -1 1
  x = tf.keras.layers.RandomFlip(mode="horizontal", seed=42)(x)
  x = tf.keras.layers.RandomFlip(mode="vertical", seed=42)(x)
  x = tf.keras.layers.RandomRotation(1, fill_mode="reflect", interpolation="bilinear", seed=42, fill_value=0.0)(x)
  x = pre_trained_model(x)
  x = tf.keras.layers.Dropout(0.4)(x)
  x = tf.keras.layers.Dense(128, activation='relu')(x)
  x = tf.keras.layers.Dropout(0.2)(x)
  out = tf.keras.layers.Dense(num_classes, activation='softmax')(x)

  model = tf.keras.Model(inputs=inputs, outputs=out) 

#### Model Summary

In [None]:
model.summary()

#### Model Plot

In [None]:
tf.keras.utils.plot_model(model, show_shapes=True)


## 03.03 Model Compilation

In [None]:
loss_weights=None

model.compile(optimizer = tf.optimizers.SGD(learning_rate=learning_rate,momentum=momentum),
              loss = tf.keras.losses.CategoricalCrossentropy(),
              loss_weights = loss_weights,
              metrics=[tf.keras.metrics.CategoricalAccuracy(),
                       tf.keras.metrics.CategoricalCrossentropy(),
                       tf.keras.metrics.FalseNegatives(),
                       tf.keras.metrics.FalsePositives(), 
                       tf.keras.metrics.TrueNegatives(),
                       tf.keras.metrics.TruePositives(), 
                       tfa.metrics.F1Score(num_classes=num_classes, average="micro"), 
                       tf.keras.metrics.Precision(), 
                       tf.keras.metrics.Recall(), 
                       tfa.metrics.MatthewsCorrelationCoefficient(num_classes=num_classes)])


## 03.04 Setting up callbacks

In [None]:
'''
MoreCallbacks
EarlyStopping(monitor='val_loss', patience=5, verbose=0, mode='min'),
ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, mode='min',min_delta=1e-4)
'''

In [None]:
model_filepath = drive_path + drive_project_path + drive_model_path +  model_name
model_filepath

In [None]:
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=model_filepath,
                                                 save_best_only=True,
                                                 save_weights_only=True,
                                                 verbose=1)

In [None]:
csv_log = drive_path+drive_project_path+drive_csv_path+"csv_log.csv"
csv_log

In [None]:
if retrain:
  model.load_weights(model_filepath)
  csvlog_callback = tf.keras.callbacks.CSVLogger(csv_log, separator=',', append=True)
  train_history = pd.read_csv(csv_log)
  initial_epoch = 1+(train_history['epoch'].tail(1).tolist())[0]
else:
  initial_epoch = 0
  csvlog_callback = tf.keras.callbacks.CSVLogger(csv_log, separator=',', append=False)

In [None]:
log_filepath = drive_path + drive_project_path + drive_log_path 
log_filepath

In [None]:
log_callback = tf.keras.callbacks.TensorBoard(log_filepath, histogram_freq=1)

In [None]:
terminateOnNaN = tf.keras.callbacks.TerminateOnNaN()

In [None]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau( monitor='val_loss', 
                                                  factor=0.5,
                                                  patience=patience, 
                                                  min_lr=min_learning_rate)

## 03.05 Fiting the model

In [None]:
%tensorboard --logdir $log_filepath

In [None]:
gpu_info = !nvidia-smi -L
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [None]:
class_weight=None

if (num_epochs > 0):
  train_history = model.fit(train_dataset,
                          initial_epoch=initial_epoch,
                          epochs=initial_epoch+num_epochs,
                          batch_size=batch_size,
                          validation_data=val_dataset,
                          class_weight=class_weight,
                          callbacks=[reduce_lr, cp_callback, log_callback, csvlog_callback, terminateOnNaN])

In [None]:
if (num_epochs > 0):
  for key in train_history.history.keys():
    print(key)

In [None]:
#PLOT HISTORY
def show_history(history, label_a, name_a=None, label_b=None, name_b=None):
  plt.plot(history[label_a],label=name_a)
  if (label_b != None):
    plt.plot(history[label_b],label=name_b)
  plt.xlabel('Epochs')
  if (name_b != None):
    plt.ylabel(name_b)
    plt.title(name_b+' vs Epochs')
  plt.legend()
  plt.show()

def make_plots(history):
  show_history(history, 'lr','Learning Rate')
  show_history(history, 'val_loss','Validation Loss',
                                      'loss','Training Loss')
  show_history(history, 'val_categorical_accuracy','Validation Categorical Accuracy',
                                      'categorical_accuracy','Training Categorical Accuracy')
  show_history(history, 'val_categorical_crossentropy','Validation Categorical Crossentropy',
                                      'categorical_crossentropy','Categorical Crossentropy')
  show_history(history, 'val_MatthewsCorrelationCoefficient','Validation MatthewsCorrelationCoefficient',
                                      'MatthewsCorrelationCoefficient','MatthewsCorrelationCoefficient')

if (num_epochs > 0):
  make_plots(train_history.history)

In [None]:
csv_train_history = pd.read_csv(csv_log)
for key in csv_train_history.keys():
    print(key)

In [None]:
#PLOT HISTORY
make_plots(csv_train_history)


## 03.06 Evaluating the model

In [None]:
eval = model.evaluate(val_dataset)

In [None]:

print('loss = {:.3f}'.format(eval[0]))
print('categorical_accuracy = {:.3f}'.format(eval[1]))
print('categorical_crossentropy = {:.3f}'.format(eval[2]))
print('false_negatives = ',int(eval[3]))
print('false_positives = ',int(eval[4]))
print('true_negatives = ',int(eval[5]))
print('true_positives = ',int(eval[6]))
print('f1_score = {:.3f}'.format(eval[7]))
print('precision = {:.3f}'.format(eval[8]))
print('recall = {:.3f}'.format(eval[9]))
print('MatthewsCorrelationCoefficient = {:.3f}'.format(eval[10]))


# **04. Make Prediction and Analysis on Validation Dataset**


## 04.01 Prediction

In [None]:
y_val = []
prediction = []
for images, labels in val_dataset:
    for label in labels:
      y_val.append(np.nanargmax(label.numpy()))
    preds = model.predict(images)
    for pred in preds:
      prediction.append(np.nanargmax(pred)) 

In [None]:
np.unique(y_val, return_counts=True)

In [None]:
np.unique(prediction, return_counts=True)

## 04.02 Analisys and report

In [None]:
def test_model(y_true, y_predicted):
    print("Accuracy = {:.3f}".format(mtc.accuracy_score(y_true, y_predicted)))
    print("Accuracy Balanced = {:.3f}".format(mtc.balanced_accuracy_score(y_true, y_predicted)))
    
    print("Precision micro = {:.3f}".format(mtc.precision_score(y_true,y_predicted, average="micro")))
    print("Precision macro = {:.3f}".format(mtc.precision_score(y_true,y_predicted, average="macro")))
    print("Precision weighted = {:.3f}".format(mtc.precision_score(y_true,y_predicted, average="weighted")))
    
    print("Recall micro = {:.3f}".format(mtc.recall_score(y_true, y_predicted, average="micro")))
    print("Recall macro = {:.3f}".format(mtc.recall_score(y_true, y_predicted, average="macro")))
    print("Recall weighted = {:.3f}".format(mtc.recall_score(y_true, y_predicted, average="weighted")))

    print("F1 micro = {:.3f}".format(mtc.f1_score(y_true, y_predicted, average="micro")))
    print("F1 macro = {:.3f}".format(mtc.f1_score(y_true, y_predicted, average="macro")))
    print("F1 weighted = {:.3f}".format(mtc.f1_score(y_true, y_predicted, average="weighted")))

    print("MCC = {:.3f}".format(mtc.matthews_corrcoef(y_true, y_predicted)))
    print("Kappa = {:.3f}".format(mtc.cohen_kappa_score(y_true, y_predicted)))


In [None]:
test_model(y_val, prediction)

In [None]:
#classification_report
report = classification_report(y_val, prediction)
print(report)

## 04.03 Confusion Matrix

In [None]:
# confusion matrix
cm = pd.DataFrame(confusion_matrix(y_val, prediction))
cm

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
fig, ax = plt.subplots(figsize=(10, 10))

cm = confusion_matrix(y_val, prediction, labels= list(range(num_classes)))
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels= list(range(num_classes)))
disp.plot(ax= ax)

In [None]:
# confusion matrix plot
fig, ax = plt.subplots(figsize=(10, 10))
cm = confusion_matrix(y_val, prediction, normalize='true')
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels= list(range(num_classes)))
disp.plot(ax=ax)