<a href="https://colab.research.google.com/github/lorebianchi98/BrainTumorClassification/blob/main/utilities.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Utilities and Constants
In this notebook we define some constants and utility functions that are used in several notebooks of this project.

# Import

In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
from keras import layers
import matplotlib.pyplot as plt
from tqdm import tqdm
import shutil
from tensorflow.keras import optimizers
import random as rn
import os
from sklearn import metrics
from sklearn.metrics import ConfusionMatrixDisplay

# Path Costants and Classes
This constants keep the values of the paths to the drive folders and local folders used.

In [None]:
DRIVE_DIR = "/content/gdrive/MyDrive/BrainTumorClassifier"
DATA_DIR = '/content/brain-tumor-mri-dataset'
PREPROCESSED_DIR = '/content/brain-tumor-mri-dataset-cleaned'
SETS_DIR = '/content/brain-tumor-mri-splits'
MODELS_PATH = '/content/gdrive/MyDrive/BrainTumorClassifier/Models'

SEED = 123

CLASSES = ['glioma', 'meningioma', 'notumor', 'pituitary']

In [None]:
IMAGE_WIDTH = 224
IMAGE_HEIGHT = 224
IMAGE_SIZE = (IMAGE_HEIGHT, IMAGE_WIDTH)
BATCH_SIZE = 64

# Set Seed
In order to obtain reproducible results we set the seeds.

In [None]:
np.random.seed(SEED)
rn.seed(SEED)
tf.random.set_seed(SEED)

# Object Handling Utilities

In [None]:
#provides functions to store and load objects from files 
import pickle

def saveObject(obj, path):
    """"Save an object using the pickle library on a file
    
    :param obj: undefined. Object to save
    :param fileName: str. Name of the file of the object to save
    """
    print("Saving " + path + '.pkl')
    with open(path + ".pkl", 'wb') as fid:
        pickle.dump(obj, fid)
    
def loadObject(path):
    """"Load an object from a file
    
    :param fileName: str. Name of the file of the object to load
    :return: obj: undefined. Object loaded
    """
    try:
        with open(path + '.pkl', 'rb') as fid:
            obj = pickle.load(fid)
            return obj
    except IOError:
        return None

# Training Utility
Functions used in order to support the training of models

In [None]:
def load_data_splits (img_size, batch_size, shuffle_on_val=True):
  train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    SETS_DIR + '/train',
    labels='inferred', #the label of the dataset is obtained by the name of the directory
    seed=SEED,
    shuffle=True,
    image_size=img_size,
    batch_size=batch_size,
  )
  val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    SETS_DIR + '/val',
    labels='inferred', #the label of the dataset is obtained by the name of the directory
    seed=SEED,
    shuffle=shuffle_on_val,
    image_size=img_size,
    batch_size=batch_size,
  )
  test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    SETS_DIR + '/test',
    labels='inferred', #the label of the dataset is obtained by the name of the directory
    seed=SEED,
    shuffle=False, 
    image_size=img_size,
    batch_size=batch_size,
  )
  return train_ds, val_ds, test_ds

  
def compile_model(model, metrics='accuracy', loss='sparse_categorical_crossentropy', optimizer='adam', learning_rate = 0.0005):
  '''
    compile_model is used to compile the current model
    :param model: model to compile
    :param optimizer: optimizer to be used
    :param learning_rate: learning rate parameter for the optimizer
  '''
  if optimizer == 'adam':
    optimizer=optimizers.Adam(learning_rate=learning_rate)
  elif optimizer == 'rmsprop':
    optimizer = optimizers.RMSprop(learning_rate=learning_rate)
  else:
    return

  model.compile(loss=loss,
    optimizer=optimizer,
    metrics=[metrics])

  return model

def run_model (model, model_name, train_ds, val_ds, epochs=20, patience=3, monitor='val_loss'):
  '''
  run_model is used to run the current mode
  :param model: model to run
  :param model_name: name given to save the model
  :param epochs: how many epochs to do
  :param patience: patience value for Early Stopping
  :param monitor: what to monitor for Early Stopping and Model Checkpoint
  '''
  # local save path for the models
  local_path = 'model/' + model_name + '.h5'
  drive_path = MODELS_PATH + '/' + model_name
  #deletes old model
  try:
    shutil.rmtree(drive_path)
  except:
    pass
  os.mkdir(drive_path)
  callbacks_list = [
                  keras.callbacks.EarlyStopping(monitor=monitor, patience=patience), #we implement EarlyStopping to prevent overfitting
                  keras.callbacks.ModelCheckpoint(
                      filepath = local_path,
                      monitor=monitor,
                      verbose=1,
                      save_best_only=True)
                  ]
  history = model.fit(train_ds,
                    epochs=epochs,
                    validation_data=val_ds,
                    callbacks=callbacks_list)
  # save on Drive only the best model
  shutil.copy(local_path, drive_path + '/' + model_name + '.h5')
  # save on Drive also the history
  saveObject(history, drive_path + '/history') 
  return tf.keras.models.load_model(local_path), history


In order to fight overfitting we provide keras layers that performs data augmentation:

In [None]:
data_augmentation_layers = keras.Sequential(
  [
  layers.RandomFlip("horizontal"), # Applies horizontal flipping to a random 50% of the images
  layers.RandomContrast(0.15), # Randomly adjust the contrast of an image or images by a random factor in the range[–15%, +15%] 
  ]
)

# Evaluation Utilities
Functions used in order to evaluate models.

In [None]:
def plot_accuracy_and_loss_history(history):
  acc = history.history['accuracy']
  val_acc = history.history['val_accuracy']
  loss = history.history['loss']
  val_loss = history.history['val_loss']

  epochs = range(len(acc))

  plt.plot(epochs, acc, 'bo', label='Training accuracy')
  plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
  plt.title('Training and validation accuracy')
  plt.legend()

  plt.figure()

  plt.plot(epochs, loss, 'bo', label='Training loss')
  plt.plot(epochs, val_loss, 'b', label='Validation loss')
  plt.title('Training and validation loss')
  plt.legend()

  plt.show()

def evaluate_model(model, test_ds):
  test_loss, test_accuracy = model.evaluate(test_ds)
  print("Loss on test set: " +str(test_loss))
  print("Accuracy on test set: " +str(test_accuracy))
  test_labels = np.concatenate([label for image, label in test_ds], axis=0) # retrieve the labels of the test set
  test_predictions = model.predict(test_ds)  # probabilities for all classes
  test_predictions = np.argmax(test_predictions, axis=-1)  # index of the classes with largest probability
  print(metrics.classification_report(test_labels, test_predictions, target_names=CLASSES, digits=4))

def plot_confusionmatrix(model, test_ds):
  test_labels = np.concatenate([label for image, label in test_ds], axis=0) # retrieve the labels of the test set
  test_predictions = model.predict(test_ds, verbose=1)  # probabilities for all classes
  test_predictions = np.argmax(test_predictions, axis=-1)  # index of the classes with largest probability
  # plot confusion matrix
  fig, ax = plt.subplots(figsize=(10, 10))
  ConfusionMatrixDisplay.from_predictions(test_labels, test_predictions, xticks_rotation='vertical', ax=ax, display_labels=CLASSES)
