**Function to make the list of labels given the labeled data**

In [0]:
def get_labels_list(X):
  #sample is a file name, e.g. /content/train/train/Type_2/1348.jpg
  y = []
  for sample in X:
    cancer_type = sample[26:27]
    if cancer_type == '1':
      y_sample = [1, 0, 0]
    elif cancer_type == '2':
      y_sample = [0, 1, 0]
    else:
      y_sample = [0, 0, 1]

    y.append(y_sample)

  return y

**Batch generator for model fit_generator**

In [0]:
def batch_generator_train(files, batch_size):
    #number_of_batches = np.ceil(len(files)/batch_size)

    # if len(files) is not multiple of batch_size the last batch
    # will be droppend and we reshuffle and restart from 
    # counter == 0 (see the if condition at bottom)
    number_of_batches = np.floor(len(files)/batch_size)

    counter = 0
    random.shuffle(files)
    # while True because on each call of this function it must 
    # return a batch. When the batches in files are finished it 
    # restarts, that means this function must never end
    while True:
        batch_files = files[batch_size*counter:batch_size*(counter+1)]
        image_list = []
        mask_list = []
        for f in batch_files:
            image = cv2.imread(f)     # given the name of the file we take the image 
            image = cv2.resize(image, conf['image_shape'])

            # here we take the label
            # example: f == /content/train/train/Type_2/1348.jpg
            #          the label is the character at index 26
            #cancer_type = f[26:27]      # relies on path lengths that is hard coded below
            if "Type_1" in f:
                mask = [1, 0, 0]
            elif "Type_2" in f:
                mask = [0, 1, 0]
            elif "Type_3" in f:
                mask = [0, 0, 1]
            else:
                raise Exception('Error in function batch_generator_train => label not found in filename')

            image_list.append(image)
            mask_list.append(mask)
            
        counter += 1
        image_list = np.array(image_list)
        mask_list = np.array(mask_list)
 
        yield image_list, mask_list

        if counter == number_of_batches:
            # reshuffle and restart
            random.shuffle(files)
            counter = 0

**Function to import images in RAM**

In [0]:
def load_dataset_in_ram(files):
  imgs = []
  labels = []

  for file in files:
    img = load_img(file, target_size=conf['image_shape'])
    data = img_to_array(img)
    #samples = expand_dims(data, 0)
    samples = data
    imgs.append(samples)

    #cancer_type = file[label_at_char_position : label_at_char_position+1]      # relies on path lengths that is hard coded below
    if "Type_1" in file:
      label = [1, 0, 0]
    elif "Type_2" in file:
      label = [0, 1, 0]
    elif "Type_3" in file:
      label = [0, 0, 1]
    else:
      raise Exception('Error in function load_dataset_in_ram => label not found in filename')
    labels.append(label)

  imgs = np.array(imgs)
  labels = np.array(labels)
  return imgs, labels   

**Function to show probability predictions**

In [0]:
def show_probability_predictions(model, X, y):
  print('[P(class_0),P(class_1),P(class_2),pred_label,truth_label,pred==truth]')
  result = np.zeros((len(X),6))

  # each row is a sample, for each we have three colums that 
  # are the three probability predicted 
  y_probability_predictions = np.round(model.predict(X), decimals=2)
  result[:,:-3] = y_probability_predictions

  y_predicted = np.argmax(y_probability_predictions, axis=1)
  result[:,-3] = y_predicted

  result[:,-2] = np.argmax(y, axis=1)

  row = 0
  for predicted in result[:,-3]:
    if predicted == result[row,-2]:
      result[row,-1] = 1
    else:
      result[row,-1] = 0
    row += 1

  print(result)

**Function to show confusion matrix and other stats**

In [0]:
from sklearn.metrics import confusion_matrix, classification_report


def show_report(model, X, y):
    """
    Displays a confusion matrix and a classification report.
    """
    # axis=1: it means that for each row it returns the argmax of that row
    #  e.g. row1: [0.15, 0.70, 0.15]  => argmax==1
    #       row2: [0.90, 0.05, 0.05] => argmax==0
    #   hence the np.argmax return the array [1, 0]
    y_predicted = np.argmax(model.predict(X), axis=1)   
    y_true = np.argmax(y, axis=1)

    print("Confusion matrix (rows: true, columns: predicted)")
    print(confusion_matrix(y_true, y_predicted))
    print("")

    print("Classification report")
    print(classification_report(y_true, y_predicted))

**Function to show graphs**

In [0]:
# it takes an history object as argument.
# this function shows accuracy graph and loss graph for 
# train and validation dataset
def show_graphs(history, plot_validation=False):

  # Plot training & validation accuracy values
  plt.plot(history.history['acc'])
  if plot_validation == True:
    plt.plot(history.history['val_acc'])
  plt.title('Model accuracy')
  plt.ylabel('Accuracy')
  plt.xlabel('Epoch')
  if plot_validation == True:
    plt.legend(['Train', 'Validation'], loc='upper left')
  else: 
    plt.legend(['Train'], loc='upper left')
  plt.show()

  # Plot training & validation loss values
  plt.plot(history.history['loss'])
  if plot_validation == True:
    plt.plot(history.history['val_loss'])
  plt.title('Model loss')
  plt.ylabel('Loss')
  plt.xlabel('Epoch')
  if plot_validation == True:
    plt.legend(['Train', 'Validation'], loc='upper left')
  else:
    plt.legend(['Train'], loc='upper left')
  plt.show()