# Utility

In [20]:
from sklearn import metrics
import numpy as np
from matplotlib import pyplot as plt
import os


In [21]:
data_path='/content/drive/MyDrive/CI/numpy_data/'

def load_training():
  images = np.load(os.path.join(data_path,'train_tensor.npy'))
  labels = np.load(os.path.join(data_path,'train_labels.npy'))
  return images,labels

def load_test():
  images = np.load(os.path.join(data_path,'public_test_tensor.npy'))
  labels = np.load(os.path.join(data_path,'public_test_labels.npy'))
  return images,labels

In [22]:
def plot_history(history, file_name=None):
  acc = history.history['accuracy']
  val_acc = history.history['val_accuracy']
  loss = history.history['loss']
  val_loss = history.history['val_loss']

  epochs = range(len(acc))
  fig, (acc_g,loss_g) = plt.subplots(1,2, figsize=(15,6))

  acc_g.plot(epochs, acc, color='blue', marker='o', label='Training accuracy')
  acc_g.plot(epochs, val_acc, color='red', marker='o', label='Validation accuracy')
  acc_g.set_title('Training and validation accuracy')
  acc_g.set_xlabel('Epochs')
  acc_g.set_ylabel('Accuracy')
  acc_g.legend()

  acc_g.set_ylim(0, 1)

  #plt.figure()

  loss_g.plot(epochs, loss, color='blue', marker='o', label='Training loss')
  loss_g.plot(epochs, val_loss, color='red', marker='o', label='Validation loss')
  loss_g.set_title('Training and validation loss')
  loss_g.set_xlabel('Epochs')
  loss_g.set_ylabel('Loss')
  loss_g.legend()
  loss_g.set_ylim(0, 1)

  plt.tight_layout()
  
  if file_name is not None:
    dir_name ="/content/drive/MyDrive/CI/models/plot_antonio"
    if not os.path.exists(dir_name):
      os.makedirs(dir_name)
    fig.savefig(os.path.join(dir_name, file_name + '.png'))

In [23]:
def oversample(images, labels):
  class_0 = len([label for label in labels if label == 0])
  class_1 = len([label for label in labels if label == 1])
  #select indexes of 1s and 0s in separate arrays
  indexes_0=[]
  for i in range(len(labels)):
    if labels[i] == 0:
      indexes_0.append(i)

  indexes_1=[]
  for i in range(len(labels)):
    if labels[i] == 1:
      indexes_1.append(i)

  n = class_0 - class_1  # 460 random indices
  if n > 0:
    #Oversampling on the class 1    
    print(len(indexes_0))
    print(f'Oversampling must be done on {n} elements of the second array.')
    index = np.random.choice(indexes_1, n, replace=True)  
    random_imgs = np.array(images)[index]
    random_labels = np.array(labels)[index]
  elif (n < 0):
    #Oversampling on the class 2
    print(f'Oversampling must be done on {-n} elements of the first array.')
    index = np.random.choice(indexes_0, -n, replace=True)  
    random_imgs = np.array(images)[index]
    random_labels = np.array(labels)[index]

  final_samples= np.concatenate((images, random_imgs), axis=0)
  final_labels = np.concatenate((labels, random_labels), axis=0)

  class_0 = len([label for label in final_labels if label == 0])
  class_1 = len([label for label in final_labels if label == 1])
  print('Class 0:', class_0)
  print('Class 1:', class_1)
  return final_samples, final_labels

#oversample(['a', 'b', 'c', 'd'], [0, 0, 1, 0])

In [24]:
def undersample(images, labels):
#select indexes of 1s and 0s in separate arrays

  indexes_0=[]
  for i in range(len(labels)):
    if labels[i] == 0:
      indexes_0.append(i)

  indexes_1=[]
  for i in range(len(labels)):
    if labels[i] == 1:
      indexes_1.append(i)

  #undersample the array with more samples

  if len(indexes_0)>len(indexes_1):
    bigger = indexes_0 
    smaller = indexes_1 
  else:
    bigger = indexes_1 
    smaller =  indexes_0

  n = len(smaller)  # for n random indices    
  index = np.random.choice(bigger, n, replace=False)  
  x_random = np.array(labels)[index]
  index= np.sort(index)       

  #array of the balanced indexes
  indexes=np.concatenate((index, np.array(smaller)), axis=0)    
  indexes=np.sort(indexes)


  #new images and labels arrays
  imgs=  np.array(images)[indexes]
  lbls=  np.array(labels)[indexes]

  return imgs, lbls


In [25]:
import tensorflow as tf

def apply_contrast(images, contrast=1.5):
  samples_expanded = np.expand_dims(images, -1)
  #print(np.shape(samples_expanded))
  temp = []
  for elem in samples_expanded:
    temp.append(tf.image.adjust_contrast(elem, contrast))

  temp=np.squeeze(temp, axis=-1)
  samples_adjusted=np.array(temp)
  return samples_adjusted

## Mass vs Calcification

In [26]:
def mass_calcification_training_set_building(images, labels):

  train_abnormal_imgs=[]
  [train_abnormal_imgs.append(images[2*i+1]) for i in range(0,int(len(images)/2))]
  train_abnormal_imgs=np.array(train_abnormal_imgs)
  print(train_abnormal_imgs.shape)

  train_abnormal_labels=[]
  [train_abnormal_labels.append(labels[2*i+1]) for i in range(0,int(len(labels)/2))]
  train_abnormal_labels=np.array(train_abnormal_labels)

  #print('Initial Labels:', train_abnormal_labels)

  train_abnormal_labels = [0 if label < 3 else 1 for label in train_abnormal_labels]
  train_abnormal_labels = np.array(train_abnormal_labels)
  #print('Labels for the problem:', train_abnormal_labels)
  return train_abnormal_imgs, train_abnormal_labels

In [27]:
def mass_calficication_test_set_building(images, labels):
  test_abnormal_imgs = []
  [test_abnormal_imgs.append(images[2*i+1]) for i in range(0,int(len(images)/2))]
  test_abnormal_imgs = np.array(test_abnormal_imgs)
  #print(test_abnormal_imgs.shape)

  test_abnormal_labels = []
  [test_abnormal_labels.append(labels[2*i+1]) for i in range(0,int(len(labels)/2))]
  test_abnormal_labels=np.array(test_abnormal_labels)
  #print('Initial Labels:', str(test_abnormal_labels[:3])[:-1], '...', str(test_abnormal_labels[-3:])[1:])

  test_abnormal_labels = [0 if label < 3 else 1 for label in test_abnormal_labels]
  test_abnormal_labels = np.array(test_abnormal_labels)
  #print('Labels for the problem:', str(test_abnormal_labels[:3])[:-1], '...', str(test_abnormal_labels[-3:])[1:])
  return test_abnormal_imgs, test_abnormal_labels

## Benignant vs Malignant

In [28]:
def benignant_malignant_training_set_building(images, labels):
  #select only abnormal patches and labels
  train_abnormal_imgs=[]
  [train_abnormal_imgs.append(images[2*i+1]) for i in range(0,int(len(images)/2))]
  train_abnormal_imgs=np.array(train_abnormal_imgs)
  #print(train_abnormal_imgs.shape)

  train_abnormal_labels=[]
  [train_abnormal_labels.append(labels[2*i+1]) for i in range(0,int(len(labels)/2))]
  train_abnormal_labels=np.array(train_abnormal_labels)
  #print('Initial Labels:', train_abnormal_labels)

  # LABELS 1,3 -> 0 benign - LABELS 2,4 -> 1 malignant  
  train_abnormal_labels = np.array([0 if label % 2 == 1 else 1 for label in train_abnormal_labels])
  #print('Labels for the problem:', train_abnormal_labels)
  return train_abnormal_imgs, train_abnormal_labels

In [29]:
def benignant_malignant_test_set_building(test_images, test_labels):
  test_abnormal_imgs = []
  [test_abnormal_imgs.append(test_images[2*i+1]) for i in range(0,int(len(test_images)/2))]
  test_abnormal_imgs = np.array(test_abnormal_imgs)
  #print(test_abnormal_imgs.shape)

  test_abnormal_labels = []
  [test_abnormal_labels.append(test_labels[2*i+1]) for i in range(0,int(len(test_labels)/2))]
  test_abnormal_labels=np.array(test_abnormal_labels)

  test_abnormal_labels = np.array([0 if label % 2 == 1 else 1 for label in test_abnormal_labels])

  print('Labels for the problem:', test_abnormal_labels)
  return test_abnormal_imgs, test_abnormal_labels

## Siamese

In [30]:
def make_pairs(images, labels):

  pair_images = []

  train_abnormal_imgs=[]
  [train_abnormal_imgs.append(images[2*i+1]) for i in range(0,int(len(images)/2))]
  train_abnormal_imgs=np.array(train_abnormal_imgs)
  #print('Abnormal images shape:', train_abnormal_imgs.shape)

  train_baselines = []
  [train_baselines.append(images[2*i]) for i in range(0,int(len(images)/2))]
  train_baselines = np.array(train_baselines)
  #print('Baselines shape:', train_baselines.shape)

  for idx in range(len(train_abnormal_imgs)):
    pair_images.append([train_baselines[idx], train_abnormal_imgs[idx]])


  train_abnormal_labels=[]

  [train_abnormal_labels.append(labels[2*i+1]) for i in range(0,int(len(labels)/2))]
  train_abnormal_labels=np.array(train_abnormal_labels)
  #print('Initial Labels:', train_abnormal_labels)

  train_abnormal_labels = [0 if label < 3 else 1 for label in train_abnormal_labels]
  train_abnormal_labels = np.array(train_abnormal_labels)

  #print('Final Labels:', train_abnormal_labels)

  return (np.array(pair_images), np.array(train_abnormal_labels))

## Evaluation

In [31]:
def do_evaluations(test_predictions, test_labels):

  test_predictions = np.squeeze(test_predictions, axis=-1)
  test_predictions = np.rint(test_predictions)

  m = metrics.confusion_matrix(test_labels, test_predictions)
  f2 = metrics.fbeta_score(test_labels, test_predictions,2)

  accuracy = metrics.accuracy_score(test_labels, test_predictions)
  fpr, tpr, thresholds = metrics.roc_curve(test_labels, test_predictions)
  auc = metrics.auc(fpr, tpr)
  print( f'Confusion matrix:\n {m}\n')
  print( f'accuracy:\n {accuracy}\n')
  print( f'f2:\n {f2}\n')
  print( f'report:\n {metrics.classification_report(test_labels, test_predictions)}')
  print( f'AUC:\n {auc}')

  
  plt.matshow(m, cmap=plt.cm.get_cmap('Blues', 16))
  plt.colorbar()
  plt.show()
  return auc, m, accuracy, f2