In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import tensorflow as tf
import tensorflow.keras as tk
import numpy as np
import pandas as pd
import os
import datetime
from sklearn import metrics
from keras.models import load_model
from sklearn.metrics import confusion_matrix
from chexpert_parser import load_dataset
from chexpert_parser import load_dataset_by_tuple
from itertools import zip_longest

physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
else:
    print("No GPU found, model running on CPU")

np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning) 

seed = 1234567890
tf.random.set_seed(seed)
np.random.seed(seed)

In [None]:
# Help functions

class LabelAUC(tf.keras.metrics.AUC):
    def __init__(self, label_id, name="label_auc", **kwargs):
        super(LabelAUC, self).__init__(name=name, **kwargs)
        self.label_id = label_id
 
    def update_state(self, y_true, y_pred, **kwargs):
        return super(LabelAUC, self).update_state(y_true[:, self.label_id], y_pred[:, self.label_id], **kwargs)
 
    def result(self):
        return super(LabelAUC, self).result()
 
class MeanAUC_alt(tf.keras.metrics.AUC): # mean
    def __init__(self, name="label_mean_auc", **kwargs):
        super(MeanAUC, self).__init__(name=name, **kwargs)
        self.aucs = [LabelAUC(label_id=2), LabelAUC(label_id=5), LabelAUC(label_id=6), LabelAUC(label_id=8), LabelAUC(label_id=10)]

    def update_state(self, y_true, y_pred, **kwargs):
        self.mean=tf.constant(0)
        for auc in self.aucs:
            auc.update_state(y_true, y_pred)
        self.mean=(tf.constant(tf.reduce_mean([self.aucs[0].result(), self.aucs[1].result(), self.aucs[2].result(), self.aucs[3].result(), self.aucs[4].result()])))

    def result(self):
        return self.mean

    def reset_states(self):
        self.mean=tf.constant(0)

class MeanAUC(LabelAUC): 
    def __init__(self, label_id, name="label_mean_auc", **kwargs):
        super(MeanAUC, self).__init__(label_id=label_id, name=name, **kwargs)
        self.aucs = [LabelAUC(label_id=label_id[0]), LabelAUC(label_id=label_id[1]), LabelAUC(label_id=label_id[2]), LabelAUC(label_id=label_id[3]), LabelAUC(label_id=label_id[4])]

    def update_state(self, y_true, y_pred, **kwargs):
        for auc in self.aucs:
            auc.update_state(y_true, y_pred)
    
    def result(self):
        return tf.reduce_mean([auc.result().numpy() for auc in self.aucs])

    def reset_states(self):
        return super(LabelAUC, self).reset_states()

def compute_metrics(y_true, y_pred, run):
    if run == 'training':
        mean_auc_train.update_state(y_true, y_pred)
        auc_train_card.update_state(y_true, y_pred)
        auc_train_edema.update_state(y_true, y_pred)
        auc_train_cons.update_state(y_true, y_pred)
        auc_train_atel.update_state(y_true, y_pred)
        auc_train_peff.update_state(y_true, y_pred)
    if run == 'validation':
        mean_auc_valid.update_state(y_true, y_pred)
        auc_valid_card.update_state(y_true, y_pred)
        auc_valid_edema.update_state(y_true, y_pred)
        auc_valid_cons.update_state(y_true, y_pred)
        auc_valid_atel.update_state(y_true, y_pred)
        auc_valid_peff.update_state(y_true, y_pred)

@tf.function
def train_step(model, x, y):
    with tf.GradientTape(persistent=True) as tape:
        output = model(x, training=True)
        loss_value = loss_fn(y, output)
    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    del tape
    compute_metrics(y, output, run='training')
    return loss_value

@tf.function
def validation_step(model, x, y):
    output = model(x, training=False)
    loss = loss_fn(y, output)
    compute_metrics(y, output, run='validation')
    return loss

def log_epoch(log, metrics, results):
    stacked = np.stack([met for met in results], axis=0)
    step_log = pd.DataFrame(np.array([stacked]), columns=metrics)
    log = log.append(step_log)
    return log

def save_log(log):
	file = outputFolder+'/log.csv'
	with open(file, mode='w') as f:
		log.to_csv(f, index=False)
  
def callback_earlyStopping(MetricList, min_delta=0.1, patience=20, mode='min'):
    #No early stopping for the first patience epochs 
    if len(MetricList) <= patience:
        return False
    
    min_delta = abs(min_delta)
    if mode == 'min':
      min_delta *= -1
    else:
      min_delta *= 1
    
    #last patience epochs 
    last_patience_epochs = [x + min_delta for x in MetricList[::-1][1:patience + 1]]
    current_metric = MetricList[::-1][0]
    
    if mode == 'min':
        if current_metric >= max(last_patience_epochs):
            print(f'Metric did not decrease for the last {patience} epochs.')
            return True
        else:
            return False
    else:
        if current_metric <= min(last_patience_epochs):
            print(f'Metric did not increase for the last {patience} epochs.')
            return True
        else:
            return False

Load Dataset

In [None]:
train_path =  '/content/drive/MyDrive/tfrecords/unbalanced_datasets/train_1.tfrecords'
train_dataset = load_dataset(train_path, take=None)

val_path = '/content/drive/MyDrive/tfrecords/unbalanced_datasets/valid.tfrecords'
val_dataset = load_dataset(val_path, take=None)

Build Model

In [None]:
from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Activation, Lambda
from tensorflow.keras.models import Model
from tensorflow import keras

def create_model():
    base_model = MobileNet(input_shape=(224, 224, 3), weights='imagenet', include_top=False)
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    predictions = Dense(14, activation='sigmoid')(x)
    model = Model(inputs=base_model.inputs, outputs=predictions)
    return model

In [None]:
mobilenet = create_model()

Initialize Metrics and Variables

In [None]:
loss_fn = tf.keras.losses.BinaryCrossentropy()
optimizer = keras.optimizers.SGD(1e-3)

outputFolder = "/content/drive/MyDrive/Modelli_Tesi/Local/Unbalanced/client_card"
if not os.path.exists(outputFolder):
    os.makedirs(outputFolder)
checkpoint_folder = outputFolder+'/ckpt'
if not os.path.exists(checkpoint_folder):
    os.makedirs(checkpoint_folder)

#Tensorboard Variables
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = outputFolder + '/logs/gradient_tape/' + current_time + '/train'
valid_log_dir = outputFolder + '/logs/gradient_tape/' + current_time + '/valid'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
valid_summary_writer = tf.summary.create_file_writer(valid_log_dir)
if not os.path.exists(train_log_dir):
    os.makedirs(train_log_dir)
if not os.path.exists(valid_log_dir):
    os.makedirs(valid_log_dir)

# Train Metrics
mean_auc_train = MeanAUC(label_id=[2,5,6,8,10], name='mean_auc_train')
auc_train_card = LabelAUC(2, name='auc_train_card')
auc_train_edema = LabelAUC(5, name='auc_train_edema')
auc_train_cons = LabelAUC(6, name='auc_train_cons')
auc_train_atel = LabelAUC(8, name='auc_train_atel')
auc_train_peff = LabelAUC(10, name='auc_train_peff')
# Validation Metrics
mean_auc_valid = MeanAUC(label_id=[2,5,6,8,10], name='mean_auc_valid')
auc_valid_card = LabelAUC(2, name='auc_valid_card')
auc_valid_edema = LabelAUC(5, name='auc_valid_edema')
auc_valid_cons = LabelAUC(6, name='auc_valid_cons')
auc_valid_atel = LabelAUC(8, name='auc_valid_atel')
auc_valid_peff = LabelAUC(10, name='auc_valid_peff')

# Checkpoint
ckpt = tf.train.Checkpoint(optimizer=optimizer, model=mobilenet)
manager = tf.train.CheckpointManager(ckpt, directory=checkpoint_folder, checkpoint_name='cktp_centralized', max_to_keep=None)

Training

In [None]:
epochs = 10 
metrics=['epoch', 'AUC_mean', 'AUC_cardiomegaly', 'AUC_edema', 'AUC_consolidation', 'AUC_atelectasis', 'AUC_pleural_effusion']
train_logger = pd.DataFrame(columns=metrics)
valid_logger = pd.DataFrame(columns=metrics)

# metrics_seq: sequenza di metriche di controllo per l'Early Stopping
metrics_seq = []

for e in range(epochs):

    #Train Step
    print("Start of epoch %d" %(e+1))
    for step, row in enumerate(train_dataset):
        train_loss = train_step(mobilenet, row['x'], row['y'])
        if step % 100 == 0:
            template = 'TRAINING: Epoch {}, Step {}, AUC MEAN: {}, AUC_cardiomegaly: {}, AUC_edema: {}, AUC_consolidation: {}, AUC_atelectasis: {}, AUC_pleural_effusion: {}'
            print(template.format(e+1, step, mean_auc_train.result().numpy(), auc_train_card.result().numpy(), auc_train_edema.result().numpy(), auc_train_cons.result().numpy(), auc_train_atel.result().numpy(), auc_train_peff.result().numpy()))
    train_results = [e+1, mean_auc_train.result().numpy(), auc_train_card.result().numpy(), auc_train_edema.result().numpy(), auc_train_cons.result().numpy(), auc_train_atel.result().numpy(), auc_train_peff.result().numpy()]
    
    # Validation Step
    for step, row in enumerate(val_dataset):
        val_loss = validation_step(mobilenet, row['x'], row['y'])
        if step % 100 == 0:
            template = 'VALIDATION: Epoch {}, Step {}, AUC MEAN: {}, AUC_cardiomegaly: {}, AUC_edema: {}, AUC_consolidation: {}, AUC_atelectasis: {}, AUC_pleural_effusion: {}'
            print(template.format(e+1, step, mean_auc_valid.result().numpy(), auc_valid_card.result().numpy(), auc_valid_edema.result().numpy(), auc_valid_cons.result().numpy(), auc_valid_atel.result().numpy(), auc_valid_peff.result().numpy()))
    valid_results = [e+1, mean_auc_valid.result().numpy(), auc_valid_card.result().numpy(), auc_valid_edema.result().numpy(), auc_valid_cons.result().numpy(), auc_valid_atel.result().numpy(), auc_valid_peff.result().numpy()]

    # Logs results
    train_logger = log_epoch(train_logger, metrics, train_results)
    valid_logger = log_epoch(valid_logger, metrics, valid_results)
    
    print(train_logger)
    print(valid_logger)

    # Save checkpoint at the end of each epoch
    save_path = manager.save()

    # Check for EarlyStopping
    metrics_seq.append(mean_auc_valid.result().numpy())
    stopEarly = callback_earlyStopping(metrics_seq, min_delta=0.1, patience=4, mode='max')
    if stopEarly:
        print("Callback_EarlyStopping signal received at epoch= %d/%d"%(e+1,epochs))
        print("Terminating training ")
        break


    # Reset metrics at the end of each epoch
    mean_auc_train.reset_states()
    auc_train_card.reset_states()
    auc_train_edema.reset_states()
    auc_train_cons.reset_states()
    auc_train_atel.reset_states()
    auc_train_peff.reset_states()

    mean_auc_valid.reset_states()
    auc_valid_card.reset_states()
    auc_valid_edema.reset_states()
    auc_valid_cons.reset_states()
    auc_valid_atel.reset_states()
    auc_valid_peff.reset_states()

# Save model
mobilenet.save(outputFolder+'/client_card.h5')
mobilenet.save_weights(outputFolder+'/weights.h5')

# Save log to a .csv file 
file_train = outputFolder+'/log_train.csv'
with open(file_train, mode='w') as f:
	train_logger.to_csv(f, index=False)

file_valid = outputFolder+'/log_valid.csv'
with open(file_valid, mode='w') as f:
	valid_logger.to_csv(f, index=False)

Start of epoch 1
TRAINING: Epoch 1, Step 0, AUC MEAN: 0.3950601816177368, AUC_cardiomegaly: 0.0, AUC_edema: 0.3515625, AUC_consolidation: 0.4151785969734192, AUC_atelectasis: 0.7571428418159485, AUC_pleural_effusion: 0.4514169991016388
TRAINING: Epoch 1, Step 100, AUC MEAN: 0.3890911042690277, AUC_cardiomegaly: 0.0, AUC_edema: 0.43844568729400635, AUC_consolidation: 0.46503812074661255, AUC_atelectasis: 0.51803058385849, AUC_pleural_effusion: 0.523941159248352
TRAINING: Epoch 1, Step 200, AUC MEAN: 0.3925397992134094, AUC_cardiomegaly: 0.0, AUC_edema: 0.4451776444911957, AUC_consolidation: 0.4753434658050537, AUC_atelectasis: 0.5193608403205872, AUC_pleural_effusion: 0.5228170156478882
TRAINING: Epoch 1, Step 300, AUC MEAN: 0.3968673348426819, AUC_cardiomegaly: 0.0, AUC_edema: 0.4613317549228668, AUC_consolidation: 0.48417922854423523, AUC_atelectasis: 0.5152942538261414, AUC_pleural_effusion: 0.5235313773155212
TRAINING: Epoch 1, Step 400, AUC MEAN: 0.40242236852645874, AUC_cardiomega