# Prerequisites

In [None]:
import os
import tensorflow as tf
from tensorflow import keras 
from keras import layers
from IPython import display
import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib
import numpy as np
import time
from tqdm import tqdm
from matplotlib import gridspec
import keras.backend as K
from PIL import Image
import gc
import cv2
from sklearn.metrics import confusion_matrix
from datetime import datetime
from tensorflow.keras.callbacks import (ModelCheckpoint, TensorBoard, LearningRateScheduler, ReduceLROnPlateau,
                                        CSVLogger, EarlyStopping)

from sklearn.model_selection import KFold

# Model


In [None]:
def conv_jun():
    """A classifier based off of the work by Jun et al. (https://arxiv.org/abs/1804.06812).
    The original classifier is built for multi-class classification. We make several
    adjustments to make it better suited for our purposes. Firstly, we add a Gaussian noise
    layer. Secondly, we add dropout after each convolutional block. Finally, we reduce the 
    number of features in each convolutional layer by a factor of 2.

    Returns
    ------
       model : tf.keras.Model
          A model object
    """
    input = layers.Input(shape=(128,128,1))
     
    noise = layers.GaussianNoise(0.1)(input)
    
    x = layers.Conv2D(32, 3, 1, padding='same')(noise)
    x = layers.ELU()(x)
    x = layers.BatchNormalization()(x)
    dropout = layers.Dropout(0.5)(x)

    x = layers.Conv2D(32, 3, 1, padding='same')(x)
    x = layers.ELU()(x)
    x = layers.BatchNormalization()(x)
    dropout = layers.Dropout(0.5)(x)

    x = layers.MaxPooling2D(pool_size=(2, 2), strides = 2, padding='same')(x)

    x = layers.Conv2D(64, 3, 1, padding='same')(x)
    x = layers.ELU()(x)
    x = layers.BatchNormalization()(x)
    dropout = layers.Dropout(0.5)(x)

    x = layers.Conv2D(64, 3, 1, padding='same')(x)
    x = layers.ELU()(x)
    x = layers.BatchNormalization()(x)
    dropout = layers.Dropout(0.5)(x)

    x = layers.MaxPooling2D(pool_size=(2, 2), strides = 2, padding='same')(x)

    x = layers.Conv2D(128, 3, 1, padding='same')(x)
    x = layers.ELU()(x)
    x = layers.BatchNormalization()(x)
    dropout = layers.Dropout(0.5)(x)

    x = layers.Conv2D(128, 3, 1, padding='same')(x)
    x = layers.ELU()(x)
    x = layers.BatchNormalization()(x)
    dropout = layers.Dropout(0.5)(x)

    x = layers.MaxPooling2D(pool_size=(2, 2), strides = 2, padding='same')(x)
  
    x = layers.Dense(1024)(x)
    x = layers.ReLU()(x)
    x = layers.BatchNormalization()(x)
    
    
    dropout = layers.Dropout(0.5)(x)
    flatten = layers.Flatten()(dropout)
    
    dense = layers.Dense(1,activation='sigmoid')(flatten)
    model = tf.keras.Model(inputs = input,outputs = dense)

    return model


In [None]:
import gc
from tensorflow.keras.callbacks import Callback

In [None]:
class ClearMemory(Callback):
    def on_epoch_end(self, epoch, logs=None):
        gc.collect()
        K.clear_session()

In [None]:
def createCallBack(learning_rate):

    
    callbacks = [ReduceLROnPlateau(#monitor='accuracy',
                                   monitor ='val_loss',
                                   factor=0.1,
                                   patience=5,
                                   min_lr=learning_rate / 1000),
                 EarlyStopping(#monitor='accuracy',
                                monitor ='val_loss',
                              patience=9,  
                               min_delta=0.0001),
                 ClearMemory()]


    return callbacks

# Data Loader


In [None]:
class load_img_sets():
  """Cross validation is much easier to perform when images are in the form
  of numpy arrays and not tf.data.Dataset. This class serves to convert 
  tf.data.Dataset objects to numpy arrays.

    Attributes
    ----------
        path : str
            The path to the image dataset

        batch_size : int
            The size of batch that images will be read in and converted to 
            arrays. (defaults to 64)
        
        ignore_other : bool
            A flag that can be set that will ignore ECGs that are not AF or 
            NORMAL (defaults to True)
    Methods
    ------
        load_data()
            Function used to load in image data and convert it to numpy
            arrays

        load_af_data()
            Function used to load in image data specifically for one sub direc-
            tory, in this case AF.
       
  """
  def __init__(self,path,batch_size=64,ignore_other = True):

    self.path = path
    self.batch_size = batch_size
    self.ignore = ignore_other

  def load_data(self):
    """Function used to load in image data and convert it to numpy
      arrays.

    Returns
    ------
        X : np.ndarray
          numpy array of images
        Y : np.ndarray
          numpy array of lables
    """
    X = []
    Y = []
    ds = tf.keras.utils.image_dataset_from_directory(
    self.path,
    image_size=(128, 128),
    batch_size=self.batch_size,
    color_mode='grayscale'
    )
    for image_batch,label_batch in ds:
        for image,label in zip(image_batch,label_batch):
            if self.ignore:
              if label.numpy()!=2:
                X.append(tf.keras.preprocessing.image.img_to_array(image))
                # without this part, 0 => AF implying AF is the negative class
                if label == 0: #convert af label to 1
                  Y.append(label.numpy()+1)
                if label == 1: #convert normal label to 0
                  Y.append(label.numpy()-1)
            else:
              X.append(tf.keras.preprocessing.image.img_to_array(image))
              Y.append(label.numpy())
    return np.asarray(X),np.asarray(Y)

  def load_af_data(self):
    """Function used to load in image data specifically for one sub direc-
      tory, in this case AF.

    Returns
    ------
        X : np.ndarray
          numpy array of images
        Y : np.ndarray
          numpy array of lables
    """
    X = []
    Y = []
    ds = tf.keras.utils.image_dataset_from_directory(
    self.path,
    label_mode = None,
    image_size=(128, 128),
    batch_size=self.batch_size,
    color_mode='grayscale'
    )
    for image_batch in ds:
        for image in image_batch:
              X.append(tf.keras.preprocessing.image.img_to_array(image))
              Y.append(1) #0 corresponds to af simply due to the nature of this function
    return np.asarray(X),np.asarray(Y)
          

# Cross Validation


#### Generate ECGs

In [None]:

def label_maker(n_classes=2,ecg_type='NORMAL', num_eg=16):
    """A helper function used to generate a set of labels (NORMAL,AF,OTHER)
      to be used in image generation functions. Generates a tensor of zero, one, or 
      two equalling length of the num_eg parameter.

      Parameters
      ----------
      n_classes : int
          not necessary

      ecg_type : str
          A string indicating the type of ECG to generate. Accepted arguments
          are 'NORMAL', 'AF', or 'OTHER'.
      
      num_eg : int
          The number of labels to generate

      Returns
      ------
      labels
          A tensor of labels of either 0,1 or 2 corresponding to AF, NORMAL or
          OTHER
      """
    if ecg_type == 'AF':
        lab = tf.cast(0,  dtype=tf.dtypes.int32) # generators were trained with AF = 0 but that will 
        # not work with other keras metrics so we keep AF = 0 here but 
        # when we read in the data to np arrays we swap the values from AF = 0 to AF = 1
    elif ecg_type == 'NORMAL':
        lab = tf.cast(1,  dtype=tf.dtypes.int32)
    else:
        lab = tf.cast(2,  dtype=tf.dtypes.int32)
    return tf.repeat(lab, [num_eg], axis=None, name=None)

def generate_new_images_is(model, num_eg,ecg_type = 'NORMAL', batch_size=1000):
    """A function used to generate a set of images as numpy arrays

        Parameters
        ----------
        model : tf.Model
            The generator to be used to generate images.

        epoch : int
            The current epoch of training
        
        seed : tf.Tensor
            A fixed tensor of shape num_eg*latent_dim containing random numbers
            drawn from the Gaussian distribution where num_eg is the required
            number of fake ECGs to generate.
          
        ecg_type : str
            A string indicating the type of ECG to generate. Accepted arguments
            are 'NORMAL', 'AF', or 'OTHER'. Used in conjunction with the label_gen
            function to generate a tenesor of integers corresponding to the
            chosen ECG type.
        batch_size : int
            Number of ECGs generated in a given iteration. Must equally divide
            num_eg (defaults to 1000)
    """
    limit = num_eg//batch_size
    count = 0
    if num_eg%batch_size!=0:
      print("please ensure batch size and number of examples are divisible")
      return
    imgs = []
    for i in range(limit):
      input = tf.random.normal([batch_size, 100])
      labels = label_maker(n_classes=2, ecg_type = ecg_type, num_eg = batch_size)

      predictions = model([input, labels], training=False)
      
      for j in range(predictions.shape[0]):
          pred = (predictions[j, :, :, :] + 1 ) * 127.5
          pred = np.asarray(pred)  
          imgs.append(pred)
    return imgs

#### Normalize images

In [None]:
def normalize(train, test,fake=None):
  """A function used to preprocess image data.

  Parameters
  ----------
      train : np.ndarray
          Training image set

      test : np.ndarray
          Testing image set
      fake : np.ndarray
          Optional fake image set that will later be 
          combined with the training set. (defaults to None)
  Returns
  ------
      train_norm : np.ndarray
          normalized training set
      
      test_norm : np.ndarray
          normalized test set
      
      fake_norm : (optional) np.ndarry
          normalized fake set
  """

  train_norm = train.astype('float32')
  test_norm = test.astype('float32')

  train_norm = train_norm / 255.0
  test_norm = test_norm / 255.0

  if fake is not None:
    fake_norm = fake.astype('float32')
    fake_norm = fake_norm / 255.0
    return train_norm, test_norm,fake_norm

  return train_norm, test_norm

In [None]:
# X_train,X_valid,X_test=prep_pixels(X_train,X_test)

In [None]:
# X_train,X_test,X_fake=normalize(X_train,X_test,X_fake)

In [None]:
# print(X_train.shape,X_test.shape,X_fake.shape)

#### Save Folds

In [None]:
from sklearn.utils import shuffle

In [None]:
def save_for_cross_val(train,t_lab,valid,v_lab,dir,k=5,shuf = False):
  """A splitting function used to manually split up datasets into 
  folds for cross valudation.

  Parameters
  ----------
      train : np.ndarry
        Training image set
      t_lab : np.ndarry
        Training label set
      valid : np.ndarry
        Other image set
      v_lab : np.ndarry
        Other label set
      dir : str
        Directory to save folds to
      k : int
        Number of folds to split data into (Defaults to 5)
      shuf : bool
        Flag controlling wether or not the dataset should be shuffled
        before folding (defaults to False). If using augmentation, this
        MUST be True

  """
  datasets = np.concatenate((train,valid))
  
  labels = np.concatenate((t_lab,v_lab))
  print(np.unique(labels,return_counts=True))
  
  if shuf:
      print(shuf)
      datasets,labels = shuffle(datasets,labels, random_state=0)
  datasets = np.array_split(datasets,k,axis=0)
  labels = np.array_split(labels,k,axis=0)
  fold = 1
  for img,label in zip(datasets,labels):
    print(fold,img.shape,label.shape)
    np.savez(dir+f'/train_{fold}.npz',img,label)
    fold+=1

  np.savez('/content/train_5.npz',datasets[-1],labels[-1]) # this is done this way
  # because in practice, this stops Colab from restarting



#### Actuall CV code

In [None]:
#([training sets], validation set)
folds = [([1,2,3,4],5),
([1,2,3,5],4),
([1,2,4,5],3),
([1,3,4,5],2),
([2,3,4,5],1)]

In [None]:
metrics = [keras.metrics.TruePositives(name='tp'),
           keras.metrics.FalsePositives(name='fp'),
           keras.metrics.TrueNegatives(name='tn'),
           keras.metrics.FalseNegatives(name='fn'),
           keras.metrics.BinaryAccuracy(name='accuracy'),
           keras.metrics.Precision(name='precision'),
           keras.metrics.Recall(name='recall'),
           keras.metrics.AUC(name='auc')]

In [None]:
def cross_fold(folds,training_path, holdout_path,learning_rate,batch_size,epochs):
    """A function used to run cross validation

    Parameters
    ----------
        folds : (list,int)
            A list of integers to train on and one to test o
        training_path : str
            Path to training (and validation) folds super-directory
        holdout_path : str
            Direct path to holdout fold
        learning_rate : float
            Learning rate of the optimizer
        batch_size : int
            Number of samples to read into the GPU per time step
        epochs : int
            Total number of training iterations

       
    """
    for i in folds:
        K.clear_session()
        model = conv_jun()

        model.compile(optimizer=keras.optimizers.Adam(
                  learning_rate = learning_rate, beta_1 = 0.5, beta_2 = 0.999, amsgrad = False),
                  loss=tf.keras.losses.BinaryCrossentropy(),
                  metrics=[metrics])
        
        training_sets = i[0]
        evaluating_set = i[1]

        for j in training_sets:
            print(f'---Training with subset {j}---')

            with np.load(training_path+f'/train_{j}.npz') as data:
              X = data['arr_0']
              Y = data['arr_1']

            train_dataset = tf.data.Dataset.from_tensor_slices((X,Y))
            del X
            del Y
            gc.collect()

            #https://github.com/keras-team/keras/issues/4446#issuecomment-261804574

            #https://github.com/keras-team/keras/issues/454#issuecomment-125644222
      
            train_dataset = train_dataset.batch(batch_size)
            history = model.fit(train_dataset,
                    epochs=epochs,
                    verbose=2)
          
        print(f'---Testing with subset {i[1]}---')
        with np.load(training_path+f'/train_{i[1]}.npz') as data:
            X = data['arr_0']
            Y = data['arr_1']
        
        train_dataset = tf.data.Dataset.from_tensor_slices((X,Y))
        del X
        del Y
        gc.collect()
        train_dataset = train_dataset.batch(batch_size)


        model.evaluate(train_dataset,callbacks=tf.keras.callbacks.CSVLogger('eval.log', separator=";", append=True))
        
        print(f'---Testing on holdout---')
        with np.load(holdout_path) as data:
            X = data['arr_0']
            Y = data['arr_1']
            X,Y=shuffle(X,Y)
            print(np.unique(Y,return_counts=True))

        train_dataset = tf.data.Dataset.from_tensor_slices((X,Y))
        del X
        del Y
        gc.collect()
        train_dataset = train_dataset.batch(batch_size)

        model.evaluate(train_dataset,callbacks=tf.keras.callbacks.CSVLogger('eval.log', separator=";", append=True))

# **Use Cases**


## Cross validation

Get Control set and ***UNSEEN HOLDOUT***

In [None]:
!unzip pre_saved_assests/splits_for_classifier.zip 

#### Select the Assets you wish to use for augmentation

### Assets for DCCGAN

In [None]:
!unzip pre_saved_assests/splits_dcgan.zip 
!unzip pre_saved_assests/dcgan_images.zip 
!unzip pre_saved_assests/dcgan_gen.zip 
GEN_PATH = 'content/dcgan_gen'
FOLD_PATH = 'content/dcgan_aug'

### Assets for WCGAN

In [None]:
!unzip pre_saved_assests/splits_wgan.zip 
!unzip pre_saved_assests/wgan_images.zip 
!unzip pre_saved_assests/wgan_gen_rms.zip 
GEN_PATH = 'content/wgan_gen_rms'
FOLD_PATH = 'content/wgan_aug'

### Assets for WCGANGP RMSprop

In [None]:
!unzip pre_saved_assests/splits_for_wgangp.zip 
!unzip pre_saved_assests/wgangp_rms_images.zip 
!unzip pre_saved_assests/wgangp_gen_rms.zip 
GEN_PATH = 'content/wgangp_gen_rms'
FOLD_PATH = 'content/wgangp_aug'

### Asssets for WCGANGP Adam

In [None]:
!unzip pre_saved_assests/splits_for_wgangp_2.zip
!unzip pre_saved_assests/wgangp_adam_images.zip #or mbd
!unzip pre_saved_assests/wgangp_gen_adam.zip
GEN_PATH = 'content/wgangp_adam'
FOLD_PATH = 'content/wgangp_aug_adam'

### Select how you want to run cross validation

### Running cross validation from presaved folds


There is a tendency for checkpoint files to end up in image directories which messes with Keras's ability to read from directories

In [None]:
rm -rf `find ~/Desktop/rsnjos005-AFib_GAN -type d -name .ipynb_checkpoints` #make sure this path is correct for you

In [None]:
cross_fold(folds,FOLD_PATH,'content/content/splits_for_classifier/test.npzz',0.001,64,10)

### Producing folds from presaved generator and data splits and running cross validation

In [None]:
!unzip pre_saved_assests/train_final.zip
!unzip pre_saved_assests/valid_final.zip
!unzip pre_saved_assests/test_final.zip

In [None]:
rm -rf `find ~/Desktop/rsnjos005-AFib_GAN -type d -name .ipynb_checkpoints` #make sure this path is correct for you

In [None]:
train_ds = load_img_sets('content/images_train',64)
X_train,Y_train = train_ds.load_data()

valid_ds = load_img_sets('content/images_valid',64)
X_valid,Y_valid= valid_ds.load_data()

test_ds = load_img_sets('content/images_test',64)
X_test,Y_test = test_ds.load_data()

In [None]:
X_train = np.concatenate((X_train,X_valid))
Y_train = np.concatenate((Y_train,Y_valid))

In [None]:
generator = keras.models.load_model(GEN_PATH)

In [None]:
X_fake = np.asarray(generate_new_images_is(generator,53900,'AF',100))
Y_fake = np.ones([X_fake.shape[0]])

In [None]:
X_train,X_test,X_fake=normalize(X_train,X_test,X_fake)

In [None]:
import os.path
from os import path
if path.exists('splits') == False:
  os.mkdir('splits')

In [None]:
save_for_cross_val(X_train,Y_train,X_fake,Y_fake,'splits',shuf=True)

In [None]:
gc.collect()

In [None]:
FOLD_PATH = 'splits'

In [None]:
cross_fold(folds,FOLD_PATH,'content/content/splits_for_classifier/test.npz',0.001,64,10)

## Image quality
This section must be run separatley as the PIL version required for clean-fid will force Colab to restart

In [None]:
!pip install clean-fid # You will likely have to restart Colab as this requires an older version of PIL
# If that is the case you will need to rerun the prerequisites block

In [None]:
from cleanfid import fid #this likely will not work without a GPU enabled

In [None]:
def get_fid(real_dir,fake_dir):
    """Uses clean-fid to calculate the Frechét Inception Distance between a set 
    of real and fake images

    Parameters
    ----------
        real_dir : str
            directory of real images
        fake_dir : str
            directory of fake images
    Returns
    ------
        score : float
            the FID between images of the two directories
    """
    score = fid.compute_fid(real_dir, fake_dir,mode="legacy_tensorflow")
    return score

In [None]:
def get_kid(real_dir,fake_dir):
    """Uses clean-fid to calculate the Kernel Inception Distance between a set 
    of real and fake images

    Parameters
    ----------
        real_dir : str
            directory of real images
        fake_dir : str
            directory of fake images
    Returns
    ------
        score : float
            the KID between images of the two directories
    """
    score = fid.compute_kid(real_dir, fake_dir,mode="legacy_tensorflow")
    return score

Coputing KID using elementary methods

In [None]:
import torch

In [None]:
device=torch.device("cuda")
feat_model = fid.build_feature_extractor('legacy_tensorflow', device) # build Inception
x = fid.get_folder_features('/content/images_train', model=feat_model, num_workers=12, num=None,
                        shuffle=False, seed=0, batch_size=128, device=torch.device("cuda"),
                        mode="legacy_tensorflow", custom_fn_resize=None, description="", verbose=True,
                        custom_image_tranform=None) #get real activations

In [None]:
y = fid.get_folder_features('/content/content/adam', model=feat_model, num_workers=12, num=None,
                        shuffle=False, seed=0, batch_size=128, device=torch.device("cuda"),
                        mode="legacy_tensorflow", custom_fn_resize=None, description="", verbose=True,
                        custom_image_tranform=None) #get fake activations

### FID and KID

Note that if these are not run with CUDA enabled they will fail.

In [None]:
score = get_fid('/content/content/images_train', '/content/content/new_images')
print(score)

In [None]:
score = get_kid('/content/content/images_train', '/content/content/new_images')
print(score)