**Import packages and define special layers:**

In [None]:
from __future__ import absolute_import, division, print_function
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, optimizers, applications, Sequential
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler, History
import tensorflow_addons as tfa
import tensorflow_datasets as tfds

from kaggle_datasets import KaggleDatasets
import glob
import imageio
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

import sys
print("Python version:", sys.version)
import PIL
import time
import shutil

from functools import partial
from albumentations import (
    Compose, RandomBrightness, JpegCompression, HueSaturationValue, RandomContrast, HorizontalFlip,
    Rotate
)

from IPython import display


from tensorflow.python.eager import def_function
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import tensor_shape
from tensorflow.python.keras import backend as K
from tensorflow.python.keras import layers
from tensorflow.python.keras import initializers
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops



In [None]:
#Define minibatch discrimination layer
# https://github.com/garridoq/gan-guide/blob/master/Minibatch%20discrimination%20%2B%20label%20smoothing.ipynb
class MinibatchDiscrimination(tf.keras.layers.Layer):

    def __init__(self, num_kernel, dim_kernel,kernel_initializer='glorot_uniform', **kwargs):
        self.num_kernel = num_kernel
        self.dim_kernel = dim_kernel
        self.kernel_initializer = kernel_initializer
        super(MinibatchDiscrimination, self).__init__(**kwargs)

    def build(self, input_shape):
        # Create a trainable weight variable for this layer.
        self.kernel = self.add_weight(name='kernel', 
                                      shape=(input_shape[-1], self.num_kernel*self.dim_kernel),
                                      initializer=self.kernel_initializer,
                                      trainable=True)
        super(MinibatchDiscrimination, self).build(input_shape)
        
    def call(self, x):
        activation = tf.matmul(x, self.kernel)
        activation = tf.reshape(activation, shape=(-1, self.num_kernel, self.dim_kernel))
        #Mi
        tmp1 = tf.expand_dims(activation, 3)
        #Mj
        tmp2 = tf.transpose(activation, perm=[1, 2, 0])
        tmp2 = tf.expand_dims(tmp2, 0)
        
        diff = tmp1 - tmp2
        
        l1 = tf.reduce_sum(tf.math.abs(diff), axis=2)
        features = tf.reduce_sum(tf.math.exp(-l1), axis=2)
        return tf.concat([x, features], axis=1)
    
    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[1] + self.num_kernel)

#Define spectral normalization layer
# https://medium.com/@FloydHsiu0618/spectral-normalization-implementation-of-tensorflow-2-0-keras-api-d9060d26de77
class SpectralNormalization(tf.keras.layers.Layer): # tried layers.Wrapper, also got error
    def __init__(self, layer, iteration=1, **kwargs):
        super(SpectralNormalization, self).__init__(layer, **kwargs)
        self.layer = layer
        self.iteration = iteration

    def build(self):

        if not self.layer.built:
            self.layer.build(input_shape)

            if not hasattr(self.layer, 'kernel'):
                raise ValueError('Invalid layer for SpectralNormalization.')

            self.w = self.layer.kernel
            self.w_shape = self.w.shape.as_list()
            self.u = self.add_variable(shape=(1, self.w_shape[-1]), initializer=tf.random_normal_initializer(), name='sn_u', trainable=False, dtype=tf.float32)

        super(SpectralNormalization, self).build()

    @tf.function
    def call(self, inputs, training=None):

        self._compute_weights(training)
        output = self.layer(inputs)

        return output

    def _compute_weights(self, training):
       
        iteration = self.iteration
        w_reshaped = tf.reshape(self.w, [-1, self.w_shape[-1]])

        u_hat = tf.identity(self.u)
        v_hat = None

        for _ in range(self.iteration):
               
            v_ = tf.matmul(u_hat, tf.transpose(w_reshaped))
            v_hat = tf.nn.l2_normalize(v_)

            u_ = tf.matmul(v_hat, w_reshaped)
            u_hat = tf.nn.l2_normalize(u_)

        if training == True: self.u.assign(u_hat)
        sigma = tf.matmul(tf.matmul(v_hat, w_reshaped), tf.transpose(u_hat))
       
        w_norm = self.w / sigma

        self.layer.kernel = w_norm
       
    def compute_output_shape(self, input_shape):

        return tf.TensorShape(self.layer.compute_output_shape(input_shape).as_list())

***Set the accelerator to TPU* and then run the following code:**

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Device:', tpu.master())
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except:
    strategy = tf.distribute.get_strategy()
print('Number of replicas:', strategy.num_replicas_in_sync)

AUTOTUNE = tf.data.experimental.AUTOTUNE

print(tf.__version__)

**Load the data:**

(The various definitions are used to augment the small data set)

In [None]:
from kaggle_datasets import KaggleDatasets
GCS_PATH = KaggleDatasets().get_gcs_path()

MONET_FILENAMES = tf.io.gfile.glob(str(GCS_PATH + '/monet_tfrec/*.tfrec'))
print('Monet TFRecord Files:', len(MONET_FILENAMES))

IMAGE_SIZE = [256, 256]

def normalize(image):
    return (tf.cast(image, tf.float32) / 127.5) - 1

def decode_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.reshape(image, [*IMAGE_SIZE, 3])
    return image

def random_crop(image):
    cropped_image = tf.image.random_crop(image, size=[256, 256, 3])
    return cropped_image

def random_jitter(image):
    # resizing to 286 x 286 x 3 
    image = tf.image.resize(image, [int(256*1.3), int(256*1.3)],
                          method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    # randomly cropping to 256 x 256 x 3
    image = random_crop(image)
    # random mirroring
    return image

def flip(image):
    return tf.image.flip_left_right(image)

def preprocess_image_train(image, label=None):
    image = random_jitter(image)
    return image

def read_tfrecord(example):
    tfrecord_format = {
        "image_name": tf.io.FixedLenFeature([], tf.string),
        "image": tf.io.FixedLenFeature([], tf.string),
        "target": tf.io.FixedLenFeature([], tf.string)
    }
    example = tf.io.parse_single_example(example, tfrecord_format)
    image = decode_image(example['image'])
    return image

def load_dataset(filenames, labeled=False, ordered=False, repeats=200):
    dataset = tf.data.TFRecordDataset(filenames)
    dataset = dataset.map(read_tfrecord, num_parallel_calls=AUTOTUNE)
    dataset = dataset.concatenate(dataset.map(flip, num_parallel_calls=AUTOTUNE).shuffle(100000))
    dataset = dataset.concatenate(dataset.map(random_jitter, num_parallel_calls=AUTOTUNE).shuffle(10000, reshuffle_each_iteration=True).repeat(repeats))
    dataset = dataset.map(normalize, num_parallel_calls=AUTOTUNE).shuffle(10000)
    return dataset

monet_ds = load_dataset(MONET_FILENAMES, labeled=True, repeats=100).batch(100, drop_remainder=True)

**Display the first images from the dataset:**

In [None]:
def view_image(ds, rows=2):
    image = next(iter(ds)) # extract 1 batch from the dataset
    image = image.numpy()

    fig = plt.figure(figsize=(22, rows * 5.05 ))
    for i in range(5 * rows):
        ax = fig.add_subplot(rows, 5, i+1, xticks=[], yticks=[])
        ax.imshow(image[i] / 2 + .5)

view_image(monet_ds)

**Create the generator:**

(The generator samples noise, reshapes and upsamples it to Monetize according to the current distribution. Choose the appropriate architecture for experiments)

In [None]:
#Original generator architecture with batch normalization and LeakyReLUs
def Generator(LATENT_DIM=128, OUTPUT_CHANNELS=3):
    model = tf.keras.Sequential()
    model.add(layers.Dense(4*4*LATENT_DIM, use_bias=False, input_shape=(LATENT_DIM,)))
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU(alpha=0.2))

    model.add(layers.Reshape((4, 4, LATENT_DIM)))
    #assert model.output_shape == (None, 4, 4, LATENT_DIM) # Note: None is the batch size

    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2DTranspose(LATENT_DIM, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False))
    #assert model.output_shape == (None, 8, 8, LATENT_DIM)
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2DTranspose(LATENT_DIM, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False))
    #assert model.output_shape == (None, 16, 16, LATENT_DIM)
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())
    
    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2DTranspose(LATENT_DIM//2, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False))
    #assert model.output_shape == (None, 32, 32, LATENT_DIM//2)
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())
    
    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2DTranspose(LATENT_DIM//4, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False))
    #assert model.output_shape == (None, 64, 64, LATENT_DIM//4)
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2DTranspose(LATENT_DIM//8, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False))
    #assert model.output_shape == (None, 128, 128, LATENT_DIM//8)
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())
    
    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2DTranspose(OUTPUT_CHANNELS, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False, activation='tanh'))
    #assert model.output_shape == (None, 256, 256, 4)
    #model.add(layers.BatchNormalization())
    #model.add(layers.LeakyReLU())

    return model

In [None]:
#Generator with spectral normalization and LeakyReLUs
def Generator_s(LATENT_DIM=128, OUTPUT_CHANNELS=3):
    model = tf.keras.Sequential()
    model.add(SpectralNormalization(layers.Dense(4*4*LATENT_DIM, use_bias=False, input_shape=(LATENT_DIM,))))
    #model.add(SpectralNormalization())
    model.add(layers.LeakyReLU(alpha=0.2))

    model.add(layers.Reshape((4, 4, LATENT_DIM)))
    #assert model.output_shape == (None, 4, 4, LATENT_DIM) # Note: None is the batch size

    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(SpectralNormalization(layers.Conv2DTranspose(LATENT_DIM, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False)))
    #assert model.output_shape == (None, 8, 8, LATENT_DIM)
    #model.add(SpectralNormalization())
    model.add(layers.LeakyReLU())

    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(SpectralNormalization(layers.Conv2DTranspose(LATENT_DIM, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False)))
    #assert model.output_shape == (None, 16, 16, LATENT_DIM)
    #model.add(SpectralNormalization())
    model.add(layers.LeakyReLU())
    
    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(SpectralNormalization(layers.Conv2DTranspose(LATENT_DIM//2, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False)))
    #assert model.output_shape == (None, 32, 32, LATENT_DIM//2)
    #model.add(SpectralNormalization())
    model.add(layers.LeakyReLU())
    
    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(SpectralNormalization(layers.Conv2DTranspose(LATENT_DIM//4, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False)))
    #assert model.output_shape == (None, 64, 64, LATENT_DIM//4)
    #model.add(SpectralNormalization())
    model.add(layers.LeakyReLU())

    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(SpectralNormalization(layers.Conv2DTranspose(LATENT_DIM//8, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False)))
    #assert model.output_shape == (None, 128, 128, LATENT_DIM//8)
    #model.add(SpectralNormalization())
    model.add(layers.LeakyReLU())
    
    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(SpectralNormalization(layers.Conv2DTranspose(OUTPUT_CHANNELS, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False, activation='tanh')))
    #assert model.output_shape == (None, 256, 256, 4)
    #model.add(SpectralNormalization())
    model.add(layers.LeakyReLU())

    return model

**Create the discriminator:**

(The discriminator takes in the input image and classifies it as real or fake (generated). But instead of outputting a single node, the discriminator outputs a smaller 2D image with higher pixel values indicating a real classification and lower values indicating a fake classification. Choose the appropriate architecture for experiments)

In [None]:
#Original discriminator with LeakyReLUs and dropout=0.3, final layer has sigmoid activation function
def Discriminator():
    model = tf.keras.Sequential()
    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2D(64, 4, strides=(2, 2), padding='same', kernel_initializer=initializer,
                                     input_shape=[256, 256, 3], use_bias=False))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.1))

    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2D(128, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.1))
    
    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2D(256, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.1))
    
    model.add(layers.ZeroPadding2D())
    initializer = tf.random_normal_initializer(0., 0.02)
    gamma_init = keras.initializers.RandomNormal(mean=0.0, stddev=0.02)
    model.add(layers.Conv2D(512, 4, strides=1, kernel_initializer=initializer, use_bias=False))
    model.add(tfa.layers.InstanceNormalization(gamma_initializer=gamma_init))
    model.add(layers.LeakyReLU())
    model.add(layers.ZeroPadding2D())
    model.add(layers.Conv2D(1, 4, strides=1, kernel_initializer=initializer))
    model.add(layers.LeakyReLU(alpha=0.2))
    
    model.add(layers.Flatten())
    model.add(layers.Dense(1, activation='linear'))

    return model

In [None]:
#Discriminator with addition of minibatch discrimination.
def Discriminator_m():
    model = tf.keras.Sequential()
    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2D(64, 4, strides=(2, 2), padding='same', kernel_initializer=initializer,
                                     input_shape=[256, 256, 3], use_bias=False))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2D(128, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))
    
    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2D(256, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))
    
    model.add(layers.ZeroPadding2D())
    initializer = tf.random_normal_initializer(0., 0.02)
    gamma_init = keras.initializers.RandomNormal(mean=0.0, stddev=0.02)
    model.add(layers.Conv2D(512, 4, strides=1, kernel_initializer=initializer, use_bias=False))
    model.add(tfa.layers.InstanceNormalization(gamma_initializer=gamma_init))
    model.add(layers.LeakyReLU())
    model.add(layers.BatchNormalization())
    model.add(layers.Flatten())
    model.add(layers.Dense(512))
    model.add(layers.LeakyReLU())
    model.add(MinibatchDiscrimination(num_kernel=100, dim_kernel=5))
    model.add(layers.Dense(512))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dense(1, activation='linear'))

    return model

In [None]:
#Discriminator with spectral normalization
def Discriminator_s():
    model = tf.keras.Sequential()
    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2D(64, 4, strides=(2, 2), padding='same', kernel_initializer=initializer,
                                     input_shape=[256, 256, 3], use_bias=False))
    model.add(SpectralNormalization())
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2D(128, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False))
    model.add(SpectralNormalization())
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))
    
    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2D(256, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False))
    model.add(SpectralNormalization())
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))
    
    model.add(layers.ZeroPadding2D())
    initializer = tf.random_normal_initializer(0., 0.02)
    gamma_init = keras.initializers.RandomNormal(mean=0.0, stddev=0.02)
    model.add(layers.Conv2D(512, 4, strides=1, kernel_initializer=initializer, use_bias=False))
    model.add(SpectralNormalization())
    model.add(layers.LeakyReLU())
    model.add(layers.ZeroPadding2D())
    model.add(layers.Conv2D(1, 4, strides=1, kernel_initializer=initializer))
    model.add(layers.LeakyReLU(alpha=0.2))
    
    model.add(layers.Flatten())
    model.add(layers.Dense(1, activation='sigmoid'))

    return model

In [None]:
#Discriminator with minibatch discrimination and spectral normalization.
def Discriminator_ms():
    model = tf.keras.Sequential()
    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(SpectralNormalization(layers.Conv2D(64, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, input_shape=[256, 256, 3], use_bias=False)))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(SpectralNormalization(layers.Conv2D(128, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False)))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))
    
    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(SpectralNormalization(layers.Conv2D(256, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False)))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))
    
    model.add(layers.ZeroPadding2D())
    initializer = tf.random_normal_initializer(0., 0.02)
    gamma_init = keras.initializers.RandomNormal(mean=0.0, stddev=0.02)
    model.add(SpectralNormalization(layers.Conv2D(512, 4, strides=1, kernel_initializer=initializer, use_bias=False)))
    model.add(layers.LeakyReLU())
    model.add(layers.BatchNormalization())
    model.add(layers.Flatten())
    model.add(layers.Dense(512))
    model.add(layers.LeakyReLU())
    model.add(MinibatchDiscrimination(num_kernel=100, dim_kernel=5))
    model.add(layers.Dense(512))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dense(1, activation='sigmoid'))

    return model

**Define loss:**

In [None]:
#Least squares loss
with strategy.scope():
    def discriminator_loss(predictions_real, predictions_gen, labels_real):
        gen_loss  = tf.reduce_mean((predictions_gen  - tf.reduce_mean(predictions_real) + labels_real) ** 2)
        real_loss = tf.reduce_mean((predictions_real - tf.reduce_mean(predictions_gen)  - labels_real) ** 2)
        return (gen_loss + real_loss) / 2
    
    def generator_loss(predictions_real, predictions_gen, labels_real):
        gen_loss  = tf.reduce_mean((predictions_gen  - tf.reduce_mean(predictions_real) - labels_real) ** 2)
        real_loss = tf.reduce_mean((predictions_real - tf.reduce_mean(predictions_gen)  + labels_real) ** 2)
        return (gen_loss + real_loss) / 2

**Define GAN monitor (for images)**

In [None]:
gen_dir = 'generated'

if not os.path.exists(gen_dir):
    os.makedirs(gen_dir)

class GANMonitor(keras.callbacks.Callback):
    def __init__(self, num_img=3, latent_dim=128):
        self.num_img = num_img
        self.latent_dim = latent_dim

    def on_epoch_end(self, epoch, logs=None):
        random_latent_vectors = tf.random.normal(shape=(self.num_img, self.latent_dim))
        generated_images = self.model.generator(random_latent_vectors)
        generated_images *= 255
        generated_images.numpy()
        for i in range(self.num_img):
            img = keras.preprocessing.image.array_to_img(generated_images[i])
            img.save("generated/generated_img_{i}_{epoch}.png".format(i=i, epoch=epoch))

**Define MonetGAN**

In [None]:
OUTPUT_CHANNELS = 3
LATENT_DIM = 128

with strategy.scope():
    monet_generator = Generator(LATENT_DIM, 3)
    monet_discriminator = Discriminator()

class MonetGan(keras.Model):
    def __init__(self, monet_generator, monet_discriminator, latent_dim, real_label=0.5, fake_label=0):
        super(MonetGan, self).__init__()
        self.generator = monet_generator
        self.discriminator = monet_discriminator
        self.latent_dim = latent_dim
        self.real_label = real_label
        self.fake_label = fake_label
        
    def compile(self, d_opt, g_opt, d_loss_fn, g_loss_fn):
        super(MonetGan, self).compile()
        self.d_opt = d_opt
        self.g_opt = g_opt
        self.d_loss_fn = d_loss_fn
        self.g_loss_fn = g_loss_fn
        
    def train_step(self, images):
        #noise = tf.random.normal([BATCH_SIZE, noise_dim])
    
        if isinstance(images, tuple):
            images = images[0]
    
        # Sample random points in the latent space
        batch_size = tf.shape(images)[0]
        noise = tf.random.normal(shape=(batch_size, self.latent_dim))
        
        #labels_gen  = tf.zeros((batch_size, 1)) + fake_label
        labels_real = tf.zeros((batch_size, 1)) + self.real_label
        
        # Add random noise to the labels - important trick!
        #labels_gen  += 0.05 * tf.random.uniform(tf.shape(labels_gen))
        labels_real += 0.05 * tf.random.uniform(tf.shape(labels_real))
    
        with tf.GradientTape() as disc_tape: 
            generated_images = self.generator(noise, training=False)
        
            real_output = self.discriminator(images, training=True)
            fake_output = self.discriminator(generated_images, training=True)
    
            disc_loss = self.d_loss_fn(real_output, fake_output, labels_real)
    
        with tf.GradientTape() as gen_tape: 
            generated_images = self.generator(noise, training=True)
        
            real_output = self.discriminator(images, training=False)
            fake_output = self.discriminator(generated_images, training=False)
    
            gen_loss = self.g_loss_fn(real_output, fake_output, labels_real)

        gradients_of_discriminator = disc_tape.gradient(disc_loss, self.discriminator.trainable_variables)
        gradients_of_generator = gen_tape.gradient(gen_loss, self.generator.trainable_variables)

        self.g_opt.apply_gradients(zip(gradients_of_generator, self.generator.trainable_variables))
        self.d_opt.apply_gradients(zip(gradients_of_discriminator, self.discriminator.trainable_variables))
    
        return {"d_loss": disc_loss, "g_loss": gen_loss}

Define evaluation metric

**Training parameters and checkpoints (creates new model)**

In [None]:
checkpoint_path = 'training_1/cp-{epoch:04d}.h5'
checkpoint_dir = os.path.dirname(checkpoint_path)
if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)


# Creates checkpoint callback to pass to model.fit
checkpoint = ModelCheckpoint(checkpoint_path,
                            save_weights_only=True,
                            verbose=1,
                            periods=5)

In [None]:
#Training 
EPOCHS = 50

LR_G = 0.001
LR_D = 0.0005
beta_1 = .5

real_label = 0.05
fake_label = 0.95

with strategy.scope():
    monet_gan = MonetGan(monet_discriminator=monet_discriminator, 
                         monet_generator=monet_generator, 
                         latent_dim=LATENT_DIM,
                         real_label=real_label,
                         fake_label=fake_label)
    
    monet_gan.compile(
        d_opt = tf.keras.optimizers.Adam(learning_rate=LR_D, beta_1=beta_1),
        g_opt = tf.keras.optimizers.Adam(learning_rate=LR_G, beta_1=beta_1),
        d_loss_fn=discriminator_loss,
        g_loss_fn=generator_loss
    )

**Look at saved checkpoints (if not training from scratch)**

In [None]:
!ls {checkpoint_dir}

**Choose and load latest checkpoint (if not training from scratch)**

In [None]:
list_of_files = glob.glob(checkpoint_dir + '/*.h5') # * means all if need specific format then *.csv
latest_file = max(list_of_files, key=os.path.getctime)
print(latest_file)
# Load checkpoint
monet_gan.load_weights(latest_file)

Only run this in case you wanna delete folders

In [None]:
#shutil.rmtree("/kaggle/working/generated")
#shutil.rmtree("/kaggle/working/training_1")

[Learning rate scheduler](https://keras.io/guides/writing_your_own_callbacks/#learning-rate-scheduling)

In [None]:
class CustomLearningRateScheduler(keras.callbacks.Callback):
    """Learning rate scheduler which sets the learning rate according to schedule.

  Arguments:
      schedule: a function that takes an epoch index
          (integer, indexed from 0) and current learning rate
          as inputs and returns a new learning rate as output (float).
  """

    def __init__(self, schedule):
        super(CustomLearningRateScheduler, self).__init__()
        self.schedule = schedule

    def on_epoch_begin(self, epoch, logs=None):
        if not hasattr(self.model.d_opt, "lr"):
            raise ValueError('Disc_optimizer must have a "lr" attribute.')
        if not hasattr(self.model.g_opt, "lr"):
            raise ValueError('Gen_optimizer must have a "lr" attribute.')
        # Get the current learning rate from model's optimizer.
        d_lr = float(tf.keras.backend.get_value(self.model.d_opt.learning_rate))
        g_lr = float(tf.keras.backend.get_value(self.model.g_opt.learning_rate))
        # Call schedule function to get the scheduled learning rate.
        res = self.schedule(epoch, d_lr, g_lr)
        d_scheduled_lr = res[0]
        g_scheduled_lr = res[1]
        # Set the value back to the optimizer before this epoch starts
        tf.keras.backend.set_value(self.model.d_opt.lr, d_scheduled_lr)
        tf.keras.backend.set_value(self.model.g_opt.lr, g_scheduled_lr)
        print("\nEpoch %05d: Learning rate is %6.4f (Disc) and %6.4f (Gen)." % (epoch, d_scheduled_lr, g_scheduled_lr))


LR_SCHEDULE = [
    # (epoch to start, gen learning rate, disc learning rate) tuples
    # start: (0, 0.01, 0.005) G,D
    (10, 0.005, 0.0025),
    (20, 0.0025, 0.00125),
    (30, 0.00125, 0.000625),
    (40, 0.000625, 0.0003125)
]


def lr_schedule(epoch, d_lr, g_lr):
    """Helper function to retrieve the scheduled learning rate based on epoch."""
    if epoch < LR_SCHEDULE[0][0] or epoch > LR_SCHEDULE[-1][0]:
        return [d_lr, g_lr]
    for i in range(len(LR_SCHEDULE)):
        if epoch == LR_SCHEDULE[i][0]:
            return LR_SCHEDULE[i][1:3]
    return [d_lr, g_lr]

**Training loop**

In [None]:
history = History()
monet_gan.fit(
    monet_ds,
    #validation_split=0.20,
    epochs=EPOCHS,
    callbacks=[
        checkpoint,
        GANMonitor(num_img=3, latent_dim=LATENT_DIM),
        CustomLearningRateScheduler(lr_schedule),
        history
    ]
)

In [None]:
# summarize losses
history.history['g_loss']
history.history['d_loss']
plt.plot(history.history['g_loss'])
plt.plot(history.history['d_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['generator', 'discriminator'], loc='upper left')
plt.show()

**Display image from epoch-number**

In [None]:
def display_image(num_img, epoch_no):
  return PIL.Image.open('generated/generated_img_{i}_{epoch}.png'.format(i = num_img, epoch = epoch_no))

In [None]:
# epoch_no from 0 to 49
# choose num_img from 0 to 2. if want more samples, in monet_gan.fit(), set GAN_monitor(num_img=n,) for your choice of n
display_image(0, 0) 

**Create gif**

In [None]:
anim_file = 'dcgan.gif'

with imageio.get_writer(anim_file, mode='I') as writer:
    filenames = glob.glob('generated/generated_img_2*.png') # can also try img_1 or img_2
    filenames = sorted(filenames)
    for filename in filenames:
        image = imageio.imread(filename)
        writer.append_data(image)
    image = imageio.imread(filename)
    writer.append_data(image)

In [None]:
!pip install git+https://github.com/tensorflow/docs
import tensorflow_docs.vis.embed as embed
embed.embed_file(anim_file)

Predict images

In [None]:
from scipy.linalg import sqrtm
def calculate_fid(model, images1, images2):
    # calculate activations
    #act1 = model.predict(images1)
    #act2 = model.predict(images2)
    # calculate mean and covariance statistics
    mu1, sigma1 = images1.mean(axis=0), np.cov(images1, rowvar=False)
    mu2, sigma2 = images2.mean(axis=0), np.cov(images2, rowvar=False)
    # calculate sum squared difference between means
    ssdiff = np.sum((mu1 - mu2)**2.0)
    # calculate sqrt of product between cov
    covmean = sqrtm(sigma1.dot(sigma2))
    # check and correct imaginary numbers from sqrt
    if np.iscomplexobj(covmean):
        covmean = covmean.real
    # calculate score
    fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
    return fid

In [None]:
image_dir =  '/images'
if not os.path.exists(image_dir):
    os.makedirs(image_dir)

for i in range(10):
    prediction = monet_generator(np.random.randn(1, LATENT_DIM), training=False)[0].numpy()
    prediction = (prediction * 127.5 + 127.5).astype(np.uint8)
    im = PIL.Image.fromarray(prediction)
    #print(calculate_fid(monet_gan, np.asarray(im.convert('L')).astype(np.uint32), np.asarray(im.convert('L')).astype(np.uint32)))
    im.save(f"/images/{i}.jpg")

In [None]:
fid = 999999999
prediction = monet_generator(np.random.randn(1, LATENT_DIM), training=False)[0].numpy()
prediction = (prediction * 127.5 + 127.5).astype(np.uint8)
prediction = PIL.Image.fromarray(prediction)
batch = next(iter(monet_ds))
for image in batch:
    image = image.numpy()
    image = np.dot(image, [0.2989, 0.5870, 0.1140])
    t = calculate_fid(monet_gan, np.asarray(prediction.convert('L')).astype(np.uint32), image.astype(np.uint32))
    if t < fid:
        fid = t
print(fid)

In [None]:
shutil.make_archive("/kaggle/working/images", 'zip', "/kaggle/working")

Todo - implement MiFID metric for evaluation, and maybe others (IS?)<br>
Debug - Spectral Normalization