**Import packages:**

In [None]:
from __future__ import absolute_import, division, print_function # bring to first line
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, optimizers, applications, Sequential
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler
import tensorflow_addons as tfa
import tensorflow_datasets as tfds

from kaggle_datasets import KaggleDatasets
import glob
import imageio
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import sys
print("Python version:", sys.version)
import PIL
import time

from functools import partial
from albumentations import (
    Compose, RandomBrightness, JpegCompression, HueSaturationValue, RandomContrast, HorizontalFlip,
    Rotate
)

from IPython import display

***Set the accelerator to TPU* and then run the following code:**

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Device:', tpu.master())
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except:
    strategy = tf.distribute.get_strategy()
print('Number of replicas:', strategy.num_replicas_in_sync)

AUTOTUNE = tf.data.experimental.AUTOTUNE

print(tf.__version__)

**Load the data:**

(The various definitions are used to augment the small data set)

In [None]:
GCS_PATH = KaggleDatasets().get_gcs_path()

MONET_FILENAMES = tf.io.gfile.glob(str(GCS_PATH + '/monet_tfrec/*.tfrec'))
print('Monet TFRecord Files:', len(MONET_FILENAMES))

IMAGE_SIZE = [256, 256]

def normalize(image):
    return (tf.cast(image, tf.float32) / 127.5) - 1

def decode_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.reshape(image, [*IMAGE_SIZE, 3])
    return image

def random_crop(image):
    cropped_image = tf.image.random_crop(image, size=[256, 256, 3])
    return cropped_image

def random_jitter(image):
    # resizing to 286 x 286 x 3 
    image = tf.image.resize(image, [int(256*1.3), int(256*1.3)],
                          method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    # randomly cropping to 256 x 256 x 3
    image = random_crop(image)
    # random mirroring
    return image

def flip(image):
    return tf.image.flip_left_right(image)

def preprocess_image_train(image, label=None):
    image = random_jitter(image)
    return image

def read_tfrecord(example):
    tfrecord_format = {
        "image_name": tf.io.FixedLenFeature([], tf.string),
        "image": tf.io.FixedLenFeature([], tf.string),
        "target": tf.io.FixedLenFeature([], tf.string)
    }
    example = tf.io.parse_single_example(example, tfrecord_format)
    image = decode_image(example['image'])
    return image

def load_dataset(filenames, labeled=False, ordered=False, repeats=200):
    dataset = tf.data.TFRecordDataset(filenames)
    dataset = dataset.map(read_tfrecord, num_parallel_calls=AUTOTUNE)
    dataset = dataset.concatenate(dataset.map(flip, num_parallel_calls=AUTOTUNE).shuffle(100000))
    dataset = dataset.concatenate(dataset.map(random_jitter, num_parallel_calls=AUTOTUNE).shuffle(10000, reshuffle_each_iteration=True).repeat(repeats))
    dataset = dataset.map(normalize, num_parallel_calls=AUTOTUNE).shuffle(10000)
    return dataset

monet_ds = load_dataset(MONET_FILENAMES, labeled=True, repeats=100).batch(100, drop_remainder=True)

**Display the first images from the dataset:**

In [None]:
def view_image(ds, rows=2):
    image = next(iter(ds)) # extract 1 batch from the dataset
    image = image.numpy()

    fig = plt.figure(figsize=(22, rows * 5.05 ))
    for i in range(5 * rows):
        ax = fig.add_subplot(rows, 5, i+1, xticks=[], yticks=[])
        ax.imshow(image[i] / 2 + .5)

view_image(monet_ds)

**Create the generator:**

(The generator samples noise, reshapes and upsamples it to Monetize according to the current distribution)

In [None]:
def Generator(LATENT_DIM=128, OUTPUT_CHANNELS=3):
    model = tf.keras.Sequential()
    model.add(layers.Dense(4*4*LATENT_DIM, use_bias=False, input_shape=(LATENT_DIM,)))
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())

    model.add(layers.Reshape((4, 4, LATENT_DIM)))
    assert model.output_shape == (None, 4, 4, LATENT_DIM) # Note: None is the batch size

    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2DTranspose(LATENT_DIM, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False))
    assert model.output_shape == (None, 8, 8, LATENT_DIM)
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())

    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2DTranspose(LATENT_DIM, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False))
    assert model.output_shape == (None, 16, 16, LATENT_DIM)
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())
    
    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2DTranspose(LATENT_DIM//2, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False))
    assert model.output_shape == (None, 32, 32, LATENT_DIM//2)
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())
    
    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2DTranspose(LATENT_DIM//4, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False))
    assert model.output_shape == (None, 64, 64, LATENT_DIM//4)
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())

    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2DTranspose(LATENT_DIM//8, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False))
    assert model.output_shape == (None, 128, 128, LATENT_DIM//8)
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())
    
    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2DTranspose(OUTPUT_CHANNELS, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False, activation='tanh'))
    assert model.output_shape == (None, 256, 256, 3) # changed from (None, 256, 256, 4)
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())

    return model

**Create the discriminator:**

(The discriminator takes in the input image and classifies it as real or fake (generated). But instead of outputting a single node, the discriminator outputs a smaller 2D image with higher pixel values indicating a real classification and lower values indicating a fake classification.)

In [None]:
def Discriminator():
    model = tf.keras.Sequential()
    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2D(64, 4, strides=(2, 2), padding='same', kernel_initializer=initializer,
                                     input_shape=[256, 256, 3], use_bias=False))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2D(128, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))
    
    initializer = tf.random_normal_initializer(0., 0.02)
    model.add(layers.Conv2D(256, 4, strides=(2, 2), padding='same', kernel_initializer=initializer, use_bias=False))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))
    
    model.add(layers.ZeroPadding2D())
    initializer = tf.random_normal_initializer(0., 0.02)
    gamma_init = keras.initializers.RandomNormal(mean=0.0, stddev=0.02)
    model.add(layers.Conv2D(512, 4, strides=1, kernel_initializer=initializer, use_bias=False))
    model.add(tfa.layers.InstanceNormalization(gamma_initializer=gamma_init))
    model.add(layers.LeakyReLU())
    model.add(layers.ZeroPadding2D())
    model.add(layers.Conv2D(1, 4, strides=1, kernel_initializer=initializer))
    model.add(layers.LeakyReLU(alpha=0.2))
    
    model.add(layers.Flatten())
    model.add(layers.Dense(1))
    model.add(layers.LeakyReLU())

    return model

**Define least squares loss:**

In [None]:
#Least squares
with strategy.scope():
    def discriminator_loss(predictions_real, predictions_gen, labels_real):
        gen_loss  = tf.reduce_mean((predictions_gen  - tf.reduce_mean(predictions_real) + labels_real) ** 2)
        real_loss = tf.reduce_mean((predictions_real - tf.reduce_mean(predictions_gen)  - labels_real) ** 2)
        return (gen_loss + real_loss) / 2
    
    def generator_loss(predictions_real, predictions_gen, labels_real):
        gen_loss  = tf.reduce_mean((predictions_gen  - tf.reduce_mean(predictions_real) - labels_real) ** 2)
        real_loss = tf.reduce_mean((predictions_real - tf.reduce_mean(predictions_gen)  + labels_real) ** 2)
        return (gen_loss + real_loss) / 2

**Alternatively, BCE loss:**

In [None]:
#BCE loss
with strategy.scope():
    def discriminator_loss(predictions_real, predictions_gen):
        bce = tf.keras.losses.BinaryCrossentropy(from_logits=True, reduction=tf.keras.losses.Reduction.NONE)
        loss1 = tf.reduce_sum(bce(tf.ones_like(predictions_real), predictions_real)) * (1. / 32) # 32 is the default batch size for model_fit
        loss2 = tf.reduce_sum(bce(tf.zeros_like(predictions_gen), predictions_gen)) * (1. / 32)
        return loss1 + loss2
    
    def generator_loss(predictions_gen):
        bce = tf.keras.losses.BinaryCrossentropy(from_logits=True, reduction=tf.keras.losses.Reduction.NONE)
        return tf.reduce_sum(bce(tf.ones_like(predictions_gen), predictions_gen)) * (1. / 32)

**Define GAN monitor (for images)**<br>
Seems like this will generate 3 images at the end of each epoch from random noise<br>
So after 50 epochs, there will be 50*3 = 150 images

In [None]:
gen_dir = 'generated'

if not os.path.exists(gen_dir):
    os.makedirs(gen_dir)

class GANMonitor(keras.callbacks.Callback):
    def __init__(self, num_img=3, latent_dim=128):
        self.num_img = num_img
        self.latent_dim = latent_dim

    def on_epoch_end(self, epoch, logs=None):
        random_latent_vectors = tf.random.normal(shape=(self.num_img, self.latent_dim))
        generated_images = self.model.generator(random_latent_vectors)
        generated_images *= 255
        generated_images.numpy()
        for i in range(self.num_img):
            img = keras.preprocessing.image.array_to_img(generated_images[i])
            img.save("generated/generated_img_{i}_{epoch}.png".format(i=i, epoch=epoch))

**Define MonetGAN (least squares loss):**

In [None]:
class MonetGan(keras.Model):
    def __init__(self, monet_generator, monet_discriminator, latent_dim, real_label=0.5, fake_label=0):
        super(MonetGan, self).__init__()
        self.generator = monet_generator
        self.discriminator = monet_discriminator
        self.latent_dim = latent_dim
        self.real_label = real_label
        self.fake_label = fake_label
        
    def compile(self, d_opt, g_opt, d_loss_fn, g_loss_fn):
        super(MonetGan, self).compile()
        self.d_opt = d_opt
        self.g_opt = g_opt
        self.d_loss_fn = d_loss_fn
        self.g_loss_fn = g_loss_fn
        
    def train_step(self, images):
    
        if isinstance(images, tuple):
            images = images[0]
    
        # Sample random points in the latent space
        batch_size = tf.shape(images)[0]
        noise = tf.random.normal(shape=(batch_size, self.latent_dim))
        
        labels_gen  = tf.zeros((batch_size, 1)) + self.fake_label
        labels_real = tf.zeros((batch_size, 1)) + self.real_label
        
        # Add random noise to the labels - important trick!
        labels_gen  += 0.05 * tf.random.uniform(tf.shape(labels_gen))
        labels_real += 0.05 * tf.random.uniform(tf.shape(labels_real))
    
        with tf.GradientTape() as disc_tape: 
            generated_images = self.generator(noise, training=False)
        
            real_output = self.discriminator(images, training=True)
            fake_output = self.discriminator(generated_images, training=True)
    
            disc_loss = self.d_loss_fn(real_output, fake_output, labels_real)
    
        with tf.GradientTape() as gen_tape: 
            generated_images = self.generator(noise, training=True)
        
            real_output = self.discriminator(images, training=False)
            fake_output = self.discriminator(generated_images, training=False) 
    
            gen_loss = self.g_loss_fn(real_output, fake_output, labels_real)

        gradients_of_discriminator = disc_tape.gradient(disc_loss, self.discriminator.trainable_weights)
        gradients_of_generator = gen_tape.gradient(gen_loss, self.generator.trainable_weights)

        self.g_opt.apply_gradients(zip(gradients_of_generator, self.generator.trainable_weights))
        self.d_opt.apply_gradients(zip(gradients_of_discriminator, self.discriminator.trainable_weights))
    
        return {"d_loss": disc_loss, "g_loss": gen_loss}

**Define MonetGAN (BCE loss):**

In [None]:
class MonetGan(keras.Model):
    def __init__(self, monet_generator, monet_discriminator, latent_dim, real_label=0.5, fake_label=0):
        super(MonetGan, self).__init__()
        self.generator = monet_generator
        self.discriminator = monet_discriminator
        self.latent_dim = latent_dim
        self.real_label = real_label
        self.fake_label = fake_label
        
    def compile(self, d_opt, g_opt, d_loss_fn, g_loss_fn):
        super(MonetGan, self).compile()
        self.d_opt = d_opt
        self.g_opt = g_opt
        self.d_loss_fn = d_loss_fn
        self.g_loss_fn = g_loss_fn
        
    def train_step(self, images):
    
        if isinstance(images, tuple):
            images = images[0]
    
        # Sample random points in the latent space
        batch_size = tf.shape(images)[0]
        noise = tf.random.normal(shape=(batch_size, self.latent_dim))
        
        labels_gen  = tf.zeros((batch_size, 1)) + self.fake_label
        labels_real = tf.zeros((batch_size, 1)) + self.real_label
        
        # Add random noise to the labels - important trick!
        labels_gen  += 0.05 * tf.random.uniform(tf.shape(labels_gen))
        labels_real += 0.05 * tf.random.uniform(tf.shape(labels_real))
            
        with tf.GradientTape() as disc_tape:
            generated_images = self.generator(noise, training=False)
        
            real_output = self.discriminator(images, training=True)
            fake_output = self.discriminator(generated_images, training=True)
    
            disc_loss = self.d_loss_fn(real_output, fake_output)
        
        with tf.GradientTape() as gen_tape:
            generated_images = self.generator(noise, training=True)
            
            real_output = self.discriminator(images, training=False)
            fake_output = self.discriminator(generated_images, training=False)
            
            gen_loss = self.g_loss_fn(fake_output)
            
        gradients_of_generator = gen_tape.gradient(gen_loss, self.generator.trainable_weights)
        gradients_of_discriminator = disc_tape.gradient(disc_loss, self.discriminator.trainable_weights)
            
        self.g_opt.apply_gradients(zip(gradients_of_generator, self.generator.trainable_weights))
        self.d_opt.apply_gradients(zip(gradients_of_discriminator, self.discriminator.trainable_weights))
    
        return {"d_loss": disc_loss, "g_loss": gen_loss}

**Training parameters and checkpoints (creates new model)**

In [None]:
# LR_G, LR_D
t0 = (0.001, 0.0005)
t1 = (0.001, 0.001)
t2 = (0.001, 0.002)
t3 = (0.001, 0.004)
t4 = (0.001, 0.00025)
t5 = (0.00025, 0.001)
t6 = (0.0005, 0.001)
t7 = (0.002, 0.001)
t8 = (0.004, 0.001)

LR_G = t5[0]
LR_D = t5[1]
checkpoint_path = 'training_1/cp-{epoch:04d}.h5' # try h5 instead of ckpt
checkpoint_dir = os.path.dirname(checkpoint_path)
if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)


# Creates checkpoint callback to pass to model.fit
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path,
                                                save_weights_only=False,
                                                verbose=1,
                                                periods=5)

#Training 
EPOCHS = 100

beta_1 = .5

real_label = .66
fake_label = 0

OUTPUT_CHANNELS = 3
LATENT_DIM = 128

with strategy.scope():

    monet_generator = Generator(LATENT_DIM, 3) # generates Monet-esque paintings
    monet_discriminator = Discriminator() # differentiates real Monet paintings and generated
    
    
    monet_gan = MonetGan(monet_discriminator=monet_discriminator, 
                         monet_generator=monet_generator, 
                         latent_dim=LATENT_DIM,
                         real_label=real_label,
                         fake_label=fake_label)
    
    monet_gan.compile(
        #d_opt = tf.keras.optimizers.Adam(learning_rate=LR_D, beta_1=beta_1),
        #g_opt = tf.keras.optimizers.Adam(learning_rate=LR_G, beta_1=beta_1),
        d_opt = tf.keras.optimizers.SGD(learning_rate=LR_D),
        g_opt = tf.keras.optimizers.SGD(learning_rate=LR_G),
        d_loss_fn=discriminator_loss,
        g_loss_fn=generator_loss
    )

**Look at saved checkpoints**<br>
list files in the checkpoint_dir

In [None]:
!ls {checkpoint_dir}

**load latest checkpoint (if not training from scratch)**

In [None]:
import glob

list_of_files = glob.glob(checkpoint_dir + '/*.h5') # * means all if need specific format then *.csv
latest_file = max(list_of_files, key=os.path.getctime)
print(latest_file)

In [None]:
monet_gan.load_weights(latest_file)

Or choose a checkpoint to load from

In [None]:
monet_gan.load_weights(checkpoint_dir + '/cp-0003.h5')

**Training loop**

In [None]:
from tensorflow.keras.callbacks import History
history = History()

monet_gan.fit(
    monet_ds,
    epochs=EPOCHS,
    callbacks=[
        cp_callback,
        GANMonitor(num_img=3, latent_dim=LATENT_DIM),
        history
    ]
)

**Display image from epoch-number**

In [None]:
# plot losses and save plot
plt.plot(history.history['g_loss'])
plt.plot(history.history['d_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['generator', 'discriminator'], loc='upper left')
plt.savefig('loss_graph--G_{LR_G:.5f}--D_{LR_D:.5f}--.png'.format(LR_G = LR_G, LR_D=LR_D)) 
# can change the {}s to {EPOCHS:3d} if varying epochs
plt.show()
# save losses to csv file
hist = history.history
hist_df = pd.DataFrame.from_dict(hist)
hist_df.index = list(range(1,len(hist_df)+1))
hist_df.to_csv('hist--G_{LR_G:.5f}--D_{LR_D:.5f}--.csv'.format(LR_G = LR_G, LR_D=LR_D), 
               # can change the {}s to {EPOCHS:3d} if varying epochs
               index_label = 'Epoch')

In [None]:
def display_image(num_img, epoch_no):
  return PIL.Image.open('generated/generated_img_{i}_{epoch}.png'.format(i = num_img, epoch = epoch_no))

In [None]:
# epoch_no from 0 to 49
# choose num_img from 0 to 2. if want more samples, in monet_gan.fit(), set GAN_monitor(num_img=n,) for your choice of n
display_image(0, 2) 

**Create gif**

In [None]:
anim_file = 'dcgan.gif'

with imageio.get_writer(anim_file, mode='I') as writer:
    filenames = glob.glob('generated/generated*.png')
    filenames = sorted(filenames)
    for filename in filenames:
        image = imageio.imread(filename)
        writer.append_data(image)
    image = imageio.imread(filename)
    writer.append_data(image)

In [None]:
!pip install git+https://github.com/tensorflow/docs
import tensorflow_docs.vis.embed as embed
embed.embed_file(anim_file)

**Predict images**

In [None]:
image_dir =  '../images'
if not os.path.exists(image_dir):
    os.makedirs(image_dir)

for i in range(7000):
    prediction = monet_generator(np.random.randn(1, LATENT_DIM), training=False)[0].numpy()
    prediction = (prediction * 127.5 + 127.5).astype(np.uint8)
    im = PIL.Image.fromarray(prediction)
    im.save(f"../images/{i}.jpg")

In [None]:
import shutil
shutil.make_archive("/kaggle/working/images", 'zip', "/kaggle/images")