# This notebook is for training of our CGAN model

Initially we will import all the required libraries which are necessary for training of our model.

In [0]:
import numpy as np
import pandas as pd
from numpy import load
from numpy import zeros
from numpy import ones
from numpy import asarray
from numpy import append
from numpy.random import random
from numpy.random import randint
from numpy.random import shuffle
import time
import os
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import Adamax
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Reshape
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Conv2DTranspose
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import Concatenate
from keras.models import load_model
from keras.callbacks import ModelCheckpoint
import matplotlib.pyplot as plt
from tensorflow.keras.utils import plot_model
from matplotlib import patheffects as path_effects
import collections
from tensorflow.keras.models import load_model
from tensorflow import get_logger as log


Using TensorFlow backend.


We will mount our google drive to our google colab platform so that we can access files and the save the results in the drive

In [0]:
from google.colab import drive
drive.mount('/content/drive')
os.chdir("/content/drive/My Drive/deeplearning/")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
#We will set our flags and turn off the warnings for a precise output. Tensorflow and Keras are both very good at giving warnings when syntax being used is out of date, dimensions do not match, or features (such as trainable=True) are not used as required. The problem is you sometimes have to run through many warnings before seeing the impact of the issue. In debugging circumstances, being able to shut off warnings can be helpful.

In [0]:
#  SET YOUR FLAGS
qErrorHide = False
if qErrorHide:
    print("\n***REMEMBER:  WARNINGS turned OFF***\n***REMEMBER:  WARNINGS turned OFF***\n")
    log().setLevel('ERROR')

This is a very important step. As we need a lot of time for our training and there could be many interruptions while training so we will specifiy if we want to start training freshly or use an already existing existing model and continue training from that model.

In [0]:
#    INDICATE IF STARTING FRESH OR CONTINUING FROM PREVIOUS RUN
qRestart = False
if qRestart:
    epochs_done = 155
    epochs_goal = 200
else:
    epochs_done = 0
    epochs_goal = 200


In this section, we will develop a GAN for the faces dataset that we have prepared. The first step is to define the models.
The best way to design models in Keras to have multiple inputs is by using the Functional API, as opposed to the Sequential API . We will use the functional API to implement the discriminator, generator, and the composite model.

Starting with the discriminator model, a new second input is defined that takes an integer for the class label of the image. This has the effect of making the input image conditional on the provided class label.

The class label is then passed through an Embedding layer with the size of 8. This means that each of the 4 classes for the CelebA dataset (0 through 3) will map to a different 8-element vector representation that will be learned by the discriminator model.

The output of the embedding is then passed to a fully connected layer with a linear activation. Importantly, the fully connected layer has enough activations that can be reshaped into one channel of a 64x64 image. The activations are reshaped into single 64x64 activation map and concatenated with the input image. This has the effect of looking like a two-channel input image to the next convolutional layer.

The define_discriminator() below implements this update to the discriminator model. The parameterized shape of the input image is also used after the embedding layer to define the number of activations for the fully connected layer to reshape its output. The number of classes in the problem is also parameterized in the function and set.

We will use a functional modelling of keras while using the embedding layers for labels. It is implemented as a modest convolutional neural network using best practices for GAN design such as using the LeakyReLU activation function with a slope of 0.2, using a 2 × 2 stride to downsample, and the Adamax version of stochastic gradient descent with a learning rate of 0.0007. While Adam optimizers are generally used, Adamax is recommended when there are embeddings. The discriminator model takes as input one 64 × 64 color image and a class label as embedded vector and outputs a binary prediction as to whether the image is real (class = 1) or fake (class = 0).

In [0]:
# define the standalone discriminator model
def define_discriminator(in_shape=(64,64,3), n_classes=4):
    print("**********  ENTERED discriminator  *****************")
    ##### foundation for labels
    in_label = Input(shape=(1,))
    embedding_layer = Embedding(n_classes, 8)
    # embedding_layer.trainable = False
    li = embedding_layer (in_label)
    n_nodes = in_shape[0] * in_shape[1]
    print(">>embedding>> in_shape[0], in_shape[1], n_nodes: ", in_shape[0], in_shape[1], n_nodes)
    li = Dense(n_nodes)(li)
    li = Reshape((in_shape[0], in_shape[1], 1))(li)
    # image input
    dropout = 0.1
    in_image = Input(shape=in_shape)
    print("\nin_image: ", in_image)
    # concat label as a channel
    merge = Concatenate()([in_image, li])
    print("\nmerge.shape: ", merge.shape)
    # sample to 64x64
    fe = Conv2D(128, (5,5), padding='same')(merge)
    fe = LeakyReLU(alpha=0.2)(fe)
    fe = Dropout(dropout)(fe)
    print("fe.shape: ", fe.shape)
    # downsample to 32x32
    fe = Conv2D(128, (5,5), strides=(2,2), padding='same')(fe)
    fe = LeakyReLU(alpha=0.2)(fe)
    # fe = Dropout(dropout)(fe)
    print("fe.shape: ", fe.shape)
    # downsample to 16x16
    fe = Conv2D(128, (5,5), strides=(2,2), padding='same')(fe)
    fe = LeakyReLU(alpha=0.2)(fe)
    # fe = Dropout(dropout)(fe)
    print("fe.shape: ", fe.shape)
    # downsample to 8x8
    fe = Conv2D(128, (5,5), strides=(2,2), padding='same')(fe)
    fe = LeakyReLU(alpha=0.2)(fe)
    # fe = Dropout(dropout)(fe)
    print("fe.shape: ", fe.shape)
    # downsample to 4x4
    fe = Conv2D(128, (5,5), strides=(2,2), padding='same')(fe)
    fe = LeakyReLU(alpha=0.2)(fe)
    # fe = Dropout(dropout)(fe)
    print("fe.shape: ", fe.shape)
    # flatten feature maps
    fe = Flatten()(fe)
    # fe = Dropout(dropout)(fe)
    print("fe flatten shape: ", fe.shape)
    # output
    out_layer = Dense(1, activation='sigmoid')(fe)
    print("out_layer.shape: ", out_layer.shape)
    # define model
    model = Model([in_image, in_label], out_layer)
    print("\nmodel: ", model)
    # compile model
    opt = Adamax(lr=0.00007, beta_1=0.08, beta_2=0.999, epsilon=10e-8)
    model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
    print("\nembedding_layer.get_weights(): \n",embedding_layer.get_weights())
    model.summary()
    plot_model(model, to_file='cgan/discriminator_model1.png')
    return model


Next, the generator model must be updated to take the class label. This has the effect of making the point in the latent space conditional on the provided class label.

As in the discriminator, the class label is passed through an embedding layer to map it to a unique 8-element vector and is then passed through a fully connected layer with a linear activation before being resized. In this case, the activations of the fully connected layer are resized into a single 5x5 feature map. This is to match the 5x5 feature map activations of the unconditional generator model. The new 5x5 feature map is added as one more channel to the existing 128, resulting in 129 feature maps that are then upsampled as in the prior model.

The define_generator() function below implements this, again parameterizing the number of classes as we did with the discriminator model.

The generator model takes as input a point in the latent space and embedded labels, and outputs a single 64 × 64 color image. This is achieved by using a fully connected layer to interpret the point in the latent space and provide sufficient activations that can be reshaped into many different (in this case 128) of a low-resolution version of the output image (e.g. 5 × 5). This is then upsampled four times, doubling the size and quadrupling the area of the activations each time using transpose convolutional layers. The model uses best practices such as the LeakyReLU activation, a kernel size that is a factor of the stride size, and a hyperbolic tangent (Tanh) activation function in the output layer. The define generator() function below defines the generator model but intentionally does not compile it as it is not trained directly, then returns the model. The size of the latent space is parameterized as a function argument.

In [0]:
# define the standalone generator model
def define_generator(latent_dim, n_classes=4):
    print("**********  ENTERED generator  *****************")
    ##### foundation for labels
    in_label = Input(shape=(1,))
    embedding_layer = Embedding(n_classes, 8)
    embedding_layer.trainable = True
    li = embedding_layer (in_label)
    n_nodes = 4 * 4
    li = Dense(n_nodes)(li)
    li = Reshape((4 , 4, 1))(li)
    print("generator...  n_nodes, li.shape: ", n_nodes, li.shape)
    ##### foundation for 4x4 image
    in_lat = Input(shape=(latent_dim,))
    n_nodes = 128 * 4 * 4
    genX = Dense(n_nodes)(in_lat)
    genX = LeakyReLU(alpha=0.2)(genX)
    genX = Reshape((4, 4, 128))(genX)
    dropout = 0.1
    print("genX.shape: ", genX.shape)
    ##### merge image gen and label input
    merge = Concatenate()([genX, li])
    print("merge.shape: ", merge.shape)
    ##### create merged model
    # upsample to 8x8
    gen = Conv2DTranspose(128, (4,4), strides=(2,2), padding='same')(merge)
    print("gen after CV2DT.shape: ", gen.shape)
    gen = LeakyReLU(alpha=0.2)(gen)
    gen = Dropout(dropout)(gen)
    print("gen.shape: ", gen.shape)
    # upsample to 16x16
    gen = Conv2DTranspose(128, (4,4), strides=(2,2), padding='same')(gen)
    gen = LeakyReLU(alpha=0.2)(gen)
    print("gen.shape: ", gen.shape)
    # upsample to 32x32
    gen = Conv2DTranspose(128, (4,4), strides=(2,2), padding='same')(gen)
    gen = LeakyReLU(alpha=0.2)(gen)
    print("gen.shape: ", gen.shape)
    # upsample to 64x64
    gen = Conv2DTranspose(128, (4,4), strides=(2,2), padding='same')(gen)
    gen = LeakyReLU(alpha=0.2)(gen)
    print("gen.shape: ", gen.shape)
    # output layer 64x64x3
    out_layer = Conv2D(3, (5,5), activation='tanh', padding='same')(gen)
    print("out_layer.shape: ", out_layer.shape)
    # define model
    model = Model(inputs=[in_lat, in_label], outputs=out_layer)
    opt = Adamax(lr=0.0002, beta_1=0.5, beta_2=0.999, epsilon=10e-8)
    model.compile(loss=['binary_crossentropy'], optimizer=opt)
    print("\nembedding_layer.get_weights(): \n",embedding_layer.get_weights())
    model.summary()
    # plot_model(model, to_file='generator_model1.png')
    return model

Finally, the composite GAN model requires updating. A GAN model can be defined that combines both the generator model and the discriminator model into one larger model. This larger model will be used to train the model weights in the generator, using the output and error calculated by the discriminator model. The discriminator model is trained separately, and as such, the model weights are marked as not trainable in this larger GAN model to ensure that only the weights of the generator model are updated. This change to the trainability of the discriminator weights only has an effect when training the combined GAN model, not when training the discriminator standalone.

The new GAN model will take a point in latent space as input and a class label and generate a prediction of whether input was real or fake, as before.

Using the functional API to design the model, it is important that we explicitly connect the image generated output from the generator as well as the class label input, both as input to the discriminator model. This allows the same class label input to flow down into the generator and down into the discriminator.



This larger GAN model takes as input a point in the latent space, uses the generator model to generate an image, which is fed as input to the discriminator model, then output or classified as real or fake. The define gan() function below implements this, taking the already-defined generator and discriminator models as input.

In [0]:
# define the combined generator and discriminator model, for updating the generator
def define_gan(g_model, d_model):
    print("**********  ENTERED gan  *****************")
    # make weights in the discriminator not trainable
    d_model.trainable = False
    # get noise and label inputs from generator model
    gen_noise, gen_label = g_model.input
    # get image output from the generator model
    gen_output = g_model.output
    # connect image output and label input from generator as inputs to discriminator
    gan_output = d_model([gen_output, gen_label])
    # define gan model as taking noise and label and outputting a classification
    model = Model([gen_noise, gen_label], gan_output)
    # compile model
    opt = Adamax(lr=0.0002, beta_1=0.5, beta_2=0.999, epsilon=10e-8)
    model.compile(loss='binary_crossentropy', optimizer=opt)
    model.summary()
    plot_model(model, to_file='cgan/gan_model1.png')
    return model


There are circumstances where we want to insure that a generated image has particular characteristics, such as a face being  attractive, selecting a particular gender, and having facial features such as high cheek bones and large lips. Looking into the near future, it will be possible to create realistic GAN generated images of models wearing fashionable clothing, with specific expressions, and poses for catalogues. In this project, we could enter in the features: attractive, female, high cheek bones, and large lips in order to get many faces for fashion models.

There were three parts to this process:

selecting a subset of faces (only those identified as being "attractive"): Details of the process are discussed in data preprocessing.
identifying the characteristics or attributes to be used and their probabilities in the population of images:
..... 0 = featured as attractive and female and not high cheek bone and not large lips
..... 1 = featured as attractive and male
..... 2 = featured as attractive and female and high cheek bone
..... 3 = featured as attractive and female and not high cheek bone and large lips
setting up the cGAN so that it will generate and save faces based on the features (embeddings/labels) associated with an image.

There are four kinds of embedding and the identity of the embedding (0 thru 3) is included in the generated face. In many ways, those faces identified as being 0 are "female without high cheeck bones and without large lips". Those faces identified as 1 (male), are clearly male. Those faces identifed as 2 are female with high cheek bones. Feature 3 identifies those faces which supposedly have large lips. The labels (0 thru 3) are added when creating the image.

In [0]:
# assign categories
def assign_categs(df, lenrows):
    print("\n*****  ATTRIBUTES: \n", df.mean())

    face_male = df['Male']
    print("face_male: ", face_male.mean())
    face_male = np.where(face_male > 0, 1, face_male)
    print("face_male: ", face_male.mean())

    face_high_cheekbones = df['High_Cheekbones']
    print("face_high_cheekbones: ", face_high_cheekbones.mean())
    face_high_cheekbones = np.where(face_high_cheekbones > 0, 1, face_high_cheekbones)
    print("face_high_cheekbones: ", face_high_cheekbones.mean())

    face_big_lips = df['Big_Lips']
    print("face_big_lips: ", face_big_lips.mean())
    face_big_lips = np.where(face_big_lips > 0, 1, face_big_lips)
    print("face_big_lips: ", face_big_lips.mean())

    # replace vectors with category value
    categs = np.zeros(lenrows, dtype=int)
    print("categ.mean()): ", categs.mean())
    categs = np.where(face_male > 0, 1, categs)
    print("add face_male: categs.mean()): ", categs.mean())
    categs = np.where((face_high_cheekbones > 0)&(categs==0), 2, categs)
    print("add high_cheekbones: categs.mean()): ", categs.mean())
    categs = np.where((face_big_lips > 0)&(categs==0), 3, categs)
    print("add big lips: categs.mean()): ", categs.mean())
    print("\ncategs: \n", categs)
    return categs


In [0]:
def get_cumProbs(freqCategs, categs):
    freqLists = [freqCategs[i][1] for i in range(len(freqCategs))]
    freqListX = asarray(freqLists, dtype=np.float32)
    print("freqListX: ", freqListX)
    print("len(categs): ", len(categs))
    cumProbs = freqListX/len(categs)
    print("cumProbs: ", cumProbs)
    cumProbs = append((0.0),cumProbs)
    for i in range(len(cumProbs)-1):
        cumProbs[i+1]=cumProbs[i]+cumProbs[i+1]
    print("cumProbs: ", cumProbs)
    return cumProbs

Now that we have defined the GAN model, we need to train it. But, before we can train the model, we require input data. The first step is to load and scale the pre-processed faces dataset. The saved NumPy array can be loaded, as we did in the previous section, then the pixel values must be scaled to the range [-1,1] to match the output of the generator model. The load real samples() function below implements this, returning the loaded and scaled image data ready for modeling.

In [0]:
def load_real_samples():
    # load the face dataset
    data = load('img_align_celeba_attractive_face.npz')
    X = data['arr_0']
    # convert from unsigned ints to floats
    X = X.astype('float32')
    # scale from [0,255] to [-1,1]
    X = (X - 127.5) / 127.5
    data = pd.read_csv('list_attr_celeba.csv', nrows=100000)
    data = data.drop("image_id",axis=1)
    ids  = load('ids_align_celeba_attractive_face.npz')
    idsX = ids['arr_0']
    dataX = list()
    for i,id in enumerate(idsX):
        dataVal = data[id:id+1].values
        dataVal = np.where(dataVal==-1, 0, dataVal)
        dataX.append(dataVal)
    cols = data.columns
    lencols = len(cols)
    print("cols: ", cols)
    lenrows = len(dataX)
    dataVals = asarray(dataX[0:]).reshape((lenrows,lencols),)
    df = pd.DataFrame(data=dataVals,columns=cols)
    pd.options.display.float_format = '{:,.3f}'.format
    categs = assign_categs(df, lenrows)
    freqCategs = list(collections.Counter(sorted(categs)).items())
    print("freqCategs: ", freqCategs)
    cumProbs = get_cumProbs(freqCategs, categs)
    return [X, categs], cumProbs

We will require one batch (or a half batch) of real images from the dataset each update to the GAN model. A simple way to achieve this is to select a random sample of images from the dataset each time. The generate real samples() function below implements this, taking the prepared dataset as an argument, selecting and returning a random sample of face images and their corresponding class label for the discriminator, specifically class = 1, indicating that they are real images.

In [0]:
# select real samples
def generate_real_samples(dataset, n_samples):
    # split into images and labels
    images, labels = dataset
    # choose random instances
    ix = randint(0, images.shape[0], n_samples)
    # retrieve selected images
    X, labels = images[ix], labels[ix]
    # generate 'real' class labels (1)
    y = ones((n_samples, 1))
    return [X, labels], y
 

Next, we need inputs for the generator model. These are random points from the latent space, specifically Gaussian distributed random variables. The generate latent points() function implements this, taking the size of the latent space as an argument and the number of points required and returning them as a batch of input samples for the generator model.

In [0]:
# generate points in latent space as input for the generator
def generate_latent_points(latent_dim, n_samples, cumProbs, n_classes=4):
    # print("generate_latent_points: ", latent_dim, n_samples)
    initX = -3.0
    rangeX = 2.0*abs(initX)
    stepX = rangeX / (latent_dim * n_samples)
    x_input = asarray([initX + stepX*(float(i)) for i in range(0,latent_dim * n_samples)])
    shuffle(x_input)
    # generate points in the latent space
    z_input = x_input.reshape(n_samples, latent_dim)
    randx = random(n_samples)
    labels = np.zeros(n_samples, dtype=int)
    for i in range(n_classes):
        labels = np.where((randx >= cumProbs[i]) & (randx < cumProbs[i+1]), i, labels)
    return [z_input, labels]

Next, we need to use the points in the latent space as input to the generator in order to generate new images. The generate fake samples() function below implements this, taking the generator model and size of the latent space as arguments, then generating points in the latent space and using them as input to the generator model. The function returns the generated images and their corresponding class label for the discriminator model, specifically class = 0 to indicate they are fake or generated.

In [0]:
# use the generator to generate n fake examples, with class labels
def generate_fake_samples(generator, latent_dim, n_samples, cumProbs):
    # generate points in latent space
    z_input, labels_input = generate_latent_points(latent_dim, n_samples, cumProbs)
    # predict outputs
    images = generator.predict([z_input, labels_input])
    # create class labels
    y = zeros((n_samples, 1))
    return [images, labels_input], y
 

The save plot() is called to create and save a plot of the generated images, and then the model is saved to a file. It's helpful if the image has a label stamped on it so you can see, at a glance, whether or not the embedding matches what you believe ought to be features of the generated image.

In [0]:
# create and save a plot of generated images
def save_plot(examples, labels, epoch, n=10):
    # scale from [-1,1] to [0,1]
    examples = (examples + 1) / 2.0
    # plot images
    for i in range(n * n):
        # define subplot
        fig = plt.subplot(n, n, 1 + i)
        strLabel = str(labels[i])
        # turn off axis
        fig.axis('off')
        fig.text(8.0,20.0,strLabel, fontsize=6, color='white')
        # plot raw pixel data
        fig.imshow(examples[i])
    # save plot to file
    filename = 'results/generated_plots/generated_plots_e%03d.png' % (epoch+1)
    plt.savefig(filename)
    plt.close()
    

In [0]:
def save_real_plots(dataset, nRealPlots = 5, n=10, n_samples=100):
    # plot images
    for epoch in range(nRealPlots):
        if epoch%5==0:
            print("real_plots: ", epoch)
        # prepare real samples
        [X_real, labels], y_real = generate_real_samples(dataset, n_samples)
        # scale from [-1,1] to [0,1]
        X_real = (X_real + 1) / 2.0
        for i in range(n * n):
            # define subplot
            fig = plt.subplot(n, n, 1 + i)
            strLabel = str(labels[i])
            # fig.title = strLabel
            # turn off axis
            fig.axis('off')
            fig.text(8.0,20.0,strLabel, fontsize=6, color='white')
            # plot raw pixel data
            fig.imshow(X_real[i])
        # save plot to file
        filename = 'results/real_plots/real_plot_e%03d.png' % (epoch+1)
        plt.savefig(filename)
        plt.close()
 

After every 5 training epochs, the summarize performance() function is called. There is currently no reliable way to automatically evaluate the quality of generated images. Therefore, we must generate images periodically during training and save the model at these times. This both provides a checkpoint that we can later load and use to generate images, and a way to safeguard against the training process failing, which can happen. Below defines the summarize performance() and save plot() functions. The summarize performance() function generates samples and evaluates the performance of the discriminator on real and fake samples. The classification accuracy is reported and might provide insight into model performance. 

In [0]:
# evaluate the discriminator, plot generated images, save generator model
def summarize_performance(epoch, g_model, d_model, gan_model, dataset, latent_dim, n_samples=100):
    # prepare real samples
    [X_real, labels_real], y_real = generate_real_samples(dataset, n_samples)
    # evaluate discriminator on real examples
    _, acc_real = d_model.evaluate([X_real, labels_real], y_real, verbose=0)
    # prepare fake examples
    [X_fake, labels], y_fake = generate_fake_samples(g_model, latent_dim, n_samples, cumProbs)
    # evaluate discriminator on fake examples
    _, acc_fake = d_model.evaluate([X_fake, labels], y_fake, verbose=0)
    # summarize discriminator performance
    print('>Accuracy real: %.0f%%, fake: %.0f%%' % (acc_real*100, acc_fake*100))
    # save plot
    save_plot(X_fake, labels, epoch)
    # save the generator model tile file
    filename = 'results/models/generator_model_%03d.h5' % (epoch+1)
    g_model.save(filename)
    filename = 'results/models/generator_model_gan%03d.h5' % (epoch+1)
    gan_model.save(filename)
    filename = 'results/models/generator_model_dis%03d.h5' % (epoch+1)
    d_model.trainable = True
    for layer in d_model.layers:
        layer.trainable = True
    d_model.save(filename)
    d_model.trainable = False
    for layer in d_model.layers:
        layer.trainable = False

In [0]:
def restart(epochs_done):
    # gen_weights = array(model.get_weights())
    print("****  PULLING IN EPOCH: ", epochs_done)
    filename = 'results/models/generator_model_dis%03d.h5' % (epochs_done)
    d_model = load_model(filename, compile=True)
    d_model.trainable = True
    for layer in d_model.layers:
        layer.trainable = True
    d_model.summary()
    filename = 'results/models/generator_model_%03d.h5' % (epochs_done)
    g_model = load_model(filename, compile=True)
    g_model.summary()
    gan_model = define_gan(g_model, d_model)
    gan_model.summary()
    return d_model, g_model, gan_model

We are now ready to fit the GAN models. The model is fit for 100 training epochs, which is arbitrary, as the model begins generating plausible faces after perhaps the first few epochs. A batch size of 128 samples is used, and each training epoch involves 50000/64 or about 781 batches of real and fake samples and updates to the model. First, the discriminator model is updated for a half batch of real samples, then a half batch of fake samples, together forming one batch of weight updates. The generator is then updated via the combined GAN model. Importantly, the class label is set to 1 or real for the fake samples. This has the effect of updating the generator toward getting better at generating real samples on the next batch. The train() function below implements this, taking the defined models, dataset, and size of the latent dimension as arguments and parameterizing the number of epochs and batch size with default arguments.

The following programming fragment also illustrates an approach which often prevents a stream from mode collapse. It depends on having captured disciminator weights, generator weights, and gan weights either during initialization or later in the process when all model losses are within bounds. The definition of model loss bounds are arbitrary but reflect expert opinion about when losses are what might be expected and when they are clearly much too high or much too low. Reasonable discriminator and generator losses are between 0.1 and 1.0, and their arbitrary bounds are set to between 0.001 and 2.0. Reasonable gan losses are between 0.2 and 2.0 and their arbitrary bounds are set to 0.01 and 4.5.

What happens then is discriminator, generator, and gan weights are collected when all three losses are "reasonable". When an individual model's loss goes out of bounds, then the last collected weights for that particular model are replaced, leaving the other model weights are they are, and the process moves forward. The process stops when mode collapse appears to be unavoidable even when model weights are replaced. This is identified when a particular set of model weights continue to be reused but repeatedly result in out of bound model losses.

In [0]:
# train the generator and discriminator
def train(g_model, d_model, gan_model, dataset, latent_dim, epochs_goal=200, n_batch=128, epochs_done=1):
    nTryAgains = 0
    nTripsOnSameSavedWts = 0
    nSaves = 0
    bat_per_epo = int(dataset[0].shape[0] / n_batch)
    half_batch = int(n_batch / 2)
    d_trainable_weights = np.array(d_model.get_weights())
    g_trainable_weights = np.array(g_model.get_weights())
    gan_trainable_weights = np.array(gan_model.get_weights())
    now = time.time()
    ij = 0
    ijSave = -100
    # manually enumerate epochs
    for i in range(epochs_done, epochs_goal):
        # enumerate batches over the training set
        for j in range(bat_per_epo):
            ij+=1
            # get randomly selected 'real' samples
            [X_real, labels_real], y_real = generate_real_samples(dataset, half_batch)
            qDebug=False
            # update discriminator model weights
            dis_loss, _ = d_model.train_on_batch([X_real, labels_real], y_real)
            [X_fake, labels], y_fake = generate_fake_samples(g_model, latent_dim, half_batch, cumProbs)
            gen_loss, _ = d_model.train_on_batch([X_fake, labels], y_fake)
            [z_input, labels_input] = generate_latent_points(latent_dim, n_batch, cumProbs)
            # create inverted labels for the fake samples
            y_gan = ones((n_batch, 1))
            # update the generator via the discriminator's error
            gan_loss = gan_model.train_on_batch([z_input, labels_input], y_gan)
            # summarize loss on this batch
            if (j+1) % 5==0 or dis_loss > 1.10 or gen_loss > 1.10 or gan_loss > 1.80:
                diff = int(time.time()-now)
                print('>%d/%d, %d/%d, d1=%.3f, d2=%.3f, g=%.3f, secs=%d, tryAgain=%d, nTripsOnSameSavedWts=%d, nSaves=%d' %
                    (i+1, epochs_goal, j+1, bat_per_epo, dis_loss, gen_loss, gan_loss, diff, nTryAgains, nTripsOnSameSavedWts, nSaves))
            if dis_loss > 0.30 and dis_loss < 0.95 and gen_loss > 0.25 and gen_loss < 0.95 and gan_loss > 0.40 and gan_loss < 1.50:
                nTripsOnSameSavedWts = 0
                if ij - ijSave > 8:
                    nSaves+=1
                    ijSave = ij
                    d_trainable_weights = np.array(d_model.get_weights())
                    g_trainable_weights = np.array(g_model.get_weights())
                    gan_trainable_weights = np.array(gan_model.get_weights())
            if (dis_loss < 0.001 or dis_loss > 2.0) and ijSave > 0:
                nTryAgains+=1
                nTripsOnSameSavedWts+=1
                print("LOADING d_model",j+1," from ",ijSave)
                d_model.set_weights(d_trainable_weights)
            if (gen_loss < 0.001 or gen_loss > 2.0) and ijSave > 0:
                nTryAgains+=1
                nTripsOnSameSavedWts+=1
                print("LOADING g_model",j+1," from ",ijSave)
                g_model.set_weights(g_trainable_weights)
            if (gan_loss < 0.010 or gan_loss > 4.50) and ijSave > 0:
                nTryAgains+=1
                nTripsOnSameSavedWts+=1
                print("LOADING gan_models",j+1," from ",ijSave)
                gan_model.set_weights(gan_trainable_weights)
            # if (j+1) % 10 == 0:
                # summarize_performance(i, g_model, d_model, dataset, latent_dim)
            if nTripsOnSameSavedWts > 20:
                print("**********  Too many rebuilds  **************")
                summarize_performance(i, g_model, d_model, dataset, latent_dim)
                import sys
                sys.exit(0)
        # evaluate the model performance, sometimes
        if (i+1) % 5 == 0:
            summarize_performance(i, g_model, d_model, gan_model, dataset, latent_dim)


We can then define the size of the latent space, define all three models, and train them on the loaded face dataset.

In [0]:
# size of the latent space
latent_dim = 100

if qRestart:
        d_model, g_model, gan_model = restart(epochs_done = epochs_done)
else:
        # create the discriminator
        d_model = define_discriminator()
        #d_model = load_model("generator_model_dis001.h5")
        # create the generator
        #g_model = load_model("generator_model_001.h5")
        g_model = define_generator(latent_dim)
        # create the gan
        gan_model = define_gan(g_model, d_model)
        #gan_model = load_model("generator_model_gan001.h5")

# load image data
dataset, cumProbs = load_real_samples()
save_real_plots(dataset, nRealPlots=5)
train(g_model, d_model, gan_model,  dataset, latent_dim, epochs_goal=epochs_goal, n_batch=128, epochs_done=epochs_done)
