EmojiGAN is a Deep Convolutional Generative Adversarial Network that generates emojis. A perfect companion for AI twitterbots. This model was trained on a GTX 1080ti using Keras with Tensorflow backend.

In [1]:
import keras
from keras import models
from keras import layers
from keras.preprocessing import image as k_image
import numpy as np
import os
import matplotlib.pyplot as plt
import glob
import cv2

latent_dim  = 100
height = 64
width = 64
channels = 3
depth = 64
image_dir = './data/emoji_test_set/*.png'
save_dir = './data/emoji_output'


Using TensorFlow backend.


1. Download the "emoji_imgs_V5" data set at https://github.com/SHITianhao/emoji-dataset.
2. Transfer the emoji .png files into the directory **'/data/emoji_test_set/'**.
3. (Optional) Remove any unwanted groups of emojis. E.g., the data used in the model located in **'/models/'** was trained with approximately half the data set omitted. The omitted data were inanimate objects such as flags, buildings, etc. A text file list of the emojis used in the training set can be found in the **'/models/'** directory.

Before we set up our model, we must first import and clean up the image data. The emoji .png files we just downloaded are 4-channels of varying height and width. Computer vision usually does not play well with alpha channels, so the function below will convert our 4-channel RGBA images (technically BGRA due to how cv2's imread function operates) to 3-channel RGB images (after converting BGR to RGB).

In [2]:
def remove_transparency(source, background_color):
    """
    Converts image from 4-channel image with transparency to a
    3-channel image of specified background color.
    """
    
    source_img = source[:, :, :3]
    source_mask = source[:, :, 3]  * (1 / 255.0)
    source_mask = np.repeat(source_mask[:, :, np.newaxis], 3, axis=2)

    background_mask = 1.0 - source_mask

    bg_part = (background_color * (1 / 255.0)) * (background_mask)
    source_part = (source_img * (1 / 255.0)) * (source_mask)

    return np.uint8(cv2.addWeighted(bg_part, 255.0, source_part, 255.0, 0.0))

The function below grabs the images in our image directory, adjusts height and width to target dimensions, and replaces transparency with white background.

In [3]:
def get_images(image_dir, height=64, width=64):
    """
    Compile all images into a single array of 
    dimensions (n x width x height x 3).
    """
    
    image_list = glob.glob(image_dir)
    images = []
    for image_path in image_list:
        img = cv2.imread(image_path, -1)

        if img.shape[2] == 4:
            img = remove_transparency(img, 255)

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (height, width))

        images.append(img)
    
    images = np.asarray(images) / 255.
    return images

The discriminator portion of our GAN model attempts to differentiate between real and generated images. It takes in a candidate image and outputs 0 (real) or 1 (generated). As the discriminator improves, it is better able to detect generated images.

In [4]:
def build_discriminator(
    height=64, width=64, channels=3, depth=64, lr=0.0001, dropout=0.6, 
    decay=1e-8):
    """
    Builds discriminator model. The discriminator takes a candidate image
    as input and classifies it into two classes: generated or real. Adjust
    learning rate and dropout rate to tune performance.
    """
    
    model = models.Sequential()

    model.add(layers.Conv2D(depth * 1, kernel_size=4,
                         input_shape=(height, width, channels), padding="same"))
    model.add(layers.LeakyReLU())
    
    model.add(layers.Conv2D(depth * 1, kernel_size=4, strides=2, padding="same"))
    model.add(layers.LeakyReLU())

    model.add(layers.Conv2D(depth * 1, kernel_size=4, strides=2, padding="same"))
    model.add(layers.LeakyReLU())

    model.add(layers.Conv2D(depth * 2, kernel_size=4, strides=2, padding="same"))
    model.add(layers.LeakyReLU())

    model.add(layers.Conv2D(depth * 4, kernel_size=4, strides=2, padding="same"))
    model.add(layers.LeakyReLU())
    
    model.add(layers.Conv2D(depth * 8, kernel_size=4, strides=2, padding="same"))
    model.add(layers.LeakyReLU())
    
    model.add(layers.Dropout(dropout))

    model.add(layers.Flatten())
    model.add(layers.Dense(1, activation='sigmoid'))

    model_optimizer = keras.optimizers.RMSprop(
        lr=lr,
        decay=decay,
        clipvalue=1.0)

    model.compile(optimizer=model_optimizer, loss='binary_crossentropy')
    return model

The generator creates new generated images from inputs of random vectors in latent space. As the generator improves, it becomes better at creating images from these random vectors that could be classified as real images.

In [5]:
def build_generator(
    height=64, width=64, channels=3, depth=64, dropout=0.3):
    """
    Builds generator model. Converts a latent space vector into a candidate
    image. Kernel density of conv2DTranspose layers is set to a multiple of
    stride length to avoid checkerboard artifacts in generated images. Adjust
    learning rate (in GAN model) and dropout rate to tune performance. Note
    that the generator model is not compiled in this function.
    """
    
    model = models.Sequential()

    model.add(layers.Dense(4 * 4 * depth * 8, input_shape=(latent_dim,)))
    model.add(layers.Reshape((4, 4, depth * 8)))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(dropout))

    model.add(layers.Conv2DTranspose(depth * 8, kernel_size=4,
                                         strides=2, padding='same'))
    model.add(layers.LeakyReLU())

    model.add(layers.Conv2DTranspose(depth * 4, kernel_size=4,
                                         strides=2, padding='same'))
    model.add(layers.LeakyReLU())

    model.add(layers.Conv2DTranspose(depth * 2, kernel_size=4,
                                         strides=2, padding='same'))
    model.add(layers.LeakyReLU())

    model.add(layers.Conv2DTranspose(depth, kernel_size=4,
                                         strides=2, padding='same'))
    model.add(layers.LeakyReLU())

    model.add(layers.Conv2D(channels, kernel_size=7, padding="same"))
    model.add(layers.Activation("tanh"))
    
    return model

The GAN model chains the generator to the discriminator. It classifies latent space points as 'fake' or 'real and updates the weight of the generator. Only the generator's weights are updated during GAN training. The discriminator's weights are updated separately.

In [6]:
# The adversarial network
def build_gan(latent_dim, generator, discriminator, lr=0.0002, decay=1e-8):
    """
    Build the generative adversarial network (GAN) model. The model chains
    the generator and the discriminator. It classifies latent space points
    into 'fake' or 'real', and it updates the weight of the generator to 
    produce images that are more likely to be classified as 'real'. The
    weights of the discriminator are frozen during training of the GAN model.
    Adjust the learning rate of the generator model here.
    """
    
    discriminator.trainable = False
    gan_input = keras.Input(shape=(latent_dim,))
    gan_output = discriminator(generator(gan_input))
    model = keras.models.Model(gan_input, gan_output)

    model_optimizer = keras.optimizers.RMSprop(
        lr=lr,
        decay=decay,
        clipvalue=1.0)
    
    model.compile(optimizer=model_optimizer, loss='binary_crossentropy')
    return model

In [7]:
def save_data(step, generated_images, real_images, save_dir):
    """Save sample images during training"""

    img = k_image.array_to_img(generated_images[0] * 255., scale=False)
    img.save(os.path.join(save_dir, 
                         'generated_image' + str(step) + '.png'))

    img = k_image.array_to_img(real_images[0] * 255., scale=False)
    img.save(os.path.join(save_dir,
                         'real_image' + str(step) + '.png'))

The generator and discriminator are trained in the function below. The function operates as follows:
**Discriminator training**
1. A batch of random vectors in latent space are generated.
2. They are fed through the generator to predict counterfeit images.
3. Real images and fake images are combined with corresponding labels.
4. The discriminator is trained with these images and targets.

**Generator training**
5. A new batch of random vectors in latent space are generated.
6. These vectors are fed into the GAN with the label "these are real"
7. Generator weights are adjusted so the discriminator is more likely to mark generated pictures as real.

In [8]:
def train_gan(
    discriminator, generator, gan, x_train, iterations, save_dir=None,
    batch_size=64, save_intervals=100, save_weights=False):

    for step in range(iterations):

        # Sample random points in the latent space
        random_latent_vectors = np.random.normal(size=(batch_size,
                                                       latent_dim))
        generated_images = generator.predict(random_latent_vectors)

        # Retrieve random batch of real images and combine with fake images.
        real_image_index = np.random.randint(0, x_train.shape[0], batch_size)
        real_images = x_train[real_image_index]
        combined_images = np.concatenate([generated_images, real_images])

        # Assembles labels, discriminating real from fake images
        labels = np.concatenate([np.ones((batch_size, 1)),
                                 np.zeros((batch_size, 1))])

        # Add random noise to the labels - important!
        labels += 0.05 * np.random.random(labels.shape)

        # Train the discriminator
        d_loss = discriminator.train_on_batch(combined_images, labels)

        # Samples random points in space
        random_latent_vectors = np.random.normal(size=(batch_size,
                                                       latent_dim))

        # Assembles labels that say "these are real images"
        misleading_targets = np.zeros((batch_size, 1))    
        a_loss = gan.train_on_batch(random_latent_vectors,
                                   misleading_targets)

        # occasionally print step and save data   
        if step % save_intervals == 0:
            print('step:',step)
            print('discriminator loss:', d_loss)
            print('adversarial loss:', a_loss)
            
            if save_dir is not None: 
                save_data(step, generated_images, real_images, save_dir)

DCGANs are incredibly sensitive to hyperparameter settings, so adjust with caution. The discriminator tends to take over in this system. This can be adjusted by lowering lr_d, increasing lr_g, or increasing discriminator dropout rate.

In [9]:
# Set hyperparameters here:
iterations = 50000
batch_size = 128
lr_d = 0.00002 # discriminator learning rate
lr_g = 0.0008 # generator learning rate

In [10]:
# Load data and initialize models here:
x_train = get_images(image_dir)
discriminator = build_discriminator(lr=lr_d)
generator = build_generator()
gan = build_gan(latent_dim, generator, discriminator, lr=lr_g)

In [None]:
# Train model:
train_gan(discriminator, generator, gan, x_train, iterations, save_dir=save_dir,
    batch_size=batch_size, save_intervals=100, save_weights=True)

In [None]:
# Save models:
discriminator.save(os.path.join(save_dir,'discriminator.h5'))
generator.save(os.path.join(save_dir,'generator.h5'))
gan.save(os.path.join(save_dir,'gan.h5'))

In [None]:
# Create a batch of generated images at the end.
random_latent_vectors = np.random.normal(size=(100, latent_dim))
A = generator.predict(random_latent_vectors)
A = np.clip(A, 0, 1)
fig, axes = plt.subplots(10, 10, figsize=(72, 72),
                         subplot_kw={'xticks':[], 'yticks':[]},
                         gridspec_kw=dict(hspace=0.1, wspace=0.1))

for i, ax in enumerate(axes.flat):
    ax.imshow(A[i])