# Computer Vision

This code is supporting material for the book `Building Machine Learning Systems with Python` by [Willi Richert](https://www.linkedin.com/in/willirichert/), [Luis Pedro Coelho](https://www.linkedin.com/in/luispedrocoelho/) and [Matthieu Brucher](https://www.linkedin.com/in/matthieubrucher/) published by PACKT Publishing. It is made available under the MIT License.

## Generative Adversarial Networks

Let's create a class for our GAN based on convolution networks.

In [None]:
import tensorflow as tf

def match(logits, labels):
    logits = tf.clip_by_value(logits, 1e-7, 1. - 1e-7)
    return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))

def batchnormalize(X, eps=1e-8, g=None, b=None):
    if X.get_shape().ndims == 4:
        mean = tf.reduce_mean(X, [0,1,2])
        std = tf.reduce_mean( tf.square(X-mean), [0,1,2] )
        X = (X-mean) / tf.sqrt(std+eps)

        if g is not None and b is not None:
            g = tf.reshape(g, [1,1,1,-1])
            b = tf.reshape(b, [1,1,1,-1])
            X = X*g + b

    elif X.get_shape().ndims == 2:
        mean = tf.reduce_mean(X, 0)
        std = tf.reduce_mean(tf.square(X-mean), 0)
        X = (X-mean) / tf.sqrt(std+eps)

        if g is not None and b is not None:
            g = tf.reshape(g, [1,-1])
            b = tf.reshape(b, [1,-1])
            X = X*g + b

    else:
        raise NotImplementedError

    return X

class DCGAN():
    def __init__(
            self,
            image_shape=[28,28,1],
            dim_z=100,
            dim_y=10,
            dim_W1=1024,
            dim_W2=128,
            dim_W3=64,
            dim_channel=1,
            ):

        self.image_shape = image_shape
        self.dim_z = dim_z
        self.dim_y = dim_y

        self.dim_W1 = dim_W1
        self.dim_W2 = dim_W2
        self.dim_W3 = dim_W3
        self.dim_channel = dim_channel

    def build_model(self):

        Z = tf.placeholder(tf.float32, [None, self.dim_z])
        Y = tf.placeholder(tf.float32, [None, self.dim_y])

        image_real = tf.placeholder(tf.float32, [None]+self.image_shape)
        image_gen = self.generate(Z, Y)

        raw_real = self.discriminate(image_real, Y, False)
        raw_gen = self.discriminate(image_gen, Y, True)

        discrim_cost_real = match(raw_real, tf.ones_like(raw_real))
        discrim_cost_gen = match(raw_gen, tf.zeros_like(raw_gen))
        discrim_cost = discrim_cost_real + discrim_cost_gen

        gen_cost = match( raw_gen, tf.ones_like(raw_gen) )

        return Z, Y, image_real, image_gen, discrim_cost, gen_cost

    def create_conv2d(self, input, filters, kernel_size, name):
        layer = tf.layers.conv2d(
                    inputs=input,
                    filters=filters,
                    kernel_size=kernel_size,
                    strides=[2,2],
                    name="Conv2d_" + name,
                    padding="SAME")
        layer = tf.nn.leaky_relu(layer, name= "LeakyRELU" + name)
        return layer

    def create_conv2d_transpose(self, input, filters, kernel_size, name, with_batch_norm):
        layer = tf.layers.conv2d_transpose(
                    inputs=input,
                    filters=filters,
                    kernel_size=kernel_size,
                    strides=[2,2],
                    name="Conv2d_" + name,
                    padding="SAME")
        if with_batch_norm:
            layer = batchnormalize(layer)
            layer = tf.nn.relu(layer)
        return layer

    def create_dense(self, input, units, name, leaky):
        layer = tf.layers.dense(
                inputs=input,
                units=units,
                name="Dense" + name,
                )
        layer = batchnormalize(layer)
        if leaky:
            layer = tf.nn.leaky_relu(layer, name= "LeakyRELU" + name)
        else:
            layer = tf.nn.relu(layer, name="RELU_" + name)
        return layer

    def discriminate(self, image, Y, reuse=False):
        with tf.variable_scope('discriminate', reuse=reuse):
            
            batch_size = Y.get_shape()[0]
            
            yb = tf.reshape(Y, tf.stack([-1, 1, 1, self.dim_y]))
            X = tf.concat(axis=3, values=[image, yb*tf.ones([1, 28, 28, self.dim_y])])
    
            h1 = self.create_conv2d(X, self.dim_W3, 5, "Layer1")
            h1 = tf.concat(axis=3, values=[h1, yb*tf.ones([1, 14, 14, self.dim_y])])
    
            h2 = self.create_conv2d(h1, self.dim_W2, 5, "Layer2")
            h2 = tf.reshape(h2, tf.stack([-1, 7*7*128]))
            h2 = tf.concat(axis=1, values=[h2, Y])
    
            h3 = self.create_dense(h2, self.dim_W1, "Layer3", True)
            h3 = tf.concat(axis=1, values=[h3, Y])
            
            h4 = self.create_dense(h3, 1, "Layer4", True)
            return h4

    def generate(self, Z, Y, reuse=False):
        with tf.variable_scope('generate', reuse=reuse):

            yb = tf.reshape(Y, tf.stack([-1, 1, 1, self.dim_y]))
            Z = tf.concat(axis=1, values=[Z,Y])
            h1 = self.create_dense(Z, self.dim_W1, "Layer1", False)
            h1 = tf.concat(axis=1, values=[h1, Y])
            h2 = self.create_dense(h1, self.dim_W2*7*7, "Layer2", False)
            h2 = tf.reshape(h2, tf.stack([-1,7,7,self.dim_W2]))
            h2 = tf.concat(axis=3, values=[h2, yb*tf.ones([1, 7, 7, self.dim_y])])

            h3 = self.create_conv2d_transpose(h2, self.dim_W3, 5, "Layer3", True)
            h3 = tf.concat(axis=3, values=[h3, yb*tf.ones([1, 14,14,self.dim_y])] )

            h4 = self.create_conv2d_transpose(h3, self.dim_channel, 7, "Layer4", False)
            x = tf.nn.sigmoid(h4)
            return x

We add 2 helper fucntions, one for transforming our data to one-hot encoding (without using Tensorflow, we could use it instead) and one to plot and save our sampled images.

In [None]:
import imageio
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

def one_hot(X, n):
    X = np.asarray(X).flatten()
    Xoh = np.zeros((len(X), n))
    Xoh[np.arange(len(X)), X] = 1.
    return Xoh

def save_visualization(X, nh_nw, save_path='./sample.jpg'):
    h,w = X.shape[1], X.shape[2]
    img = np.zeros((h * nh_nw[0], w * nh_nw[1], 3))

    for n,x in enumerate(X):
        j = n // nh_nw[1]
        i = n % nh_nw[1]
        img[j*h:j*h+h, i*w:i*w+w, :] = x / 255

    imageio.imwrite(save_path, img)
    plt.imshow(img)
    plt.show()

Our hyperparameters and our data

In [None]:
import os
import numpy as np

n_epochs = 10
learning_rate = 0.0002
batch_size = 128
image_shape = [28,28,1]
dim_z = 10
dim_y = 10
dim_W1 = 1024
dim_W2 = 128
dim_W3 = 64
dim_channel = 1

visualize_dim=196

from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')
mnist.data.shape = (-1, 28, 28)
mnist.data = mnist.data.astype(np.float32).reshape( [-1, 28, 28, 1]) / 255.
mnist.num_examples = len(mnist.data)
mnist.target = one_hot(mnist.target.astype(np.int8), dim_y)

Let's generate some images!

In [None]:
tf.reset_default_graph()
dcgan_model = DCGAN(
        image_shape=image_shape,
        dim_z=dim_z,
        dim_W1=dim_W1,
        dim_W2=dim_W2,
        dim_W3=dim_W3,
        )
Z_tf, Y_tf, image_tf, image_tf_sample, d_cost_tf, g_cost_tf, = dcgan_model.build_model()

discrim_vars = list(filter(lambda x: x.name.startswith('discr'), tf.trainable_variables()))
gen_vars = list(filter(lambda x: x.name.startswith('gen'), tf.trainable_variables()))

train_op_discrim = tf.train.AdamOptimizer(learning_rate, beta1=0.5).minimize(d_cost_tf, var_list=discrim_vars)
train_op_gen = tf.train.AdamOptimizer(learning_rate, beta1=0.5).minimize(g_cost_tf, var_list=gen_vars)

Z_np_sample = np.random.uniform(-1, 1, size=(visualize_dim,dim_z))
Y_np_sample = one_hot( np.random.randint(10, size=[visualize_dim]), dim_y)

step = 1000

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(n_epochs):
        permut = np.random.permutation(mnist.num_examples)
        trX = mnist.data[permut]
        trY = mnist.target[permut]
        Z = np.random.uniform(-1, 1, size=[mnist.num_examples, dim_z]).astype(np.float32)

        print("epoch: %i" % epoch)
        for j in range(0, mnist.num_examples, batch_size):
            if j % step == 0:
                print("  batch: %i" % j)

            batch = permut[j:j+batch_size]

            Xs = trX[batch]
            Ys = trY[batch]
            Zs = Z[batch]

            if (j / batch_size) % 2 == 0:
                sess.run(train_op_discrim,
                    feed_dict={
                        Z_tf:Zs,
                        Y_tf:Ys,
                        image_tf:Xs
                        })
            else:
                sess.run(train_op_gen,
                    feed_dict={
                        Z_tf:Zs,
                        Y_tf:Ys
                        })

            if j % step == 0:
                generated_samples = sess.run(
                        image_tf_sample,
                        feed_dict={
                            Z_tf:Z_np_sample,
                            Y_tf:Y_np_sample
                            })
                generated_samples = generated_samples * 255
                save_visualization(generated_samples, (7,28), save_path='./B09124_11_sample_%03d_%04d.jpg' % (epoch, j / step))