## Pablo Vicente Juan, Ming Zhou and Macrina María Lobo

## pv2288, mz2591 and mml2204

## Unsupervised Representation Learning With Deep Convolutional Neural Networks

In [None]:
%matplotlib inline

import pickle
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

import theano
import theano.tensor as T

from helper.image_manipulation import transform, inverse_transform
from helper.network_builder import build_discriminator, build_generator

relu = T.nnet.relu
tanh = T.tanh

## Visualize generated images

In [None]:
train_batch_x = np.load('../results/recons_test90.npy')
train_batch_x = inverse_transform(train_batch_x)

In [None]:
f, axarr = plt.subplots(4, 4,  figsize=(10,10))
for i in xrange(4):
    for j in xrange(4):
        plt.axes(axarr[i,j])
        plt.imshow(train_batch_x[i+4*j].transpose(1,2,0))

## Visualize intermediate outputs

### Initial parameters

In [None]:
# Image dimension
n_channels = 3              # Number of channels in image
img_size = 64               # Number of pixels width/height of images

#Architecture parameters
n_g_filters = 128           # Number of generator filters in first conv layer
n_d_filters = 128           # Number of discriminator filters in first conv layer
dimZ = 100                  # Number of dim for Z

### Network architecture

In [None]:
# Theano variable for real images
X = T.tensor4('X')
# Theano variable for random noise vector
Z = T.matrix('Z')

# Generator architecture
g_initial_im_size = 4
g_flat_size = (dimZ, n_g_filters*8*4*4)
g_layer_size = [n_g_filters*4, n_g_filters*2, n_d_filters, n_channels]
g_num_filters = [n_g_filters*8, n_g_filters*4, n_d_filters*2, n_d_filters]
g_filter_size = [5, 5, 5, 5]
g_norm = [True, True, True, False]
g_activation = [relu, relu, relu, tanh]
g_subsample = [(2,2),(2,2),(2,2),(2,2)]
g_border_mode = [(2,2),(2,2),(2,2),(2,2)]

# Discriminator architecture
d_layer_size = [n_channels, n_d_filters, n_d_filters*2, n_d_filters*4]
d_num_filters = [n_d_filters, n_d_filters*2, n_d_filters*4, n_d_filters*8]
d_filter_size = [5, 5, 5, 5]
d_norm = [False, True, True, True]
d_flat_size = (n_d_filters*8*4*4, 1)

### Load weights and dataset

In [None]:
cars, n_train_batches = load_dataset('../data/pokemon.npy', batch_size)
datasetX = transform(datasetX)

discrim_params = pickle.load( open( "../discrim_params.p", "rb" ) )
gen_params = pickle.load( open( "../gen_params.p", "rb" ) )
sample_zmb = pickle.load( open( "../sample_zmb.p", "rb" ) )

In [None]:
# Build generator
gX, g_layers = build_generator(Z, g_layer_size, g_num_filters, g_filter_size, g_flat_size, 
                               g_subsample, g_border_mode, g_norm, g_activation, g_initial_im_size, 
                               gen_params)

# Build discriminator for real samples
p_real, d_real_layers = build_discriminator(X, d_layer_size, d_num_filters, d_filter_size, 
                                            d_flat_size, d_norm, discrim_params)

# Build discriminator for generated samples
p_gen, d_gen_layers = build_discriminator(gX, d_layer_size, d_num_filters, d_filter_size, 
                                          d_flat_size, d_norm, discrim_params)

_gen = theano.function([Z], gX)

### Build funtion to explore intermediate outputs

In [None]:
_gen = theano.function([Z], gX)

d_layer0 = theano.function([X], d_real_layers[0].output)
d_layer1 = theano.function([X], d_real_layers[1].output)
d_layer2 = theano.function([X], d_real_layers[2].output)
d_layer3 = theano.function([X], d_real_layers[3].output)
d_layer4 = theano.function([X], d_real_layers[4].output)

g_layer0 = theano.function([Z], g_layers[0].output)
g_layer1 = theano.function([Z], g_layers[1].output)
g_layer2 = theano.function([Z], g_layers[2].output)
g_layer3 = theano.function([Z], g_layers[3].output)
g_layer4 = theano.function([Z], g_layers[4].output)

### Plot results

In [None]:
out = g_layer1(sample_zmb)
print(out.shape)
k=0

In [None]:
f, axarr = plt.subplots(16, 16,  figsize=(10,10))
print(k)
for i in xrange(16):
    for j in xrange(16):
        plt.axes(axarr[i,j])
        plt.imshow(out[i+4*j][k], interpolation='nearest')
        plt.axis('off')    
k += 16        
plt.savefig('g_layer1_output.png', bbox_inches='tight')        

# Maximal output

This section is based on

https://blog.keras.io/how-convolutional-neural-networks-see-the-world.html

In [None]:
filter_index = 0  

# build a loss function that maximizes the activation
# of the nth filter of the layer considered
layer_output = g_layers[-1].output
loss = T.mean(layer_output[:, filter_index, :, :])

# compute the gradient of the input picture wrt this loss
grads = T.grad(loss, g_layers[0].input)[0]

# normalization trick: we normalize the gradient
grads /= (T.sqrt(T.mean(T.square(grads))) + 1e-5)

# this function returns the loss and grads given the input picture
iterate = theano.function([Z], [loss, grads])

In [None]:
# we start from a gray image with some noise
img_width = 64
img_height = 64
#input_img_data = np.random.random((1, 3, img_width, img_height)) * 20 + 128.
#input_img_data = input_img_data.astype(np.float32)

In [None]:
# run gradient ascent for 20 steps
for i in range(20):
    loss_value, grads_value = iterate(first_sample)
    first_sample += grads_value * i

In [None]:
def deprocess_image(x):
    # normalize tensor: center on 0., ensure std is 0.1
    x -= x.mean()
    x /= (x.std() + 1e-5)
    x *= 0.1

    # clip to [0, 1]
    x += 0.5
    x = np.clip(x, 0, 1)

    # convert to RGB array
    x *= 255
    x = x.transpose((1, 2, 0))
    x = np.clip(x, 0, 255).astype('uint8')
    return x


In [None]:
f, axarr = plt.subplots(8, 8,  figsize=(10,10))
print(k)
for i in xrange(8):
    for j in xrange(8):
        plt.axes(axarr[i,j])
        img = input_img_data[i+8*j]
        img = deprocess_image(img)
        plt.imshow(img, interpolation='nearest')
        plt.axis('off')    

#plt.savefig('d_layer3_output.png', bbox_inches='tight')        