In [0]:
% matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import keras
import math
import PIL.Image
import time
import os

from __future__ import print_function
from sklearn.metrics import confusion_matrix
from keras.datasets import mnist
from keras.models import Sequential, Model
from keras import backend as K

from keras.applications import vgg16
from keras.layers import Dense, Dropout, Flatten, Activation, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras.preprocessing.image import save_img
from keras import regularizers
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image

Using TensorFlow backend.


In [0]:
### dataset contains five classes from ImageNet
x_test = np.load("X_test.npy")
x_train = np.load("X_train.npy")
y_test_index = np.load("y_test.npy") - 1
y_train_index = np.load("y_train.npy") - 1

y_train = keras.utils.to_categorical(y_train_index, 5)
y_test = keras.utils.to_categorical(y_test_index, 5)

x_combine = np.concatenate((x_train, x_test), axis = 0)
y_combine = list(np.concatenate((y_train_index, y_test_index), axis = 0))

In [0]:
def normalize(x):
    # utility function to normalize a tensor by its L2 norm
    return x / (K.sqrt(K.mean(K.square(x))) + K.epsilon())
  

def deprocess_image(x):
    # normalize an image: center on 0., ensure std is 0.1
    x -= x.mean()
    x /= (x.std() + K.epsilon())
    x *= 0.1

    # clip to [0, 1]
    x += 0.5
    x = np.clip(x, 0, 1)

    # convert to RGB array
    x *= 255
    if K.image_data_format() == 'channels_first':
        x = x.transpose((1, 2, 0))
    x = np.clip(x, 0, 255).astype('uint8')
    return x

## Visualization on VGG16 model of Imagenet Dataset

In [0]:
# build the VGG16 network with ImageNet weights
model = vgg16.VGG16(weights='imagenet', include_top=False)
print('Model loaded.')

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model loaded.


In [0]:
model

<keras.engine.training.Model at 0x7fc5a83272e8>

In [0]:
model.summary()

# this is the placeholder for the input images
input_img = model.input

# get the symbolic outputs of each "key" layer (we gave them unique names).
layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]])


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
__________

In [0]:
### Define funtion of saliency map for convolutional layer
def saliency_map(layer_name,iter = 20, n_filters = 200,  n = 8, 
                 img_width = 224, img_height = 224,
                 layder_dict = layer_dict,save = True): 
    kept_filters = []
    for filter_index in range(n_filters):
      # we only scan through the first n filters,
      # since sometimes there are too many of them
      print('Processing filter %d' % filter_index)
      start_time = time.time()

      # we build a loss function that maximizes the activation
      # of the nth filter of the layer considered
      layer_output = layer_dict[layer_name].output
      if K.image_data_format() == 'channels_first':
          loss = K.mean(layer_output[:, filter_index, :, :])
      else:
          loss = K.mean(layer_output[:, :, :, filter_index])

      # we compute the gradient of the input picture wrt this loss
      grads = K.gradients(loss, input_img)[0]
 
      # normalization trick: we normalize the gradient
      grads = normalize(grads)

      # this function returns the loss and grads given the input picture
      iterate = K.function([input_img], [loss, grads])

      # step size for gradient ascent
      step = 1.

      # we start from a gray image with some random noise
      if K.image_data_format() == 'channels_first':
        input_img_data = np.random.random((1, 3, img_width, img_height))
      else:
        input_img_data = np.random.random((1, img_width, img_height, 3))
      input_img_data = (input_img_data - 0.5) * 20 + 128

      # we run gradient ascent for n steps, here we use 20
      for i in range(iter):
          loss_value, grads_value = iterate([input_img_data])
          input_img_data += grads_value * step

          print('Current loss value:', loss_value)
          if loss_value <= 0.:
              # some filters get stuck to 0, we can skip them
              break

      # decode the resulting input image
      if loss_value > 0:
          img = deprocess_image(input_img_data[0])
          kept_filters.append((img, loss_value))
      end_time = time.time()
      print('Filter %d processed in %ds' % (filter_index, end_time - start_time))

    # we will stich the best 64 filters on a 8 x 8 grid.

    # the filters that have the highest loss are assumed to be better-looking.
    # we will only keep the top n*n filters.
    kept_filters.sort(key=lambda x: x[1], reverse=True)
    kept_filters = kept_filters[:n * n]

    # build a black picture with enough space in between
    margin = 5
    width = n * img_width + (n - 1) * margin
    height = n * img_height + (n - 1) * margin
    stitched_filters = np.zeros((width, height, 3))

    # fill the picture with our saved filters
    for i in range(n):
        for j in range(n):
            img, loss = kept_filters[i * n + j]
            width_margin = (img_width + margin) * i
            height_margin = (img_height + margin) * j
            stitched_filters[
                width_margin: width_margin + img_width,
                height_margin: height_margin + img_height, :] = img
    if save:
        # save the result to disk
        save_img('stitched_filters_%dx%d_%s.png' % (n, n, layer_name), stitched_filters)


In [0]:
saliency_map(layer_name = 'block5_conv1', iter = 20, n = 8, layder_dict = layer_dict)
# saliency_map(layer_name = 'block1_conv1', n_filters = 64, iter = 20, n = 8, layder_dict = layer_dict)
saliency_map(layer_name = 'block1_conv2', n_filters = 64, iter = 20, n = 5, layder_dict = layer_dict)
# saliency_map(layer_name = 'block2_conv1', n_filters = 64, iter = 20, n = 5, layder_dict = layer_dict)
saliency_map(layer_name = 'block2_conv2', n_filters = 64, iter = 20, n = 5, layder_dict = layer_dict)

In [0]:
model = vgg16.VGG16(weights='imagenet', include_top=True)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5


In [0]:
def initial(index):
  selected = [i for i in range(len(y_combine)) if y_combine[i] == index]
  return np.mean(x_combine[selected], axis = 0)

In [0]:
def visualize_softmax_dense_VGG(model, iter, output_index = 0, step = 0.5,
                                alpha = 0.1, initialize = 'random',
                                initialize_index = 0, save = True):
    # model: loaded model
    # step: step size for gradient ascent
    # alpha: panelty coefficient of the loss function
    # initialize: 'random' or 'mean'. If 'random', initialize input data at
    #              random; else intialize with the mean of all the images in
    #              this class on each pixel
    # initialize_index: only effective when initialize = 'mean', set sample
    #                   index for a particular class
  
    # dimensions of the generated pictures for each filter.
    img_width = 224
    img_height = 224

    # this is the placeholder for the input images
    input_img = model.input

    #find the score value before softmax activation:
    #1.recreate the dense layer
    fc_output = model.layers[-2].output
    outDense = Dense(1000, name='newDense', use_bias = True)(fc_output)

    #2.create the new model
    checkingModel = Model(model.inputs, outDense)

    wgts = model.layers[-1].get_weights()
    checkingModel.get_layer('newDense').set_weights(wgts)

    model_output = checkingModel.output

    # we build a loss function that maximizes the activation
    loss = model_output[:, output_index] - alpha * tf.norm(input_img)
    
    # we compute the gradient of the input picture wrt this loss
    grads = K.gradients(loss, input_img)[0]

    # normalization trick: we normalize the gradient
    grads = normalize(grads)

    # this function returns the loss and grads given the input picture
    iterate = K.function([input_img], [loss, grads])

    # we start from a gray image with some random noise
    if initialize == 'random':
        input_img_data = np.random.random((1, img_width, img_height, 3))
    elif initialize == 'mean':
        input_img_data = initial(initialize_index)
        input_img_data = np.expand_dims(input_img_data, axis = 0)
    else:
        raise ValueError('Initialization method should be "random" or "mean".')
    input_img_data = (input_img_data - 0.5) * 20 + 128

    # we run gradient ascent for 20 steps
    for i in range(iter):
        loss_value, grads_value = iterate([input_img_data])
        input_img_data += grads_value * step
        print('Current loss value for %d:' % i, loss_value)
    
    if save:
      if initialize == 'random':
          img = deprocess_image(input_img_data[0])
          if os.path.exists("final_image_VGG_random_%s.png" % (str(output_index))):
            os.remove('final_image_VGG_random_%s.png' % (str(output_index)))
          save_img('final_image_VGG_random_%s.png' % (str(output_index)) , img)
      else:
          img = deprocess_image(input_img_data[0])
          if os.path.exists("final_image_VGG_mean_%s.png" % (str(output_index))):
            os.remove('final_image_VGG_mean_%s.png' % (str(output_index)))
          save_img('final_image_VGG_mean_%s.png' % (str(output_index)) , img)
          



In [0]:
visualize_softmax_dense_VGG(model = model, iter = 200, output_index = 1) # goldfish
visualize_softmax_dense_VGG(model = model, iter = 200, output_index = 248) # husky
visualize_softmax_dense_VGG(model = model, iter = 300, output_index = 951) # lemon 
visualize_softmax_dense_VGG(model = model, iter = 300, output_index = 9) # ostrich
visualize_softmax_dense_VGG(model = model, iter = 200, output_index = 55) # snake

In [0]:
visualize_softmax_dense_VGG(model = model, iter = 2000, output_index = 1,
                            initialize = 'mean', initialize_index = 0) # goldfish
visualize_softmax_dense_VGG(model = model, iter = 2000, output_index = 248,
                            initialize = 'mean', initialize_index = 1) # husky
visualize_softmax_dense_VGG(model = model, iter = 2000, output_index = 951,
                            initialize = 'mean', initialize_index = 2) # lemon
visualize_softmax_dense_VGG(model = model, iter = 2000, output_index = 9,
                            initialize = 'mean', initialize_index = 3) # ostrich
visualize_softmax_dense_VGG(model = model, iter = 2000, output_index = 55,
                            initialize = 'mean', initialize_index = 4) # snake

## Model Training on 5-class Imagenet Dataset

In [0]:
num_classes = 5
batch_size = 128
input_shape = x_train.shape[1:]
epochs = 5

In [0]:
# Start construction of the Keras Sequential model.
model = Sequential()

# First convolutional layer with ReLU-activation and max-pooling.
model.add(keras.layers.Conv2D(filters = 32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape= input_shape, padding = "same", name = 'layer_conv1'))
model.add(keras.layers.MaxPooling2D(pool_size=(2, 2), strides = 2, name = 'max_pool1', padding = "same"))
model.add(keras.layers.BatchNormalization())

# Second convolutional layer with ReLU-activation and max-pooling.
model.add(keras.layers.Conv2D(filters = 64, kernel_size=(3, 3),
                 activation='relu', padding = "same", name = 'layer_conv2'))
model.add(keras.layers.MaxPooling2D(pool_size=(2, 2), strides = 2, padding = "same", name = 'max_pool2'))
model.add(keras.layers.BatchNormalization())

# Third convolutional layer with ReLU-activation and max-pooling.
model.add(keras.layers.Conv2D(filters = 128, kernel_size=(5, 5),
                 activation='relu', padding = "same", name = 'layer_conv3'))
model.add(keras.layers.MaxPooling2D(pool_size=(2, 2), strides = 2, padding = "same", name = 'max_pool3'))
model.add(keras.layers.BatchNormalization())

# Forth convolutional layer with ReLU-activation and max-pooling.
model.add(keras.layers.Conv2D(filters = 128, kernel_size=(5, 5),
                 activation='relu', padding = "same", name = 'layer_conv4'))
model.add(keras.layers.MaxPooling2D(pool_size=(2, 2), strides = 2, padding = "same", name = 'max_pool4'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dropout(0.3))

# Flatten
model.add(keras.layers.Flatten())

# fully connected layers
model.add(keras.layers.Dense(128, activation='relu', name = 'fc_layer1'))
model.add(keras.layers.Dropout(0.3))
model.add(keras.layers.BatchNormalization())


model.add(keras.layers.Dense(5, use_bias = True, name = 'fc_layer3'))
model.add(keras.layers.Softmax())


model.compile(loss='categorical_crossentropy',
              optimizer= "adam",
              metrics=['accuracy'])

In [0]:
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs= epochs)

score = model.evaluate(x_test, y_test, verbose=0)
score

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[0.6504743482827315, 0.801038062283737]

In [0]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
layer_conv1 (Conv2D)         (None, 224, 224, 32)      896       
_________________________________________________________________
max_pool1 (MaxPooling2D)     (None, 112, 112, 32)      0         
_________________________________________________________________
batch_normalization_16 (Batc (None, 112, 112, 32)      128       
_________________________________________________________________
layer_conv2 (Conv2D)         (None, 112, 112, 64)      18496     
_________________________________________________________________
max_pool2 (MaxPooling2D)     (None, 56, 56, 64)        0         
_________________________________________________________________
batch_normalization_17 (Batc (None, 56, 56, 64)        256       
_________________________________________________________________
layer_conv3 (Conv2D)         (None, 56, 56, 128)       204928    
__________

## Visualization on model of 5 class Imagenet Dataset

In [0]:
def visualize_softmax_dense(model, iter, output_index = 0, step = 0.5, 
                             alpha = 0.1,save = True):
    # model: loaded model
    # step: step size for gradient ascent
    # alpha: panelty coefficient of the loss function
  
    # dimensions of the generated pictures for each filter.
    img_width = 224
    img_height = 224

    # the name of the layer we want to visualize
    # (see model definition at keras/applications/vgg16.py)
    # this is the placeholder for the input images
    input_img = model.input

    #find the value before softmax activation and get its output:
    model_output = model.layers[-2].output
    #or should this be the output from the dropout? Whichever comes immediately after the last Dense(1)

    # we build a loss function that maximizes the activation
    loss = model_output[:, output_index] - alpha * tf.norm(input_img)
    
    # we compute the gradient of the input picture wrt this loss
    grads = K.gradients(loss, input_img)[0]

    # normalization trick: we normalize the gradient
    grads = normalize(grads)

    # this function returns the loss and grads given the input picture
    iterate = K.function([input_img], [loss, grads])

    # step size for gradient ascent
    # step = 0.5

    # we start from a gray image with some random noise
    if K.image_data_format() == 'channels_first':
        input_img_data = np.random.random((1, 3, img_width, img_height))
    else:
        input_img_data = np.random.random((1, img_width, img_height, 3))
    input_img_data = (input_img_data - 0.5) * 20 + 128

    # delta_loss = 0
    # prev_loss_value = 0

    # we run gradient ascent for 20 steps
    for i in range(iter):
        loss_value, grads_value = iterate([input_img_data])
        input_img_data += grads_value * step
        print('Current loss value for %d:' % i, loss_value)
    
    if save:
      img = deprocess_image(input_img_data[0])
      if os.path.exists("final_image_%s.png" % (str(output_index))):
        os.remove('final_image_%s.png' % (str(output_index)))
      save_img('final_image_%s.png' % (str(output_index)) , img)

In [0]:
visualize_softmax_dense(model = model, iter = 500, output_index = 0, step = 0.1, alpha = 0)
visualize_softmax_dense(model = model, iter = 100, output_index = 2, step = 0.2, alpha = 0)