Paper:
https://arxiv.org/pdf/1512.04150.pdf

GitHub:
https://github.com/jacobgil/keras-cam

In [63]:
import cv2
import glob
import os
import numpy as np
from keras.utils.np_utils import to_categorical
from keras.datasets import mnist

from keras.models import Sequential
from keras.layers import Convolution2D, ZeroPadding2D, MaxPooling2D
from keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras import backend as K
import h5py
from keras.optimizers import SGD

from keras.models import *
from keras.callbacks import *
import keras.backend as K
import cv2
import argparse
from keras.preprocessing import image
from sklearn.model_selection import train_test_split


def load_mnist():
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    return np.expand_dims(X_train, axis=1), np.expand_dims(X_test, axis=1), to_categorical(y_train), to_categorical(y_test)

In [64]:
def load_poet(img_shape):
    print("Loading POET dataset...")
    files_plane = glob.glob(r'C:\Users\Michal\Desktop\cogsci3\POETdataset\PascalImages\aeroplane*')
    files_boat = glob.glob(r'C:\Users\Michal\Desktop\cogsci3\POETdataset\PascalImages\boat*')
    files_dog = glob.glob(r'C:\Users\Michal\Desktop\cogsci3\POETdataset\PascalImages\dog*')

    files = files_plane
    x1 = np.array([image.img_to_array(image.load_img(file, target_size=img_shape, grayscale=True)) for file in files])
    files = files_dog
    x2 = np.array([image.img_to_array(image.load_img(file, target_size=img_shape, grayscale=True)) for file in files])

    X_images = np.concatenate([x1,x2])
    y = np.concatenate([[1]*x1.shape[0],[0]*x2.shape[0]])
    X_images.shape, y.shape

    X_train, X_test, y_train, y_test = train_test_split(X_images, y, test_size=0.1, stratify=y)
    print("Loaded POET dataset.")
#     X_train.shape, y_train.shape, X_test.shape, y_test.shape
    return X_train, X_test, to_categorical(y_train), to_categorical(y_test)

In [89]:
def global_average_pooling(x):
    return K.mean(x, axis = (2, 3))

def global_average_pooling_shape(input_shape):
    return input_shape[0:2]

def other_model(input_channels):
    ##model building
    model = Sequential()
    #convolutional layer with rectified linear unit activation
    model.add(Conv2D(32, kernel_size=(3, 3),
                     activation='relu',
                     input_shape=(1, 64, 64)))
    #32 convolution filters used each of size 3x3
    #again
    model.add(Conv2D(64, (3, 3), activation='relu'))
    #64 convolution filters used each of size 3x3
    #choose the best features via pooling
    model.add(MaxPooling2D(pool_size=(2, 2)))
    #randomly turn neurons on and off to improve convergence
    model.add(Dropout(0.25))
    #flatten since too many dimensions, we only want a classification output
#     model.add(Flatten())
    #fully connected to get all relevant data
#     model.add(Dense(128, activation='relu'))
    #one more dropout for convergence' sake :) 
#     model.add(Dropout(0.5))
    #output a softmax to squash the matrix into output probabilities
#     model.add(Dense(1, activation='sigmoid'))
    return model

def VGG16_convolutions(input_channels):
    model = Sequential()
    model.add(ZeroPadding2D((1,1),input_shape=(input_channels,None,None)))
    model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

#     model.add(ZeroPadding2D((1, 1)))
#     model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1'))
#     model.add(ZeroPadding2D((1, 1)))
#     model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2'))
#     model.add(ZeroPadding2D((1, 1)))
#     model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3'))
#     model.add(MaxPooling2D((2, 2), strides=(2, 2)))

#     model.add(ZeroPadding2D((1, 1)))
#     model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1'))
#     model.add(ZeroPadding2D((1, 1)))
#     model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2'))
#     model.add(ZeroPadding2D((1, 1)))
#     model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3'))
    return model

def get_model(out_classes, input_channels):
#     model = VGG16_convolutions(input_channels = input_channels)
    model = other_model(input_channels=input_channels)

#     model = load_model_weights(model, "vgg16_weights.h5")
    
    model.add(Lambda(global_average_pooling, 
              output_shape=global_average_pooling_shape))
    model.add(Dense(out_classes, activation = 'softmax', init='uniform'))
    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.5, nesterov=True)
    model.compile(loss = 'categorical_crossentropy', optimizer = sgd, metrics=['accuracy'])
    return model

def load_model_weights(model, weights_path):
    print('Loading model.')
    f = h5py.File(weights_path)
    if 'nb_layers' in f.attrs.keys():
        for k in range(f.attrs['nb_layers']):
            if k >= len(model.layers):
                # we don't look at the last (fully-connected) layers in the savefile
                break
            g = f['layer_{}'.format(k)]
            weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
            model.layers[k].set_weights(weights)
            model.layers[k].trainable = False
    f.close()
    print('Model loaded.')
    return model

def get_output_layer(model, layer_name):
    # get the symbolic outputs of each "key" layer (we gave them unique names).
    layer_dict = dict([(layer.name, layer) for layer in model.layers])
    layer = layer_dict[layer_name]
    return layer

In [90]:
def train(poet=True, img_shape=(28,28)):
    if poet:
        model = get_model(out_classes = 2, input_channels = 1)
        X_train, _, y_train, _ = load_poet(img_shape=img_shape)
    else:
        model = get_model(out_classes = 10, input_channels = 1)
        X_train, _, y_train, _ = load_mnist()
        
    print("Training..")
    checkpoint_path="weights.{epoch:02d}-{val_loss:.2f}.hdf5"
    checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto')
    model.fit(X_train, y_train, nb_epoch=40, batch_size=64, validation_split=0.2, verbose=1, callbacks=[checkpoint])

def visualize_class_activation_map(model_path, img, output_path):
        model = load_model(model_path)
#         original_img = cv2.imread(img_path, 1)
        original_img = img.reshape(img.shape)
        width, height, _ = original_img.shape
        original_img = np.expand_dims(original_img, axis=0)

        #Reshape to the network input shape (3, w, h).
        img = np.array([np.transpose(np.float32(original_img), (2, 0, 1))])
        
        #Get the 512 input weights to the softmax.
        class_weights = model.layers[-1].get_weights()[0]
        final_conv_layer = get_output_layer(model, "conv3_3")
        get_output = K.function([model.layers[0].input], [final_conv_layer.output, model.layers[-1].output])
        [conv_outputs, predictions] = get_output([original_img])
        conv_outputs = conv_outputs[0, :, :, :]

        #Create the class activation map.
        cam = np.zeros(dtype = np.float32, shape = conv_outputs.shape[1:3])
        for i, w in enumerate(class_weights[:, 1]):
                cam += w * conv_outputs[i, :, :]
        print("predictions", predictions)
        cam /= np.max(cam)
        cam = cv2.resize(cam, (height, width))
        heatmap = cv2.applyColorMap(np.uint8(255*cam), cv2.COLORMAP_JET)
        heatmap[np.where(cam < 0.2)] = 0
        img = heatmap*0.5 + original_img
        cv2.imwrite(output_path, img)

In [91]:
img_shape = (64,64)
train(poet=True, img_shape=img_shape)



Loading POET dataset...
Loaded POET dataset.
Training..




Train on 1384 samples, validate on 346 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [55]:
visualize_class_activation_map(model_path="weights.02-6.43.hdf5", img=X[0], output_path='poet0.jpg')

IndexError: index 128 is out of bounds for axis 0 with size 128

In [67]:
img_shape = (64,64)
train(poet=True, img_shape=img_shape)

ValueError: The shape of the input to "Flatten" is not fully defined (got (64, None, None). Make sure to pass a complete "input_shape" or "batch_input_shape" argument to the first layer in your model.

In [48]:
X, _, y, _ = load_poet(img_shape=img_shape)
visualize_class_activation_map(model_path="weights.02-6.43.hdf5", img=X[0], output_path='poet0.jpg')

Loading POET dataset...
Loaded POET dataset.


ValueError: Cannot feed value of shape (1, 64, 3, 64) for Tensor 'zero_padding2d_48_input_3:0', which has shape '(?, 3, ?, ?)'