Paper:
https://arxiv.org/pdf/1512.04150.pdf

GitHub:
https://github.com/jacobgil/keras-cam

In [1]:
import cv2
import glob
import os
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from keras.utils.np_utils import to_categorical
from keras.datasets import mnist

from keras.models import Sequential
from keras.layers import Convolution2D, ZeroPadding2D, MaxPooling2D
from keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras import backend as K
import h5py
from keras.optimizers import SGD, Adam

from keras.models import *
from keras.callbacks import *
import keras.backend as K
import cv2
import argparse
from keras.preprocessing import image
from sklearn.model_selection import train_test_split

# from keras.applications.vgg16 import preprocess_input

from keras.applications.resnet50 import ResNet50, preprocess_input

def load_mnist():
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    return np.expand_dims(X_train, axis=3), np.expand_dims(X_test, axis=3), to_categorical(y_train), to_categorical(y_test)

Using TensorFlow backend.


In [2]:
img_size = (244,244)

In [3]:
def load_poet(img_size, input_channels=3):
    def load_images(files, grayscale=False):
        if grayscale:
            return preprocess_input(np.array([image.img_to_array(image.load_img(file, target_size=img_size, color_mode='grayscale')) 
                             for file in files]))
        else:
            return preprocess_input(np.array([image.img_to_array(image.load_img(file, target_size=img_size)) 
                             for file in files]))
    
    main_path = r'./POETdataset/PascalImages/'
    grayscale = True
    if input_channels == 3:
        grayscale = False
    
    print("Loading POET dataset...")
    
    classes = ['aeroplane', 'boat', 'dog', 'bicycle', 'cat', 'cow', 'diningtable', 'horse', 'motorbike','sofa']
    classes = [word + '*' for word in classes]
    files_list = [glob.glob(main_path + class_) for class_ in classes]
    
    for files in files_list:
        assert len(files) > 0
    
    x = []
    y = []
    class_map = {}
    files_dict = {class_name: class_files for class_name, class_files in zip(classes, files_list)}
    for class_number, (class_name, files) in enumerate(files_dict.items()):
        x.append(load_images(files))
        y.append([class_number] * len(files))
        print(class_name, class_number)
        class_map[class_name] = class_number

    X_images = np.concatenate(x)
    y = np.concatenate(y)
    print('X.shape:', X_images.shape, 'y.shape:', y.shape)

#     X_train, X_test, y_train, y_test = train_test_split(X_images, y, test_size=0.01, stratify=y)
    print("Loaded POET dataset.")
#     X_train.shape, y_train.shape, X_test.shape, y_test.shape
#     return X_train, X_test, to_categorical(y_train), to_categorical(y_test), class_map
    return X_images, to_categorical(y), class_map

In [4]:
def global_average_pooling(x):
#     print(x.shape)
    return K.mean(x, axis = (1, 2))

def global_average_pooling_shape(input_shape):
#     print(input_shape)
    return input_shape[2:4]

def other_model(img_shape):
    ##model building
    model = Sequential()
    #convolutional layer with rectified linear unit activation
    model.add(Conv2D(32, kernel_size=(3, 3),
                     activation='relu',
                     input_shape=img_shape))
    #32 convolution filters used each of size 3x3
    #again
    model.add(Conv2D(64, (3, 3), activation='relu'))
    #64 convolution filters used each of size 3x3
    #choose the best features via pooling
    model.add(MaxPooling2D(pool_size=(2, 2)))
    #randomly turn neurons on and off to improve convergence
    model.add(Dropout(0.25))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    #randomly turn neurons on and off to improve convergence
    model.add(Dropout(0.25))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    #randomly turn neurons on and off to improve convergence
    model.add(Dropout(0.25))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    #flatten since too many dimensions, we only want a classification output
#     model.add(Flatten())
    #fully connected to get all relevant data
#     model.add(Dense(128, activation='relu'))
    #one more dropout for convergence' sake :) 
#     model.add(Dropout(0.5))
    #output a softmax to squash the matrix into output probabilities
#     model.add(Dense(1, activation='sigmoid'))
    return model

def VGG16_convolutions(img_shape):
    model = Sequential()
    model.add(ZeroPadding2D((1,1),input_shape=img_shape))
    model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3'))
    return model

def resnet():
    return ResNet50(include_top=False, weights='imagenet')

def get_model(out_classes, img_shape):
    model = VGG16_convolutions(img_shape=img_shape)
    model = load_model_weights(model, "./vgg.hdf5")
    
#     model = resnet()
    for layer in model.layers[:]:
        layer.trainable = False

#     model = other_model(img_shape=img_shape)
        
    model.layers[-1].name = 'last_conv'
    
    model.add(Lambda(global_average_pooling, 
              output_shape=global_average_pooling_shape))
    model.add(Dense(out_classes, activation = 'softmax'))
#     model = load_model_weights(model, "./weights/vggweights.04-8.53.hdf5")

    optm = SGD(lr=0.001, decay=1e-6, momentum=0.5, nesterov=True)
#     optm = Adam()
    model.compile(loss = 'categorical_crossentropy', optimizer = optm, metrics=['accuracy'])
    
    print(model.summary())
    for layer in model.layers:
        print(layer.name, layer.trainable)
    return model

def load_model_weights(model, weights_path):
    print('Loading model.')
#     f = h5py.File(weights_path)
#     print(f, f.attrs.keys())
#     if 'nb_layers' in f.attrs.keys():
#         for k in range(f.attrs['nb_layers']):
#             if k >= len(model.layers):
#                 # we don't look at the last (fully-connected) layers in the savefile
#                 break
#             g = f['layer_{}'.format(k)]
#             weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
#             model.layers[k].set_weights(weights)
#             model.layers[k].trainable = False
#         f.close()
#         print('Model loaded.')
#     else:
#         print("MODEL NOT LOADED")
    model.load_weights(weights_path)

    return model

def get_output_layer(model, layer_name):
    # get the symbolic outputs of each "key" layer (we gave them unique names).
    layer_dict = dict([(layer.name, layer) for layer in model.layers])
    layer = layer_dict[layer_name]
    return layer

In [5]:
def train(classes_n=3, poet=True, img_size=(28,28), input_channels=1, epochs_n=40, batch_size=64):
    img_shape = (img_size[0], img_size[1], input_channels)
    if poet:
        model = get_model(out_classes = classes_n, img_shape=img_shape)
        X_train, y_train, class_map = load_poet(img_size=img_size, input_channels=input_channels)
    else:
        model = get_model(out_classes = 10, input_channels = 1, img_size=img_size)
        X_train, _, y_train, _ = load_mnist()
        
    print("Training..")
    checkpoint_path="./weights/vggweights.{epoch:02d}-{val_loss:.2f}.hdf5"
    checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto')
    history = model.fit(X_train, y_train, epochs=epochs_n, batch_size=batch_size, validation_split=0.1, verbose=1, callbacks=[checkpoint])
    
    return history, X_train, y_train, class_map
    
def visualize_class_activation_map(model_path, img, output_path):
    model = load_model(model_path)
#         original_img = cv2.imread(img_path, 1)
    original_img = img.reshape(img.shape)
    width, height, _ = original_img.shape
    original_img = np.expand_dims(original_img, axis=0)

    #Reshape to the network input shape (3, w, h).
    img = np.array([np.transpose(np.float32(original_img), (2, 0, 1))])

    #Get the 512 input weights to the softmax.
    class_weights = model.layers[-1].get_weights()[0]
    final_conv_layer = get_output_layer(model, "last_conv")
    get_output = K.function([model.layers[0].input], [final_conv_layer.output, model.layers[-1].output])
    [conv_outputs, predictions] = get_output([original_img])
    conv_outputs = conv_outputs[0, :, :, :]

    #Create the class activation map.
    cam = np.zeros(dtype = np.float32, shape = conv_outputs.shape[1:3])
    for i, w in enumerate(class_weights[:, 1]):
            cam += w * conv_outputs[i, :, :]
    print("predictions", predictions)
    cam /= np.max(cam)
    cam = cv2.resize(cam, (height, width))
    heatmap = cv2.applyColorMap(np.uint8(255*cam), cv2.COLORMAP_JET)
    heatmap[np.where(cam < 0.2)] = 0
    img = heatmap*0.5 + original_img
    cv2.imwrite(output_path, img)
    
def visualize_class_activation_map_org(model_path, original_img, output_path, inv_class_map):
    model = load_model(model_path)
    width, height, _ = original_img.shape

    img = np.array([np.transpose(np.float32(original_img), (0, 1, 2))])

    class_weights = model.layers[-1].get_weights()[0]
    final_conv_layer = get_output_layer(model, "last_conv")
    get_output = K.function([model.layers[0].input], [final_conv_layer.output, model.layers[-1].output])
    [conv_outputs, predictions] = get_output([img])
    print(conv_outputs.shape)
    conv_outputs = conv_outputs[0, :, :, :]
    print(conv_outputs.shape)
    print(conv_outputs.shape[0:2])
    print('class weights:', class_weights.shape)

    class_n = np.argmax(predictions)
    print(inv_class_map[class_n])

    #Create the class activation map.
    cam = np.zeros(dtype = np.float32, shape = conv_outputs.shape[0:2])
    for i, w in enumerate(class_weights[:, class_n]):
        cam += w * conv_outputs[:, :, i]
    print("predictions", predictions)
    cam /= np.max(cam)
    cam = cv2.resize(cam, (height, width))
    heatmap = cv2.applyColorMap(np.uint8(255*cam), cv2.COLORMAP_JET)
    heatmap[np.where(cam < 0.4)] = 0
    img = (heatmap*0.5 + original_img)
    img /= np.max(img)
    print(np.max(img), np.min(img))
    plt.imshow(img)
#     cv2.imwrite(output_path, img)
#     cv2.imshow('test', img)

In [6]:
hist, X, y, class_map = train(classes_n = 10, poet=True, img_size=img_size, input_channels=3, epochs_n=200, batch_size=32)



Loading model.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
zero_padding2d_1 (ZeroPaddin (None, 246, 246, 3)       0         
_________________________________________________________________
conv1_1 (Conv2D)             (None, 244, 244, 64)      1792      
_________________________________________________________________
zero_padding2d_2 (ZeroPaddin (None, 246, 246, 64)      0         
_________________________________________________________________
conv1_2 (Conv2D)             (None, 244, 244, 64)      36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 122, 122, 64)      0         
_________________________________________________________________
zero_padding2d_3 (ZeroPaddin (None, 124, 124, 64)      0         
_________________________________________________________________
conv2_1 (Conv2D)             (None, 122, 122, 128)     73856 

KeyboardInterrupt: 

In [None]:
visualize_class_activation_map_org(model_path="./weights/vggweights.15-10.04.hdf5", original_img=X[5551], output_path='poet0.jpg', inv_class_map=inv_class_map)

In [59]:
X, y, class_map = load_poet(img_size=img_size)
inv_class_map = inv_map = {v: k for k, v in class_map.items()}

Loading POET dataset...
aeroplane* 0
boat* 1
dog* 2
bicycle* 3
cat* 4
cow* 5
diningtable* 6
horse* 7
motorbike* 8
sofa* 9
X.shape: (6270, 128, 128, 3) y.shape: (6270,)
Loaded POET dataset.


In [113]:
visualize_class_activation_map_org(model_path="./weights/weights.38-0.81.hdf5", original_img=X[2], output_path='poet0.jpg')

TypeError: visualize_class_activation_map_org() missing 1 required positional argument: 'inv_class_map'

In [48]:
visualize_class_activation_map(model_path="weights.02-6.43.hdf5", img=X[0], output_path='poet0.jpg')

Loading POET dataset...
Loaded POET dataset.


ValueError: Cannot feed value of shape (1, 64, 3, 64) for Tensor 'zero_padding2d_48_input_3:0', which has shape '(?, 3, ?, ?)'

In [188]:
import keras
model = keras.applications.vgg16.VGG16(include_top=False, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [190]:
pretrained_vgg = model

In [198]:
pretrained_vgg.save_weights('./vgg.h5')
model.load_weights('./')