-
Notifications
You must be signed in to change notification settings - Fork 661
/
saliency.py
238 lines (193 loc) · 12.8 KB
/
saliency.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
from __future__ import absolute_import
import numpy as np
from scipy.ndimage.interpolation import zoom
from keras.layers.convolutional import _Conv
from keras.layers.pooling import _Pooling1D, _Pooling2D, _Pooling3D
from keras.layers.wrappers import Wrapper
from keras import backend as K
from ..losses import ActivationMaximization
from ..optimizer import Optimizer
from ..backprop_modifiers import get
from ..utils import utils
def _find_penultimate_layer(model, layer_idx, penultimate_layer_idx):
"""Searches for the nearest penultimate `Conv` or `Pooling` layer.
Args:
model: The `keras.models.Model` instance.
layer_idx: The layer index within `model.layers`.
penultimate_layer_idx: The pre-layer to `layer_idx`. If set to None, the nearest penultimate
`Conv` or `Pooling` layer is used.
Returns:
The penultimate layer.
"""
if penultimate_layer_idx is None:
for idx, layer in utils.reverse_enumerate(model.layers[:layer_idx - 1]):
if isinstance(layer, Wrapper):
layer = layer.layer
if isinstance(layer, (_Conv, _Pooling1D, _Pooling2D, _Pooling3D)):
penultimate_layer_idx = idx
break
if penultimate_layer_idx is None:
raise ValueError('Unable to determine penultimate `Conv` or `Pooling` '
'layer for layer_idx: {}'.format(layer_idx))
# Handle negative indexing otherwise the next check can fail.
if layer_idx < 0:
layer_idx = len(model.layers) + layer_idx
if penultimate_layer_idx > layer_idx:
raise ValueError('`penultimate_layer_idx` needs to be before `layer_idx`')
return model.layers[penultimate_layer_idx]
def visualize_saliency_with_losses(input_tensor, losses, seed_input, wrt_tensor=None, grad_modifier='absolute'):
"""Generates an attention heatmap over the `seed_input` by using positive gradients of `input_tensor`
with respect to weighted `losses`.
This function is intended for advanced use cases where a custom loss is desired. For common use cases,
refer to `visualize_class_saliency` or `visualize_regression_saliency`.
For a full description of saliency, see the paper:
[Deep Inside Convolutional Networks: Visualising Image Classification Models and Saliency Maps]
(https://arxiv.org/pdf/1312.6034v2.pdf)
Args:
input_tensor: An input tensor of shape: `(samples, channels, image_dims...)` if `image_data_format=
channels_first` or `(samples, image_dims..., channels)` if `image_data_format=channels_last`.
losses: List of ([Loss](vis.losses#Loss), weight) tuples.
seed_input: The model input for which activation map needs to be visualized.
wrt_tensor: Short for, with respect to. The gradients of losses are computed with respect to this tensor.
When None, this is assumed to be the same as `input_tensor` (Default value: None)
grad_modifier: gradient modifier to use. See [grad_modifiers](vis.grad_modifiers.md). By default `absolute`
value of gradients are used. To visualize positive or negative gradients, use `relu` and `negate`
respectively. (Default value = 'absolute')
Returns:
The normalized gradients of `seed_input` with respect to weighted `losses`.
"""
opt = Optimizer(input_tensor, losses, wrt_tensor=wrt_tensor, norm_grads=False)
grads = opt.minimize(seed_input=seed_input, max_iter=1, grad_modifier=grad_modifier, verbose=False)[1]
channel_idx = 1 if K.image_data_format() == 'channels_first' else -1
grads = np.max(grads, axis=channel_idx)
return utils.normalize(grads)[0]
def visualize_saliency(model, layer_idx, filter_indices, seed_input,
wrt_tensor=None, backprop_modifier=None, grad_modifier='absolute'):
"""Generates an attention heatmap over the `seed_input` for maximizing `filter_indices`
output in the given `layer_idx`.
Args:
model: The `keras.models.Model` instance. The model input shape must be: `(samples, channels, image_dims...)`
if `image_data_format=channels_first` or `(samples, image_dims..., channels)` if
`image_data_format=channels_last`.
layer_idx: The layer index within `model.layers` whose filters needs to be visualized.
filter_indices: filter indices within the layer to be maximized.
If None, all filters are visualized. (Default value = None)
For `keras.layers.Dense` layer, `filter_idx` is interpreted as the output index.
If you are visualizing final `keras.layers.Dense` layer, consider switching 'softmax' activation for
'linear' using [utils.apply_modifications](vis.utils.utils#apply_modifications) for better results.
seed_input: The model input for which activation map needs to be visualized.
wrt_tensor: Short for, with respect to. The gradients of losses are computed with respect to this tensor.
When None, this is assumed to be the same as `input_tensor` (Default value: None)
backprop_modifier: backprop modifier to use. See [backprop_modifiers](vis.backprop_modifiers.md). If you don't
specify anything, no backprop modification is applied. (Default value = None)
grad_modifier: gradient modifier to use. See [grad_modifiers](vis.grad_modifiers.md). By default `absolute`
value of gradients are used. To visualize positive or negative gradients, use `relu` and `negate`
respectively. (Default value = 'absolute')
Example:
If you wanted to visualize attention over 'bird' category, say output index 22 on the
final `keras.layers.Dense` layer, then, `filter_indices = [22]`, `layer = dense_layer`.
One could also set filter indices to more than one value. For example, `filter_indices = [22, 23]` should
(hopefully) show attention map that corresponds to both 22, 23 output categories.
Returns:
The heatmap image indicating the `seed_input` regions whose change would most contribute towards
maximizing the output of `filter_indices`.
"""
if backprop_modifier is not None:
modifier_fn = get(backprop_modifier)
model = modifier_fn(model)
# `ActivationMaximization` loss reduces as outputs get large, hence negative gradients indicate the direction
# for increasing activations. Multiply with -1 so that positive gradients indicate increase instead.
losses = [
(ActivationMaximization(model.layers[layer_idx], filter_indices), -1)
]
return visualize_saliency_with_losses(model.input, losses, seed_input, wrt_tensor, grad_modifier)
def visualize_cam_with_losses(input_tensor, losses, seed_input, penultimate_layer, grad_modifier=None):
"""Generates a gradient based class activation map (CAM) by using positive gradients of `input_tensor`
with respect to weighted `losses`.
For details on grad-CAM, see the paper:
[Grad-CAM: Why did you say that? Visual Explanations from Deep Networks via Gradient-based Localization]
(https://arxiv.org/pdf/1610.02391v1.pdf).
Unlike [class activation mapping](https://arxiv.org/pdf/1512.04150v1.pdf), which requires minor changes to
network architecture in some instances, grad-CAM has a more general applicability.
Compared to saliency maps, grad-CAM is class discriminative; i.e., the 'cat' explanation exclusively highlights
cat regions and not the 'dog' region and vice-versa.
Args:
input_tensor: An input tensor of shape: `(samples, channels, image_dims...)` if `image_data_format=
channels_first` or `(samples, image_dims..., channels)` if `image_data_format=channels_last`.
losses: List of ([Loss](vis.losses#Loss), weight) tuples.
seed_input: The model input for which activation map needs to be visualized.
penultimate_layer: The pre-layer to `layer_idx` whose feature maps should be used to compute gradients
with respect to filter output.
grad_modifier: gradient modifier to use. See [grad_modifiers](vis.grad_modifiers.md). If you don't
specify anything, gradients are unchanged (Default value = None)
Returns:
The normalized gradients of `seed_input` with respect to weighted `losses`.
"""
penultimate_output = penultimate_layer.output
opt = Optimizer(input_tensor, losses, wrt_tensor=penultimate_output, norm_grads=False)
_, grads, penultimate_output_value = opt.minimize(seed_input, max_iter=1, grad_modifier=grad_modifier, verbose=False)
# For numerical stability. Very small grad values along with small penultimate_output_value can cause
# w * penultimate_output_value to zero out, even for reasonable fp precision of float32.
grads = grads / (np.max(grads) + K.epsilon())
# Average pooling across all feature maps.
# This captures the importance of feature map (channel) idx to the output.
channel_idx = 1 if K.image_data_format() == 'channels_first' else -1
other_axis = np.delete(np.arange(len(grads.shape)), channel_idx)
weights = np.mean(grads, axis=tuple(other_axis))
# Generate heatmap by computing weight * output over feature maps
output_dims = utils.get_img_shape(penultimate_output)[2:]
heatmap = np.zeros(shape=output_dims, dtype=K.floatx())
for i, w in enumerate(weights):
if channel_idx == -1:
heatmap += w * penultimate_output_value[0, ..., i]
else:
heatmap += w * penultimate_output_value[0, i, ...]
# ReLU thresholding to exclude pattern mismatch information (negative gradients).
heatmap = np.maximum(heatmap, 0)
# The penultimate feature map size is definitely smaller than input image.
input_dims = utils.get_img_shape(input_tensor)[2:]
# Figure out the zoom factor.
zoom_factor = [i / (j * 1.0) for i, j in iter(zip(input_dims, output_dims))]
heatmap = zoom(heatmap, zoom_factor)
return utils.normalize(heatmap)
def visualize_cam(model, layer_idx, filter_indices,
seed_input, penultimate_layer_idx=None,
backprop_modifier=None, grad_modifier=None):
"""Generates a gradient based class activation map (grad-CAM) that maximizes the outputs of
`filter_indices` in `layer_idx`.
Args:
model: The `keras.models.Model` instance. The model input shape must be: `(samples, channels, image_dims...)`
if `image_data_format=channels_first` or `(samples, image_dims..., channels)` if
`image_data_format=channels_last`.
layer_idx: The layer index within `model.layers` whose filters needs to be visualized.
filter_indices: filter indices within the layer to be maximized.
If None, all filters are visualized. (Default value = None)
For `keras.layers.Dense` layer, `filter_idx` is interpreted as the output index.
If you are visualizing final `keras.layers.Dense` layer, consider switching 'softmax' activation for
'linear' using [utils.apply_modifications](vis.utils.utils#apply_modifications) for better results.
seed_input: The input image for which activation map needs to be visualized.
penultimate_layer_idx: The pre-layer to `layer_idx` whose feature maps should be used to compute gradients
wrt filter output. If not provided, it is set to the nearest penultimate `Conv` or `Pooling` layer.
backprop_modifier: backprop modifier to use. See [backprop_modifiers](vis.backprop_modifiers.md). If you don't
specify anything, no backprop modification is applied. (Default value = None)
grad_modifier: gradient modifier to use. See [grad_modifiers](vis.grad_modifiers.md). If you don't
specify anything, gradients are unchanged (Default value = None)
Example:
If you wanted to visualize attention over 'bird' category, say output index 22 on the
final `keras.layers.Dense` layer, then, `filter_indices = [22]`, `layer = dense_layer`.
One could also set filter indices to more than one value. For example, `filter_indices = [22, 23]` should
(hopefully) show attention map that corresponds to both 22, 23 output categories.
Returns:
The heatmap image indicating the input regions whose change would most contribute towards
maximizing the output of `filter_indices`.
"""
if backprop_modifier is not None:
modifier_fn = get(backprop_modifier)
model = modifier_fn(model)
penultimate_layer = _find_penultimate_layer(model, layer_idx, penultimate_layer_idx)
# `ActivationMaximization` outputs negative gradient values for increase in activations. Multiply with -1
# so that positive gradients indicate increase instead.
losses = [
(ActivationMaximization(model.layers[layer_idx], filter_indices), -1)
]
return visualize_cam_with_losses(model.input, losses, seed_input, penultimate_layer, grad_modifier)