##### Copyright 2018 Google LLC.

Licensed under the Apache License, Version 2.0 (the "License");

In [1]:
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Channel Attribution -- Building Blocks of Interpretability

This colab notebook is part of our **Building Blocks of Intepretability** series exploring how intepretability techniques combine together to explain neural networks. If you haven't already, make sure to look at the [**corresponding paper**](https://distill.pub/2018/building-blocks) as well!

This notebook demonstrates **Channel Attribution**, a technique for exploring how different detectors in the network effected its output.

<br>
<img src="https://storage.googleapis.com/lucid-static/building-blocks/notebook_heroes/channel-attribution.jpeg" width="648"></img>
<br>

This tutorial is based on [**Lucid**](https://github.com/tensorflow/lucid), a network for visualizing neural networks. Lucid is a kind of spiritual successor to DeepDream, but provides flexible abstractions so that it can be used for a wide range of interpretability research.

**Note**: The easiest way to use this tutorial is [as a colab notebook](), which allows you to dive in with no setup. We recommend you enable a free GPU by going:

> **Runtime**   →   **Change runtime type**   →   **Hardware Accelerator: GPU**

Thanks for trying Lucid!

#### **This notebook is a Jupyter version of the original Google Colab Notebook. This version adds widgets to facilitate the use of Lucid on your own images.**



**In order to use this notebook you need to generate or download a spritemap of feature activations corresponding to your model and the layer you want to study.**


For example you can get the spritemap of Googlenet at layer Mixed4A at [this url](https://storage.googleapis.com/lucid-static/building-blocks/googlenet_spritemaps/sprite_mixed4a_channel_alpha.jpeg) or you can generate the spritemap of your choice using the notebook SpritemapGenerator placed at ```notebooks/building-blocks/jupyter-versions/```

In [2]:
# !npm install -g svelte-cli@2.2.0
!pip install ipyfilechooser ipywidgets --quiet

import numpy as np
import tensorflow as tf
# uncomment to avoid deprecation warnings :
from tensorflow.python.util import deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False
tf.logging.set_verbosity(tf.compat.v1.logging.ERROR)

from ipyfilechooser import FileChooser
import ipywidgets as widgets
from IPython.core.display import display, HTML

import lucid.modelzoo.vision_models as models
from lucid.misc.io import show
import lucid.optvis.objectives as objectives
import lucid.optvis.param as param
import lucid.optvis.render as render
import lucid.optvis.transform as transform
from lucid.misc.io import show, load
from lucid.misc.io.reading import read
from lucid.misc.io.showing import _image_url, _display_html
import lucid.scratch.web.svelte as lucid_svelte

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [3]:
model = models.InceptionV1()

In [4]:
labels_str = read("https://gist.githubusercontent.com/aaronpolhamus/964a4411c0906315deb9f4a3723aac57/raw/aa66dd9dbf6b56649fa3fab83659b2acbf3cbfd1/map_clsloc.txt",mode='r')
labels = [line[line.find(" "):].strip() for line in labels_str.split("\n")]
labels = [label[label.find(" "):].strip().replace("_", " ") for label in labels]
labels = sorted(["dummy"] + labels)

**ChannelAttrWidget**

Let's make a little widget for showing all our channels and attribution values.

In [5]:
%%html_define_svelte ChannelAttrWidget

<div class="figure">
  <div class="channel_list" >
    {{#each attrsPos as attr}}
    <div class="entry">
      <div class="sprite" style="background-image: url({{spritemap_url}}); width: {{sprite_size}}px; height: {{sprite_size}}px; background-position: -{{sprite_size*(attr.n%sprite_n_wrap)}}px -{{sprite_size*Math.floor(attr.n/sprite_n_wrap)}}px;"></div>
      <div class="value" style="background-color: hsl({{(attr.v > 0)? 210 : 0}}, {{100*Math.abs(attr.v)/1.8}}%, {{100-30*Math.abs(attr.v)/1.8}}%)">{{attr.v}}</div>
    </div>
    {{/each}}
    {{#if attrsPos.length > 5}}
    <br style="clear:both;">
    <br style="clear:both;">
    {{/if}}
    <div class="gap">...</div>
    {{#each attrsNeg as attr}}
    <div class="entry">
      <div class="sprite" style="background-image: url({{spritemap_url}}); width: {{sprite_size}}px; height: {{sprite_size}}px; background-position: -{{sprite_size*(attr.n%sprite_n_wrap)}}px -{{sprite_size*Math.floor(attr.n/sprite_n_wrap)}}px;"></div>
      <div class="value" style="background-color: hsl({{(attr.v > 0)? 210 : 0}}, {{100*Math.abs(attr.v)/1.8}}%, {{100-30*Math.abs(attr.v)/1.8}}%)">{{attr.v}}</div>
    </div>
    {{/each}}
  </div>
  <br style="clear:both">
</div>


<style>
  .entry{
    float: left;
    margin-right: 4px;
  }
  .gap {
    float: left;
    margin: 8px;
    font-size: 400%;
  }
</style>

<script>
    
  function range(n){
    return Array(n).fill().map((_, i) => i);
  }
  
  export default {
    data () {
      return {
        spritemap_url: "",
        sprite_size: 110,
        sprite_n_wrap: 22,
        attrsPos: [],
        attrsNeg: [],
      };
    },
    computed: {
    },
    helpers: {range}
  };
</script>

Trying to build svelte component from html...
svelte compile --format iife /tmp/svelte_xaebppqv/ChannelAttrWidget_a11d0b66_6448_4125_9f52_2827a93f872e.html > /tmp/svelte_xaebppqv/ChannelAttrWidget_a11d0b66_6448_4125_9f52_2827a93f872e.js
b'svelte version 1.64.1\ncompiling ../../../../tmp/svelte_xaebppqv/ChannelAttrWidget_a11d0b66_6448_4125_9f52_2827a93f872e.html...\n'


**BarsWidget**

It would also be nice to see the distribution of attribution magnitudes. Let's make another widget for that.

In [6]:
%%html_define_svelte BarsWidget

<div class="figure">
  <div class="channel_list" >
    {{#each vals as val}}
    <div class="bar" style="height: {{15*Math.abs(val)}}px; background-color: hsl({{(val > 0)? 210 : 0}}, {{Math.max(90, 110*Math.abs(val)/1.8)}}%, {{Math.min(80, 100-40*Math.abs(val)/1.8)}}%);">
    </div>
    {{/each}}
  </div>
  <br style="clear:both">
</div>


<style>
  .channel_list {
    background-color: #FEFEFE;
  }
  .bar {
    width: 1.5px;
    height: 10px;
    display: inline-block;
  }
</style>

<script>
  
  export default {
    data () {
      return {
        vals: []
      };
    }
  };
</script>

Trying to build svelte component from html...
svelte compile --format iife /tmp/svelte_xaebppqv/BarsWidget_b26bdc5c_e39c_4043_8c12_002e99aed31e.html > /tmp/svelte_xaebppqv/BarsWidget_b26bdc5c_e39c_4043_8c12_002e99aed31e.js
b'svelte version 1.64.1\ncompiling ../../../../tmp/svelte_xaebppqv/BarsWidget_b26bdc5c_e39c_4043_8c12_002e99aed31e.html...\n'


## **Spritemaps**

In order to show the channels, we need "spritemaps" of channel visualizations.
These visualization spritemaps are large grids of images (such as [this one](https://storage.googleapis.com/lucid-static/building-blocks/sprite_mixed4d_channel.jpeg)) that visualize every channel in a layer.
We provide spritemaps for GoogLeNet because making them takes a few hours of GPU time, but
you can make your own channel spritemaps to explore other models. Check out other notebooks on how to
make your own neuron visualizations.

It's also worth noting that GoogLeNet has unusually semantically meaningful neurons. We don't know why this is -- although it's an active area of research for us. More sophisticated interfaces, such as neuron groups, may work better for networks where meaningful ideas are more entangled or less aligned with the neuron directions.

In [7]:
layer_spritemap_sizes = {
    'mixed3a' : 16,
    'mixed3b' : 21,
    'mixed4a' : 22,
    'mixed4b' : 22,
    'mixed4c' : 22,
    'mixed4d' : 22,
    'mixed4e' : 28,
    'mixed5a' : 28,
  }

def spritemap(layer):
    assert layer in layer_spritemap_sizes
    size = layer_spritemap_sizes[layer]
    url = "https://storage.googleapis.com/lucid-static/building-blocks/googlenet_spritemaps/sprite_%s_channel_alpha.jpeg" % layer
    return size, url

**Attribution Code**

In [8]:
def score_f(model, logit, name):
    if name is None:
        return 0
    elif name == "logsumexp":
        base = tf.reduce_max(logit)
        return base + tf.log(tf.reduce_sum(tf.exp(logit-base)))
    elif name in model.labels:
        return logit[model.labels.index(name)]
    else:
        raise RuntimeError("Unsupported")

def channel_attr_simple(model, img, layer, class1, class2, n_show=4):

    # Set up a graph for doing attribution...
    with tf.Graph().as_default(), tf.Session() as sess:
        t_input = tf.placeholder_with_default(img, [None, None, 3])
        T = render.import_model(model, t_input, t_input)

        # Compute activations
        acts = T(layer).eval()

        # Compute gradient
        # (Adapt the softmax layer to to your model)
        logit = T("softmax2_pre_activation")[0]
        score = score_f(model, logit, class1) - score_f(model, logit, class2)
        t_grad = tf.gradients([score], [T(layer)])[0]
        grad = t_grad.eval()

        # Let's do a very simple linear approximation attribution.
        # That is, we say the attribution of y to x is 
        # the rate at which x changes y (grad of x on y) 
        # times the value of x. (activation of x)
        attr = (grad*acts)[0]

        # Then we reduce down to channels.
        channel_attr = attr.sum(0).sum(0)

        # Now we just need to present the results.

        # Get spritemaps


    spritemap_n, spritemap_url = spritemap(layer)

    # Let's show the distribution of attributions
    print("Distribution of attribution accross channels:")
    print("")
    lucid_svelte.BarsWidget({"vals" : [float(v) for v in np.sort(channel_attr)[::-1]]})

    # Let's pick the most extreme channels to show
    ns_pos = list(np.argsort(-channel_attr)[:n_show])
    ns_neg = list(np.argsort(channel_attr)[:n_show][::-1])

    # ...  and show them with ChannelAttrWidget
    print("")
    print("Top", n_show, "channels in each direction:")
    print("")
    lucid_svelte.ChannelAttrWidget({
        "spritemap_url": spritemap_url,
        "sprite_size": 110,
        "sprite_n_wrap": spritemap_n,
        "attrsPos": [{"n": int(n), "v": str(float(channel_attr[n]))[:5]} for n in ns_pos],
        "attrsNeg": [{"n": int(n), "v": str(float(channel_attr[n]))[:5]} for n in ns_neg] 
    })


In [9]:
print(
    "Upload file from local machine and select uploading path (A) or just select one file (B):"
)
print("A1) Select a file to upload")
uploader = widgets.FileUpload(accept='', multiple=False)
display(uploader)

print("\nA2) Select destination for uploaded file")
print("B) Select file in this server")
notebooks_root_path = ""
fc = FileChooser(".",
                 use_dir_icons=True,
                 select_default=True)
display(fc)

layers_list = [layer.name for layer in model.layers]
print("\nSelect the layer ")
layers_widget = widgets.Dropdown(
    options=layers_list,
    value=layers_list[3],
    description='Layers'
)
display(layers_widget)

print("\nSelect the first class  whose influence is being studied: ")
classes_widget = widgets.Dropdown(
    options=labels,
    value=labels[2],
    description='Classes of documents'
)
display(classes_widget)

print("\nSelect the second class  whose influence is being studied: ")
classes_widget_bis = widgets.Dropdown(
    options=labels,
    value=labels[1],
    description='Classes of documents'
)
display(classes_widget_bis)

# Selection of number of images to display
slider = widgets.IntSlider(
    value=3,
    min=1,
    max=30,
    step=1,
    description='Number of features to display :',
    layout=widgets.Layout(width='70%')
)
display(slider)

Upload file from local machine and select uploading path (A) or just select one file (B):
A1) Select a file to upload


FileUpload(value={}, description='Upload')


A2) Select destination for uploaded file
B) Select file in this server


FileChooser(path='.', filename='', title='HTML(value='', layout=Layout(display='none'))', show_hidden='False',…


Select the layer 


Dropdown(description='Layers', index=3, options=('conv2d0', 'conv2d1', 'conv2d2', 'mixed3a', 'mixed3b', 'mixed…


Select the first class  whose influence is being studied: 


Dropdown(description='Classes of documents', index=2, options=('Afghan hound', 'African chameleon', 'African c…


Select the second class  whose influence is being studied: 


Dropdown(description='Classes of documents', index=1, options=('Afghan hound', 'African chameleon', 'African c…

IntSlider(value=3, description='Number of features to display :', layout=Layout(width='70%'), max=30, min=1)

In [16]:
if uploader.value: # upload local file to server
    picture_name = uploader.value[0]
    content = uploader.value[picture_name]['content'] # memoryview of the file
    picture_path = os.path.join(fc.selected_path, picture_name)
    with open(picture_name, 'wb') as f:
        f.write(content)
else: # use files already on the server
    picture_path = fc.selected
        
layer_name = layers_widget.value # layers to use semantic dictionnary on

class_name_1 = classes_widget.value # layers to use semantic dictionnary on
class_name_2 = classes_widget_bis.value # layers to use semantic dictionnary on

n_show = slider.value

# Channel attributions from article teaser

In [19]:
img = load(picture_path)

legend = "<h4> Legend : </h4>"
legend += "<div style='color:blue;font-weight: bold;'>%s</div>" % class_name_1
legend += "<div style='color:red;font-weight: bold;'>%s</div>" % class_name_2
display(HTML(legend))

channel_attr_simple(model, img, layer_name, class_name_1, class_name_2, n_show=n_show)

Distribution of attribution accross channels:




Top 3 channels in each direction:



# Channel Attribution - Path Integrated

In [12]:
def channel_attr_path(
    model, img, layer, class1, class2, n_show=4, stochastic_path=False, N=100
):

    # Set up a graph for doing attribution
    with tf.Graph().as_default(), tf.Session() as sess:
        t_input = tf.placeholder_with_default(img, [None, None, 3])
        T = render.import_model(model, t_input, t_input)

        # Compute activations
        acts = T(layer).eval()

        # Compute gradient
        logit = T("softmax2_pre_activation")[0]
        score = score_f(model, logit, class1) - score_f(model, logit, class2)
        t_grad = tf.gradients([score], [T(layer)])[0]

        # Integrate on a path from acts=0 to acts=acts
        attr = np.zeros(acts.shape[1:])
        # acts_ = [acts * 0/N ; acts * 1/N; ... ; acts * (N-1)/N]
        
        n_channels = model.get_layer(layer).depth
        
        for n in range(N):
            acts_ = acts * float(n) / N
            if stochastic_path:
                acts_ *= (
                    np.random.uniform(0, 1, [n_channels]) + np.random.uniform(0, 1, [n_channels])
                ) / 1.5 # vector of dim n_channels containing values in [0, 1.33]
            grad = t_grad.eval({T(layer): acts_})
            attr += 1.0 / N * (grad * acts)[0] # mean of attributions with different value of n

        # Then we reduce down to channels.
        channel_attr = attr.sum(0).sum(0)

    # Now we just need to present the results.

    # Get spritemaps

    spritemap_n, spritemap_url = spritemap(layer)

    # Let's show the distribution of attributions
    print("Distribution of attribution accross channels:")
    print("")
    lucid_svelte.BarsWidget({"vals": [float(v) for v in np.sort(channel_attr)[::-1]]})

    # Let's pick the most extreme channels to show
    ns_pos = list(np.argsort(-channel_attr)[:n_show])
    ns_neg = list(np.argsort(channel_attr)[:n_show][::-1])

    # ...  and show them with ChannelAttrWidget
    print("")
    print("Top", n_show, "channels in each direction:")
    print("")
    lucid_svelte.ChannelAttrWidget(
        {
            "spritemap_url": spritemap_url,
            "sprite_size": 110,
            "sprite_n_wrap": spritemap_n,
            "attrsPos": [{"n": int(n), "v": str(float(channel_attr[n]))[:5]} for n in ns_pos],
            "attrsNeg": [{"n": int(n), "v": str(float(channel_attr[n]))[:5]} for n in ns_neg]
        }
    )



In [13]:
def compare_attr_methods(model, img, layer_name, class1, class2, n_show):
    _display_html("<h2>Linear Attribution</h2>")
    channel_attr_simple(model, img, layer_name, class1, class2, n_show=n_show)

    _display_html("<br><br><h2>Path Integrated Attribution</h2>")
    channel_attr_path(model, img, layer_name, class1, class2, n_show=n_show)

    _display_html("<br><br><h2>Stochastic Path Integrated Attribution</h2>")
    channel_attr_path(model, img, layer_name, class1, class2, n_show=n_show, stochastic_path=True)

In [18]:
img = load(picture_path)
legend = "<h4> Legend : </h4>"
legend += "<div style='color:blue;font-weight: bold;'>%s</div>" % class_name_1
legend += "<div style='color:red;font-weight: bold;'>%s</div>" % class_name_2
display(HTML(legend))

compare_attr_methods(model, img, layer_name, class_name_1, class_name_2, n_show=n_show)

Distribution of attribution accross channels:




Top 3 channels in each direction:



Distribution of attribution accross channels:




Top 3 channels in each direction:



Distribution of attribution accross channels:




Top 3 channels in each direction:

