From e830c8f879aab7d6a35b6e26b804f91735f13f16 Mon Sep 17 00:00:00 2001
From: Ludwig Schubert <github-email@ludwigschubert.de>
Date: Tue, 15 May 2018 12:05:15 -0700
Subject: [PATCH] Increment version to 0.1.0

---
 README.md                            | 18 +++++++++++++++-
 lucid/misc/io/serialize_array.py     |  1 +
 lucid/misc/redirected_relu_grad.py   | 23 +++++++++++++++++---
 lucid/optvis/objectives.py           | 18 +++++++++++++++-
 lucid/optvis/render.py               | 32 ++++++++++++++++++++++------
 setup.py                             |  2 +-
 tests/misc/test_gradient_override.py | 32 ++++++++++++++++++++++++++++
 tests/optvis/test_integration.py     |  5 +++++
 8 files changed, 119 insertions(+), 12 deletions(-)
diff --git a/README.md b/README.md
index 9ffef041..aec7b07b 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,7 @@ run in your browser.
 <img src="https://storage.googleapis.com/lucid-static/common/stickers/colab-tutorial.png" width="500" alt=""></img>
 </a>
 
-## Building Blocks 
+## Building Blocks
 *Notebooks corresponding to the [Building Blocks of Interpretability](https://distill.pub/2018/building-blocks/) article*
 
 
@@ -80,6 +80,22 @@ This project is research code. It is not an official Google product.
 
 ## Development
 
+### Style guide deviations
+
+We use naming conventions to help differentiate tensors, operations, and values:
+
+* Suffix variable names representing **tensors** with `_t`
+* Suffix variable names representing **operations** with `_op`
+* Don't suffix variable names representing concrete values
+
+Usage example:
+
+```
+global_step_t = tf.train.get_or_create_global_step()
+global_step_init_op = tf.variables_initializer([global_step_t])
+global_step = global_step_t.eval()
+```
+
 ### Running Tests
 
 Use `tox` to run the test suite on all supported environments.
diff --git a/lucid/misc/io/serialize_array.py b/lucid/misc/io/serialize_array.py
index c8df7953..43e3e15a 100644
--- a/lucid/misc/io/serialize_array.py
+++ b/lucid/misc/io/serialize_array.py
@@ -48,6 +48,7 @@ def _normalize_array(array, domain=(0, 1)):
   array = np.squeeze(array)
   assert len(array.shape) <= 3
   assert np.issubdtype(array.dtype, np.number)
+  assert not np.isnan(array).any()
 
   low, high = np.min(array), np.max(array)
   if domain is None:
diff --git a/lucid/misc/redirected_relu_grad.py b/lucid/misc/redirected_relu_grad.py
index 1820f1da..dce9ed2f 100644
--- a/lucid/misc/redirected_relu_grad.py
+++ b/lucid/misc/redirected_relu_grad.py
@@ -23,13 +23,20 @@
 These functions provide a more convenient solution: temporarily override the
 gradient of ReLUs to allow gradient to flow back through the ReLU -- even if it
 didn't activate and had a derivative of zero -- allowing the visualization
-process to get started.
+process to get started. These functions override the gradient for at most 16
+steps. Thus, you need to initialize `global_step` before using these functions.
 
 Usage:
 ```python
 from lucid.misc.gradient_override import gradient_override_map
 from lucid.misc.redirected_relu_grad import redirected_relu_grad
 
+...
+global_step_t = tf.train.get_or_create_global_step()
+init_global_step_op = tf.variables_initializer([global_step_t])
+init_global_step_op.run()
+...
+
 with gradient_override_map({'Relu': redirected_relu_grad}):
   model.import_graph(...)
 ```
@@ -99,7 +106,12 @@ def redirected_relu_grad(op, grad):
     batch = tf.shape(relu_grad)[0]
     reshaped_relu_grad = tf.reshape(relu_grad, [batch, -1])
     relu_grad_mag = tf.norm(reshaped_relu_grad, axis=1)
-  return tf.where(relu_grad_mag > 0., relu_grad, redirected_grad)
+  result_grad = tf.where(relu_grad_mag > 0., relu_grad, redirected_grad)
+
+  global_step_t =tf.train.get_or_create_global_step()
+  return_relu_grad = tf.greater(global_step_t, tf.constant(16, tf.int64))
+
+  return tf.where(return_relu_grad, relu_grad, result_grad)
 
 
 def redirected_relu6_grad(op, grad):
@@ -125,4 +137,9 @@ def redirected_relu6_grad(op, grad):
     batch = tf.shape(relu_grad)[0]
     reshaped_relu_grad = tf.reshape(relu_grad, [batch, -1])
     relu_grad_mag = tf.norm(reshaped_relu_grad, axis=1)
-  return tf.where(relu_grad_mag > 0., relu_grad, redirected_grad)
+  result_grad =  tf.where(relu_grad_mag > 0., relu_grad, redirected_grad)
+
+  global_step_t = tf.train.get_or_create_global_step()
+  return_relu_grad = tf.greater(global_step_t, tf.constant(16, tf.int64))
+
+  return tf.where(return_relu_grad, relu_grad, result_grad)
diff --git a/lucid/optvis/objectives.py b/lucid/optvis/objectives.py
index 7df40fce..96378141 100644
--- a/lucid/optvis/objectives.py
+++ b/lucid/optvis/objectives.py
@@ -131,7 +131,23 @@ def wrap_objective(f, *args, **kwds):
 
 @wrap_objective
 def neuron(layer_name, channel_n, x=None, y=None, batch=None):
-  """Visualize a single neuron of a single channel."""
+  """Visualize a single neuron of a single channel.
+
+  Defaults to the center neuron. When width and height are even numbers, we
+  choose the neuron in the bottom right of the center 2x2 neurons.
+
+  Odd width & height:               Even width & height:
+
+  +---+---+---+                     +---+---+---+---+
+  |   |   |   |                     |   |   |   |   |
+  +---+---+---+                     +---+---+---+---+
+  |   | X |   |                     |   |   |   |   |
+  +---+---+---+                     +---+---+---+---+
+  |   |   |   |                     |   |   | X |   |
+  +---+---+---+                     +---+---+---+---+
+                                    |   |   |   |   |
+                                    +---+---+---+---+
+  """
   def inner(T):
     layer = T(layer_name)
     shape = tf.shape(layer)
diff --git a/lucid/optvis/render.py b/lucid/optvis/render.py
index cb57e278..bc786cdd 100644
--- a/lucid/optvis/render.py
+++ b/lucid/optvis/render.py
@@ -31,6 +31,8 @@
 
 from lucid.optvis import objectives, param, transform
 from lucid.misc.io import show
+from lucid.misc.redirected_relu_grad import redirected_relu_grad, redirected_relu6_grad
+from lucid.misc.gradient_override import gradient_override_map
 
 # pylint: disable=invalid-name
 
@@ -40,8 +42,8 @@
 
 
 def render_vis(model, objective_f, param_f=None, optimizer=None,
-               transforms=None, thresholds=(512,),
-               print_objectives=None, verbose=True,):
+               transforms=None, thresholds=(512,), print_objectives=None,
+               verbose=True, relu_gradient_override=True, use_fixed_seed=False):
   """Flexible optimization-base feature vis.
 
   There's a lot of ways one might wish to customize otpimization-based
@@ -72,6 +74,11 @@ def render_vis(model, objective_f, param_f=None, optimizer=None,
       whose values get logged during the optimization.
     verbose: Should we display the visualization when we hit a threshold?
       This should only be used in IPython.
+    relu_gradient_override: Whether to use the gradient override scheme
+      described in lucid/misc/redirected_relu_grad.py. On by default!
+    use_fixed_seed: Seed the RNG with a fixed value so results are reproducible.
+      Off by default. As of tf 1.8 this does not work as intended, see:
+      https://github.com/tensorflow/tensorflow/issues/9171
   Returns:
     2D array of optimization results containing of evaluations of supplied
     param_f snapshotted at specified thresholds. Usually that will mean one or
@@ -80,7 +87,11 @@ def render_vis(model, objective_f, param_f=None, optimizer=None,
 
   with tf.Graph().as_default() as graph, tf.Session() as sess:
 
-    T = make_vis_T(model, objective_f, param_f, optimizer, transforms)
+    if use_fixed_seed:  # does not mean results are reproducible, see Args doc
+      tf.set_random_seed(0)
+
+    T = make_vis_T(model, objective_f, param_f, optimizer, transforms,
+                   relu_gradient_override)
     print_objective_func = make_print_objective_func(print_objectives, T)
     loss, vis_op, t_image = T("loss"), T("vis_op"), T("input")
     tf.global_variables_initializer().run()
@@ -105,7 +116,7 @@ def render_vis(model, objective_f, param_f=None, optimizer=None,
 
 
 def make_vis_T(model, objective_f, param_f=None, optimizer=None,
-               transforms=None):
+               transforms=None, relu_gradient_override=False):
   """Even more flexible optimization-base feature vis.
 
   This function is the inner core of render_vis(), and can be used
@@ -155,10 +166,19 @@ def make_vis_T(model, objective_f, param_f=None, optimizer=None,
   transform_f = make_transform_f(transforms)
   optimizer = make_optimizer(optimizer, [])
 
-  T = import_model(model, transform_f(t_image), t_image)
+  global_step = tf.train.get_or_create_global_step()
+  init_global_step = tf.variables_initializer([global_step])
+  init_global_step.run()
+
+  if relu_gradient_override:
+    with gradient_override_map({'Relu': redirected_relu_grad,
+                                'Relu6': redirected_relu6_grad}):
+      T = import_model(model, transform_f(t_image), t_image)
+  else:
+    T = import_model(model, transform_f(t_image), t_image)
   loss = objective_f(T)
 
-  global_step = tf.Variable(0, trainable=False, name="global_step")
+
   vis_op = optimizer.minimize(-loss, global_step=global_step)
 
   local_vars = locals()
diff --git a/setup.py b/setup.py
index 20c7fc1d..7efa731b 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@
 
 from setuptools import setup, find_packages
 
-version = '0.0.8'
+version = '0.1.0'
 
 test_deps = [
   'future',
diff --git a/tests/misc/test_gradient_override.py b/tests/misc/test_gradient_override.py
index d59831c5..20394d0d 100644
--- a/tests/misc/test_gradient_override.py
+++ b/tests/misc/test_gradient_override.py
@@ -29,6 +29,10 @@ def gradient_override(op, grad):
     return tf.constant(42)
 
   with tf.Session().as_default() as sess:
+    global_step = tf.train.get_or_create_global_step()
+    init_global_step = tf.variables_initializer([global_step])
+    init_global_step.run()
+
     a = tf.constant(1.)
     standard_relu = tf.nn.relu(a)
     grad_wrt_a = tf.gradients(standard_relu, a, [1.])[0]
@@ -56,6 +60,10 @@ def test_gradient_override_relu6_directionality(nonl_name, nonl,
     nonl_grad_override, examples):
   for incoming_grad, input, grad in examples:
     with tf.Session().as_default() as sess:
+      global_step = tf.train.get_or_create_global_step()
+      init_global_step = tf.variables_initializer([global_step])
+      init_global_step.run()
+
       batched_shape = [1,1]
       incoming_grad_t = tf.constant(incoming_grad, shape=batched_shape)
       input_t = tf.constant(input, shape=batched_shape)
@@ -63,3 +71,27 @@ def test_gradient_override_relu6_directionality(nonl_name, nonl,
         nonl_t = nonl(input_t)
         grad_wrt_input = tf.gradients(nonl_t, input_t, [incoming_grad_t])[0]
       assert (grad_wrt_input.eval() == grad).all()
+
+@pytest.mark.parametrize("nonl_name,nonl,nonl_grad_override, examples", nonls)
+def test_gradient_override_shutoff(nonl_name, nonl,
+    nonl_grad_override, examples):
+  for incoming_grad, input, grad in examples:
+    with tf.Session().as_default() as sess:
+      global_step_t = tf.train.get_or_create_global_step()
+      global_step_init_op = tf.variables_initializer([global_step_t])
+      global_step_init_op.run()
+      global_step_assign_t = tf.assign(global_step_t, 17)
+      sess.run(global_step_assign_t)
+
+      # similar setup to test_gradient_override_relu6_directionality,
+      # but we test that the gradient is *not* what we're expecting as after 16
+      # steps the override is shut off
+      batched_shape = [1,1]
+      incoming_grad_t = tf.constant(incoming_grad, shape=batched_shape)
+      input_t = tf.constant(input, shape=batched_shape)
+      with gradient_override_map({nonl_name: nonl_grad_override}):
+        nonl_t = nonl(input_t)
+        grad_wrt_input = tf.gradients(nonl_t, input_t, [incoming_grad_t])[0]
+      nonl_t_no_override = nonl(input_t)
+      grad_wrt_input_no_override = tf.gradients(nonl_t_no_override, input_t, [incoming_grad_t])[0]
+      assert (grad_wrt_input.eval() == grad_wrt_input_no_override.eval()).all()
diff --git a/tests/optvis/test_integration.py b/tests/optvis/test_integration.py
index f12fcaa2..2b86c3cf 100644
--- a/tests/optvis/test_integration.py
+++ b/tests/optvis/test_integration.py
@@ -20,4 +20,9 @@ def test_integration(decorrelate, fft):
     verbose=False, transforms=[])
   start_image = rendering[0]
   end_image = rendering[-1]
+  objective_f = objectives.neuron("mixed3a", 177)
+  param_f = lambda: param.image(64, decorrelate=decorrelate, fft=fft)
+  rendering = render.render_vis(model, objective_f, param_f, verbose=False, thresholds=(0,64), use_fixed_seed=True)
+  start_image, end_image = rendering
+
   assert (start_image != end_image).any()