From e830c8f879aab7d6a35b6e26b804f91735f13f16 Mon Sep 17 00:00:00 2001 From: Ludwig Schubert Date: Tue, 15 May 2018 12:05:15 -0700 Subject: [PATCH] Increment version to 0.1.0 --- README.md | 18 +++++++++++++++- lucid/misc/io/serialize_array.py | 1 + lucid/misc/redirected_relu_grad.py | 23 +++++++++++++++++--- lucid/optvis/objectives.py | 18 +++++++++++++++- lucid/optvis/render.py | 32 ++++++++++++++++++++++------ setup.py | 2 +- tests/misc/test_gradient_override.py | 32 ++++++++++++++++++++++++++++ tests/optvis/test_integration.py | 5 +++++ 8 files changed, 119 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 9ffef041..aec7b07b 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ run in your browser. -## Building Blocks +## Building Blocks *Notebooks corresponding to the [Building Blocks of Interpretability](https://distill.pub/2018/building-blocks/) article* @@ -80,6 +80,22 @@ This project is research code. It is not an official Google product. ## Development +### Style guide deviations + +We use naming conventions to help differentiate tensors, operations, and values: + +* Suffix variable names representing **tensors** with `_t` +* Suffix variable names representing **operations** with `_op` +* Don't suffix variable names representing concrete values + +Usage example: + +``` +global_step_t = tf.train.get_or_create_global_step() +global_step_init_op = tf.variables_initializer([global_step_t]) +global_step = global_step_t.eval() +``` + ### Running Tests Use `tox` to run the test suite on all supported environments. diff --git a/lucid/misc/io/serialize_array.py b/lucid/misc/io/serialize_array.py index c8df7953..43e3e15a 100644 --- a/lucid/misc/io/serialize_array.py +++ b/lucid/misc/io/serialize_array.py @@ -48,6 +48,7 @@ def _normalize_array(array, domain=(0, 1)): array = np.squeeze(array) assert len(array.shape) <= 3 assert np.issubdtype(array.dtype, np.number) + assert not np.isnan(array).any() low, high = np.min(array), np.max(array) if domain is None: diff --git a/lucid/misc/redirected_relu_grad.py b/lucid/misc/redirected_relu_grad.py index 1820f1da..dce9ed2f 100644 --- a/lucid/misc/redirected_relu_grad.py +++ b/lucid/misc/redirected_relu_grad.py @@ -23,13 +23,20 @@ These functions provide a more convenient solution: temporarily override the gradient of ReLUs to allow gradient to flow back through the ReLU -- even if it didn't activate and had a derivative of zero -- allowing the visualization -process to get started. +process to get started. These functions override the gradient for at most 16 +steps. Thus, you need to initialize `global_step` before using these functions. Usage: ```python from lucid.misc.gradient_override import gradient_override_map from lucid.misc.redirected_relu_grad import redirected_relu_grad +... +global_step_t = tf.train.get_or_create_global_step() +init_global_step_op = tf.variables_initializer([global_step_t]) +init_global_step_op.run() +... + with gradient_override_map({'Relu': redirected_relu_grad}): model.import_graph(...) ``` @@ -99,7 +106,12 @@ def redirected_relu_grad(op, grad): batch = tf.shape(relu_grad)[0] reshaped_relu_grad = tf.reshape(relu_grad, [batch, -1]) relu_grad_mag = tf.norm(reshaped_relu_grad, axis=1) - return tf.where(relu_grad_mag > 0., relu_grad, redirected_grad) + result_grad = tf.where(relu_grad_mag > 0., relu_grad, redirected_grad) + + global_step_t =tf.train.get_or_create_global_step() + return_relu_grad = tf.greater(global_step_t, tf.constant(16, tf.int64)) + + return tf.where(return_relu_grad, relu_grad, result_grad) def redirected_relu6_grad(op, grad): @@ -125,4 +137,9 @@ def redirected_relu6_grad(op, grad): batch = tf.shape(relu_grad)[0] reshaped_relu_grad = tf.reshape(relu_grad, [batch, -1]) relu_grad_mag = tf.norm(reshaped_relu_grad, axis=1) - return tf.where(relu_grad_mag > 0., relu_grad, redirected_grad) + result_grad = tf.where(relu_grad_mag > 0., relu_grad, redirected_grad) + + global_step_t = tf.train.get_or_create_global_step() + return_relu_grad = tf.greater(global_step_t, tf.constant(16, tf.int64)) + + return tf.where(return_relu_grad, relu_grad, result_grad) diff --git a/lucid/optvis/objectives.py b/lucid/optvis/objectives.py index 7df40fce..96378141 100644 --- a/lucid/optvis/objectives.py +++ b/lucid/optvis/objectives.py @@ -131,7 +131,23 @@ def wrap_objective(f, *args, **kwds): @wrap_objective def neuron(layer_name, channel_n, x=None, y=None, batch=None): - """Visualize a single neuron of a single channel.""" + """Visualize a single neuron of a single channel. + + Defaults to the center neuron. When width and height are even numbers, we + choose the neuron in the bottom right of the center 2x2 neurons. + + Odd width & height: Even width & height: + + +---+---+---+ +---+---+---+---+ + | | | | | | | | | + +---+---+---+ +---+---+---+---+ + | | X | | | | | | | + +---+---+---+ +---+---+---+---+ + | | | | | | | X | | + +---+---+---+ +---+---+---+---+ + | | | | | + +---+---+---+---+ + """ def inner(T): layer = T(layer_name) shape = tf.shape(layer) diff --git a/lucid/optvis/render.py b/lucid/optvis/render.py index cb57e278..bc786cdd 100644 --- a/lucid/optvis/render.py +++ b/lucid/optvis/render.py @@ -31,6 +31,8 @@ from lucid.optvis import objectives, param, transform from lucid.misc.io import show +from lucid.misc.redirected_relu_grad import redirected_relu_grad, redirected_relu6_grad +from lucid.misc.gradient_override import gradient_override_map # pylint: disable=invalid-name @@ -40,8 +42,8 @@ def render_vis(model, objective_f, param_f=None, optimizer=None, - transforms=None, thresholds=(512,), - print_objectives=None, verbose=True,): + transforms=None, thresholds=(512,), print_objectives=None, + verbose=True, relu_gradient_override=True, use_fixed_seed=False): """Flexible optimization-base feature vis. There's a lot of ways one might wish to customize otpimization-based @@ -72,6 +74,11 @@ def render_vis(model, objective_f, param_f=None, optimizer=None, whose values get logged during the optimization. verbose: Should we display the visualization when we hit a threshold? This should only be used in IPython. + relu_gradient_override: Whether to use the gradient override scheme + described in lucid/misc/redirected_relu_grad.py. On by default! + use_fixed_seed: Seed the RNG with a fixed value so results are reproducible. + Off by default. As of tf 1.8 this does not work as intended, see: + https://github.com/tensorflow/tensorflow/issues/9171 Returns: 2D array of optimization results containing of evaluations of supplied param_f snapshotted at specified thresholds. Usually that will mean one or @@ -80,7 +87,11 @@ def render_vis(model, objective_f, param_f=None, optimizer=None, with tf.Graph().as_default() as graph, tf.Session() as sess: - T = make_vis_T(model, objective_f, param_f, optimizer, transforms) + if use_fixed_seed: # does not mean results are reproducible, see Args doc + tf.set_random_seed(0) + + T = make_vis_T(model, objective_f, param_f, optimizer, transforms, + relu_gradient_override) print_objective_func = make_print_objective_func(print_objectives, T) loss, vis_op, t_image = T("loss"), T("vis_op"), T("input") tf.global_variables_initializer().run() @@ -105,7 +116,7 @@ def render_vis(model, objective_f, param_f=None, optimizer=None, def make_vis_T(model, objective_f, param_f=None, optimizer=None, - transforms=None): + transforms=None, relu_gradient_override=False): """Even more flexible optimization-base feature vis. This function is the inner core of render_vis(), and can be used @@ -155,10 +166,19 @@ def make_vis_T(model, objective_f, param_f=None, optimizer=None, transform_f = make_transform_f(transforms) optimizer = make_optimizer(optimizer, []) - T = import_model(model, transform_f(t_image), t_image) + global_step = tf.train.get_or_create_global_step() + init_global_step = tf.variables_initializer([global_step]) + init_global_step.run() + + if relu_gradient_override: + with gradient_override_map({'Relu': redirected_relu_grad, + 'Relu6': redirected_relu6_grad}): + T = import_model(model, transform_f(t_image), t_image) + else: + T = import_model(model, transform_f(t_image), t_image) loss = objective_f(T) - global_step = tf.Variable(0, trainable=False, name="global_step") + vis_op = optimizer.minimize(-loss, global_step=global_step) local_vars = locals() diff --git a/setup.py b/setup.py index 20c7fc1d..7efa731b 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ from setuptools import setup, find_packages -version = '0.0.8' +version = '0.1.0' test_deps = [ 'future', diff --git a/tests/misc/test_gradient_override.py b/tests/misc/test_gradient_override.py index d59831c5..20394d0d 100644 --- a/tests/misc/test_gradient_override.py +++ b/tests/misc/test_gradient_override.py @@ -29,6 +29,10 @@ def gradient_override(op, grad): return tf.constant(42) with tf.Session().as_default() as sess: + global_step = tf.train.get_or_create_global_step() + init_global_step = tf.variables_initializer([global_step]) + init_global_step.run() + a = tf.constant(1.) standard_relu = tf.nn.relu(a) grad_wrt_a = tf.gradients(standard_relu, a, [1.])[0] @@ -56,6 +60,10 @@ def test_gradient_override_relu6_directionality(nonl_name, nonl, nonl_grad_override, examples): for incoming_grad, input, grad in examples: with tf.Session().as_default() as sess: + global_step = tf.train.get_or_create_global_step() + init_global_step = tf.variables_initializer([global_step]) + init_global_step.run() + batched_shape = [1,1] incoming_grad_t = tf.constant(incoming_grad, shape=batched_shape) input_t = tf.constant(input, shape=batched_shape) @@ -63,3 +71,27 @@ def test_gradient_override_relu6_directionality(nonl_name, nonl, nonl_t = nonl(input_t) grad_wrt_input = tf.gradients(nonl_t, input_t, [incoming_grad_t])[0] assert (grad_wrt_input.eval() == grad).all() + +@pytest.mark.parametrize("nonl_name,nonl,nonl_grad_override, examples", nonls) +def test_gradient_override_shutoff(nonl_name, nonl, + nonl_grad_override, examples): + for incoming_grad, input, grad in examples: + with tf.Session().as_default() as sess: + global_step_t = tf.train.get_or_create_global_step() + global_step_init_op = tf.variables_initializer([global_step_t]) + global_step_init_op.run() + global_step_assign_t = tf.assign(global_step_t, 17) + sess.run(global_step_assign_t) + + # similar setup to test_gradient_override_relu6_directionality, + # but we test that the gradient is *not* what we're expecting as after 16 + # steps the override is shut off + batched_shape = [1,1] + incoming_grad_t = tf.constant(incoming_grad, shape=batched_shape) + input_t = tf.constant(input, shape=batched_shape) + with gradient_override_map({nonl_name: nonl_grad_override}): + nonl_t = nonl(input_t) + grad_wrt_input = tf.gradients(nonl_t, input_t, [incoming_grad_t])[0] + nonl_t_no_override = nonl(input_t) + grad_wrt_input_no_override = tf.gradients(nonl_t_no_override, input_t, [incoming_grad_t])[0] + assert (grad_wrt_input.eval() == grad_wrt_input_no_override.eval()).all() diff --git a/tests/optvis/test_integration.py b/tests/optvis/test_integration.py index f12fcaa2..2b86c3cf 100644 --- a/tests/optvis/test_integration.py +++ b/tests/optvis/test_integration.py @@ -20,4 +20,9 @@ def test_integration(decorrelate, fft): verbose=False, transforms=[]) start_image = rendering[0] end_image = rendering[-1] + objective_f = objectives.neuron("mixed3a", 177) + param_f = lambda: param.image(64, decorrelate=decorrelate, fft=fft) + rendering = render.render_vis(model, objective_f, param_f, verbose=False, thresholds=(0,64), use_fixed_seed=True) + start_image, end_image = rendering + assert (start_image != end_image).any()