Merge pull request #1141 from dwf/gradient_descent_accept_list_of_pairs

Accept list of pairs for gradients in GradientDescent.
mila-iqia · Aug 18, 2016 · 46c03f6 · 46c03f6
2 parents c3bf3a8 + ceeabeb
commit 46c03f6
Show file tree

Hide file tree

Showing 2 changed files with 26 additions and 13 deletions.
diff --git a/blocks/algorithms/__init__.py b/blocks/algorithms/__init__.py
@@ -3,6 +3,7 @@
 import itertools
 from abc import ABCMeta, abstractmethod
 from collections import OrderedDict
+from collections import Mapping
 from six.moves import reduce
 
 from picklable_itertools.extras import equizip
@@ -221,9 +222,10 @@ class GradientDescent(UpdatesAlgorithm):
         remember a weighted sum of gradients from previous steps like it is
         done in gradient descent with momentum. If ``None``, an instance of
         :class:`Scale` is created.
-    gradients : OrderedDict, optional
+    gradients : OrderedDict or list of 2-tuples, optional
         A dictionary mapping a parameter to an expression for the cost's
-        gradient with respect to the parameter. If ``None``, the gradient
+        gradient with respect to the parameter, or equivalently, a list of
+        (parameter, gradient) tuples. If ``None``, the gradient
         are taken automatically using :func:`theano.gradient.grad`.
     known_grads : dict, optional
         A passthrough to `theano.tensor.grad`'s `known_grads` argument.
@@ -266,6 +268,11 @@ def __init__(self, cost=None, parameters=None, step_rule=None,
         # Set initial values for cost, parameters, gradients.
         self.cost = cost
         self.parameters = parameters
+        # Coerce lists of tuples to OrderedDict. Do not coerce Mappings,
+        # as we don't want to convert dict -> OrderedDict and give it
+        # an arbitrary, non-deterministic order.
+        if gradients is not None and not isinstance(gradients, Mapping):
+            gradients = OrderedDict(gradients)
         self.gradients = gradients
 
         # If we don't have gradients, we'll need to infer them from the

diff --git a/tests/algorithms/test_algorithms.py b/tests/algorithms/test_algorithms.py
@@ -83,17 +83,23 @@ def test_gradient_descent():
 
 
 def test_gradient_descent_with_gradients():
-    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
-    W_start_value = W.get_value()
-    cost = tensor.sum(W ** 2)
-    gradients = OrderedDict()
-    gradients[W] = tensor.grad(cost, W)
-
-    algorithm = GradientDescent(gradients=gradients)
-    algorithm.step_rule.learning_rate.set_value(0.75)
-    algorithm.initialize()
-    algorithm.process_batch(dict())
-    assert_allclose(W.get_value(), -0.5 * W_start_value)
+    def _test(f):
+        W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
+        W_start_value = W.get_value()
+        cost = tensor.sum(W ** 2)
+        gradients = OrderedDict()
+        gradients[W] = tensor.grad(cost, W)
+        algorithm = GradientDescent(gradients=f(gradients))
+        algorithm.step_rule.learning_rate.set_value(0.75)
+        algorithm.initialize()
+        algorithm.process_batch(dict())
+        assert_allclose(W.get_value(), -0.5 * W_start_value)
+
+    # With OrderedDict
+    yield (_test, lambda g: g)
+
+    # With list of pairs
+    yield (_test, lambda g: list(g.items()))
 
 
 def test_gradient_descent_multiple_initialize():