Skip to content

Commit

Permalink
Merge pull request #1141 from dwf/gradient_descent_accept_list_of_pairs
Browse files Browse the repository at this point in the history
Accept list of pairs for gradients in GradientDescent.
  • Loading branch information
dwf committed Aug 18, 2016
2 parents c3bf3a8 + ceeabeb commit 46c03f6
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 13 deletions.
11 changes: 9 additions & 2 deletions blocks/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import itertools
from abc import ABCMeta, abstractmethod
from collections import OrderedDict
from collections import Mapping
from six.moves import reduce

from picklable_itertools.extras import equizip
Expand Down Expand Up @@ -221,9 +222,10 @@ class GradientDescent(UpdatesAlgorithm):
remember a weighted sum of gradients from previous steps like it is
done in gradient descent with momentum. If ``None``, an instance of
:class:`Scale` is created.
gradients : OrderedDict, optional
gradients : OrderedDict or list of 2-tuples, optional
A dictionary mapping a parameter to an expression for the cost's
gradient with respect to the parameter. If ``None``, the gradient
gradient with respect to the parameter, or equivalently, a list of
(parameter, gradient) tuples. If ``None``, the gradient
are taken automatically using :func:`theano.gradient.grad`.
known_grads : dict, optional
A passthrough to `theano.tensor.grad`'s `known_grads` argument.
Expand Down Expand Up @@ -266,6 +268,11 @@ def __init__(self, cost=None, parameters=None, step_rule=None,
# Set initial values for cost, parameters, gradients.
self.cost = cost
self.parameters = parameters
# Coerce lists of tuples to OrderedDict. Do not coerce Mappings,
# as we don't want to convert dict -> OrderedDict and give it
# an arbitrary, non-deterministic order.
if gradients is not None and not isinstance(gradients, Mapping):
gradients = OrderedDict(gradients)
self.gradients = gradients

# If we don't have gradients, we'll need to infer them from the
Expand Down
28 changes: 17 additions & 11 deletions tests/algorithms/test_algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,17 +83,23 @@ def test_gradient_descent():


def test_gradient_descent_with_gradients():
W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
W_start_value = W.get_value()
cost = tensor.sum(W ** 2)
gradients = OrderedDict()
gradients[W] = tensor.grad(cost, W)

algorithm = GradientDescent(gradients=gradients)
algorithm.step_rule.learning_rate.set_value(0.75)
algorithm.initialize()
algorithm.process_batch(dict())
assert_allclose(W.get_value(), -0.5 * W_start_value)
def _test(f):
W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
W_start_value = W.get_value()
cost = tensor.sum(W ** 2)
gradients = OrderedDict()
gradients[W] = tensor.grad(cost, W)
algorithm = GradientDescent(gradients=f(gradients))
algorithm.step_rule.learning_rate.set_value(0.75)
algorithm.initialize()
algorithm.process_batch(dict())
assert_allclose(W.get_value(), -0.5 * W_start_value)

# With OrderedDict
yield (_test, lambda g: g)

# With list of pairs
yield (_test, lambda g: list(g.items()))


def test_gradient_descent_multiple_initialize():
Expand Down

0 comments on commit 46c03f6

Please sign in to comment.