Merge branch 'master' of github.com:lmjohns3/theanets

lmjohns3 · Jul 11, 2015 · 583b105 · 583b105
2 parents 3e5e1d9 + 7697f96
commit 583b105
Show file tree

Hide file tree

Showing 5 changed files with 31 additions and 50 deletions.
diff --git a/README.rst b/README.rst
@@ -87,15 +87,15 @@ In fact, you can just as easily create 3 (or any number of) hidden layers:
 
   net = theanets.Classifier([100, 1000, 1000, 1000, 10])
 
-By default, hidden layers use the logistic sigmoid transfer function. By passing
-a tuple instead of just an integer, you can change some of these layers to use
-different activations_:
+By default, hidden layers use the relu transfer function. By passing a tuple
+instead of just an integer, you can change some of these layers to use different
+activations_:
 
 .. code:: python
 
   maxout = (1000, 'maxout:4')  # maxout with 4 pieces.
   net = theanets.Classifier([
-      100, 1000, maxout, (1000, 'relu'), 10])
+      100, 1000, maxout, (1000, 'tanh'), 10])
 
 .. _activations: http://theanets.readthedocs.org/en/latest/reference.html#module-theanets.activations
 
@@ -107,7 +107,7 @@ layer_, like how its parameters are initialized:
   # Sparsely-initialized layer with large nonzero weights.
   foo = dict(name='foo', size=1000, std=1, sparsity=0.9)
   net = theanets.Classifier([
-      100, foo, (1000, 'maxout:4'), (1000, 'relu'), 10])
+      100, foo, (1000, 'maxout:4'), (1000, 'tanh'), 10])
 
 .. _layer: http://theanets.readthedocs.org/en/latest/reference.html#module-theanets.layers.base
 

diff --git a/docs/creating.rst b/docs/creating.rst
@@ -226,7 +226,7 @@ following:
   <theanets.layers.base.Layer>` subclass.
 
 - ``activation``: A string describing the :ref:`creating-activation` to use for
-  the layer. This defaults to "logistic".
+  the layer. This defaults to "relu".
 
 - ``inputs``: An integer or dictionary describing the sizes of the inputs that
   this layer expects. This is normally optional and defaults to the size of the
@@ -272,7 +272,7 @@ Integers
 If a layer value is an integer, that value is interpreted as the ``size`` of a
 regular :class:`Feedforward <theanets.layers.feedforward.Feedforward>` layer.
 All options for the layer are set to their defaults (e.g., the activation
-function defaults to the logistic sigmoid).
+function defaults to "relu").
 
 For example, to create a network with an input layer containing 4 units, hidden
 layers with 5 and 6 units, and an output layer with 2 units, you can just use
@@ -336,9 +336,9 @@ Activation Functions
 
 An activation function (sometimes also called a transfer function) specifies how
 the final output of a layer is computed from the weighted sums of the inputs. By
-default, hidden layers in ``theanets`` use a logistic sigmoid activation
-function. Output layers in :class:`Regressor <theanets.feedforward.Regressor>`
-and :class:`Autoencoder <theanets.feedforward.Autoencoder>` models use linear
+default, hidden layers in ``theanets`` use a relu activation function. Output
+layers in :class:`Regressor <theanets.feedforward.Regressor>` and
+:class:`Autoencoder <theanets.feedforward.Autoencoder>` models use linear
 activations (i.e., the output is just the weighted sum of the inputs from the
 previous layer), and the output layer in :class:`Classifier
 <theanets.feedforward.Classifier>` models uses a softmax activation.

diff --git a/docs/index.rst b/docs/index.rst
@@ -66,21 +66,21 @@ In fact, you can just as easily create 3 (or any number of) hidden layers::
   net = theanets.Classifier([
       100, 1000, 1000, 1000, 10])
 
-By default, hidden layers use the logistic sigmoid transfer function. By passing
-a tuple instead of just an integer, you can change some of these layers to use
-different :mod:`activations <theanets.activations>`::
+By default, hidden layers use the relu transfer function. By passing a tuple
+instead of just an integer, you can change some of these layers to use different
+:mod:`activations <theanets.activations>`::
 
   maxout = (1000, 'maxout:4')  # maxout with 4 pieces.
   net = theanets.Classifier([
-      100, 1000, maxout, (1000, 'relu'), 10])
+      100, 1000, maxout, (1000, 'tanh'), 10])
 
 By passing a dictionary instead, you can specify even more attributes of each
 :mod:`layer <theanets.layers.base>`, like how its parameters are initialized::
 
   # Sparsely-initialized layer with large nonzero weights.
   foo = dict(name='foo', size=1000, std=1, sparsity=0.9)
   net = theanets.Classifier([
-      100, foo, (1000, 'maxout:4'), (1000, 'relu'), 10])
+      100, foo, (1000, 'maxout:4'), (1000, 'tanh'), 10])
 
 Specifying layers is the heart of building models in ``theanets``. Read more
 about this in :doc:`creating`.

diff --git a/theanets/graph.py b/theanets/graph.py
@@ -150,7 +150,7 @@ def add_layer(self, layer, is_output=False):
         form = 'feedforward'
         kwargs = dict(
             name='out' if is_output else 'hid{}'.format(len(self.layers)),
-            activation=act if is_output else 'logistic',
+            activation=act if is_output else 'relu',
             inputs={self.layers[-1].output_name(): self.layers[-1].size},
             size=layer,
         )
@@ -276,13 +276,25 @@ def itertrain(self, train, valid=None, algo='rmsprop', subalgo='rmsprop',
             A dictionary of monitor values computed using the validation
             dataset, at the conclusion of training.
         '''
+        def create_dataset(data, **kwargs):
+            '''Create a dataset.'''
+            default_axis = 0
+            if not callable(data) and not callable(data[0]) and len(data[0].shape) == 3:
+                default_axis = 1
+            name = kwargs.get('name', 'dataset')
+            s = '{}_batches'.format(name)
+            return downhill.Dataset(
+                data, name=name, batch_size=kwargs.get('batch_size', 32),
+                iteration_size=kwargs.get('iteration_size', kwargs.get(s)),
+                axis=kwargs.get('axis', default_axis))
+
         # set up datasets ...
         if valid is None:
             valid = train
         if not isinstance(valid, downhill.Dataset):
-            valid = _create_dataset(valid, name='valid', **kwargs)
+            valid = create_dataset(valid, name='valid', **kwargs)
         if not isinstance(train, downhill.Dataset):
-            train = _create_dataset(train, name='train', **kwargs)
+            train = create_dataset(train, name='train', **kwargs)
 
         if 'algorithm' in kwargs:
             warnings.warn(
@@ -675,34 +687,3 @@ def updates(self, **kwargs):
         '''
         _, updates = self.build_graph(**kwargs)
         return updates
-
-
-def _create_dataset(data, **kwargs):
-    '''Create a dataset for this experiment.
-
-    Parameters
-    ----------
-    data : sequence of ndarray or callable
-        The values that you provide for data will be encapsulated inside a
-        :class:`Dataset <downhill.Dataset>` instance; see that class for
-        documentation on the types of things it needs. In particular, you
-        can currently pass in either a list/array/etc. of data, or a
-        callable that generates data dynamically.
-
-    Returns
-    -------
-    data : :class:`Dataset <downhill.Dataset>`
-        A dataset capable of providing mini-batches of data to a training
-        algorithm.
-    '''
-    default_axis = 0
-    if not callable(data) and not callable(data[0]) and len(data[0].shape) == 3:
-        default_axis = 1
-    name = kwargs.get('name', 'dataset')
-    b, i, s = 'batch_size', 'iteration_size', '{}_batches'.format(name)
-    return downhill.Dataset(
-        data,
-        name=name,
-        batch_size=kwargs.get(b, 32),
-        iteration_size=kwargs.get(i, kwargs.get(s)),
-        axis=kwargs.get('axis', default_axis))
diff --git a/theanets/layers/base.py b/theanets/layers/base.py
@@ -187,7 +187,7 @@ class Layer(util.Registrar(str('Base'), (), {})):
 
     _count = 0
 
-    def __init__(self, size, inputs, name=None, activation='logistic', **kwargs):
+    def __init__(self, size, inputs, name=None, activation='relu', **kwargs):
         Layer._count += 1
         super(Layer, self).__init__()
         self.size = size