activations

pchavanne · Jan 11, 2017 · 59c98ec · 59c98ec
1 parent d714d07
commit 59c98ec
Show file tree

Hide file tree

Showing 14 changed files with 47 additions and 39 deletions.
diff --git a/docs/index.rst b/docs/index.rst
@@ -54,7 +54,7 @@ Referencees on functions, classes or methodes, with notes and references.
   modules/layers
   modules/updates
   modules/init
-  modules/activation
+  modules/activations
   modules/objectives
   modules/utils
 

diff --git a/docs/modules/activation.rst → docs/modules/activations.rst b/docs/modules/activation.rst → docs/modules/activations.rst
@@ -1,9 +1,9 @@
-:mod:`yadll.activation`
+:mod:`yadll.activations`
 
 Activation
 ==========
 
-.. automodule:: yadll.activation
+.. automodule:: yadll.activations
 
 .. autosummary::
 

diff --git a/docs/user/tutorial.rst b/docs/user/tutorial.rst
@@ -85,13 +85,13 @@ Layers names are optional.
     l_dro1 = yadll.layers.Dropout(incoming=l_in, corruption_level=0.4, name='Dropout 1')
     # Dense Layer 1
     l_hid1 = yadll.layers.DenseLayer(incoming=l_dro1, nb_units=500, W=yadll.init.glorot_uniform,
-                                     l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activation.relu,
+                                     l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu,
                                      name='Hidden layer 1')
     # Dropout Layer 2
     l_dro2 = yadll.layers.Dropout(incoming=l_hid1, corruption_level=0.2, name='Dropout 2')
     # Dense Layer 2
     l_hid2 = yadll.layers.DenseLayer(incoming=l_dro2, nb_units=500, W=yadll.init.glorot_uniform,
-                                     l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activation.relu,
+                                     l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu,
                                      name='Hidden layer 2')
     # Logistic regression Layer
     l_out = yadll.layers.LogisticRegression(incoming=l_hid2, nb_class=10, l1=hp.l1_reg,
@@ -247,13 +247,13 @@ When loading the parameters, the network name must match the saved parameters ne
     l_dro1 = yadll.layers.Dropout(incoming=l_in, corruption_level=0.4, name='Dropout 1')
     # Dense Layer 1
     l_hid1 = yadll.layers.DenseLayer(incoming=l_dro1, nb_units=500, W=yadll.init.glorot_uniform,
-                                     l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activation.relu,
+                                     l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu,
                                      name='Hidden layer 1')
     # Dropout Layer 2
     l_dro2 = yadll.layers.Dropout(incoming=l_hid1, corruption_level=0.2, name='Dropout 2')
     # Dense Layer 2
     l_hid2 = yadll.layers.DenseLayer(incoming=l_dro2, nb_units=500, W=yadll.init.glorot_uniform,
-                                     l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activation.relu,
+                                     l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu,
                                      name='Hidden layer 2')
     # Logistic regression Layer
     l_out = yadll.layers.LogisticRegression(incoming=l_hid2, nb_class=10, l1=hp.l1_reg,

diff --git a/examples/model_template.py b/examples/model_template.py
@@ -38,13 +38,13 @@
 l_dro1 = yadll.layers.Dropout(incoming=l_in, corruption_level=0.4, name='Dropout 1')
 # Dense Layer 1
 l_hid1 = yadll.layers.DenseLayer(incoming=l_dro1, nb_units=500, W=yadll.init.glorot_uniform,
-                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activation.relu,
+                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu,
                                  name='Hidden layer 1')
 # Dropout Layer 2
 l_dro2 = yadll.layers.Dropout(incoming=l_hid1, corruption_level=0.2, name='Dropout 2')
 # Dense Layer 2
 l_hid2 = yadll.layers.DenseLayer(incoming=l_dro2, nb_units=500, W=yadll.init.glorot_uniform,
-                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activation.relu,
+                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu,
                                  name='Hidden layer 2')
 # Logistic regression Layer
 l_out = yadll.layers.LogisticRegression(incoming=l_hid2, nb_class=10, l1=hp.l1_reg,
@@ -117,13 +117,13 @@
 l_dro1 = yadll.layers.Dropout(incoming=l_in, corruption_level=0.4, name='Dropout 1')
 # Dense Layer 1
 l_hid1 = yadll.layers.DenseLayer(incoming=l_dro1, nb_units=500, W=yadll.init.glorot_uniform,
-                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activation.relu,
+                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu,
                                  name='Hidden layer 1')
 # Dropout Layer 2
 l_dro2 = yadll.layers.Dropout(incoming=l_hid1, corruption_level=0.2, name='Dropout 2')
 # Dense Layer 2
 l_hid2 = yadll.layers.DenseLayer(incoming=l_dro2, nb_units=500, W=yadll.init.glorot_uniform,
-                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activation.relu,
+                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu,
                                  name='Hidden layer 2')
 # Logistic regression Layer
 l_out = yadll.layers.LogisticRegression(incoming=l_hid2, nb_class=10, l1=hp.l1_reg,

diff --git a/examples/normalization_example.py b/examples/normalization_example.py
@@ -38,11 +38,11 @@
 l_in = yadll.layers.InputLayer(input_shape=(hp.batch_size, 28 * 28), name='Input')
 # Dense Layer 1
 l_hid1 = yadll.layers.DenseLayer(incoming=l_in, nb_units=500, W=yadll.init.glorot_uniform,
-                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activation.relu,
+                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu,
                                  name='Hidden layer 1')
 # Dense Layer 2
 l_hid2 = yadll.layers.DenseLayer(incoming=l_hid1, nb_units=500, W=yadll.init.glorot_uniform,
-                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activation.relu,
+                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu,
                                  name='Hidden layer 2')
 # Logistic regression Layer
 l_out = yadll.layers.LogisticRegression(incoming=l_hid2, nb_class=10, l1=hp.l1_reg,
@@ -90,13 +90,13 @@
 l_bn1 = yadll.layers.BatchNormalization(incoming=l_in, name='Batch Normalization 1')
 # Dense Layer 1
 l_hid1 = yadll.layers.DenseLayer(incoming=l_bn1, nb_units=500, W=yadll.init.glorot_uniform,
-                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activation.relu,
+                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu,
                                  name='Hidden layer 1')
 # Batch Normalization 2
 l_bn2 = yadll.layers.BatchNormalization(incoming=l_hid1, name='Batch Normalization 1')
 # Dense Layer 2
 l_hid2 = yadll.layers.DenseLayer(incoming=l_bn2, nb_units=500, W=yadll.init.glorot_uniform,
-                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activation.relu,
+                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu,
                                  name='Hidden layer 2')
 # Batch Normalization 3
 l_bn3 = yadll.layers.BatchNormalization(incoming=l_hid2, name='Batch Normalization 1')

diff --git a/examples/save_and_load_example.py b/examples/save_and_load_example.py
@@ -38,12 +38,12 @@
 l_dro1 = yadll.layers.Dropout(incoming=l_in, corruption_level=0.5)
 # Dense Layer 1
 l_hid1 = yadll.layers.DenseLayer(incoming=l_dro1, nb_units=500, W=yadll.init.glorot_uniform,
-                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activation.relu)
+                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu)
 # Dropout Layer 2
 l_dro2 = yadll.layers.Dropout(incoming=l_hid1, corruption_level=0.25)
 # Dense Layer 2
 l_hid2 = yadll.layers.DenseLayer(incoming=l_dro2, nb_units=250, W=yadll.init.glorot_uniform,
-                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activation.relu)
+                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu)
 # Logistic regression Layer
 l_out = yadll.layers.LogisticRegression(incoming=l_hid2, nb_class=10)
 
@@ -100,10 +100,10 @@
 l_dro1 = yadll.layers.Dropout(incoming=l_in, corruption_level=0.5)
 
 l_hid1 = yadll.layers.DenseLayer(incoming=l_dro1, nb_units=500, W=yadll.init.glorot_uniform,
-                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activation.relu)
+                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu)
 l_dro2 = yadll.layers.Dropout(incoming=l_hid1, corruption_level=0.25)
 l_hid2 = yadll.layers.DenseLayer(incoming=l_dro2, nb_units=250, W=yadll.init.glorot_uniform,
-                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activation.relu)
+                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu)
 l_out = yadll.layers.LogisticRegression(incoming=l_hid2, nb_class=10)
 
 # Create network and add layers

diff --git a/examples/updates_examples.py b/examples/updates_examples.py
@@ -39,12 +39,12 @@
 l_dro1 = yadll.layers.Dropout(incoming=l_in, corruption_level=0.5)
 # Dense Layer 1
 l_hid1 = yadll.layers.DenseLayer(incoming=l_dro1, nb_units=500, W=yadll.init.glorot_uniform,
-                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activation.relu)
+                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu)
 # Dropout Layer 2
 l_dro2 = yadll.layers.Dropout(incoming=l_hid1, corruption_level=0.25)
 # Dense Layer 2
 l_hid2 = yadll.layers.DenseLayer(incoming=l_dro2, nb_units=500, W=yadll.init.glorot_uniform,
-                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activation.relu)
+                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu)
 # Logistic regression Layer
 l_out = yadll.layers.LogisticRegression(incoming=l_hid2, nb_class=10, l1=hp.l1_reg, l2=hp.l2_reg)
 

diff --git a/tests/test_activation.py b/tests/test_activation.py
@@ -13,61 +13,61 @@
 
 def test_sigmoid():
     x = T.matrix('x')
-    f = theano.function([x], yadll.activation.sigmoid(x))
+    f = theano.function([x], yadll.activations.sigmoid(x))
     actual = f(x_val)
     desired = 1 / (1 + np.exp(-x_val))
     assert_allclose(actual, desired, rtol=1e-5)
 
 
 def test_ultra_fast_sigmoid():
     x = T.matrix('x')
-    f = theano.function([x], yadll.activation.ultra_fast_sigmoid(x))
+    f = theano.function([x], yadll.activations.ultra_fast_sigmoid(x))
     actual = f(x_val)
     desired = 1 / (1 + np.exp(-x_val))
     assert_allclose(actual, desired, rtol=0, atol=1e-1)
 
 
 def test_tanh():
     x = T.matrix('x')
-    f = theano.function([x], yadll.activation.tanh(x))
+    f = theano.function([x], yadll.activations.tanh(x))
     actual = f(x_val)
     desired = np.tanh(x_val)
     assert_allclose(actual, desired, rtol=1e-5)
 
 
 def test_softmax():
     x = T.matrix('x')
-    f = theano.function([x], yadll.activation.softmax(x))
+    f = theano.function([x], yadll.activations.softmax(x))
     actual = f(x_val)
     desired = np.exp(x_val) / np.exp(x_val).sum()
     assert_allclose(actual, desired, rtol=1e-5)
 
 
 def test_softplus():
     x = T.matrix('x')
-    f = theano.function([x], yadll.activation.softplus(x))
+    f = theano.function([x], yadll.activations.softplus(x))
     actual = f(x_val)
     desired = np.log(1 + np.exp(x_val))
     assert_allclose(actual, desired, rtol=1e-3)
 
 
 def test_relu():
     x = T.matrix('x')
-    f = theano.function([x], yadll.activation.relu(x))
+    f = theano.function([x], yadll.activations.relu(x))
     actual = f(x_val)
     desired = x_val * (x_val > 0)
     assert_allclose(actual, desired, rtol=1e-5)
     x = T.matrix('x')
     alpha = 0.5
-    f = theano.function([x], yadll.activation.relu(x, alpha))
+    f = theano.function([x], yadll.activations.relu(x, alpha))
     actual = f(x_val)
     desired = x_val * (x_val > 0) + alpha * x_val * (x_val < 0)
     assert_allclose(actual, desired, rtol=1e-5)
 
 
 def test_linear():
     x = [0, -1, 1, 3.2, 1e-7, np.inf, True, None, 'foo']
-    actual = yadll.activation.linear(x)
+    actual = yadll.activations.linear(x)
     desired = x
     assert actual == desired
 
diff --git a/tests/test_init.py b/tests/test_init.py
@@ -37,40 +37,40 @@ def test_init():
     assert np.abs(np.std(w.get_value()) - 0.5) < eps
 
     # Glorot
-    init_obj = (yadll.init.glorot_uniform, {'gain': yadll.activation.tanh})
+    init_obj = (yadll.init.glorot_uniform, {'gain': yadll.activations.tanh})
     w = yadll.init.initializer(init_obj=init_obj, shape=shape, name='w')
     scale = 1 * np.sqrt(6. / (shape[0] + shape[1]))
     assert np.abs(np.mean(w.get_value()) - 0.0) < eps
     assert np.abs(np.max(w.get_value()) - scale) < eps
     assert np.abs(np.min(w.get_value()) - (-scale)) < eps
 
-    init_obj = (yadll.init.glorot_uniform, {'gain': yadll.activation.sigmoid})
+    init_obj = (yadll.init.glorot_uniform, {'gain': yadll.activations.sigmoid})
     w = yadll.init.initializer(init_obj=init_obj, shape=shape, name='w')
     scale = 4 * np.sqrt(6. / (shape[0] + shape[1]))
     assert np.abs(np.mean(w.get_value()) - 0.0) < eps
     assert np.abs(np.max(w.get_value()) - scale) < eps
     assert np.abs(np.min(w.get_value()) - (-scale)) < eps
 
-    init_obj = (yadll.init.glorot_uniform, {'gain': yadll.activation.sigmoid})
+    init_obj = (yadll.init.glorot_uniform, {'gain': yadll.activations.sigmoid})
     w = yadll.init.initializer(init_obj=init_obj, shape=shape, fan=(500, 500), name='w')
     scale = 4 * np.sqrt(6. / (500 + 500))
     assert np.abs(np.mean(w.get_value()) - 0.0) < eps
     assert np.abs(np.max(w.get_value()) - scale) < eps
     assert np.abs(np.min(w.get_value()) - (-scale)) < eps
 
-    init_obj = (yadll.init.glorot_normal, {'gain': yadll.activation.tanh})
+    init_obj = (yadll.init.glorot_normal, {'gain': yadll.activations.tanh})
     w = yadll.init.initializer(init_obj=init_obj, shape=shape, name='w')
     scale = 1 * np.sqrt(2. / (shape[0] + shape[1]))
     assert np.abs(np.mean(w.get_value()) - 0.0) < eps
     assert np.abs(np.std(w.get_value()) - scale) < eps
 
-    init_obj = (yadll.init.glorot_normal, {'gain': yadll.activation.sigmoid})
+    init_obj = (yadll.init.glorot_normal, {'gain': yadll.activations.sigmoid})
     w = yadll.init.initializer(init_obj=init_obj, shape=shape, name='w')
     scale = 4 * np.sqrt(2. / (shape[0] + shape[1]))
     assert np.abs(np.mean(w.get_value()) - 0.0) < eps
     assert np.abs(np.std(w.get_value()) - scale) < eps
 
-    init_obj = (yadll.init.glorot_normal, {'gain': yadll.activation.sigmoid})
+    init_obj = (yadll.init.glorot_normal, {'gain': yadll.activations.sigmoid})
     w = yadll.init.initializer(init_obj=init_obj, shape=shape, fan=(500, 500), name='w')
     scale = 4 * np.sqrt(2. / (500 + 500))
     assert np.abs(np.mean(w.get_value()) - 0.0) < eps
@@ -96,7 +96,7 @@ def test_init():
     assert np.abs(np.mean(w.get_value()) - 0.0) < eps
     assert np.allclose(np.dot(w.get_value(), w.get_value().T), np.eye(min(shape)), atol=1e-5)
 
-    init_obj = (yadll.init.orthogonal, {'gain': yadll.activation.relu})
+    init_obj = (yadll.init.orthogonal, {'gain': yadll.activations.relu})
     w = yadll.init.initializer(init_obj=init_obj, shape=shape, name='w')
     assert np.abs(np.mean(w.get_value()) - 0.0) < eps
     assert np.allclose(np.dot(w.get_value(), w.get_value().T), np.eye(min(shape)) * 2, atol=1e-5)

diff --git a/yadll/__init__.py b/yadll/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: UTF-8 -*-
 
-from . import activation
+from . import activations
 from . import data
 from . import exceptions
 from . import hyperparameters

diff --git a/yadll/activation.py → yadll/activations.py b/yadll/activation.py → yadll/activations.py
diff --git a/yadll/init.py b/yadll/init.py
@@ -2,7 +2,7 @@
 import numpy as np
 
 from .utils import shared_variable
-from .activation import *
+from .activations import *
 
 np_rng = np.random.RandomState(1234)
 

diff --git a/yadll/layers.py b/yadll/layers.py
@@ -215,7 +215,7 @@ def __init__(self, incoming, nb_units, W=glorot_uniform, b=constant,
         self.params.append(self.b)
         self.activation = activation
         if isinstance(activation, basestring):
-            self.activation = getattr(yadll.activation, activation)
+            self.activation = getattr(yadll.activations, activation)
         self.l1 = l1
         self.l2 = l2
         if l1 and l1 != 0:

diff --git a/yadll/updates.py b/yadll/updates.py
@@ -116,5 +116,13 @@ def rmsprop(cost, params, learning_rate=1.0, rho=0.9, epsilon=1e-6, **kwargs):
 
 
 def hessian_free(cost, parms, **kwargs):
+    """
+    Hessian Free optimization
+
+    References
+    ----------
+    .. [1] http://www.cs.toronto.edu/~jmartens/docs/Deep_HessianFree.pdf
+    .. [2] http://www.cs.toronto.edu/~hinton/absps/momentum.pdf
+    """
     # TODO implement hessian_free method
     raise NotImplementedError