adam

pchavanne · Jan 16, 2017 · 5402810 · 5402810
1 parent 683b413
commit 5402810
Show file tree

Hide file tree

Showing 9 changed files with 157 additions and 174 deletions.
diff --git a/.idea/dictionaries/philippe.xml b/.idea/dictionaries/philippe.xml
diff --git a/docs/modules/updates.rst b/docs/modules/updates.rst
@@ -3,7 +3,7 @@
 Updates
 =======
 
-Updating functions that are passed to the network.
+Updating functions that are passed to the network for optimization.
 
 .. automodule:: yadll.updates
 

diff --git a/examples/hp_grid_search.py → examples/hp_grid_search_example.py b/examples/hp_grid_search.py → examples/hp_grid_search_example.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
+"""
+This example show you how to make a grid search on the Hyperparameters
+"""
 import os
 import cPickle
 import pandas as pd

diff --git a/examples/mnist_examples.py b/examples/mnist_examples.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
 """
-Example of dl usage on the mnist dataset
+Example of yadll usage on the mnist dataset
+many networks are predefined, to see the complete list,
 use -n or --network_list to see all available networks
 
 Usage:

diff --git a/examples/model_template.py b/examples/model_template.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
+"""
+This example show you how to creat and train a model and make prediction.
+"""
 import os
 import yadll
 import logging
@@ -63,7 +66,7 @@
 model.network = net
 
 # updates method
-model.updates = yadll.updates.adagrad
+model.updates = yadll.updates.adamax
 
 # train the model and save it to file at each best
 model.train()
@@ -81,70 +84,3 @@
 print ("Model 1, predicted values for the first 30 examples in test set:")
 print predicted_values
 print test_set_y[:30]
-
-# loading saved model
-print ("Loading model from file")
-# load the saved model
-model2 = yadll.model.load_model('best_model.ym')
-
-predicted_values2 = model2.predict(test_set_x[:30])
-print ("Model 2, predicted values for the first 30 examples in test set:")
-print predicted_values2
-print test_set_y[:30]
-
-# load network parameters
-# first we recreate the network
-# create the model
-model3 = yadll.model.Model(name='mlp with dropout', data=data,)
-
-# Hyperparameters
-hp = yadll.hyperparameters.Hyperparameters()
-hp('batch_size', 500)
-hp('n_epochs', 1000)
-hp('learning_rate', 0.1)
-hp('momentum', 0.5)
-hp('l1_reg', 0.00)
-hp('l2_reg', 0.0000)
-hp('patience', 10000)
-
-# add the hyperparameters to the model
-model3.hp = hp
-
-# Create connected layers
-# Input layer
-l_in = yadll.layers.InputLayer(input_shape=(hp.batch_size, 28 * 28), name='Input')
-# Dropout Layer 1
-l_dro1 = yadll.layers.Dropout(incoming=l_in, corruption_level=0.4, name='Dropout 1')
-# Dense Layer 1
-l_hid1 = yadll.layers.DenseLayer(incoming=l_dro1, nb_units=500, W=yadll.init.glorot_uniform,
-                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu,
-                                 name='Hidden layer 1')
-# Dropout Layer 2
-l_dro2 = yadll.layers.Dropout(incoming=l_hid1, corruption_level=0.2, name='Dropout 2')
-# Dense Layer 2
-l_hid2 = yadll.layers.DenseLayer(incoming=l_dro2, nb_units=500, W=yadll.init.glorot_uniform,
-                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu,
-                                 name='Hidden layer 2')
-# Logistic regression Layer
-l_out = yadll.layers.LogisticRegression(incoming=l_hid2, nb_class=10, l1=hp.l1_reg,
-                                        l2=hp.l2_reg, name='Logistic regression')
-
-# Create network and add layers
-net3 = yadll.network.Network('2 layers mlp with dropout')
-net3.add(l_in)
-net3.add(l_dro1)
-net3.add(l_hid1)
-net3.add(l_dro2)
-net3.add(l_hid2)
-net3.add(l_out)
-
-# load params
-net3.load_params('net_params.yp')
-
-# add the network to the model
-model3.network = net3
-
-predicted_values3 = model3.predict(test_set_x[:30])
-print ("Model 3, predicted values for the first 30 examples in test set:")
-print predicted_values3
-print test_set_y[:30]
diff --git a/examples/save_and_load_example.py b/examples/save_and_load_example.py
@@ -1,6 +1,10 @@
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
-
+"""
+This example file show you how to creat a model, train it and save it.
+You will save a model, save the parameters and save the configuration,
+and rebuild the model.
+"""
 import os
 import yadll
 import logging
@@ -123,14 +127,14 @@
 print test_set_y[:30]
 # Now let's load parameters
 model_3.network.load_params('net_params.yp')
-# And try prediciting again
+# And try predicting again
 predicted_values_3 = model_3.predict(test_set_x[:30])
 print ("Model 3 after loading parameters values for the first 30 examples in test set:")
 print predicted_values_3
 print test_set_y[:30]
 
 ##########################################################################
-# Reconstruction the model from configuration and load paramters
+# Reconstruction the model from configuration and load parameters
 model_4 = yadll.model.Model()
 model_4.from_conf(conf)         # load from conf obj
 model_5 = yadll.model.Model()

diff --git a/examples/updates_examples.py b/examples/updates_examples.py
@@ -1,9 +1,20 @@
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
-
+"""
+This example will show you the difference between the updates function:
+    - sgd: Stochastic Gradient Descent
+    - momentum: Stochastic Gradient Descent with momentum
+    - nesterov_momentum: Stochastic Gradient Descent with Nesterov momentum
+    - adagrad: Adaptive gradient descent
+    - rmsprop: scaling with the Root mean square of the gradient
+    - adadelta: adaptive learning rate
+    - adam: Adaptive moment gradient descent
+    - adamax: adam with infinity norm
+"""
 import os
 import yadll
 import logging
+from collections import OrderedDict
 
 logging.basicConfig(level=logging.DEBUG, format='%(message)s')
 
@@ -16,87 +27,96 @@
     urllib.urlretrieve(origin, datafile)
 data = yadll.data.Data(datafile)
 
-# create the model
-model = yadll.model.Model(name='mlp with dropout', data=data, file='best_model.ym')
-
-# Hyperparameters
-hp = yadll.hyperparameters.Hyperparameters()
-hp('batch_size', 500)
-hp('n_epochs', 1000)
-hp('learning_rate', 0.9)
-hp('momentum', 0.5)
-hp('l1_reg', 0.001)
-hp('l2_reg', 0.00001)
-hp('patience', 10000)
-
-# add the hyperparameters to the model
-model.hp = hp
-
-# Create connected layers
-# Input layer
-l_in = yadll.layers.InputLayer(input_shape=(hp.batch_size, 28 * 28))
-# Dropout Layer 1
-l_dro1 = yadll.layers.Dropout(incoming=l_in, corruption_level=0.5)
-# Dense Layer 1
-l_hid1 = yadll.layers.DenseLayer(incoming=l_dro1, nb_units=500, W=yadll.init.glorot_uniform,
-                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu)
-# Dropout Layer 2
-l_dro2 = yadll.layers.Dropout(incoming=l_hid1, corruption_level=0.25)
-# Dense Layer 2
-l_hid2 = yadll.layers.DenseLayer(incoming=l_dro2, nb_units=500, W=yadll.init.glorot_uniform,
-                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu)
-# Logistic regression Layer
-l_out = yadll.layers.LogisticRegression(incoming=l_hid2, nb_class=10, l1=hp.l1_reg, l2=hp.l2_reg)
-
-# Create network and add layers
-net = yadll.network.Network('2 layers mlp with dropout')
-net.add(l_in)
-net.add(l_dro1)
-net.add(l_hid1)
-net.add(l_dro2)
-net.add(l_hid2)
-net.add(l_out)
-
-# add the network to the model
-model.network = net
-
-# updates method
-model.updates = yadll.updates.sgd
-
-# saving configuration of the model
-conf = model.to_conf()
-
-# train the model and save it to file at each best
-model.train()
-
-# saving network parameters
-net.save_params('net_params.yp')
-
-# make prediction
-# We can test it on some examples from test
-test_set_x = data.test_set_x.get_value()
-test_set_y = data.test_set_y.eval()
-
-predicted_values = model.predict(test_set_x[:30])
-
-print ("Model 1, predicted values for the first 30 examples in test set:")
-print predicted_values
-print test_set_y[:30]
-
-# Reconstruction of the model from configuration
-model_2 = yadll.model.Model()
-model_2.from_conf(conf)
-
-import json
-with open('model.json', 'w') as f:
-    json.dump(model.to_json(),f)
-
-net.load_params('net_params.yp')
-model_2.network.load_params('net_params.yp')
-predicted_values_2 = model_2.predict(test_set_x[:30])
-print predicted_values_2
-print test_set_y[:30]
-
-model_2.data = data
+updates = OrderedDict([
+    ('sgd', [['learning_rate', 0.1, [0.001, 0.01, 0.1]]]),
+    ('momentum', [['learning_rate', 0.1, [0.001, 0.01, 0.1]],
+                  ['momentum', 0.9, [0.85, 0.9, 0.95, 0.99]]]),
+    ('nesterov_momentum', [['learning_rate', 0.1, [0.001, 0.01, 0.1]],
+                           ['momentum', 0.9, [0.85, 0.9, 0.95, 0.99]]]),
+    ('adagrad', [['learning_rate', 0.1, [0.001, 0.01, 0.1]]]),
+    ('rmsprop', [['learning_rate', 0.1, [0.001, 0.01, 0.1]]]),
+    ('adadelta', [['learning_rate', 0.1, [0.001, 0.01, 0.1]]]),
+    ('adam', [['learning_rate', 0.1, [0.001, 0.01, 0.1]]]),
+    ('adamax', [['learning_rate', 0.1, [0.001, 0.01, 0.1]]]),
+])
+
+
+def get_hps():
+    # Hyperparameters
+    hps = yadll.hyperparameters.Hyperparameters()
+    hps('batch_size', 50)
+    hps('n_epochs', 500)
+    hps('l1_reg', 0.001)
+    hps('l2_reg', 0.00001)
+    hps('patience', 5000)
+    return hps
+
+
+def get_model(hp):
+    # create the model
+    model = yadll.model.Model(name='mlp with dropout', data=data)
+    # Create connected layers
+    # Input layer
+    l_in = yadll.layers.InputLayer(input_shape=(hp.batch_size, 28 * 28))
+    # Dropout Layer 1
+    l_dro1 = yadll.layers.Dropout(incoming=l_in, corruption_level=0.5)
+    # Dense Layer 1
+    l_hid1 = yadll.layers.DenseLayer(incoming=l_dro1, nb_units=500, W=yadll.init.glorot_uniform,
+                                     l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu)
+    # Dropout Layer 2
+    l_dro2 = yadll.layers.Dropout(incoming=l_hid1, corruption_level=0.25)
+    # Dense Layer 2
+    l_hid2 = yadll.layers.DenseLayer(incoming=l_dro2, nb_units=500, W=yadll.init.glorot_uniform,
+                                     l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activations.relu)
+    # Logistic regression Layer
+    l_out = yadll.layers.LogisticRegression(incoming=l_hid2, nb_class=10, l1=hp.l1_reg, l2=hp.l2_reg)
+
+    # Create network and add layers
+    net = yadll.network.Network('2 layers mlp with dropout')
+    net.add(l_in)
+    net.add(l_dro1)
+    net.add(l_hid1)
+    net.add(l_dro2)
+    net.add(l_hid2)
+    net.add(l_out)
+
+    # add the network to the model
+    model.network = net
+
+    # add the hyperparameters to the model
+    model.hp = hp
+
+    return model
+
+report = list()
+
+for update, hyperparams in updates.iteritems():
+    hps = get_hps()
+    for hyperparam in hyperparams:
+        hps(*hyperparam)
+    for hp in hps:
+        model = get_model(hp)
+        model.updates = getattr(yadll.updates, update)
+        model.train()
+        r = list()
+        r.append(update)
+        for hyperparam in hyperparams:
+            r.append(hyperparam[0])
+            r.append(hp.hp_value[hyperparam[0]])
+        r.append('epoch')
+        r.append(model.report['epoch'])
+        r.append('early_stop')
+        r.append(model.report['early_stop'])
+        r.append('best_validation')
+        r.append(round(model.report['best_validation'], 2))
+        r.append('best_iter')
+        r.append(model.report['best_iter'])
+        r.append('test_score')
+        r.append(round(model.report['test_score'], 2))
+        r.append('training_duration')
+        r.append(model.report['training_duration'])
+        report.append(r)
+        print report
+        with open('report', 'w') as f:
+            f.writelines(' '.join(str(e) for e in r))
 
-model_2.train()