json

pchavanne · Dec 8, 2016 · 58520ba · 58520ba
1 parent 9138b5b
commit 58520ba
Show file tree

Hide file tree

Showing 7 changed files with 175 additions and 8 deletions.
diff --git a/examples/hp_grid_search.py b/examples/hp_grid_search.py
@@ -86,6 +86,6 @@ def grid_search():
                          columns=['iteration', 'test', 'validation', 'training time'])
 reports = pd.concat([param_reports, pd_report], axis=1)
 
-reports.to_html(open('/home/philippe/Python/Theano/report.html', 'w'))
+reports.to_html(open('report.html', 'w'))
 
 print reports.loc[reports['validation'].idxmin()]
diff --git a/examples/model_template.py b/examples/model_template.py
@@ -1,9 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
 import os
-
 import yadll
-
 import logging
 
 logging.basicConfig(level=logging.DEBUG, format='%(message)s')
@@ -24,7 +22,7 @@
 hp = yadll.hyperparameters.Hyperparameters()
 hp('batch_size', 500)
 hp('n_epochs', 1000)
-hp('learning_rate', 0.1)
+hp('learning_rate', 0.9)
 hp('momentum', 0.5)
 hp('l1_reg', 0.00)
 hp('l2_reg', 0.0000)
@@ -65,7 +63,7 @@
 model.network = net
 
 # updates method
-model.updates = yadll.updates.nesterov_momentum
+model.updates = yadll.updates.adagrad
 
 # train the model and save it to file at each best
 model.train()

diff --git a/examples/updates_examples.py b/examples/updates_examples.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+import os
+import yadll
+import logging
+
+logging.basicConfig(level=logging.DEBUG, format='%(message)s')
+
+# load the data
+datafile = 'mnist.pkl.gz'
+if not os.path.isfile(datafile):
+    import urllib
+    origin = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
+    print 'Downloading data from %s' % origin
+    urllib.urlretrieve(origin, datafile)
+data = yadll.data.Data(datafile)
+
+# create the model
+model = yadll.model.Model(name='mlp with dropout', data=data, file='best_model.ym')
+
+# Hyperparameters
+hp = yadll.hyperparameters.Hyperparameters()
+hp('batch_size', 500)
+hp('n_epochs', 1000)
+hp('learning_rate', 0.9)
+hp('momentum', 0.5)
+hp('l1_reg', 0.001)
+hp('l2_reg', 0.00001)
+hp('patience', 10000)
+
+# add the hyperparameters to the model
+model.hp = hp
+
+# Create connected layers
+# Input layer
+l_in = yadll.layers.InputLayer(shape=(hp.batch_size, 28 * 28)) #, name='Input')
+# Dropout Layer 1
+l_dro1 = yadll.layers.Dropout(incoming=l_in, corruption_level=0.5)#, name='Dropout 1')
+# Dense Layer 1
+l_hid1 = yadll.layers.DenseLayer(incoming=l_dro1, nb_units=500, W=yadll.init.glorot_uniform,
+                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activation.relu) #,
+                                 #name='Hidden layer 1')
+# Dropout Layer 2
+l_dro2 = yadll.layers.Dropout(incoming=l_hid1, corruption_level=0.25)#, name='Dropout 2')
+# Dense Layer 2
+l_hid2 = yadll.layers.DenseLayer(incoming=l_dro2, nb_units=500, W=yadll.init.glorot_uniform,
+                                 l1=hp.l1_reg, l2=hp.l2_reg, activation=yadll.activation.relu,
+                                 )#name='Hidden layer 2')
+# Logistic regression Layer
+l_out = yadll.layers.LogisticRegression(incoming=l_hid2, nb_class=10, l1=hp.l1_reg,
+                                        l2=hp.l2_reg)#, name='Logistic regression')
+
+# Create network and add layers
+net = yadll.network.Network('2 layers mlp with dropout')
+net.add(l_in)
+net.add(l_dro1)
+net.add(l_hid1)
+net.add(l_dro2)
+net.add(l_hid2)
+net.add(l_out)
+
+# add the network to the model
+model.network = net
+
+# updates method
+model.updates = yadll.updates.sgd
+
+# train the model and save it to file at each best
+model.train()
+
+# saving network paramters
+net.save_params('net_params.yp')
+
+# make prediction
+# We can test it on some examples from test
+test_set_x = data.test_set_x.get_value()
+test_set_y = data.test_set_y.eval()
+
+predicted_values = model.predict(test_set_x[:30])
+
+print ("Model 1, predicted values for the first 30 examples in test set:")
+print predicted_values
+print test_set_y[:30]
+
+print model
diff --git a/yadll/hyperparameters.py b/yadll/hyperparameters.py
@@ -73,3 +73,6 @@ def reset(self):
         for name, value in self.hp_default.iteritems():
             self.__setattr__(name, value)
         self.iteration = 0
+
+    def to_json(self):
+        return self.hp_value
diff --git a/yadll/layers.py b/yadll/layers.py
@@ -31,13 +31,15 @@ class Layer(object):
         The layer name.
 
     """
+    nb_instances = 0
+
     def __init__(self, incoming, name=None):
         """
         The base class that represent a single layer of any neural network.
         It has to be subclassed by any kind of layer.
 
         """
-
+        self.id = self.get_id()
         if isinstance(incoming, tuple):
             self.input_shape = incoming
             self.input_layer = None
@@ -46,9 +48,16 @@ def __init__(self, incoming, name=None):
             self.input_layer = incoming
 
         self.name = name
+        if name is None:
+            self.name = self.__class__.__name__ + ' ' + str(self.id)
         self.params = []
         self.reguls = 0
 
+    @classmethod
+    def get_id(cls):
+        cls.nb_instances += 1
+        return cls.nb_instances
+
     def get_params(self):
         """
         Theano shared variables representing the parameters of this layer.
@@ -98,13 +107,24 @@ def get_output(self, **kwargs):
         """
         raise NotImplementedError
 
+    def to_json(self):
+        json = {'name': self.name,
+                'input shape': self.input_shape}
+        if self.input_layer is None:
+            json['input layer'] = 'no input layer'
+        else:
+            json['input layer'] = self.input_layer.name
+        return json
+
 
 class InputLayer(Layer):
     """
     Input layer of the data, it has no parameters, it just shapes the data as
      the input for any network.
      A ::class:`InputLayer` is always the first layer of any network.
     """
+    nb_instances = 0
+
     def __init__(self, shape, input_var=None, **kwargs):
         """
         The input layer of any network
@@ -123,11 +143,17 @@ def __init__(self, shape, input_var=None, **kwargs):
     def get_output(self, **kwargs):
         return self.input
 
+    def to_json(self):
+        json = dict({'class': self.__class__.__name__}, **super(InputLayer, self).to_json())
+        return json
+
 
 class ReshapeLayer(Layer):
     """
     Reshape the incoming layer to the output_shape.
     """
+    nb_instances = 0
+
     def __init__(self, incoming, output_shape=None, **kwargs):
         super(ReshapeLayer, self).__init__(incoming, **kwargs)
         self.reshape_shape = output_shape
@@ -149,6 +175,8 @@ class FlattenLayer(Layer):
     """
     Reshape layers back to flat
     """
+    nb_instances = 0
+
     def __init__(self, incoming, ndim=2, **kwargs):
         super(FlattenLayer, self).__init__(incoming, **kwargs)
         self.ndim = ndim
@@ -166,6 +194,8 @@ class DenseLayer(Layer):
     """
     Fully connected neural network layer
     """
+    nb_instances = 0
+
     def __init__(self, incoming, nb_units, W=glorot_uniform, b=constant,
                  activation=tanh, l1=None, l2=None, **kwargs):
         super(DenseLayer, self).__init__(incoming, **kwargs)
@@ -181,9 +211,9 @@ def __init__(self, incoming, nb_units, W=glorot_uniform, b=constant,
             self.b = initializer(b, shape=(self.shape[1],), name='b')
         self.params.append(self.b)
         self.activation = activation
-        if l1:
+        if l1 and l1 != 0:
             self.reguls += l1 * T.mean(T.abs_(self.W))
-        if l2:
+        if l2 and l2 != 0:
             self.reguls += l2 * T.mean(T.sqr(self.W))
 
     @property
@@ -194,13 +224,19 @@ def get_output(self, **kwargs):
         X = self.input_layer.get_output(**kwargs)
         return self.activation(T.dot(X, self.W) + self.b)
 
+    def to_json(self):
+        json = dict({'class': self.__class__.__name__}, **super(DenseLayer, self).to_json())
+        return json
+
 
 class UnsupervisedLayer(DenseLayer):
     """
     Base class for all unsupervised layers.
     Unsupervised layers are pre-trained against its own input.
 
     """
+    nb_instances = 0
+
     def __init__(self, incoming, nb_units, hyperparameters, **kwargs):
         super(UnsupervisedLayer, self).__init__(incoming, nb_units, **kwargs)
         self.hp = hyperparameters
@@ -241,15 +277,23 @@ class LogisticRegression(DenseLayer):
     ----------
     .. [1] http://deeplearning.net/tutorial/logreg.html
     """
+    nb_instances = 0
+
     def __init__(self, incoming, nb_class, W=constant, activation=softmax, **kwargs):
         super(LogisticRegression, self).__init__(incoming, nb_class, W=W,
                                                  activation=activation, **kwargs)
 
+    def to_json(self):
+        json = dict({'class': self.__class__.__name__}, **super(LogisticRegression, self).to_json())
+        return json
+
 
 class Dropout(Layer):
     """
     Dropout layer
     """
+    nb_instances = 0
+
     def __init__(self, incoming, corruption_level=0.5, **kwargs):
         super(Dropout, self).__init__(incoming, **kwargs)
         self.p = 1 - corruption_level
@@ -264,11 +308,16 @@ def get_output(self, stochastic=True, **kwargs):
             X = X * T_rng.binomial(self.input_shape, n=1, p=self.p, dtype=floatX)
         return X
 
+    def to_json(self):
+        json = dict({'class': self.__class__.__name__}, **super(Dropout, self).to_json())
+        return json
 
 class Dropconnect(DenseLayer):
     """
     DropConnect layer
     """
+    nb_instances = 0
+
     def __init__(self, incoming, nb_units, corruption_level=0.5, **kwargs):
         super(Dropconnect, self).__init__(incoming, nb_units, **kwargs)
         self.p = 1 - corruption_level
@@ -284,6 +333,8 @@ class PoolLayer(Layer):
     """
     Pooling layer, default is maxpooling
     """
+    nb_instances = 0
+
     def __init__(self, incoming, poolsize, stride=None, ignore_border=True,
                  padding=(0, 0), mode='max', **kwargs):
         super(PoolLayer, self).__init__(incoming, **kwargs)
@@ -313,6 +364,8 @@ class ConvLayer(Layer):
     """
     Convolutional layer
     """
+    nb_instances = 0
+
     def __init__(self, incoming, image_shape=None, filter_shape=None, W=glorot_uniform,
                  border_mode='valid', subsample=(1, 1), l1=None, l2=None, pool_scale=None, **kwargs):
         super(ConvLayer, self).__init__(incoming, **kwargs)
@@ -357,6 +410,8 @@ class ConvPoolLayer(ConvLayer, PoolLayer):
     ----------
     .. [1] http://deeplearning.net/tutorial/lenet.html
     """
+    nb_instances = 0
+
     def __init__(self, incoming, poolsize, image_shape=None, filter_shape=None,
                   b=constant, activation=tanh, **kwargs):
         super(ConvPoolLayer, self).__init__(incoming, poolsize=poolsize, image_shape=image_shape,
@@ -388,6 +443,8 @@ class AutoEncoder(UnsupervisedLayer):
     ----------
     .. [1] http://deeplearning.net/tutorial/dA.html
     """
+    nb_instances = 0
+
     def __init__(self, incoming, nb_units, hyperparameters, corruption_level=0.0,
                  W=(glorot_uniform, {'gain': sigmoid}), b_prime=constant,
                  sigma=None, contraction_level= None, **kwargs):
@@ -435,6 +492,8 @@ class RBM(UnsupervisedLayer):
     ----------
     .. [1] http://deeplearning.net/tutorial/rbm.html
     """
+    nb_instances = 0
+
     def __init__(self, incoming, nb_units, hyperparameters, W=glorot_uniform,
                  b_hidden=constant, activation=sigmoid, **kwargs):
         super(RBM, self).__init__(incoming, nb_units, hyperparameters, W=W,
@@ -535,6 +594,8 @@ class BatchNormalization(Layer):
     ..[1] http://jmlr.org/proceedings/papers/v37/ioffe15.pdf
     ..[2] https://github.com/fchollet/keras/blob/master/keras/layers/normalization.py#L6
     """
+    nb_instances = 0
+
     def __init__(self, incoming, **kwargs):
         super(BatchNormalization, self).__init__(incoming, **kwargs)
 
@@ -553,6 +614,8 @@ class LayerNormalization(Layer):
 
     ..[1] http://arxiv.org/pdf/1607.06450v1.pdf
     """
+    nb_instances = 0
+
     def __init__(self, incoming, **kwargs):
         super(LayerNormalization, self).__init__(incoming, **kwargs)
 
@@ -571,6 +634,8 @@ class RNN(Layer):
 
     .. [1] http://deeplearning.net/tutorial/rnnslu.html
     """
+    nb_instances = 0
+
     def __init__(self, incoming, n_hidden, n_out, activation=tanh, **kwargs):
         super(RNN, self).__init__(incoming, **kwargs)
         self.activation = activation
@@ -622,6 +687,8 @@ class LSTM(Layer):
     .. [3] http://people.idsia.ch/~juergen/lstm/
     .. [4] http://colah.github.io/posts/2015-08-Understanding-LSTMs/
     """
+    nb_instances = 0
+
     def __init__(self, incoming, n_hidden, n_out, peephole=False, tied_i_f=False, activation=tanh, **kwargs):
         super(LSTM, self).__init__(incoming, **kwargs)
         self.peephole = peephole    # gate layers look at the cell state

diff --git a/yadll/model.py b/yadll/model.py
@@ -294,3 +294,11 @@ def predict(self, X):
         predict = theano.function(inputs=[self.x], outputs=prediction, name='predict')
         return predict(X)
 
+    def to_json(self):
+        return {'model name': self.name,
+                'file': self.file,
+                'network': self.network.to_json(),
+                'hyperparameters': self.hp.to_json(),
+                'update': self.updates.__name__,
+                'report': self.report}
+