Compile

pchavanne · Feb 27, 2017 · 6409990 · 6409990
1 parent 91781e8
commit 6409990
Show file tree

Hide file tree

Showing 2 changed files with 83 additions and 34 deletions.
diff --git a/examples/lstm_example.py b/examples/lstm_example.py
@@ -22,9 +22,9 @@
 
 # Hyperparameters
 hp = yadll.hyperparameters.Hyperparameters()
-hp('batch_size', 3)
+hp('batch_size', 1)
 hp('n_epochs', 100)
-hp('patience', 100)
+hp('patience', 1000)
 
 # add the hyperparameters to the model
 model.hp = hp
@@ -33,10 +33,11 @@
 # Input layer
 l_in = yadll.layers.InputLayer(input_shape=(hp.batch_size, sequence_length, number_of_chars))
 # LSTM 1
-l_lstm1 = yadll.layers.LSTM(incoming=l_in, n_units=16, last_only=False)
+l_lstm1 = yadll.layers.BNLSTM(incoming=l_in, n_units=16, last_only=False)
 # LSTM 2
-l_lstm2 = yadll.layers.LSTM(incoming=l_lstm1, n_units=16)
+l_lstm2 = yadll.layers.BNLSTM(incoming=l_lstm1, n_units=16)
 # Logistic regression Layer
+
 l_out = yadll.layers.LogisticRegression(incoming=l_lstm2, n_class=number_of_chars)
 
 # Create network and add layers
@@ -52,6 +53,7 @@
 model.updates = yadll.updates.rmsprop
 
 # train the model and save it to file at each best
+model.compile(compile_arg='all')
 model.train()
 
 # prime the model with 'ab' sequence and let it generate the learned alphabet

diff --git a/yadll/layers.py b/yadll/layers.py
@@ -117,16 +117,6 @@ def get_output(self, **kwargs):
         raise NotImplementedError
 
     def to_conf(self):
-        # conf = self.__dict__.copy()
-        # for key in ['params', 'reguls']:
-        #     conf.pop(key, None)
-        # if conf['input_layer']:
-        #     conf['input_layer'] = conf['input_layer'].name
-        # if 'activation' in conf:
-        #     conf['activation'] = conf['activation'].__name__
-        # if 'hyperparameters' in conf:
-        #     conf['hp'] = conf.pop('hyperparameters').to_conf()
-        # conf['type'] = self.__class__.__name__
         conf = {'type': self.__class__.__name__,
                 'id': self.id,
                 'name': self.name,
@@ -139,12 +129,6 @@ def to_conf(self):
             conf['input_layer'] = self.input_layer.name
         return conf
 
-    # def from_conf(self, conf):
-    #     self.__dict__.update(conf)
-    #     if 'hp' in conf:
-    #         for k, v in conf['hp'].iteritems():
-    #             self.hp(k, v)
-
 
 class InputLayer(Layer):
     """
@@ -564,6 +548,16 @@ def to_conf(self):
         conf['activation'] = activation_to_conf(self.activation)
         return conf
 
+    def __getstate__(self):
+        if hasattr(self.activation, '__call__'):
+            dic = self.__dict__.copy()
+            dic['activation'] = activation_to_conf(self.activation)
+            return dic
+
+    def __setstate__(self, dic):
+        self.__dict__.update(dic)
+        self.activation = get_activation(self.activation)
+
 
 class AutoEncoder(UnsupervisedLayer):
     """
@@ -746,20 +740,21 @@ class BatchNormalization(Layer):
     """
     n_instances = 0
 
-    def __init__(self, incoming, axis=-2, alpha=0.1, epsilon=1e-5, beta=True, **kwargs):
+    def __init__(self, incoming, axis=-2, alpha=0.1, epsilon=1e-5, has_beta=True, **kwargs):
         super(BatchNormalization, self).__init__(incoming, **kwargs)
         self.axis = axis
         self.alpha = alpha
         self.epsilon = epsilon
+        self.has_beta = has_beta
         self.beta = self.gamma = self.mean = self.var = None
         if self.input_shape is not None:
-            self.init_params(self.input_shape, beta=beta)
+            self.init_params(self.input_shape, has_beta=has_beta)
 
-    def init_params(self, input_shape, beta):
+    def init_params(self, input_shape, has_beta):
         self.gamma = initializer(constant, shape=input_shape, value=1, name='gamma')
         self.params.append(self.gamma)
-        self.beta = initializer(constant, shape=(input_shape[1],), value=0, name='beta')
-        if beta:
+        if has_beta:
+            self.beta = initializer(constant, shape=(input_shape[1],), value=0, name='beta')
             self.params.append(self.beta)
         self.mean = initializer(constant, shape=input_shape, value=0, name='mean')
         self.var = initializer(constant, shape=input_shape, value=1, name='var')
@@ -774,16 +769,18 @@ def get_output(self, stochastic=True, **kwargs):
         else:
             mean = self.mean
             var = self.var
-        x_hat = (x - mean) / T.sqrt(var + self.epsilon)                 # normalize
-        y = self.gamma * x_hat + self.beta                              # scale and shift
+        x_hat = (x - mean) / T.sqrt(var + self.epsilon)     # normalize
+        y = self.gamma * x_hat                              # scale
+        if self.has_beta:
+            y += self.beta                                  # shift
         return y
 
     def to_conf(self):
         conf = super(BatchNormalization, self).to_conf()
         conf['axis'] = self.axis
         conf['alpha'] = self.alpha
         conf['epsilon'] = self.epsilon
-        conf['beta'] = self.beta
+        conf['has_beta'] = self.has_beta
         return conf
 
 
@@ -876,8 +873,24 @@ def one_step(x_t, h_tm1, *args):
 
     def to_conf(self):
         conf = super(RNN, self).to_conf()
+        conf['n_out'] = self.n_out
+        conf['activation'] = activation_to_conf(self.activation)
+        conf['last_only'] = self.last_only
+        conf['grad_clipping'] = self.grad_clipping
+        conf['go_backwards'] = self.go_backwards
+        conf['allow_gc'] = self.allow_gc
         return conf
 
+    def __getstate__(self):
+        if hasattr(self.activation, '__call__'):
+            dic = self.__dict__.copy()
+            dic['activation'] = activation_to_conf(self.activation)
+            return dic
+
+    def __setstate__(self, dic):
+        self.__dict__.update(dic)
+        self.activation = get_activation(self.activation)
+
 
 class LSTM(Layer):
     r"""
@@ -1047,8 +1060,26 @@ def one_step(x_t, h_tm1, c_tm1, *args):
 
     def to_conf(self):
         conf = super(LSTM, self).to_conf()
+        conf['n_units'] = self.n_units
+        conf['peepholes'] = self.peepholes
+        conf['tied_i_f'] = self.tied
+        conf['activation'] = activation_to_conf(self.activation)
+        conf['last_only'] = self.last_only
+        conf['grad_clipping'] = self.grad_clipping
+        conf['go_backwards'] = self.go_backwards
+        conf['allow_gc'] = self.allow_gc
         return conf
 
+    def __getstate__(self):
+        if hasattr(self.activation, '__call__'):
+            dic = self.__dict__.copy()
+            dic['activation'] = activation_to_conf(self.activation)
+            return dic
+
+    def __setstate__(self, dic):
+        self.__dict__.update(dic)
+        self.activation = get_activation(self.activation)
+
 
 class GRU(Layer):
     r"""
@@ -1160,8 +1191,24 @@ def one_step(x_t, h_tm1, *args):
 
     def to_conf(self):
         conf = super(GRU, self).to_conf()
+        conf['n_units'] = self.n_units
+        conf['activation'] = activation_to_conf(self.activation)
+        conf['last_only'] = self.last_only
+        conf['grad_clipping'] = self.grad_clipping
+        conf['go_backwards'] = self.go_backwards
+        conf['allow_gc'] = self.allow_gc
         return conf
 
+    def __getstate__(self):
+        if hasattr(self.activation, '__call__'):
+            dic = self.__dict__.copy()
+            dic['activation'] = activation_to_conf(self.activation)
+            return dic
+
+    def __setstate__(self, dic):
+        self.__dict__.update(dic)
+        self.activation = get_activation(self.activation)
+
 
 class BNLSTM(LSTM):
     r"""
@@ -1202,16 +1249,16 @@ def __init__(self, incoming, n_units, activation=tanh, last_only=True, grad_clip
                                      grad_clipping=grad_clipping, go_backwards=go_backwards,
                                      allow_gc=allow_gc, **kwargs)
         # Batch Normalise the input
-        self.bn_x = BatchNormalization(None, nested=True)
-        self.bn_x.init_params(input_shape=(self.input_shape[1], self.input_shape[0], n_units), beta=False)
+        self.bn_x = BatchNormalization(None)
+        self.bn_x.init_params(input_shape=(self.input_shape[1], self.input_shape[0], n_units), has_beta=False)
         self.params.extend(self.bn_x.params)
         # Batch Normalise the hidden state
-        self.bn_h = BatchNormalization(None, nested=True)
-        self.bn_h.init_params(input_shape=(self.input_shape[1], self.input_shape[0], n_units), beta=False)
+        self.bn_h = BatchNormalization(None)
+        self.bn_h.init_params(input_shape=(self.input_shape[1], self.input_shape[0], n_units), has_beta=False)
         self.params.extend(self.bn_h.params)
         # Batch Normalise the cell state
-        self.bn_c = BatchNormalization(None, nested=True)
-        self.bn_c.init_params(input_shape=(self.input_shape[1], self.input_shape[0], n_units), beta=False)
+        self.bn_c = BatchNormalization(None)
+        self.bn_c.init_params(input_shape=(self.input_shape[1], self.input_shape[0], n_units), has_beta=False)
         self.params.extend(self.bn_c.params)
 
     def get_output(self, **kwargs):