Merge pull request #27 from ragavvenkatesan/dev

batch norm post activations
ragavvenkatesan · Feb 27, 2017 · 733e931 · 733e931
2 parents a61687f + 505f9ab
commit 733e931
Show file tree

Hide file tree

Showing 9 changed files with 664 additions and 29 deletions.
diff --git a/docs/source/yann/layers/batch_norm.rst b/docs/source/yann/layers/batch_norm.rst
@@ -0,0 +1,16 @@
+.. _batch_norm:
+
+:mod:`batch_norm` - Batch normalization  layer classes
+======================================================
+
+The file ``yann.layers.batch_norm.py`` contains the definition for the batch norm layers. Batch norm
+can by default be applied to convolution and fully connected layers by sullying an argument
+``batch_norm = True``, in the layer arguments. But this in-built method applies batch norm 
+prior to layer activation. Some architectures including ResNet involves batch norms after the 
+activations of the layer. Therefore there is a need for an independent batch norm layer that simply
+applies batch norm for some outputs. The layers in this module can do that. 
+
+There are four classes in this file. Two for one-dimensions and two for two-dimnensions.
+
+.. automodule:: yann.layers.batch_norm
+   :members:
diff --git a/docs/source/yann/layers/index.rst b/docs/source/yann/layers/index.rst
@@ -18,6 +18,10 @@ that are accessible in ``yann``. It contains various layers including:
     * :mod:`flatten.flatten_layer`
     * :mod:`flatten.unflatten_layer`
     * :mod:`random.random_layer`
+    * :mod:`batch_norm.batch_norm_layer_2d` and
+        :mod:`batch_norm.dropout_batch_norm_layer_2d`        
+    * :mod:`batch_norm.batch_norm_layer_1d` and
+        :mod:`batch_norm.dropout_batch_norm_layer_1d`  
 
 All these are inherited classes from :mod:`layer` class, which is abstract.
 
@@ -36,5 +40,6 @@ Specific layers that can be used are
    output   
    random
    transform
+   batchnorm
 
 
diff --git a/pantry/tutorials/lenet.py b/pantry/tutorials/lenet.py
@@ -142,17 +142,16 @@ def lenet5 ( dataset= None, verbose = 1 ):
                verbose = verbose)
 
     net.test(verbose = verbose)
-
-    from yann.utils.pickle import pickle
-    pickle(net, 'network.pkl')
 
-# Advaned version of the CNN
-def lenet_maxout ( dataset= None, verbose = 1 ):             
+# Advaned versions of the CNN
+def lenet_maxout_batchnorm_before_activation ( dataset= None, verbose = 1 ):             
     """
     This is a version with nesterov momentum and rmsprop instead of the typical sgd. 
     This also has maxout activations for convolutional layers, dropouts on the last
     convolutional layer and the other dropout layers and this also applies batch norm
-    to all the layers.  So we just spice things up and add a bit of steroids to 
+    to all the layers.  The batch norm is applied by using the ``batch_norm = True`` parameters
+    in all layers. This batch norm is applied before activation as is used in the original 
+    version of the paper. So we just spice things up and add a bit of steroids to 
     :func:`lenet5`.  This also introduces a visualizer module usage.
 
     Args: 
@@ -210,7 +209,7 @@ def lenet_maxout ( dataset= None, verbose = 1 ):
                     filter_size = (5,5),
                     pool_size = (2,2),
                     activation = ('maxout', 'maxout', 2),
-                    # batch_norm = True,           
+                    batch_norm = True,           
                     regularize = True,                             
                     verbose = verbose
                     )
@@ -222,7 +221,7 @@ def lenet_maxout ( dataset= None, verbose = 1 ):
                     filter_size = (3,3),
                     pool_size = (2,2),
                     activation = ('maxout', 'maxout', 2),
-                    # batch_norm = True,
+                    batch_norm = True,
                     regularize = True,                    
                     verbose = verbose
                     )      
@@ -289,6 +288,170 @@ def lenet_maxout ( dataset= None, verbose = 1 ):
 
     net.test(verbose = verbose)
 
+def lenet_maxout_batchnorm_after_activation ( dataset= None, verbose = 1 ):             
+    """
+    This is a version with nesterov momentum and rmsprop instead of the typical sgd. 
+    This also has maxout activations for convolutional layers, dropouts on the last
+    convolutional layer and the other dropout layers and this also applies batch norm
+    to all the layers. The difference though is that we use the ``batch_norm`` layer to apply
+    batch norm that applies batch norm after the activation fo the previous layer.
+    So we just spice things up and add a bit of steroids to 
+    :func:`lenet5`.  This also introduces a visualizer module usage.
+
+    Args: 
+        dataset: Supply a dataset.    
+        verbose: Similar to the rest of the dataset.    
+    """
+    optimizer_params =  {        
+                "momentum_type"       : 'nesterov',             
+                "momentum_params"     : (0.75, 0.95, 30),      
+                "optimizer_type"      : 'rmsprop',                
+                "id"                  : "main"
+                        }
+
+    dataset_params  = {
+                            "dataset"   : dataset,
+                            "svm"       : False, 
+                            "n_classes" : 10,
+                            "id"        : 'data'
+                    }
+
+    visualizer_params = {
+                    "root"       : 'lenet_bn_after',
+                    "frequency"  : 1,
+                    "sample_size": 32,
+                    "rgb_filters": True,
+                    "debug_functions" : False,
+                    "debug_layers": False,  # Since we are on steroids this time, print everything.
+                    "id"         : 'main'
+                        }                      
+
+    net = network(   borrow = True,
+                     verbose = verbose )                       
+
+    net.add_module ( type = 'optimizer',
+                     params = optimizer_params, 
+                     verbose = verbose )
+
+    net.add_module ( type = 'datastream', 
+                     params = dataset_params,
+                     verbose = verbose )
+
+    net.add_module ( type = 'visualizer',
+                     params = visualizer_params,
+                     verbose = verbose )
+
+    net.add_layer ( type = "input",
+                    id = "input",
+                    verbose = verbose, 
+                    origin = 'data' )
+
+    net.add_layer ( type = "conv_pool",
+                    origin = "input",
+                    id = "conv_pool_1",
+                    num_neurons = 40,
+                    filter_size = (5,5),
+                    pool_size = (2,2),
+                    activation = ('maxout', 'maxout', 2),
+                    batch_norm = False,           
+                    regularize = True,                             
+                    verbose = verbose
+                    )
+
+    net.add_layer ( type = 'batch_norm',
+                    origin = 'conv_pool_1',
+                    id = 'batch_norm_after_cp_1',
+                    )
+
+    net.add_layer ( type = "convolution",
+                    origin = "batch_norm_after_cp_1",
+                    id = "conv_pool_2",
+                    num_neurons = 100,
+                    filter_size = (3,3),
+                    pool_size = (2,2),
+                    activation = ('maxout', 'maxout', 2),
+                    batch_norm = False,
+                    regularize = True,                    
+                    verbose = verbose
+                    )      
+
+    net.add_layer ( type = 'batchnorm',
+                    origin = 'conv_pool_2',
+                    id = 'batch_norm_after_cp_2',
+                    )
+
+    net.add_layer ( type = "dot_product",
+                    origin = "batch_norm_after_cp_2",
+                    id = "dot_product_1",
+                    num_neurons = 1250,
+                    activation = 'relu',
+                    dropout_rate = 0.5,
+                    batch_norm = False,
+                    regularize = True,                                        
+                    verbose = verbose
+                    )
+
+    net.add_layer ( type = 'batchnorm',
+                    origin = 'dot_product_1',
+                    id = 'batch_norm_after_dp_1',
+                    )
+
+    net.add_layer ( type = "dot_product",
+                    origin = "batch_norm_after_dp_1",
+                    id = "dot_product_2",
+                    num_neurons = 1250,                    
+                    activation = 'relu',
+                    dropout_rate = 0.5,
+                    regularize = True,  
+                    batch_norm = False,                                      
+                    verbose = verbose
+                    ) 
+
+    net.add_layer ( type = 'batch_norm',
+                origin = 'dot_product_2',
+                id = 'batch_norm_after_dp_2',
+                )
+
+    net.add_layer ( type = "classifier",
+                    id = "softmax",
+                    origin = "batch_norm_after_dp_2",
+                    num_classes = 10,
+                    regularize = True,                    
+                    activation = 'softmax',
+                    verbose = verbose
+                    )
+
+    net.add_layer ( type = "objective",
+                    id = "obj",
+                    origin = "softmax",
+                    objective = "nll",
+                    regularization = (0.0001, 0.0001),                                    
+                    datastream_origin = 'data', 
+                    verbose = verbose
+                    )
+
+    learning_rates = (0.05, 0.001, 0.0001)  
+
+    net.cook( optimizer = 'main',
+              objective_layer = 'obj',
+              datastream = 'data',
+              classifier = 'softmax',
+              verbose = verbose
+              )
+    draw_network(net.graph, filename = 'lenet.png')    
+    net.pretty_print()
+
+    net.train( epochs = (40, 40), 
+               validate_after_epochs = 1,
+               visualize_after_epochs = 1,
+               training_accuracy = True,
+               learning_rates = learning_rates,               
+               show_progress = True,
+               early_terminate = True,
+               verbose = verbose)
+
+    net.test(verbose = verbose)
+
     ## Boiler Plate ## 
 if __name__ == '__main__':
     import sys
@@ -312,4 +475,5 @@ def lenet_maxout ( dataset= None, verbose = 1 ):
         dataset = data.dataset_location()
 
     lenet5 ( dataset, verbose = 2 )
-    # lenet_maxout (dataset, verbose = 3)
+    # lenet_maxout_batchnorm_before_activation (dataset, verbose = 2)
+    # lenet_maxout_batchnorm_after_activation (dataset, verbose = 2)
diff --git a/requirements.txt b/requirements.txt
@@ -1,8 +1,7 @@
 numpy>=1.11
-theano>=0.8.0
+theano==0.9.0rc1
 scipy>=0.18
 progressbar>=2
 skdata>=0.0.4
 pillow>=4.0
-matplotlib
-
+matplotlib
diff --git a/requirements_full.txt b/requirements_full.txt
@@ -5,5 +5,4 @@ pydot-ng
 pydotplus
 pygraphviz
 graphviz
-networkx
-matplotlib
+networkx
diff --git a/yann/layers/abstract.py b/yann/layers/abstract.py
@@ -76,6 +76,15 @@ def print_layer(self, prefix = " ", nest = True, last = True, verbose = 2):
         print(prefix_entry + "=================------------------")
         print(prefix_entry + " type: " + self.type)
         print(prefix_entry + " output shape: " + str(self.output_shape))
+        if self.batch_norm is True :
+            print prefix_entry + " batch norm is ON"
+        elif self.type == 'dot_product' or \
+             self.type == 'hidden' or  \
+             self.type == 'mlp' or  \
+             self.type == 'fully_connected' or \
+             self.type == 'conv_pool' or \
+             self.type == 'convolution':
+            print prefix_entry + " batch norm is OFF"        
         print(prefix_entry + "-----------------------------------")
 
         if nest is False: