Bug fixes with merge layer.

ragavvenkatesan · Jan 24, 2017 · 9c3cf24 · 9c3cf24
1 parent a674983
commit 9c3cf24
Show file tree

Hide file tree

Showing 5 changed files with 73 additions and 41 deletions.
diff --git a/pantry/tutorials/lenet.py b/pantry/tutorials/lenet.py
@@ -15,8 +15,8 @@ def lenet5 ( dataset= None, verbose = 1 ):
     """
     optimizer_params =  {        
                 "momentum_type"       : 'polyak',             
-                "momentum_params"     : (0.5, 0.95, 30),      
-                "regularization"      : (0.00, 0.0001),       
+                "momentum_params"     : (0.65, 0.95, 30),      
+                "regularization"      : (0.00, 0.001),       
                 "optimizer_type"      : 'adagrad',                
                 "id"                  : "main"
                         }
@@ -29,11 +29,11 @@ def lenet5 ( dataset= None, verbose = 1 ):
                     }
 
     visualizer_params = {
-                    "root"       : '.',
+                    "root"       : 'lenet5',
                     "frequency"  : 1,
-                    "sample_size": 32,
-                    "rgb_filters": False,
-                    "debug_functions" : True,
+                    "sample_size": 144,
+                    "rgb_filters": True,
+                    "debug_functions" : False,
                     "debug_layers": False,  # Since we are on steroids this time, print everything.
                     "id"         : 'main'
                         }       
@@ -61,7 +61,7 @@ def lenet5 ( dataset= None, verbose = 1 ):
                     verbose = verbose, 
                     datastream_origin = 'data', # if you didnt add a dataset module, now is 
                                                  # the time. 
-                    mean_subtract = True )
+                    mean_subtract = False )
 
     # add first convolutional layer
     net.add_layer ( type = "conv_pool",
@@ -117,17 +117,17 @@ def lenet5 ( dataset= None, verbose = 1 ):
                     verbose = verbose
                     )
 
-    learning_rates = (0.05, 0.01, 0.001, 0.0001)  
+    learning_rates = (0.05, 0.01, 0.001)  
     net.pretty_print()  
-    draw_network(net.graph, filename = 'lenet.png')    
+    #draw_network(net.graph, filename = 'lenet.png')    
     net.cook( optimizer = 'main',
               objective_layer = 'obj',
               datastream = 'data',
               classifier = 'softmax',
               verbose = verbose
               )
 
-    net.train( epochs = (20, 20, 20 ), 
+    net.train( epochs = (40, 40 ), 
                validate_after_epochs = 1,
                training_accuracy = True,
                learning_rates = learning_rates,               
@@ -165,12 +165,12 @@ def lenet_maxout ( dataset= None, verbose = 1 ):
                     }
 
     visualizer_params = {
-                    "root"       : '.',
+                    "root"       : 'lenet_on_steroids',
                     "frequency"  : 1,
                     "sample_size": 32,
                     "rgb_filters": True,
                     "debug_functions" : True,
-                    "debug_layers": False,  # Since we are on steroids this time, print everything.
+                    "debug_layers": True,  # Since we are on steroids this time, print everything.
                     "id"         : 'main'
                         }                      
 
@@ -263,10 +263,10 @@ def lenet_maxout ( dataset= None, verbose = 1 ):
               classifier = 'softmax',
               verbose = verbose
               )
-    draw_network(net.graph, filename = 'lenet.png')    
+    #draw_network(net.graph, filename = 'lenet.png')    
     net.pretty_print()
 
-    net.train( epochs = (20, 20, 10, 5), 
+    net.train( epochs = (40, 40, 20, 10), 
                validate_after_epochs = 1,
                visualize_after_epochs = 1,
                training_accuracy = True,
@@ -298,6 +298,6 @@ def lenet_maxout ( dataset= None, verbose = 1 ):
         dataset = data.dataset_location()
 
     lenet5 ( dataset, verbose = 2 )
-    #lenet_maxout (dataset, verbose = 2)
+    lenet_maxout (dataset, verbose = 2)
 
 
diff --git a/yann/core/operators.py b/yann/core/operators.py
@@ -1,4 +1,4 @@
-def copy_params (source, destination, borrow = True):
+def copy_params (source, destination, borrow = True, verbose = 2):
     """
     Internal function that copies paramters maintaining theano shared nature. 
 
@@ -9,5 +9,12 @@ def copy_params (source, destination, borrow = True):
     Notes: 
         Was using deep copy to do this. This seems faster. But can I use ``theano.clone`` ?
     """
+    if verbose >=3:
+        print "... Copying paramters"
+
     for src, dst in zip(source, destination):
+
+        if verbose>=3:
+            print "... source shape: " + str(src.get_value(borrow = True).shape)
+            print "... destination shape: " + str(dst.get_value(borrow = True).shape)            
         dst.set_value ( src.get_value (borrow = borrow))
diff --git a/yann/layers/merge.py b/yann/layers/merge.py
@@ -1,6 +1,7 @@
 from abstract import layer
 from yann.core.errors import rmse, l1, cross_entropy
 import numpy
+from theano import tensor as T
 
 class merge_layer (layer):
     """
@@ -24,6 +25,8 @@ def __init__ (  self,
                     error = 'rmse',
                     verbose = 2):
 
+        super(merge_layer,self).__init__(id = id, type = 'merge', verbose = verbose)
+
         if type == 'error':
             if verbose >=3:
                 print "... Creating the merge layer"
@@ -61,8 +64,12 @@ def __init__ (  self,
             self.output_shape = input_shape[0]
 
         elif type == 'concatenate':
-            self.output = T.concatenate([x[0],x[1]], axis = 1)
-
+            self.output = T.concatenate([x[0],x[1]], axis = 1)   
+            if len(input_shape[0]) == 2:             
+                self.output_shape = (input_shape [0], input_shape[0][1] + input_shape[1][1])
+            elif len(input_shape[1]) == 4:
+                self.output_shape = (input_shape [0], input_shape[0][1] + input_shape[1][1],
+                                        input_shape[2], input_shape[3])
 
     def loss(self, type = None):
         """

diff --git a/yann/network.py b/yann/network.py
@@ -401,7 +401,7 @@ def add_layer(self, type, verbose = 2, **kwargs):
         if verbose >= 3:
             print "... Layer " + id + " is created and it learnablity is " + \
                                                                     str(self.layers[id].active)
-    def add_module (self, type, params, verbose = 2):
+    def add_module (self, type, params = None, verbose = 2):
         """
         Use this function to add a module to the net.
 
@@ -1281,8 +1281,8 @@ def _add_merge_layer(self, id, options, verbose = 2):
 
         for lyr in origin:
             self.dropout_layers[id].origin.append(lyr)
-            self.dropout_layers[lyr].destination.append(id)
             self.layers[id].origin.append(lyr)
+            self.dropout_layers[lyr].destination.append(id)            
             self.layers[lyr].destination.append(id)
 
     def _add_random_layer(self, id, options, verbose = 2):
@@ -1751,6 +1751,7 @@ def _cache_data (self, type = 'train', batch = 0, verbose = 2):
 
 
 
+
     def _cook_visualizer(self, verbose = 2):
         """
         This is an internal function that cooks a visualizer
@@ -1793,6 +1794,16 @@ def _cook_visualizer(self, verbose = 2):
         self.cooked_visualizer.visualize_images(imgs = imgs, verbose = verbose)
         self.visualize_after_epochs = 1
 
+    def _cook_resultor (self, resultor = None, verbose = 2):
+        """
+        This is an internal function that cooks a resultor
+
+        Args:
+            verbose: as always
+        """
+        if verbose > 3:
+            print "... Resultor is cooked"
+
     def visualize_activities( self, epoch = 0, verbose = 2):
         """
         This method will save down all layer activities for the correct epoch.
@@ -1828,15 +1839,7 @@ def visualize(self, epoch = 0, verbose =2 ):
             self.visualize_activities(epoch = epoch, verbose = verbose)
             self.visualize_filters(epoch = epoch, verbose = verbose)  
 
-    def _cook_resultor (verbose = 2):
-        """
-        This is an internal function that cooks a resultor
 
-        Args:
-            verbose: as always
-        """
-        if verbose > 3:
-            print "... Resultor is cooked"
 
     def cook(self, verbose = 2, **kwargs):
         """
@@ -2288,8 +2291,15 @@ def train(self, verbose = 2, **kwargs):
         early_termination = False
         iteration= 0        
         era = 0
-        total_epochs = sum(epochs) 
-        change_era = epochs[era] 
+        if isinstance(epochs, int):
+            total_epochs = epochs  
+            change_era = epochs + 1      
+        elif len(epochs) > 1:
+            total_epochs = sum(epochs)
+            change_era = epochs[era]
+        else:
+            total_epochs = epochs
+            change_era = epochs + 1    
         final_era = False 
 
         # main loop
@@ -2376,9 +2386,9 @@ def train(self, verbose = 2, **kwargs):
                                         verbose = verbose )
                 self.visualize ( epoch = epoch_counter , verbose = verbose)
                 if best is True:
-                    copy_params(source = self.params, destination= nan_insurance , 
+                    copy_params(source = self.active_params, destination= nan_insurance , 
                                                                             borrow = self.borrow)
-                    copy_params(source = self.params, destination= self.best_params, 
+                    copy_params(source = self.active_params, destination= self.best_params, 
                                                                             borrow = self.borrow)                        
                         # self.resultor.save_network()
                 # self.resultor.something() # this function is dummy now. But resultor should use 

diff --git a/yann/special/gan.py b/yann/special/gan.py
@@ -572,17 +572,23 @@ def train ( self, verbose, **kwargs):
         early_termination = False
         iteration= 0        
         era = 0
-        total_epochs = sum(epochs) 
-        change_era = epochs[era] 
+
+        if isinstance(epochs, int):
+            total_epochs = epochs  
+            change_era = epochs + 1      
+        elif len(epochs) > 1:
+            total_epochs = sum(epochs)
+            change_era = epochs[era]
+        else:
+            total_epochs = epochs
+            change_era = epochs + 1      
+
         final_era = False 
 
         # main loop
         while (epoch_counter < total_epochs) and (not early_termination):
             nan_flag = False
             # check if its time for a new era.
-            if epoch_counter == 10:
-                import pdb
-                pdb.set_trace()
 
             if (epoch_counter == change_era):
             # if final_era, while loop would have terminated.
@@ -682,10 +688,12 @@ def train ( self, verbose, **kwargs):
                 self.visualize ( epoch = epoch_counter , verbose = verbose)
 
                 if best is True:
-                    copy_params(source = self.params, destination= nan_insurance , 
-                                                                            borrow = self.borrow)
-                    copy_params(source = self.params, destination= self.best_params, 
-                                                                            borrow = self.borrow)                        
+                    copy_params(source = self.active_params, destination= nan_insurance , 
+                                                                    borrow = self.borrow,
+                                                                    verbose = verbose)
+                    copy_params(source = self.active_params, destination= self.best_params, 
+                                                                    borrow = self.borrow,
+                                                                    verbose = verbose)                        
 
                 self.decay_learning_rate(learning_rates[0])