Visualizer setup.

ragavvenkatesan · Jan 23, 2017 · 2dc937a · 2dc937a
1 parent cdfc187
commit 2dc937a
Show file tree

Hide file tree

Showing 8 changed files with 87 additions and 49 deletions.
diff --git a/pantry/tutorials/autoencoder.py b/pantry/tutorials/autoencoder.py
@@ -31,7 +31,14 @@ def autoencoder ( dataset= None, verbose = 1 ):
                     "id"         : 'main'
                         }  
 
-    # intitialize the network
+    # intitialize the network    
+    optimizer_params =  {        
+                "momentum_type"       : 'nesterov',             
+                "momentum_params"     : (0.9, 0.95, 30),      
+                "regularization"      : (0.0001, 0.0001),       
+                "optimizer_type"      : 'rmsprop',                
+                "id"                  : "main"
+                    }
     net = network(   borrow = True,
                      verbose = verbose )                       
 
@@ -43,7 +50,9 @@ def autoencoder ( dataset= None, verbose = 1 ):
                      params = visualizer_params,
                      verbose = verbose 
                     ) 
-
+    net.add_module ( type = 'optimizer',
+                     params = optimizer_params,
+                     verbose = verbose )
     # add an input layer 
     net.add_layer ( type = "input",
                     id = "input",
@@ -102,7 +111,6 @@ def autoencoder ( dataset= None, verbose = 1 ):
                     )
 
     learning_rates = (0, 0.1, 0.01)  
-
     net.cook( objective_layer = 'obj',
               datastream = 'data',
               generator = 'merge',
@@ -120,7 +128,7 @@ def autoencoder ( dataset= None, verbose = 1 ):
                show_progress = True,
                early_terminate = True,
                verbose = verbose)
-
+                    
 
 if __name__ == '__main__':
     import sys

diff --git a/pantry/tutorials/gan.py b/pantry/tutorials/gan.py
@@ -18,7 +18,7 @@ def simple_gan ( dataset= None, verbose = 1 ):
         verbose: Similar to the rest of the dataset.
     """
     optimizer_params =  {        
-                "momentum_type"       : 'polyak',             
+                "momentum_type"       : 'nesterov',             
                 "momentum_params"     : (0.65, 0.95, 30),      
                 "regularization"      : (0.000, 0.000),       
                 "optimizer_type"      : 'rmsprop',                
@@ -57,7 +57,7 @@ def simple_gan ( dataset= None, verbose = 1 ):
     #z - latent space created by random gaussian layer
     net.add_layer(type = 'random',
                         id = 'z',
-                        num_neurons = (500,256), 
+                        num_neurons = (500,64), 
                         distribution = 'normal',
                         mu = 0,
                         sigma = 1,
@@ -77,17 +77,16 @@ def simple_gan ( dataset= None, verbose = 1 ):
                     id = "G(z)",
                     num_neurons = 784,
                     activation = 'relu',
-                    dropout_rate = 0,
                     verbose = verbose
                     )
 
     #D(x) - Contains params theta_d - 784 X 256 - first layer of D, creates features 1 X 256. 
     net.add_layer ( type = "dot_product",
                     id = "D(x)",
                     origin = "x",
-                    num_neurons = 1024,
-                    dropout_rate = 0,                    
+                    num_neurons = 512,
                     activation = ('maxout','maxout',2),
+                    #activation = 'relu',
                     verbose = verbose
                     )
 
@@ -96,11 +95,11 @@ def simple_gan ( dataset= None, verbose = 1 ):
     net.add_layer ( type = "dot_product",
                     id = "D(G(z))",
                     origin = "G(z)",
-                    dropout_rate = 0,                    
-                    num_neurons = 1024,
+                    num_neurons = 512,
                     input_params = net.dropout_layers["D(x)"].params, # must be the same params, 
                                                         # this way it remains the same network.
                     activation = ('maxout','maxout',2),
+                    #activation = 'relu',
                     verbose = verbose
                     )
 
@@ -110,7 +109,6 @@ def simple_gan ( dataset= None, verbose = 1 ):
                     origin = "D(G(z))",
                     num_neurons = 1,
                     activation = 'sigmoid',
-                    dropout_rate = 0,
                     verbose = verbose
                     )
 
@@ -121,7 +119,6 @@ def simple_gan ( dataset= None, verbose = 1 ):
                     num_neurons = 1,
                     input_params = net.dropout_layers["fake"].params, # Again share their parameters
                     activation = 'sigmoid',
-                    dropout_rate = 0,
                     verbose = verbose
                     )
 
@@ -177,10 +174,10 @@ def simple_gan ( dataset= None, verbose = 1 ):
                 softmax_layer = "softmax",
                 verbose = verbose )
 
-    learning_rates = (0.05, 0.001, 0.0001)  
+    learning_rates = (0.05, 0.01)  
 
-    net.train( epochs = (50, 50), 
-               k = 5,  # refer to Ian Goodfellow's paper Algorithm 1.
+    net.train( epochs = (10), 
+               k = 30,  # refer to Ian Goodfellow's paper Algorithm 1.
                validate_after_epochs = 1,
                training_accuracy = True,
                show_progress = True,

diff --git a/pantry/tutorials/lenet.py b/pantry/tutorials/lenet.py
@@ -118,10 +118,8 @@ def lenet5 ( dataset= None, verbose = 1 ):
                     )
 
     learning_rates = (0.05, 0.01, 0.001, 0.0001)  
-
     net.pretty_print()  
     draw_network(net.graph, filename = 'lenet.png')    
-
     net.cook( optimizer = 'main',
               objective_layer = 'obj',
               datastream = 'data',
@@ -138,7 +136,6 @@ def lenet5 ( dataset= None, verbose = 1 ):
                verbose = verbose)
 
     net.test(verbose = verbose)
-
 # Advaned version of the CNN
 def lenet_maxout ( dataset= None, verbose = 1 ):             
     """
@@ -202,7 +199,7 @@ def lenet_maxout ( dataset= None, verbose = 1 ):
     net.add_layer ( type = "conv_pool",
                     origin = "input",
                     id = "conv_pool_1",
-                    num_neurons = 30,
+                    num_neurons = 20,
                     filter_size = (5,5),
                     pool_size = (2,2),
                     activation = ('maxout', 'maxout', 2),
@@ -213,7 +210,7 @@ def lenet_maxout ( dataset= None, verbose = 1 ):
     net.add_layer ( type = "conv_pool",
                     origin = "conv_pool_1",
                     id = "conv_pool_2",
-                    num_neurons = 60,
+                    num_neurons = 50,
                     filter_size = (3,3),
                     pool_size = (2,2),
                     activation = ('maxout', 'maxout', 2),

diff --git a/yann/core/errors.py b/yann/core/errors.py
@@ -11,8 +11,8 @@ def cross_entropy ( a , b ):
     Returns: 
         theano shared variable: Computational graph with the error.
     """
-    #return T.mean(T.nnet.categorical_crossentropy(a.flatten(2),b.flatten(2)))
-    return T.mean(- T.sum(a * T.log(b) + (1 - a) * T.log(1 - b), axis=1))
+    return T.mean(T.nnet.categorical_crossentropy(a.flatten(2),b.flatten(2)))
+    # return T.mean(- T.sum(a * T.log(b) + (1 - a) * T.log(1 - b), axis=1))
 
 def l1 ( a, b ):
     """

diff --git a/yann/layers/abstract.py b/yann/layers/abstract.py
@@ -2,12 +2,8 @@
 Todo:
 
     * LSTM / GRN layers
-    * A concatenate layer
-    * A Merge layer that is going to sum / average two layer activations.
     * An Embed layer that is going to create a new embedding space for two layer's activations to
       project on to the same space and minimize its distances. 
-    * An error layer that produces the error between two layers. (use errors.py in core.)
-      - This can be used to generate images back such as in the case of auto-encoders.    
 """
 
 import theano
@@ -16,6 +12,7 @@
 # The above import is an experimental code. Not sure if it works perfectly, but I have no doubt 
 # yet.
 from yann.core import activations
+import numpy
 
 class layer(object):
     """
@@ -51,7 +48,7 @@ def __init__(self, id, type, verbose = 2):
         if verbose >= 3:
             print "... Initializing a new layer " + self.id + " of type " + self.type        
 
-    def print_layer(self, prefix = " ", nest = True, last = True):
+    def print_layer(self, prefix = " ", nest = True, last = True, verbose = 2):
         """
         Print information about the layer
         
@@ -88,7 +85,7 @@ def print_layer(self, prefix = " ", nest = True, last = True):
 
         return prefix
 
-    def _graph_attributes(self):
+    def _graph_attributes(self, verbose = 2):
         """
         This is an internal function that returns attributes as a dictionary so that I can add
         it to the networkx graph output.
@@ -113,7 +110,21 @@ def _graph_attributes(self):
         out["type"] = self.type
         return out
 
-def _dropout(rng, params, dropout_rate):
+    def get_params (self ,verbose = 2):
+        """
+        This method returns the parameters of the layer in a numpy ndarray format.
+
+        Notes:
+            This is a slow method, because we are taking the values out of GPU. Ordinarily, I should
+            have used get_value( borrow = True ), but I can't do this because some parameters are 
+            theano.tensor.var.TensroVariable which needs to be run through eval. 
+        """
+        out = []
+        for p in self.params:
+            out.append(numpy.asarray(p.eval(),dtype = theano.config.floatX))            
+        return out
+
+def _dropout(rng, params, dropout_rate, verbose = 2):
     """
     dropout thanks to misha denil 
     https://github.com/mdenil/dropout    

diff --git a/yann/modules/visualizer.py b/yann/modules/visualizer.py
@@ -320,8 +320,17 @@ def visualize_activities(self, layer_activities, epoch, index = 0, verbose = 2):
             imgs = activity(index)
             if len(imgs.shape) == 2:
                 if not os.path.exists(loc + '/layer_' + id):                
-                    os.makedirs(loc + '/layer_' + id)                
-                self.visualize_images(imgs, loc=loc + '/layer_' + id, verbose =verbose)
+                    os.makedirs(loc + '/layer_' + id)     
+                if not os.path.exists(loc + '/layer_' + id + '/straight'):                
+                    os.makedirs(loc + '/layer_' + id + '/straight')                        
+                self.visualize_images(  imgs = imgs,
+                                        loc = loc + '/layer_' + id + '/straight', 
+                                        verbose = verbose )  
+                if not os.path.exists(loc + '/layer_' + id + '/transposed'):                
+                    os.makedirs(loc + '/layer_' + id + '/transposed')   
+                self.visualize_images(  imgs = imgs.transpose(),
+                                        loc = loc + '/layer_' + id + '/transposed', 
+                                        verbose = verbose )                                  
             elif len(imgs.shape) == 4:
                 imgs = imgs.transpose(0,2,3,1)
                 if not os.path.exists(loc + '/layer_' + id):                
@@ -344,10 +353,11 @@ def visualize_filters(self, layers, epoch, index = 0, verbose = 2):
         if not os.path.exists(loc):            
             os.makedirs(loc)
         for id, layer in layers.iteritems():  
-            if layer.active is True:    
+            if layer.params is not None:    
                 if verbose >= 3:
-                    print "... saving down visualization of layer " + id      
-                imgs = layer.w.get_value(borrow = True)
+                    print "... saving down visualization of layer " + id
+
+                imgs = layer.get_params()[0]
                 if len(imgs.shape) == 4:
                     if not os.path.exists(loc + '/layer_' + id):                
                         os.makedirs(loc + '/layer_' + id)
@@ -358,10 +368,16 @@ def visualize_filters(self, layers, epoch, index = 0, verbose = 2):
                 elif len(imgs.shape) == 2:
                     if not os.path.exists(loc + '/layer_' + id):                
                         os.makedirs(loc + '/layer_' + id)
-                    size = imgs.shape[1]
+                    if not os.path.exists(loc + '/layer_' + id + '/straight'):                
+                        os.makedirs(loc + '/layer_' + id + '/straight')                        
                     self.visualize_images(   imgs = imgs,
-                                             loc = loc + '/layer_' + id , 
-                                             verbose = verbose )                     
+                                             loc = loc + '/layer_' + id + '/straight', 
+                                             verbose = verbose )  
+                    if not os.path.exists(loc + '/layer_' + id + '/transposed'):                
+                        os.makedirs(loc + '/layer_' + id + '/transposed')   
+                    self.visualize_images(   imgs = imgs.transpose(),
+                                             loc = loc + '/layer_' + id + '/transposed', 
+                                             verbose = verbose )                                                                                        
 
 
 if __name__ == '__main__':

diff --git a/yann/network.py b/yann/network.py
@@ -825,9 +825,8 @@ def _add_unflatten_layer( self, id, options, verbose = 2):
         from yann.layers.flatten import unflatten_layer as flt        
         self.dropout_layers[id] = flt(input = dropout_input, id = id, shape = shape,
                                                                     input_shape = input_shape)
-        self.layers[id] = flt(input = input, id = id, shape = shape,
-                                                                    input_shape = input_shape)        
-
+        self.layers[id] = flt(input = input, id = id, shape = shape, input_shape = input_shape)        
+
         self.dropout_layers[id].origin.append(origin)
         self.dropout_layers[origin].destination.append(id)
         self.layers[id].origin.append(origin)
@@ -1862,7 +1861,8 @@ def cook(self, verbose = 2, **kwargs):
             self.network_type = 'generator'
 
         if not 'params' in kwargs.keys():
-            params = None
+            if self.network_type == 'generator' or 'classifier':
+                params = self.active_params
         else:
             params = params
 
@@ -1917,7 +1917,10 @@ def cook(self, verbose = 2, **kwargs):
         self.dropout_cost = self.dropout_layers[objective_layer].output
 
         self._cook_datastream(verbose = verbose)
-        self._cook_optimizer(params = params, verbose = verbose )
+        self._cook_optimizer(params = params,
+                             optimizer = self.cooked_optimizer,
+                             objective = self.dropout_cost,
+                             verbose = verbose )
 
         self._initialize_test (classifier = classifier,
                                generator = generator,
@@ -1936,8 +1939,7 @@ def cook(self, verbose = 2, **kwargs):
         self.best_params = []
         # Let's bother only about learnable params. This avoids the problem when weights are 
         # shared
-        if params is None:
-            params = self.active_params
+
         for param in params:
             self.best_params.append(theano.shared(param.get_value(borrow = self.borrow)))
 

diff --git a/yann/special/gan.py b/yann/special/gan.py
@@ -4,6 +4,11 @@
 Goodfellow, Ian, Jean Pouget-Abadie, Mehdi Mirza, Bing Xu, David Warde-Farley, Sherjil Ozair, 
 Aaron Courville, and Yoshua Bengio. "Generative adversarial nets." In Advances in Neural Information
 Processing Systems, pp. 2672-2680. 2014.
+
+TODO:
+
+    There seems to be something wrong with the fine-tuning update. Code crashes after a call to 
+    _new_era. This needs debugging and fixing. 
 """
 import time 
 import numpy 
@@ -343,9 +348,7 @@ def cook(   self,
         # shared
         self.active_params = self.classifier_active_params + self.discriminator_active_params + \
                                     self.generator_active_params 
-        if params is None:
-            params = self.active_params
-        for param in params:
+        for param in self.active_params:
             self.best_params.append(theano.shared(param.get_value(borrow = self.borrow)))
 
         self.gen_cost = []  
@@ -577,6 +580,10 @@ def train ( self, verbose, **kwargs):
         while (epoch_counter < total_epochs) and (not early_termination):
             nan_flag = False
             # check if its time for a new era.
+            if epoch_counter == 10:
+                import pdb
+                pdb.set_trace()
+
             if (epoch_counter == change_era):
             # if final_era, while loop would have terminated.
                 era = era + 1
@@ -634,7 +641,6 @@ def train ( self, verbose, **kwargs):
                     if minibatch % k == 0:                  
                         gen_cost = self.mini_batch_train_gen (minibatch, epoch_counter)
 
-
                     if numpy.isnan(gen_cost) or \
                         numpy.isnan(softmax_cost) or \
                         numpy.isnan(fake_cost) or \
@@ -645,6 +651,7 @@ def train ( self, verbose, **kwargs):
                         if verbose >= 2:
                             print ".. NAN! Slowing learning rate by 10 times and restarting epoch."                                      
                         break                 
+
                     self.fake_cost = self.fake_cost + [fake_cost]
                     self.real_cost = self.real_cost + [real_cost]
                     self.softmax_cost = self.softmax_cost + [softmax_cost]