In [None]:
#Copyright (c) 2014, Roland Memisevic
#All rights reserved.
#
#memisevr[at]iro[dot]umontreal[dot]ca
#
#Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
#
#    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
#    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
#
#            THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os
os.environ["THEANO_FLAGS"] = "device=gpu0"

%matplotlib inline
import numpy
import theano
from theano.tensor.shared_randomstreams import RandomStreams

import theano.tensor as T
import numpy as np
class Zae(object):
    """Zero-bias autoencoder"""

    def __init__(self, numvis, numhid, vistype, init_features, selectionthreshold=1.0):
        self.numvis = numvis
        self.numhid  = numhid
        self.vistype = vistype
        self.selectionthreshold = theano.shared(value=selectionthreshold, name='selectionthreshold')
        self.W_init = init_features.astype(theano.config.floatX)
        self.W = theano.shared(value = self.W_init, name='W')
        self.bvis = theano.shared(value=numpy.zeros(numvis, dtype=theano.config.floatX), name='bvis')
        self.inputs = T.matrix(name = 'inputs') 
        self.rng = RandomStreams()
        
        #self.inputs_noise = self.inputs * (self.rng.uniform(size=self.inputs.shape) <= 0.7)
        self.inputs_noise = self.inputs
        
        self.params = [self.W, self.bvis]
        
        
        
        self._prehiddens = T.dot(self.inputs_noise, self.W) 
        self._hiddens = (self._prehiddens > self.selectionthreshold) * self._prehiddens
        if self.vistype == 'binary':
            self._outputs = T.nnet.sigmoid(T.dot(self._hiddens, self.W.T) + self.bvis)
            costpercase = -T.sum(self.inputs*T.log(self._outputs) + (1-self.inputs)*T.log(1-self._outputs), axis=1) 
        elif self.vistype == 'real':
            self._outputs = T.dot(self._hiddens, self.W.T) + self.bvis 
            costpercase = T.sum(0.5 * ((self.inputs - self._outputs)**2), axis=1) 

        self._cost = T.mean(costpercase)
        self._grads = T.grad(self._cost, self.params)

        self.cost = theano.function([self.inputs], self._cost)
        self.grad = theano.function([self.inputs], T.grad(self._cost, self.params))
        self.prehiddens = theano.function([self.inputs], self._prehiddens)
        self.hiddens = theano.function([self.inputs], self._hiddens)

    def updateparams(self, newparams):
        def inplaceupdate(x, new):
            x[...] = new
            return x

        paramscounter = 0
        for p in self.params:
            pshape = p.get_value().shape
            pnum = numpy.prod(pshape)
            p.set_value(inplaceupdate(p.get_value(borrow=True), newparams[paramscounter:paramscounter+pnum].reshape(*pshape)), borrow=True)
            paramscounter += pnum 

    def get_params(self):
        return numpy.concatenate([p.get_value().flatten() for p in self.params])

    def save(self, filename):
        numpy.save(filename, self.get_params())

    def load(self, filename):
        self.updateparams(numpy.load(filename))


In [None]:
#Copyright (c) 2014, Roland Memisevic
#All rights reserved.
#
#memisevr[at]iro[dot]umontreal[dot]ca
#
#Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
#
#    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
#    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
#
#            THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import pylab
import numpy
import numpy.random
import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams


class GraddescentMinibatch(object):

    def __init__(self, model, data, batchsize, learningrate, momentum=0.9, rng=None, verbose=True):
        self.model         = model
        self.data          = data
        self.learningrate  = learningrate
        self.verbose       = verbose
        self.batchsize     = batchsize
        self.numbatches    = self.data.get_value().shape[0] / batchsize
        self.momentum      = momentum 
        if rng is None:
            self.rng = numpy.random.RandomState(1)
        else:
            self.rng = rng

        self.epochcount = 0
        self.index = T.lscalar() 
        self.incs = dict([(p, theano.shared(value=numpy.zeros(p.get_value().shape, 
                            dtype=theano.config.floatX), name='inc_'+p.name)) for p in self.model.params])
        self.inc_updates = {}
        self.updates = {}
        self.n = T.scalar('n')
        self.noop = 0.0 * self.n
        self.set_learningrate(self.learningrate)

    def set_learningrate(self, learningrate):
        self.learningrate = learningrate
        for _param, _grad in zip(self.model.params, self.model._grads):
            self.inc_updates[self.incs[_param]] = self.momentum * self.incs[_param] - self.learningrate * _grad 
            self.updates[_param] = _param + self.incs[_param]

        self._updateincs = theano.function([self.index], self.model._cost, 
                                     updates = self.inc_updates,
                givens = {self.model.inputs:self.data[self.index*self.batchsize:(self.index+1)*self.batchsize]})
        self._trainmodel = theano.function([self.n], self.noop, updates = self.updates)

    def step(self):
        def inplaceclip(x):
            x[:,:] *= x>0.0
            return x

        def inplacemask(x, mask):
            x[:,:] *= mask
            return x

        cost = 0.0
        stepcount = 0.0
        for batch_index in range(self.numbatches):#self.rng.permutation(self.numbatches-1):
            stepcount += 1.0
            cost = (1.0-1.0/stepcount)*cost + (1.0/stepcount)*self._updateincs(batch_index)
            self._trainmodel(0)

        self.epochcount += 1
        if self.verbose:
            print 'epoch: %d, cost: %f' % (self.epochcount, cost)



In [None]:
from lasagnekit.datasets.mnist import MNIST
from lasagnekit.misc.plot_weights import grid_plot
data = MNIST()
data.load()


In [None]:
#Copyright (c) 2014, Roland Memisevic
#All rights reserved.
#
#memisevr[at]iro[dot]umontreal[dot]ca
#
#Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
#
#    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
#    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
#
#            THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os
os.environ["THEANO_FLAGS"] = "device=gpu0"
import numpy 
import numpy.random
import pylab
#from dispims_color import dispims_color
import theano
from theano.tensor.shared_randomstreams import RandomStreams
import matplotlib.pyplot as plt
rng = numpy.random.RandomState(1)
theano_rng = RandomStreams(1)
SMALL = 0.001
patchsize = 12
numfeatures = 500



#INSTANTIATE THE ZERO-BIAS AUTOENCODER
model = Zae(numvis=784, numhid=numfeatures, vistype="binary", 
                init_features=np.random.uniform(-0.05, 0.05, size=(784, 500)), selectionthreshold=0.2)

#assert False, "preprocessing is done, may train now"


trainpatches_theano = theano.shared(data.X)

#DO SOME STEPS WITH SMALL LEARNING RATE TO MAKE SURE THE INITIALIZATION IS IN A REASONABLE RANGE
trainer = GraddescentMinibatch(model, trainpatches_theano, 100, learningrate=0.0001, momentum=0.9)
trainer.step(); trainer.step(); trainer.step() 

#TRAIN THE MODEL FOR REAL, AND SHOW FILTERS 
trainer = GraddescentMinibatch(model, trainpatches_theano, 100, learningrate=0.01, momentum=0.9)


for epoch in xrange(50):
    trainer.step()
    #d_w, _ = model.grad(trainpatches_theano.get_value()[0:100])
    #plt.clf()
    #print(model.W.get_value().mean(), model.W.get_value().std())
    #plt.show()
    if epoch % 10 == 0 and epoch > 0:
        trainer.set_learningrate(trainer.learningrate*0.8)
        #dispims_color(numpy.dot(model.W.get_value().T, pca_forward.T).reshape(-1, patchsize, patchsize, 3), 1)
        
        plt.clf()
        grid_plot(model.W.get_value().T.reshape((numfeatures, 28, 28)), 
                  imshow_options={"cmap": "gray"}, nbrows=10, nbcols=10)
        
        plt.show()
