In [627]:
import _pickle as cPickle
import gzip 
import numpy as np
import sys 

# Load the dataset
f = gzip.open('mnist.pkl.gz', 'rb')
train_set, valid_set, test_set = cPickle.load(f,encoding='latin1')
f.close()

print(train_set[1])

[5 0 4 ..., 8 4 8]


IndexError: tuple index out of range

In [1249]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

def sigmoidPrime(x):
    return np.exp(x)/((1+np.exp(x))**2)

def relu(x):
    if x<0:
        return 0
    else:
        return x

def reluPrime(x):
    if x<0:
        return 0
    else:
        return 1

def elementWise(f, x):
    for i in range(len(x)):
        x[i] = f(x[i])
    return x    

def softMax(x):
    z = np.exp(x)
    return z/sum(z)

def softMaxPrime(x):
    z = np.exp(x)
    c = sum(z)
    for i in range(len(z)):
        z[i] = (c-z[i])*z[i]/(c**2)
    return z

def display(x, label, act):
    strn = ""
    for i in range(len(x)):
        if i%28==0:
            print(strn)
            strn = ""
        if x[i]==0:
            strn += " "
        if x[i]>=act:
            strn += "x"
    print(label)
            
def logLoss(x, target):
    loss = 0
    for i in range(len(x)):
        loss += target[i]*np.log(x[i])+(1-target[i])*np.log(1-x[i])
    return (-1.0/len(x))*loss
    
def logLossPrime(x, target):
    grad = np.zeros(len(x))
    for i in range(len(x)):
        grad[i]=(-1.0/len(x))*(target[i]/x[i]-(1-target[i])/(1-x[i]))
    return grad
    
class Neural_Network:
    defaultSize = 16
    inputChecks = True #this will change whether inputs that match the MNIST format are given
    #can be turned off to allow for debugging on smaller examples
    
    def __init__(self, activation, activationPrime, shape = None, layers = 2):
        if layers<2:
            raise NameError("two few layers")
        if Neural_Network.inputChecks and (shape[0] != 28**2 or shape[len(shape)-1] != 10):
            raise NameError("improper input or output layer ")
        if shape==None:
            shape = [28**2] + [defaultSize for i in range(layers-2)] + [10]
        self.activation = activation
        self.activationPrime = activationPrime
        self.shape = shape
        self.weights = Neural_Network.constructWeights(shape) 
        #weights[i] is the weights going into layer i
        self.bias = Neural_Network.constructBias(shape)
        #bias[i] is the bias on layer i 
        self.activations = []
        self.zs = []
        
    #returns a list of matrices of weights. weights[i] is the set of weights going into ith layer
    #weights[i][j][k] represents the weight going from the kth neuron in layer i-1 to jth neuron in layer i 
    def constructWeights(shape):
        weights = [None]
        for i in range(len(shape)-1): 
            weights.append(np.random.uniform(-1,1,shape[i]*shape[i+1]).reshape((shape[i+1],shape[i])))
        return weights
    
    #returns a list of vectors of biases. bias[i] is the set of biases on the ith layer
    #bias[i][j] is the bias in the ith layer on the jth neuron
    def constructBias(shape):
        bias = [None]
        for i in range(1,len(shape)): 
            bias.append(np.random.uniform(-1,1,shape[i]))
        return bias
    
    #performs forward propagation on the input X with the current weights and biases
    #using activation function from the constructor returns the activations on the last layer 
    #updates the zs and activations attributes
    def forwardProp(self, X):
        if Neural_Network.inputChecks and len(X) != 28**2:
            raise NameError("improper input")
        prevAct = X
        act = []
        self.activations = []
        self.zs = []
        self.activations.append(prevAct)
        for i in range(1,len(self.shape)): #each layer
            z = np.dot(self.weights[i], prevAct) + self.bias[i]
            act = elementWise(self.activation, z)
            self.activations.append(act)
            self.zs.append(z)
            prevAct = act
        return act
    
    
    def costFunctionCrossEntropy(self, data):
        images = data[0]
        labels = data[1]
        if len(images) != len(labels):
            raise NameError("improper input")
        target = np.zeros(10)
        cost = 0 
        for i in range(len(images)):
            target[labels[i]] = 1
            cost += Neural_Network.crossEntropy(softMax(self.forwardProp(images[i])),target)
            target[labels[i]] = 0
        return cost
    
    def crossEntropy(output, target):
        cost = 0
        for i in range(len(target)):
            cost -= target[i]*np.log(output[i])
        return cost
    
    def costFunctionEuclid(self, data):
        images = data[0]
        labels = data[1]
        if Neural_Network.inputChecks and len(images) != len(labels):
            raise NameError("improper input")
        target = np.zeros(10)
        cost = 0 
        for i in range(len(images)):
            target[labels[i]] = 1
            out = softMax(self.forwardProp(images[i]))
            cost += np.sum(out-target)**2
            target[labels[i]] = 0
        return cost
    
    def costFunctionLogLoss(self, data):
        images = data[0]
        labels = data[1]
        if Neural_Network.inputChecks and len(images) != len(labels):
            raise NameError("improper input")
        target = np.zeros(10)
        cost = 0 
        for i in range(len(images)):
            target[labels[i]] = 1
            out = self.forwardProp(images[i])
            cost += logLoss(out, target)
            target[labels[i]] = 0
        return cost
        
    def classification(outputActivations):
        maximum = -1
        index = -1
        for i in range(len(outputActivations)):
            if outputActivations[i] >= maximum:
                index = i
                maximum = outputActivations[i]
        return index
                
    def randomInitialization(self):
        self.weights = Neural_Network.constructWeights(self.shape) 
        self.bias = Neural_Network.constructBias(self.shape)
    
    def improvement(prev, new, target):
        return sum(target - new)**2 <= sum(target - prev)**2
    
    def test(self, data):
        print("test")
        correct = 0
        wrong = 0
        target = np.zeros(10)
        for i in range(20):
            prevAct = self.forwardProp(data[0][i])
            target[data[1][i]] = 1
            grad = self.backProp(target)
            target[data[1][i]] = 0
            self.applyGradient(grad, .1)
            if Neural_Network.improvement(prevAct, self.forwardProp(data[0][i]), target):
                correct +=1
            else:
                wrong +=1
            self.randomInitialization()
        print("correct: ", correct)
        print("wrong: ", wrong)
    
    #using backProp this will perform gradient descent from the current initialization of the weights
    #and biases on the given data. data in form touple of array of images and array of labels
    #will stop when the change in cost between steps is less than epsilon
    def gradientDescent(self, data, epsilon, learningRate):
        return self.stochasticGradientDescent(data, len(data[0]), epsilon, learningRate)
    
    #will apply the gradient with stepSize where the gradient is in form 
    #given by the backProp function
    def applyGradient(self, gradient, stepSize):
        for i in range(1, len(gradient[0])): #for each matrix in the weight update, 
            #first value is None for convenience in indexing
            self.weights[i] -= stepSize * gradient[0][i]
        for i in range(1, len(gradient[1])): #for each vector in the bias update
            self.bias[i] -= stepSize * gradient[1][i]
        
    def validation(self, data):
        images = data[0]
        labels = data[1]
        if len(images) != len(labels):
            raise NameError("improper input")
        correct = 0 
        wrong = 0
        for i in range(len(images)):
            if (Neural_Network.classification(self.forwardProp(images[i]))) == labels[i]:
                correct += 1
            else:
                wrong += 1
        print("correct: ", correct)
        print("wrong: ", wrong)
    
    def randomBatch(images, labels, batchSize):
        indices = np.random.choice(len(images), batchSize, replace = False)
        imageBatch = []
        labelBatch = []
        for i in range(len(indices)):
            imageBatch.append(images[indices[i]])
            labelBatch.append(labels[indices[i]])
        return (imageBatch, labelBatch)
        
    #using backProp this will perform gradient descent from the current initialization of the weights
    #and biases on the given data, with the given batch size 
    def stochasticGradientDescent(self, data, batchSize, epsilon, learningRate):
        prevCost = sys.maxsize
        cost = 0
        images = data[0]
        labels = data[1]
        while prevCost - cost >= epsilon:
            for i in range(5):
                print(i)
                imageSet, labelSet = Neural_Network.randomBatch(images, labels, batchSize)
                target = np.zeros(10)
                gradients = []
                for i in range(len(imageSet)):
                    target[labelSet[i]] = 1
                    output = self.forwardProp(imageSet[i])
                    gradients.append(self.backProp(target))
                    target[labelSet[i]] = 0
                self.applyGradient(self.averageGradient(gradients), learningRate)
            cost = self.costFunctionLogLoss(data)
            self.validation(data)
            print(cost)
            prevCost = cost# oh hmm this needs a change
            cost = 0
    
    #given a list of touples of the gradient in the form given by backProp
    def averageGradient(self, gradients):
        averageGrad = (Neural_Network.constructWeights(self.shape), Neural_Network.constructBias(self.shape))
        for i in range(len(averageGrad)): #weights then biases
            for j in range(len(gradients)): #grad from each sample
                for k in range(1, len(gradients[j][i])): #grad for each matrix
                    averageGrad[i][k] += gradients[j][i][k]
            for k in range(1,len(averageGrad[i])):#done at end to prevent rounding small numbers to zero
                averageGrad[i][k] /= len(gradients) #if overflow is a problem then divide at each step
        return averageGrad
    
    #this will return the gradient of the cost on the single target
    #the form is a touple with the weights and then the biases
    #in the same format as the weights and biases attributes for the Neural_Network class
    def backProp(self, target):
        if Neural_Network.inputChecks and len(target) != 10:
            raise NameError("improper input")
        #computes the gradient of the cost with respect to each of the activations 
        #caches results for use later
        def activationGrad():
            actGrad = []
            output = self.activations[len(self.shape)-1]
            layerActGrad = logLossPrime(output, target)#last layer using log loss
            actGrad.append(layerActGrad)
            for i in range(len(self.shape)-2, 0, -1):#each layer backwards from second to last one
                prevLayerActGrad = layerActGrad
                layerActGrad = np.zeros(self.shape[i])
                for k in range(self.shape[i]):#each neuron in layer i
                    for j in range(self.shape[i+1]): #each nueron in layer i+1
                        layerActGrad[k] += self.weights[i+1][j][k]*self.activationPrime(self.zs[i][j])*prevLayerActGrad[j]
                          #TODO figure out which weight
                actGrad.append(layerActGrad)
            actGrad.append(None) #needed to avoid off by one error
            actGrad.reverse()
            return actGrad
        
        actGrad = activationGrad()
        weightGrad = Neural_Network.constructWeights(self.shape) #kept in list of matrix form
        biasGrad = Neural_Network.constructBias(self.shape) #kept in list of vector form
        for i in range(len(self.shape)-1, 1, -1): #loop from last layer to first
            for j in range(self.shape[i]): #each neuron
                for k in range(len(self.weights[i-1])):#each weight #TODO this needs to be adjusted now that in matrix form
                    deriv = self.activationPrime(self.zs[i-1][j])
                    actDeriv = actGrad[i][j]
                    a = self.activations[i][j] * deriv * actDeriv#TODO check i-1
                    weightGrad[i][j][k] = a
                    #TODO here I need to use the activationsGrad
                biasGrad[i][j] = self.activationPrime(self.zs[i-1][j])*actGrad[i][j] #each bias on each neuron
        return (weightGrad, biasGrad)

In [1263]:
Neural_Network.inputChecks = True
a = Neural_Network(sigmoid, sigmoidPrime, shape=[28**2, 8, 8, 10])

In [1264]:
a.forwardProp(train_set[0][0])

array([ 0.71090105,  0.48020259,  0.36616042,  0.51390308,  0.44072154,
        0.3560126 ,  0.511882  ,  0.49526543,  0.5496971 ,  0.67491579])

In [1266]:
a.stochasticGradientDescent(train_set, 2000, 0, 0.5)

0


KeyboardInterrupt: 

In [1258]:
print(train_set[1][39])
print(a.forwardProp(train_set[0][39]))
print(a.forwardProp(train_set[0][39])/sum(a.forwardProp(train_set[0][39])))
print(Neural_Network.classification(a.forwardProp(train_set[0][39])))

6
[ 0.19473711  0.10362538  0.18675223  0.177155    0.13337837  0.1735821
  0.1677258   0.1956107   0.13970612  0.10809757]
[ 0.12322245  0.06557031  0.11816991  0.11209714  0.0843969   0.10983634
  0.10613069  0.12377522  0.08840088  0.06840015]
7


In [1259]:
for i in range(100):
    out = a.forwardProp(train_set[0][i])
    print("max", max(out), "mean", np.mean(out))
    print()

max 0.209606395907 mean 0.165720237187

max 0.211897986483 mean 0.165926005147

max 0.200760577901 mean 0.148790578021

max 0.222265083679 mean 0.178058103785

max 0.202868356238 mean 0.164999853832

max 0.219064038745 mean 0.177485227495

max 0.228872610102 mean 0.170549522648

max 0.201728441173 mean 0.162691786696

max 0.209206726755 mean 0.15799747965

max 0.187287013279 mean 0.145770764122

max 0.233170572858 mean 0.189571256659

max 0.260396896273 mean 0.181724946789

max 0.234279376586 mean 0.166940012199

max 0.215835768643 mean 0.165287165502

max 0.228810126651 mean 0.162833296449

max 0.208536108784 mean 0.148583232869

max 0.221358946557 mean 0.17057841095

max 0.234290075471 mean 0.186349396344

max 0.174236041092 mean 0.141690758922

max 0.199570217522 mean 0.166938359855

max 0.223299153026 mean 0.165671981377

max 0.216158032076 mean 0.168713028994

max 0.193139491243 mean 0.153845007375

max 0.215498963467 mean 0.177476232856

max 0.23030137239 mean 0.177145362664

max

In [1211]:
numpy.random.choice(5, 6, replace = False)

ValueError: Cannot take a larger sample than population when 'replace=False'

In [1261]:
a.validation(valid_set)

correct:  1341
wrong:  8659


In [1262]:
a.validation(test_set)

correct:  1394
wrong:  8606
