In [6]:
import json
import random
import sys
import pickle
import gzip
import numpy as np

In [7]:
def load_data():
    f = gzip.open(r'C:\Users\Kul Garima\.jupyter\DeepLearningPython35-master\mnist.pkl.gz', 'rb')
    training_data, validation_data, test_data = pickle.load(f, encoding='latin1')
    f.close()
    return (training_data, validation_data, test_data)


def load_data_wrapper():
    tr_d, va_d, te_d = load_data()
    
    training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
    training_results = [vectorized_result(y) for y in tr_d[1]]
    training_data = list(zip(training_inputs, training_results))
    
    validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
    validation_data = list(zip(validation_inputs, va_d[1]))
    
    test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
    test_data = list(zip(test_inputs, te_d[1]))
    
    return (training_data, validation_data, test_data)




In [8]:
class QuadraticCost(object):

    def fn(a, y):
        return 0.5*np.linalg.norm(a-y)**2

    def delta(z, a, y):
        return (a-y) * sigmoid_prime(z)


class CrossEntropyCost(object):
    
    def fn(a, y):        
        return np.sum(np.nan_to_num(-y*np.log(a)-(1-y)*np.log(1-a)))

    def delta(z, a, y):
        return (a-y)



class Network(object):

    def __init__(self, sizes, cost=CrossEntropyCost):
        
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.default_weight_initializer()
        self.cost=cost

    def default_weight_initializer(self):
        

        self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
        self.weights = [np.random.randn(y, x)/np.sqrt(x)
                        for x, y in zip(self.sizes[:-1], self.sizes[1:])]

    def large_weight_initializer(self):
        
        self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
        self.weights = [np.random.randn(y, x)
                        for x, y in zip(self.sizes[:-1], self.sizes[1:])]

    def feedforward(self, a):
        
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a)+b)
        return a

    def SGD(self, training_data, epochs, mini_batch_size, eta,
            lmbda = 0.0,
            evaluation_data=None,
            monitor_evaluation_cost=False,
            monitor_evaluation_accuracy=False,
            monitor_training_cost=False,
            monitor_training_accuracy=False):
        
        if evaluation_data: n_data = len(evaluation_data)
        n = len(training_data)
        
        evaluation_cost, evaluation_accuracy = [], []
        training_cost, training_accuracy = [], []
        
        for j in range(epochs):
            random.shuffle(training_data)
            mini_batches = [training_data[k:k+mini_batch_size]
                for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(
                    mini_batch, eta, lmbda, len(training_data))
            print ("Epoch %s training complete" % j)
            if monitor_training_cost:
                cost = self.total_cost(training_data, lmbda)
                training_cost.append(cost)
                print("Cost on training data: {}".format(cost))
            if monitor_training_accuracy:
                accuracy = self.accuracy(training_data, convert=True)
                training_accuracy.append(accuracy)
                print( "Accuracy on training data: {} / {}".format(
                    accuracy, n))
            if monitor_evaluation_cost:
                cost = self.total_cost(evaluation_data, lmbda, convert=True)
                evaluation_cost.append(cost)
                print ("Cost on evaluation data: {}".format(cost))
            if monitor_evaluation_accuracy:
                accuracy = self.accuracy(evaluation_data)
                evaluation_accuracy.append(accuracy)
                print ("Accuracy on evaluation data: {} / {}".format(
                    self.accuracy(evaluation_data), n_data))
            print
        return evaluation_cost, evaluation_accuracy, \
            training_cost, training_accuracy

    def update_mini_batch(self, mini_batch, eta, lmbda, n):
       
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
            
        self.weights = [(1-eta*(lmbda/n))*w-(eta/len(mini_batch))*nw
                        for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b-(eta/len(mini_batch))*nb
                       for b, nb in zip(self.biases, nabla_b)]

    def backprop(self, x, y):
        
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
      
    
        activation = x
        activations = [x] 
        zs = [] 
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation)+b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
       
    
        delta = (self.cost).delta(zs[-1], activations[-1], y)
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        return (nabla_b, nabla_w)

    def accuracy(self, data, convert=False):
        
        if convert:
            results = [(np.argmax(self.feedforward(x)), np.argmax(y)) for (x, y) in data]
        else:
            results = [(np.argmax(self.feedforward(x)), y)
                        for (x, y) in data]
        return sum(int(x == y) for (x, y) in results)

    def total_cost(self, data, lmbda, convert=False):
       
        cost = 0.0
        for x, y in data:
            a = self.feedforward(x)
            if convert: y = vectorized_result(y)
            cost += self.cost.fn(a, y)/len(data)
        cost += 0.5*(lmbda/len(data))*sum(
            np.linalg.norm(w)**2 for w in self.weights)
        return cost

    def save(self, filename):
       
        data = {"sizes": self.sizes,
                "weights": [w.tolist() for w in self.weights],
                "biases": [b.tolist() for b in self.biases],
                "cost": str(self.cost.__name__)}
        f = open(filename, "w")
        json.dump(data, f)
        f.close()


def load(filename):
   
    f = open(filename, "r")
    data = json.load(f)
    f.close()
    cost = getattr(sys.modules[__name__], data["cost"])
    net = Network(data["sizes"], cost=cost)
    net.weights = [np.array(w) for w in data["weights"]]
    net.biases = [np.array(b) for b in data["biases"]]
    return net


def vectorized_result(j):
    
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

def sigmoid(z):
   
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    
    return sigmoid(z)*(1-sigmoid(z))

In [9]:
training_data, validation_data, test_data = load_data_wrapper()


In [5]:
net = Network([784, 30, 10])


In [17]:
net.SGD(training_data , 30, 10, 10.0,
            lmbda = 5.0,
            evaluation_data=None,
            monitor_evaluation_cost=False,
            monitor_evaluation_accuracy=False,
            monitor_training_cost=False,
            monitor_training_accuracy=False)

Epoch 0 training complete
Epoch 1 training complete
Epoch 2 training complete
Epoch 3 training complete
Epoch 4 training complete
Epoch 5 training complete
Epoch 6 training complete
Epoch 7 training complete
Epoch 8 training complete
Epoch 9 training complete
Epoch 10 training complete
Epoch 11 training complete
Epoch 12 training complete
Epoch 13 training complete
Epoch 14 training complete
Epoch 15 training complete
Epoch 16 training complete
Epoch 17 training complete
Epoch 18 training complete
Epoch 19 training complete
Epoch 20 training complete
Epoch 21 training complete
Epoch 22 training complete
Epoch 23 training complete
Epoch 24 training complete
Epoch 25 training complete
Epoch 26 training complete
Epoch 27 training complete
Epoch 28 training complete
Epoch 29 training complete


([], [], [], [])

In [18]:
net.SGD(training_data , 30, 10, 10.0,
            lmbda = 1000.0,
            evaluation_data= validation_data,
            monitor_evaluation_cost=True,
            monitor_evaluation_accuracy=True,
            monitor_training_cost=True,
            monitor_training_accuracy=True)

Epoch 0 training complete
Cost on training data: 3.7058409771261513
Accuracy on training data: 4859 / 50000
Cost on evaluation data: 3.7041225214976614
Accuracy on evaluation data: 983 / 10000
Epoch 1 training complete
Cost on training data: 3.670538127053678
Accuracy on training data: 4988 / 50000
Cost on evaluation data: 3.69608972078041
Accuracy on evaluation data: 961 / 10000
Epoch 2 training complete
Cost on training data: 4.120771925999175
Accuracy on training data: 5678 / 50000
Cost on evaluation data: 4.1362586612425085
Accuracy on evaluation data: 1064 / 10000
Epoch 3 training complete
Cost on training data: 3.665311029941012
Accuracy on training data: 5101 / 50000
Cost on evaluation data: 3.6694862999290128
Accuracy on evaluation data: 1030 / 10000
Epoch 4 training complete
Cost on training data: 3.4974348815603915
Accuracy on training data: 4988 / 50000
Cost on evaluation data: 3.492381188620574
Accuracy on evaluation data: 961 / 10000
Epoch 5 training complete
Cost on train

([3.7041225214976614,
  3.69608972078041,
  4.1362586612425085,
  3.6694862999290128,
  3.492381188620574,
  3.4553823184598063,
  3.7638899940265285,
  3.539800206724652,
  3.624769710821914,
  3.8969801803204236,
  3.7439224874808845,
  3.4508174498823063,
  3.671398044910423,
  4.348408884675333,
  3.9825290340463146,
  3.5239122390590936,
  3.863750173480171,
  3.790568524690881,
  3.551201729808517,
  3.5472149908514417,
  4.129725801548539,
  3.8182428943510454,
  3.746249162854825,
  3.911560599323201,
  3.590290376250039,
  3.497496592918906,
  3.5935177408763868,
  3.737046054822728,
  4.21778974471592,
  3.793227851126395],
 [983,
  961,
  1064,
  1030,
  961,
  967,
  915,
  1090,
  1064,
  1064,
  961,
  967,
  991,
  983,
  967,
  967,
  1030,
  961,
  1009,
  961,
  990,
  1009,
  967,
  990,
  1090,
  1090,
  991,
  1064,
  961,
  1009],
 [3.7058409771261513,
  3.670538127053678,
  4.120771925999175,
  3.665311029941012,
  3.4974348815603915,
  3.462937399639172,
  3.758

In [6]:
net = Network([784, 10])

In [7]:
net.SGD(training_data[:1000] , 30, 10, 10.0,
            lmbda = 1000.0,
            evaluation_data= validation_data[:100],
            monitor_evaluation_cost=True,
            monitor_evaluation_accuracy=True,
            monitor_training_cost=True,
            monitor_training_accuracy=True)

  del sys.path[0]
  del sys.path[0]
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Epoch 0 training complete
Cost on training data: inf
Accuracy on training data: 97 / 1000
Cost on evaluation data: inf
Accuracy on evaluation data: 10 / 100
Epoch 1 training complete
Cost on training data: inf
Accuracy on training data: 97 / 1000
Cost on evaluation data: inf
Accuracy on evaluation data: 10 / 100
Epoch 2 training complete
Cost on training data: inf
Accuracy on training data: 97 / 1000
Cost on evaluation data: inf
Accuracy on evaluation data: 10 / 100
Epoch 3 training complete
Cost on training data: nan
Accuracy on training data: 97 / 1000




Cost on evaluation data: nan
Accuracy on evaluation data: 10 / 100
Epoch 4 training complete
Cost on training data: nan
Accuracy on training data: 97 / 1000
Cost on evaluation data: nan
Accuracy on evaluation data: 10 / 100
Epoch 5 training complete
Cost on training data: nan
Accuracy on training data: 97 / 1000
Cost on evaluation data: nan
Accuracy on evaluation data: 10 / 100
Epoch 6 training complete
Cost on training data: nan
Accuracy on training data: 97 / 1000
Cost on evaluation data: nan
Accuracy on evaluation data: 10 / 100
Epoch 7 training complete
Cost on training data: nan
Accuracy on training data: 97 / 1000
Cost on evaluation data: nan
Accuracy on evaluation data: 10 / 100
Epoch 8 training complete
Cost on training data: nan
Accuracy on training data: 97 / 1000
Cost on evaluation data: nan
Accuracy on evaluation data: 10 / 100
Epoch 9 training complete
Cost on training data: nan
Accuracy on training data: 97 / 1000
Cost on evaluation data: nan
Accuracy on evaluation data: 

([inf,
  inf,
  inf,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan],
 [10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10],
 [inf,
  inf,
  inf,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan],
 [97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97,
  97])

In [11]:
net=Network([784, 30, 30, 30, 30, 10])

In [12]:
net.SGD(training_data, 30, 10, 0.1, lmbda=5.0,
        evaluation_data= validation_data,
            monitor_evaluation_cost=True,
            monitor_evaluation_accuracy=True,
            monitor_training_cost=True,
            monitor_training_accuracy=True)

Epoch 0 training complete
Cost on training data: 3.1914391962703026
Accuracy on training data: 7045 / 50000
Cost on evaluation data: 3.220550444355957
Accuracy on evaluation data: 1358 / 10000
Epoch 1 training complete
Cost on training data: 1.9664736336015889
Accuracy on training data: 25739 / 50000
Cost on evaluation data: 2.0719999971131013
Accuracy on evaluation data: 5206 / 10000
Epoch 2 training complete
Cost on training data: 0.9946772191561473
Accuracy on training data: 43322 / 50000
Cost on evaluation data: 1.220167426194576
Accuracy on evaluation data: 8713 / 10000
Epoch 3 training complete
Cost on training data: 0.6540916048660534
Accuracy on training data: 46082 / 50000
Cost on evaluation data: 1.0079528829260798
Accuracy on evaluation data: 9226 / 10000
Epoch 4 training complete
Cost on training data: 0.5563059908524913
Accuracy on training data: 46751 / 50000
Cost on evaluation data: 0.9818013019900832
Accuracy on evaluation data: 9341 / 10000
Epoch 5 training complete
Co

([3.220550444355957,
  2.0719999971131013,
  1.220167426194576,
  1.0079528829260798,
  0.9818013019900832,
  0.9389622529238528,
  0.945316914386068,
  0.905283338574934,
  0.9174788155704166,
  1.0816239485011028,
  0.9143214955871672,
  0.9081332484185214,
  0.918047207734018,
  0.9167082221463845,
  0.9394062152482897,
  0.9245368266140865,
  0.933868564396663,
  0.9520759833041184,
  0.9256543817147835,
  0.9318915621446743,
  0.9418773514956058,
  0.9592607547908456,
  0.9414737762831411,
  0.9710271893172757,
  0.9443633288180344,
  0.9378602011487556,
  0.9416278199859581,
  0.9409596754967604,
  0.952601368193745,
  0.9582542208805043],
 [1358,
  5206,
  8713,
  9226,
  9341,
  9444,
  9477,
  9558,
  9559,
  9284,
  9577,
  9613,
  9592,
  9613,
  9594,
  9625,
  9615,
  9609,
  9636,
  9641,
  9624,
  9601,
  9632,
  9566,
  9622,
  9641,
  9652,
  9654,
  9626,
  9621],
 [3.1914391962703026,
  1.9664736336015889,
  0.9946772191561473,
  0.6540916048660534,
  0.5563059908524