In [7]:
import numpy as np
import random
import pickle
import gzip
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_openml


def cost_grad(a , y):
    return (a-y)

def sigmoid(z):
    return 1.0/(1.0 + np.exp(-z))

def sig_diff(z):
    return sigmoid(z)*(1-sigmoid(z))


class Network():
    def __init__(self , sizes):
        self.sizes = sizes
        self.num_layers = len(sizes)
        self.biases = [np.random.randn(y , 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y,x) for y,x in zip(sizes[1:] , sizes[:-1])]
        
    def feedforward(self , a):
        for b,w in zip(self.biases, self.weights):
            a = np.matmul(w , a) + b
        return a
    def SGD(self, training_data, epochs, mini_batch_size, eta, lmbda = 5.0,evaluation_data=None):
        if evaluation_data: n_data = len(evaluation_data)
        n = len(training_data)
        for j in range(epochs):
            random.shuffle(training_data)
            mini_batches = [training_data[k:mini_batch_size + k] for k in range(0, n , mini_batch_size)]
            for mini_batch in mini_batches:
                self.update(mini_batch , lmbda , eta , n)   
      
        accuracy = self.accuracy(training_data)
        return accuracy
    
    def update(self ,mini_batch, lmbda ,eta, n ):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x,y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x,y)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]

        self.weights = [(1-eta*lmbda/n)*w - (eta/len(mini_batch))*nw for w,nw in zip(self.weights, delta_nabla_w)]
        self.biases = [b - (eta/len(mini_batch))*nb for b,nb in zip(self.biases,delta_nabla_b)]

    def accuracy(self, data):
        results = [(np.argmax(self.feedforward(x)), y) for (x, y) in data]
        return sum(int(x == y) for (x, y) in results)*100/len(data)

        
    def backprop(self, x , y ):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        zs = []
        activations = [x]
        for b,w in zip(self.biases , self.weights):
            print(x)
            z = np.matmul(w,x) + b
            zs.append(z)
            acti = sigmoid(z)
            activations.append(acti)
            
        delta = np.multiply(cost_grad(zs[-1] , y)  , sig_diff(zs[-1]))
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        for i in range(2 , self.num_layers):
            delta = np.multiply(np.matmul(nabla_w[-l+1] , delta) , sig_diff(zs[-l]))
            nabla_b = delta
            nabla_w = np.dot(delta , activations[-l-1].transpose())
        return (nabla_b , nabla_w)

    def default_weight_initializer(self):
        self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
        self.weights = [np.random.randn(y, x)/np.sqrt(x) for x, y in zip(self.sizes[:-1], self.sizes[1:])]

In [8]:
mnist = fetch_openml('mnist_784')
x_train, x_test, y_train, y_test = train_test_split(mnist.data, mnist.target, test_size=0.20)

training_data = [[x,y] for x,y in zip(x_train,y_train)]
test_data = [ [x,y] for x,y in zip(x_test,y_test)]
net = Network([784, 30, 10])
net.default_weight_initializer()
print(net.SGD(training_data, 30, 10, 0.1, lmbda = 5.0,evaluation_data=test_data))

AttributeError: 'str' object has no attribute 'shape'