In [2]:
import numpy as np
import tensorflow.keras.datasets as datasets
import matplotlib.pyplot as plt



In [3]:
mnist = datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

print(x_train.shape)

(60000, 28, 28)


In [4]:
n = x_train.shape[0]
x_train = x_train.reshape(n, -1)
x_train = x_train / 255

print(x_train.shape)

(60000, 784)


In [5]:
n = x_test.shape[0]
x_test = x_test.reshape(n, -1)

print(x_test.shape)

(10000, 784)


In [6]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [7]:
def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [8]:
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0, keepdims=True)

In [9]:
def one_hot(y):
    n = y.shape[0]
    k = np.max(y) + 1
    y_one_hot = np.zeros((n, k))
    y_one_hot[np.arange(n), y] = 1
    return y_one_hot

In [10]:
class NN:
    def __init__(self, x, y, alpha=0.01, epochs=1000, batch_size=100, architecture=(10, 10)):
        self.Z1 = None
        self.a1 = None
        self.Z2 = None
        self.a2 = None
        self.x = x
        self.y = y
        self.alpha = alpha
        self.epochs = epochs
        self.batch_size = batch_size
        self.first_layer = architecture[0]
        self.second_layer = architecture[1]
        self.W1 = np.random.randn(self.x.shape[1], self.first_layer)
        self.b1 = np.zeros((1, self.first_layer))
        self.W2 = np.random.randn(self.first_layer, self.second_layer)
        self.b2 = np.zeros((1, self.second_layer))


    def feedforward(self, x):
        self.Z1 = np.dot(x, self.W1) + self.b1
        self.a1 = sigmoid(self.Z1)
        self.Z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = softmax(self.Z2)

    def backprop(self, x, y):
        n = x.shape[0]
        one_hot_y = one_hot(y)
        dZ2 = self.a2 - one_hot_y
        dW2 = np.dot(self.a1.T, dZ2) / n
        db2 = np.sum(dZ2, axis=0, keepdims=True) / n
        dZ1 = np.dot(dZ2, self.W2.T) * sigmoid_derivative(self.Z1)

        dW1 = np.dot(x.T, dZ1) / n
        db1 = np.sum(dZ1, axis=0, keepdims=True) / n

        self.W1 -= self.alpha * dW1
        self.b1 -= self.alpha * db1
        self.W2 -= self.alpha * dW2
        self.b2 -= self.alpha * db2

    def SGD(self):
        for i in range(self.epochs):
            # for j in range(0, self.x.shape[0], self.batch_size):
            #     x_batch = self.x[j:j + self.batch_size]
            #     y_batch = self.y[j:j + self.batch_size]
            #     self.feedforward(x_batch)
            #     self.backprop(x_batch, y_batch)
            self.feedforward(self.x)
            self.backprop(self.x, self.y)
            if i % 10 == 0:
                print('Iteration:', i, 'Loss:', -np.sum(one_hot(self.y) * np.log(self.a2)) / self.x.shape[0])

In [11]:
def print_image(x):
    plt.imshow(x.reshape(28, 28), cmap='gray')

In [14]:
nn = NN(x_train, y_train, alpha=0.2, epochs=1000, batch_size=100, architecture=(10, 10))

In [None]:
nn.SGD()

Iteration: 0 Loss: 11.956416167074055
Iteration: 10 Loss: 11.771853439444488
Iteration: 20 Loss: 11.621802478969272
Iteration: 30 Loss: 11.507983697055838
Iteration: 40 Loss: 11.42138731601776
Iteration: 50 Loss: 11.358250303342835
Iteration: 60 Loss: 11.312503301761678
Iteration: 70 Loss: 11.27578676137372
Iteration: 80 Loss: 11.238341916128554
Iteration: 90 Loss: 11.19459870118278
Iteration: 100 Loss: 11.14512354203809
Iteration: 110 Loss: 11.092675134888664
Iteration: 120 Loss: 11.042079516643165
Iteration: 130 Loss: 10.99523656129692
Iteration: 140 Loss: 10.95607297602539
Iteration: 150 Loss: 10.928383753262064
Iteration: 160 Loss: 10.911711630728917
Iteration: 170 Loss: 10.903045220210817
Iteration: 180 Loss: 10.900656683782609
Iteration: 190 Loss: 10.902286506686753
Iteration: 200 Loss: 10.905890176035438
Iteration: 210 Loss: 10.912220854591894
Iteration: 220 Loss: 10.921387609621021
Iteration: 230 Loss: 10.932302662204139
Iteration: 240 Loss: 10.9438903790999
Iteration: 250 Loss

In [22]:
n1 = NN(x_train, y_train, alpha=0.1, epochs=100, batch_size=100, architecture=(10, 10))


In [23]:
n1.SGD()

  return 1 / (1 + np.exp(-x))


Iteration: 0 Loss: 11.628764477445845
Iteration: 10 Loss: 11.585489462782652
Iteration: 20 Loss: 11.542479781308197
Iteration: 30 Loss: 11.505439647585586
Iteration: 40 Loss: 11.472054275090372
Iteration: 50 Loss: 11.442968270578856
Iteration: 60 Loss: 11.417334758621255
Iteration: 70 Loss: 11.39100204474672
Iteration: 80 Loss: 11.366374601374188
Iteration: 90 Loss: 11.339299036285894
Iteration: 99 Loss: 11.315979120330585


In [12]:
nn.feedforward(x_train)
# print_image(x_train[0])