In [13]:
import numpy as np
import pandas as pd

In [14]:
df_train = pd.read_csv('mnist_train.csv')
df_test = pd.read_csv('mnist_test.csv')

In [15]:
X_train, y_train = np.array(df_train.drop(columns=['label'])), np.array(df_train['label'])
X_test, y_test = np.array(df_test.drop(columns=['label'])), np.array(df_test['label'])

In [16]:
print(f'Shape of X_train: {X_train.shape}\tShape of y_train: {y_train.shape}')
print(f'Shape of X_test:  {X_test.shape}\tShape of y_test:  {y_test.shape}')

Shape of X_train: (60000, 784)	Shape of y_train: (60000,)
Shape of X_test:  (10000, 784)	Shape of y_test:  (10000,)


In [17]:
import numpy as np


def relu(z: np.ndarray, derv: bool=False) -> np.ndarray:
    if derv: return np.where(z > 0, 1, 0) 
    return np.maximum(z, 0)


def softmax(z: np.ndarray, derv: bool=False) -> np.ndarray:
    if derv:
        return np.array([(np.exp(z[i]) * (np.sum(np.exp(z)) - 1)) / np.sum(np.exp(z))**2 for i in range(len(z))]) 
    return np.array([np.exp(z[i])/np.sum(np.exp(z)) for i in range(len(z))]) 


def one_hot(y: np.ndarray, num_classes: int=10) -> np.ndarray:
    y = y.reshape(-1)
    y_hot = np.eye(num_classes)[y]
    return y_hot


class BobNet:
    def __init__(self, n_in: int, n_hidden: int, n_out: int) -> None:
        self.w1 = np.random.rand(n_in, n_hidden)
        self.b1 = np.random.rand(n_hidden)
        self.w2 = np.random.rand(n_hidden, n_out)
        self.b2 = np.random.rand(n_out)

    def fit(self,
            X: np.ndarray,
            y: np.ndarray,
            lr: float=0.1,
            epochs: int=100, 
            batch_size: int=16,
            verbose: bool=True) -> None:
        N = X.shape[0] 

        # Simple implementation of stochastic gradient descent. 
        for epoch in range(epochs):
            # Shuffle dataset
            indices = np.random.permutation(N)
            X, y = X[indices], y[indices]    
            y_hot = one_hot(y)

            # Iterate over mini-batches 
            for i in range(0, N, batch_size):
                X_batch, y_batch = X[i:i+batch_size], y[i:i+batch_size]
                y_batch_hot = one_hot(y_batch, num_classes=self.w2.shape[1])


                # Forward pass
                z1 = np.dot(X_batch, self.w1) + self.b1                     # N x 512
                h1 = relu(z1)                                               # N x 512
                z2 = np.dot(h1, self.w2) + self.b2                          # N x 10
                h2 = softmax(z2)                                            # N x 10

                # Backpropagation

                # dh2 = -(h2 - y_batch_hot)/batch_size                         # N x 10
                # dz2 = dh2 * softmax(z2, derv=True)                          # N x 10
                dz2 = (h2 - y_batch_hot)/batch_size 
                dw2 = np.dot(h1.T, dz2)                                     # 512 x 10
                db2 = np.sum(dz2, axis=0)

                dh1 = np.dot(dz2, self.w2.T)                                # N x 512
                dz1 = dh1 * relu(z1, derv=True)                             # N x 512
                dw1 = np.dot(X_batch.T, dh1)                                # 784 x 512
                db1 = np.sum(dz1, axis=0)

                self.w2 = self.w2 - lr * dw2
                self.b2 = self.b2 - lr * db2
                self.w1 = self.w1 - lr * dw1
                self.b1 = self.b1 - lr * db1

            #if verbose:
            #    y_hat = self.predict(X)
                # y_hat_hot = one_hot(y_hat)
                # cross_entropy_loss = - np.sum(y_hot * np.log(y_hat))
            #    pred = np.argmax(y_hat, 1)
            #    acc = (pred == y).mean()
             #   print(f'Acc: {acc}')


    def predict(self, x: np.ndarray) -> np.ndarray:
        x = np.dot(x, self.w1) + self.b1
        x = relu(x)
        x = np.dot(x, self.w2) + self.b2
        return softmax(x)

In [18]:
# Create the neural network, train and predict
bobnet = BobNet(n_in=784, n_hidden=512, n_out=10)
bobnet.fit(X_train, y_train, lr=0.00000000001, epochs=1)
y_hat = bobnet.predict(X_test)
y_hat = np.argmax(y_hat, 1)
accuracy = (y_hat == y_test).mean()

  return np.array([np.exp(z[i])/np.sum(np.exp(z)) for i in range(len(z))])
  return np.array([np.exp(z[i])/np.sum(np.exp(z)) for i in range(len(z))])


In [19]:
print(accuracy)

0.098


In [20]:
y = np.array([[0.2, 0.4, 0.2, 0.2], [0.1, 0.1, 0.1, 0.6]])
print(np.argmax(y, 1))

[1 3]
