In [8]:
import numpy as np
import pandas as pd

In [9]:
np.random.seed(42)

In [10]:
df_train = pd.read_csv('mnist_train.csv')
df_test = pd.read_csv('mnist_test.csv')

In [11]:
X_train, y_train = np.array(df_train.drop(columns=['label']))/255, np.array(df_train['label'])
X_test, y_test = np.array(df_test.drop(columns=['label']))/255, np.array(df_test['label'])

In [12]:
print(f'Shape of X_train: {X_train.shape}\tShape of y_train: {y_train.shape}')
print(f'Shape of X_test:  {X_test.shape}\tShape of y_test:  {y_test.shape}')

Shape of X_train: (60000, 784)	Shape of y_train: (60000,)
Shape of X_test:  (10000, 784)	Shape of y_test:  (10000,)


In [13]:
import numpy as np


def relu(z: np.ndarray, derv: bool=False) -> np.ndarray:
    if derv: return np.where(z > 0, 1, 0) 
    return np.maximum(z, 0)


def softmax(z: np.ndarray, derv: bool=False) -> np.ndarray:
    return np.exp(z) / np.sum(np.exp(z), axis=1, keepdims=True)


def one_hot(y: np.ndarray, num_classes: int=10) -> np.ndarray:
    y_hot = np.zeros((len(y), num_classes))
    y_hot[range(y.shape[0]), y] = 1
    return y_hot


class BobNet:
    def __init__(self, n_in: int, n_hidden: int, n_out: int) -> None:
        self.w1 = np.random.rand(n_in, n_hidden)-0.5
        self.b1 = np.random.rand(n_hidden)-0.5
        self.w2 = np.random.rand(n_hidden, n_out)-0.5
        self.b2 = np.random.rand(n_out)-0.5

    def fit(self,
            X: np.ndarray,
            y: np.ndarray,
            lr: float=0.1,
            epochs: int=100, 
            batch_size: int=32,
            verbose: bool=True) -> None:
        N = X.shape[0] 

        # Simple implementation of Stochastic Gradient Descent
        for epoch in range(epochs):
            # Shuffle dataset
            indices = np.random.permutation(N)
            X, y = X[indices], y[indices]    
            y_hot = one_hot(y)

            for i in range(0, N, batch_size):
                X_batch, y_batch = X[i:i+batch_size], y[i:i+batch_size]
                y_batch_hot = one_hot(y_batch, num_classes=self.w2.shape[1])

                # Forward pass
                z1 = np.dot(X_batch, self.w1) + self.b1                     # N x 512
                h1 = relu(z1)                                               # N x 512
                z2 = np.dot(h1, self.w2) + self.b2                          # N x 10
                h2 = softmax(z2)                                            # N x 10

                # Backpropagation
                dz2 = (h2 - y_batch_hot)                                    # N x 10
                dw2 = np.dot(h1.T, dz2)                                     # 512 x 10
                db2 = np.sum(dz2, axis=0)

                dh1 = np.dot(dz2, self.w2.T)                                # N x 512
                dz1 = dh1 * relu(z1, derv=True)                             # N x 512
                dw1 = np.dot(X_batch.T, dh1)                                # 784 x 512
                db1 = np.sum(dz1, axis=0)

                # Update Parameters
                self.w2 = self.w2 - lr * dw2
                self.b2 = self.b2 - lr * db2
                self.w1 = self.w1 - lr * dw1
                self.b1 = self.b1 - lr * db1

            if verbose and epoch % 10 == 0:
                y_hat = self.forward(X)
                cross_entropy = - np.sum(y_hot * np.log(y_hat))/N
                y_hat = np.argmax(y_hat, 1) 
                accuracy = (y_hat == y).mean()
                print(f'Epoch: {epoch}\tCross-Entropy: {cross_entropy:.2f}\tAccuracy: {accuracy:.2f}')

    def forward(self, x: np.ndarray) -> np.ndarray:
        x = np.dot(x, self.w1) + self.b1
        x = relu(x)
        x = np.dot(x, self.w2) + self.b2
        x = softmax(x)
        return x

    def predict(self, x: np.ndarray) -> np.ndarray:
        out = self.forward(x)
        out = np.argmax(out, 1)
        return out

In [14]:
# Create the neural network, train and predict
bobnet = BobNet(n_in=784, n_hidden=10, n_out=10)
bobnet.fit(X_train, y_train, lr=0.0001, epochs=200)
y_hat = bobnet.predict(X_test)
accuracy = (y_hat == y_test).mean()

Epoch: 0	Cross-Entropy: 1.30	Accuracy: 0.57
Epoch: 10	Cross-Entropy: 0.55	Accuracy: 0.83
Epoch: 20	Cross-Entropy: 0.50	Accuracy: 0.85
Epoch: 30	Cross-Entropy: 0.50	Accuracy: 0.85
Epoch: 40	Cross-Entropy: 0.53	Accuracy: 0.84
Epoch: 50	Cross-Entropy: 0.57	Accuracy: 0.83
Epoch: 60	Cross-Entropy: 0.61	Accuracy: 0.81
Epoch: 70	Cross-Entropy: 0.64	Accuracy: 0.80
Epoch: 80	Cross-Entropy: 0.66	Accuracy: 0.79
Epoch: 90	Cross-Entropy: 0.67	Accuracy: 0.79
Epoch: 100	Cross-Entropy: 0.67	Accuracy: 0.79
Epoch: 110	Cross-Entropy: 0.68	Accuracy: 0.78
Epoch: 120	Cross-Entropy: 0.70	Accuracy: 0.78
Epoch: 130	Cross-Entropy: 0.69	Accuracy: 0.78
Epoch: 140	Cross-Entropy: 0.70	Accuracy: 0.78
Epoch: 150	Cross-Entropy: 0.71	Accuracy: 0.78
Epoch: 160	Cross-Entropy: 0.71	Accuracy: 0.78
Epoch: 170	Cross-Entropy: 0.71	Accuracy: 0.78
Epoch: 180	Cross-Entropy: 0.72	Accuracy: 0.78


KeyboardInterrupt: 

In [None]:
print(accuracy)

0.8585


In [None]:
y = np.array([[0.2, 0.4, 0.2, 0.2], [0.1, 0.1, 0.1, 0.6]])
print(np.argmax(y, 1))

print(bobnet.w2)


[1 3]
[[-2.44113167e-01  1.34625325e-01  3.48583676e-01 -2.62225624e-01
  -7.87520094e-02 -7.36219330e-02  1.43504740e-01 -2.72862822e-01
  -4.13564424e-01 -3.02647020e-01]
 [ 1.20758518e-01 -5.97291748e-03 -7.23513346e-01 -1.11871907e-01
  -2.26745463e-01  3.27218130e-01 -2.39835122e-01 -4.30476003e-01
  -2.70892891e-01 -3.53388176e-01]
 [ 5.78631512e-01 -5.26739668e-01  4.27947488e-01  1.46993633e-01
  -1.10748298e-01  2.71737582e-01  4.22454769e-01 -5.93473867e-01
   1.52633047e-01 -2.54043501e-01]
 [-8.43559549e-01  2.24533681e-01 -2.36529355e-01  4.93259903e-01
  -2.08561809e-01 -1.57497257e-03 -3.69926905e-01 -2.69516748e-01
   3.06979344e-01  1.01811652e-01]
 [-6.32251938e-01  1.83496072e-01  4.39026330e-01  2.05828447e-01
   5.43390536e-01 -3.36553512e-01  3.21046980e-01  3.30465993e-01
  -7.72450124e-01  2.93759467e-01]
 [ 3.13139416e-01 -1.19851188e-01  2.36400602e-01  1.99092593e-01
  -7.94919824e-01  2.84762436e-01 -5.57377602e-01  5.82158998e-01
  -3.98869653e-01 -2.446546

In [None]:
#y = np.array([0, 5, 2, 6, 2, 4, 2, 9])

y = y_train

preds = np.zeros((len(y), 10), dtype=np.float32)
preds[range(y.shape[0]), y] = 1

a = np.argmax(y, axis=0)

print(preds)
print(a)

y_hot = np.eye(10)[y]
print(y_hot)

[[0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]]
4
[[0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]]
