In [150]:
from pathlib import Path
import numpy as np

### Load data

In [151]:
img_size = 784
img_shape = (28, 28)

train_data = np.loadtxt(Path('D:/Development/Data/datasets/csv/mnist_train_small.csv'), delimiter=',')
test_data = np.loadtxt(Path('D:/Development/Data/datasets/csv/mnist_test.csv'), delimiter=',')

### Create dataset

In [152]:
def onehot(n: int, i: int) -> np.ndarray:
    v = np.zeros(n)
    v[i] = 1.0
    return v

In [153]:
x_train = train_data[:,1:] / 255.0
y_train = np.array([onehot(10, int(i)) for i in train_data[:,0]])

x_test = test_data[:,1:] / 255.0
y_test = np.array([onehot(10, int(i)) for i in test_data[:,0]])

### Define functions and model

In [None]:
def softmax(x: np.ndarray, axis=0, keepdims=True) -> np.ndarray:
    y = np.exp(x)
    return y / np.sum(y, axis=axis, keepdims=keepdims)

def cross_entropy(p: np.ndarray, q: np.ndarray) -> float:
    return -np.sum(p * np.log(q))

In [None]:
class Perceptron:
    def __init__(self, n_in: int, n_out: int) -> None:
        self.W: np.ndarray = np.random.uniform(-0.01, 0.01, (n_in, n_out))
        self.b: np.ndarray = np.zeros(n_out)
    
    def __call__(self, x: np.ndarray) -> np.ndarray:
        if x.ndim == 1:
            x = x.reshape((1, x.size))
        p = softmax(x @ self.W + self.b, axis=1)
        return p.ravel() if len(p) == 1 else p

    def save(self, fp: Path) -> None:
        np.savez(fp, w=self.W, b=self.b)

    def load(self, fp: Path) -> None:
        params = np.load(fp)
        self.W, self.b = params['w'], params['b']

def loss(model: Perceptron, X: np.ndarray, P: np.ndarray) -> float:
    return np.mean([cross_entropy(p,q) for (p,q) in zip(P,model(X))])

def accuracy(model: Perceptron, X: np.ndarray, P: np.ndarray) -> float:
    fn = lambda a, b: 1 if np.argmax(a) == np.argmax(b) else 0
    return np.mean([fn(p,q) for (p,q) in zip(P,model(X))])

### SGD Implementation

In [None]:
def train(model: Perceptron, X: np.ndarray, Y: np.ndarray, lr, batch_size, max_epoch) -> None:
    n_samples = len(x_test)
    n_batches = n_samples // batch_size

    for epoch in range(max_epoch):
        idxs = np.random.permutation(n_samples)
        batches = np.array_split(idxs, n_batches)

        for batch in batches:
            dLdy = model(X[batch]) - Y[batch]
            dW = np.mean(np.einsum('ij,ik->ijk', X[batch], dLdy), axis=0)
            db = np.mean(dLdy, axis=0)
            
            model.W -= lr * dW
            model.b -= lr * db

### Create, train and test model

In [None]:
n_in = 784
n_out = 10

learning_rate = 0.1
batch_size = 64
max_epoch = 4

model = Perceptron(n_in, n_out)

print(f"untrained loss: {round(loss(model, x_test, y_test), 3)}")
train(model, x_train, y_train, learning_rate, batch_size, max_epoch)
print(f"trained loss: {round(loss(model, x_test, y_test), 3)}")

model_accuracy = accuracy(model, x_test, y_test)
print(f"test set accuracy: {round(model_accuracy * 100, 2)}%")

untrained loss: 2.307
trained loss: 0.337
test set accuracy: 90.76%


### Save, load and test model

In [158]:
fp = Path('D:/Development/Data/tmp/parameters.npz')
model.save(fp)

In [159]:
model = Perceptron(n_in, n_out)
model.load(fp)

In [160]:
model_loss = loss(model, x_test, y_test)
model_accuracy = accuracy(model, x_test, y_test)

print(f"loss: {round(model_loss, 3)}")
print(f"accuracy: {round(model_accuracy * 100, 2)}%")

loss: 0.366
accuracy: 90.22%
