In [1]:
import os
import numpy as np
import random
import urllib.request
from npgpt.tensor import Tensor, Linear, SequentialModel, Model, Sigmoid, Tanh, ReLU, NewGELU, Softmax, LogSoftmax, cross_entropy
from npgpt.optim import SGD, Adam

In [3]:
# Download the dataset if it's not already there
if not os.path.isfile('mnist.npz'):
    url = "https://s3.amazonaws.com/img-datasets/mnist.npz"
    urllib.request.urlretrieve(url, "mnist.npz")

In [4]:
with np.load("mnist.npz", allow_pickle=True) as f:
    x_train, y_train = f['x_train'], f['y_train']
    x_test, y_test = f['x_test'], f['y_test']

In [5]:
x_train.shape

(60000, 28, 28)

In [6]:
x_train[0].shape

(28, 28)

In [7]:
x_train[0].dtype

dtype('uint8')

In [8]:
len(x_train)

60000

In [9]:
model = SequentialModel(
    layers = [
        Linear(784, 256, nonlin=NewGELU), 
        Linear(256, 128, nonlin=NewGELU), 
        Linear(128, 10, nonlin=None), 
    ]
    
)

In [10]:
# optimizer = SGD(model.get_trainable_tensors(), learning_rate=1e-2)
optimizer = Adam(model.get_tensors(), learning_rate=1e-3)

In [11]:
for _ in range(3):
    shuffle_indices = np.arange(len(x_train))
    np.random.shuffle(shuffle_indices)
    batch_size = 64
    bc = 0
    while bc < len(x_train):
        x = x_train[bc: bc+batch_size]

        x = np.float32(x)
        x /= 255.
        x = x.reshape((x.shape[0], np.prod(x.shape[1:])))
        X = Tensor(x, nograd=True)
        
        out = model(X)

        y = np.eye(10)[y_train[bc: bc+batch_size]]
        correct = Tensor(y, nograd=True)

        LOSS = cross_entropy(out, correct)

        model.zero_grad()

        LOSS.backward()

        optimizer.step()
        
        bc += batch_size

In [12]:
x_test_a = x_test
correct = 0
for i, x in enumerate(x_test_a):
    x = x.flatten()
    x = np.float32(x)
    x /= 255.
    x = x.reshape((1, *x.shape))
    X = Tensor(x)
    
    out = model(X)

    if np.argmax(out.data) == y_test[i]:
        correct += 1

print(correct)
print(correct/len(x_test_a))

9680
0.968
