In [1]:
import numpy as np
import cupy as cp
from tensor import Tensor
import nn
from tqdm import trange
from datasets import load_dataset

In [2]:
mnist = load_dataset('mnist')

def convert_to_np(example):
    example['np_image'] = np.asarray(example['image'])
    return example
mnist = mnist.map(convert_to_np)

# Convert X_train, Y_train, X_test, and Y_test to CuPy arrays
X_train, Y_train = np.asarray(mnist['train']['np_image']), np.asarray(mnist['train']['label'])
X_test, Y_test = np.asarray(mnist['test']['np_image']), np.asarray(mnist['test']['label'])

In [None]:

lr = 0.01
BS = 64

model = nn.Sequential(
    nn.Linear(784, 128),
    nn.ReLU(),
    nn.Linear(128, 10),
    nn.LogSoftmax()
)
model = model.to("cuda")

In [7]:
losses, accuracies = [], []
for i in (t := trange(1000)):
  samp = np.random.randint(0, X_train.shape[0], size=(BS))  
  x = Tensor(X_train[samp].reshape((-1, 28*28))).to('cuda')
  Y = Y_train[samp]
  y = np.zeros((len(samp),10), np.float32)
  y[range(y.shape[0]),Y] = -1.0
  y = Tensor(y).to('cuda')
  output = model.forward(x)
  x = output.mul(y)
  x = x.mean()
  x.backward()
  
  loss = x.data
  cat = np.argmax(output.data, axis=1).get()
  accuracy = (cat == Y).mean()
  losses.append(loss)
  accuracies.append(accuracy)
  t.set_description(f"loss {loss} accuracy {accuracy}")
  # SGD
  model.step(lr=lr)


loss [0.0053428] accuracy 1.0: 100%|██████████| 1000/1000 [00:02<00:00, 464.42it/s]     


In [None]:
for i in range(len(X_test)):
    input = Tensor(np.array(X_test[i]).reshape((-1, 28*28))).to('cuda')
    output = model.forward(input)
    cat = np.argmax(output.data, axis=1)
    accuracy = (cat == Y_test[i]).mean()
    accuracies.append(accuracy)

WIP:
 - nn.Sequential

In [None]:
model.layers[0].params['LinearW'].data

In [None]:
# Use the test set to evaluate the model
def forward(x):
  x = l1(x)
  x.data = np.maximum(x.data, 0)
  x = l2(x)
  return x.data


def numpy_eval():
  x = Tensor(X_test.reshape((-1, 28*28)))
  print(x.data.shape)
  Y_test_preds_out = forward(x)
  Y_test_preds = np.argmax(Y_test_preds_out, axis=1)
  return (Y_test == Y_test_preds).mean()

print(f"test set accuracy is {numpy_eval()}")


In [None]:
from datasets import load_dataset
import numpy as np

mnist = load_dataset('mnist')

def convert_to_np(example):
    example['np_image'] = np.array(example['image']).reshape((-1, 28*28))
    label = np.zeros((10), np.float32)
    label[example['label']] = -1.0
    example['np_label'] = label
    return example
mnist = mnist.map(convert_to_np)

In [None]:
import nn
from tensor import Tensor
import numpy as np

model = nn.Sequential(
    nn.Linear(784, 128),
    nn.ReLU(),
    nn.Linear(128, 10),
    nn.LogSoftmax()
)

In [None]:
x = np.random.randn(64, 784)
x = Tensor(x)

In [15]:
img = X_train[0]
img = Tensor(img.reshape((1, 1, 28, 28)))
# guassian blur
kernel = np.random.randn(1, 1, 3, 3)
kernel = Tensor(kernel)
out = img.conv2d(kernel, 1, 1)

In [14]:
mean = out.mean()
mean.backward()
kernel.grad

array([[[[35.10841837, 35.10841837, 35.10841837],
         [35.10841837, 35.10841837, 35.10841837],
         [35.10841837, 35.10841837, 35.10841837]]]])

In [28]:
import torch

tensorimg = torch.tensor(img.data, dtype=torch.float32, requires_grad=True)
tensorkernel = torch.tensor(kernel.data, dtype=torch.float32, requires_grad=True)
out = torch.conv2d(tensorimg, tensorkernel, padding=1, stride=1)
out.mean().backward()
tensorkernel.grad

tensor([[[[35.1084, 35.1084, 35.1084],
          [35.1084, 35.1084, 35.1084],
          [35.1084, 35.1084, 35.1084]]]])