In [None]:
import numpy as np
import cupy as cp
from tensor import Tensor
import nn
from tqdm import trange
from datasets import load_dataset

In [None]:

mnist = load_dataset('mnist')

def convert_to_np(example):
    example['np_image'] = np.asarray(example['image'])
    return example
mnist = mnist.map(convert_to_np)

# Convert X_train, Y_train, X_test, and Y_test to CuPy arrays
X_train, Y_train = np.asarray(mnist['train']['np_image']), np.asarray(mnist['train']['label'])
X_test, Y_test = np.asarray(mnist['test']['np_image']), np.asarray(mnist['test']['label'])

lr = 0.01
BS = 64

model = nn.Sequential(
    nn.Linear(784, 128),
    nn.ReLU(),
    nn.Linear(128, 10),
    nn.LogSoftmax()
)
model = model.to("cuda")

In [None]:
losses, accuracies = [], []
for i in (t := trange(1000)):
  samp = np.random.randint(0, X_train.shape[0], size=(BS))  
  x = Tensor(X_train[samp].reshape((-1, 28*28))).to('cuda')
  Y = Y_train[samp]
  y = np.zeros((len(samp),10), np.float32)
  y[range(y.shape[0]),Y] = -1.0
  y = Tensor(y).to('cuda')
  output = model.forward(x)
  x = output.mul(y)
  x = x.mean()
  x.backward()
  
  loss = x.data
  cat = np.argmax(output.data, axis=1).get()
  accuracy = (cat == Y).mean()
  losses.append(loss)
  accuracies.append(accuracy)
  t.set_description(f"loss {loss} accuracy {accuracy}")
  # SGD
  model.step(lr=lr)


In [None]:
for i in range(len(X_test)):
    input = Tensor(np.array(X_test[i]).reshape((-1, 28*28))).to('cuda')
    output = model.forward(input)
    cat = np.argmax(output.data, axis=1)
    accuracy = (cat == Y_test[i]).mean()
    accuracies.append(accuracy)

In [None]:
import cupy as cp

accuracies = cp.array(accuracies)
print(f"Accuracy: {accuracies.mean()}")

In [None]:
model.layers[0].params['LinearW'].data

In [None]:
# Use the test set to evaluate the model
def forward(x):
  x = l1(x)
  x.data = np.maximum(x.data, 0)
  x = l2(x)
  return x.data


def numpy_eval():
  x = Tensor(X_test.reshape((-1, 28*28)))
  print(x.data.shape)
  Y_test_preds_out = forward(x)
  Y_test_preds = np.argmax(Y_test_preds_out, axis=1)
  return (Y_test == Y_test_preds).mean()

print(f"test set accuracy is {numpy_eval()}")


In [None]:
from datasets import load_dataset
import numpy as np

mnist = load_dataset('mnist')

def convert_to_np(example):
    example['np_image'] = np.array(example['image']).reshape((-1, 28*28))
    label = np.zeros((10), np.float32)
    label[example['label']] = -1.0
    example['np_label'] = label
    return example
mnist = mnist.map(convert_to_np)

In [None]:
import nn
from tensor import Tensor
import numpy as np

model = nn.Sequential(
    nn.Linear(784, 128),
    nn.ReLU(),
    nn.Linear(128, 10),
    nn.LogSoftmax()
)

In [None]:
x = np.random.randn(64, 784)
x = Tensor(x)

In [None]:
y = model.forward(x)
model.step(1)
model.layers[0].params['LinearW'].data

In [None]:
from tensor import Tensor, Dot
import numpy as np
a = Tensor(np.array([[1,2,3],[4,5,6]]))
b = Tensor(np.array([[6,5,4], [3,2,1]]))

c = a.mse(b)
c = c.mean()
c.backward()
print(a.grad)

In [None]:
import torch

a = torch.tensor([[1,2,3],[4,5,6]], dtype=torch.float32, requires_grad=True)
b = torch.tensor([[6,5,4], [3,2,1]], dtype=torch.float32)

c = torch.nn.functional.mse_loss(a, b, reduction="mean")
print(c)
c.backward()

print(a.grad)

In [None]:
import torch
# 28*28 tensor
x_init = np.random.randn(1, 1, 28, 28).astype(np.float32)
w_init = np.random.randn(1, 1, 3, 3).astype(np.float32)
# x_init = np.random.randn(1, 3, 32, 32).astype(np.float32)
# w_init = np.random.randn(3, 1, 5, 5).astype(np.float32)
X, x = Tensor(x_init), torch.tensor(x_init, requires_grad=True, dtype=torch.float32)
Y, y = Tensor(w_init), torch.tensor(w_init, requires_grad=True, dtype=torch.float32)

Z = X.conv2d(Y, 1, 1)
Z.mean().backward()
z = torch.nn.functional.conv2d(x, y, padding=1, stride=1, bias=None)
z.mean().backward()

np.testing.assert_allclose(Z.data, z.detach().numpy(), atol=1e-6)
np.testing.assert_allclose(Y.grad, y.grad, atol=1e-6)
z.data.shape