## Device configuration

In [8]:
from tinygrad import Device

print(Device.DEFAULT)

METAL


## Model

In [9]:
from tinygrad import Tensor, nn

class Model:
  def __init__(self):
    self.l1 = nn.Conv2d(1, 32, kernel_size=(3,3))
    self.l2 = nn.Conv2d(32, 64, kernel_size=(3,3))
    self.l3 = nn.Linear(1600, 10)

  def __call__(self, x:Tensor) -> Tensor:
    x = self.l1(x).relu().max_pool2d((2,2))
    x = self.l2(x).relu().max_pool2d((2,2))
    return self.l3(x.flatten(1).dropout(0.5))

## Get the MNIST dataset

In [10]:
from tinygrad.nn.datasets import mnist

X_train, Y_train, X_test, Y_test = mnist()
print(X_train.shape, X_train.dtype, Y_train.shape, Y_train.dtype)

(60000, 1, 28, 28) dtypes.uchar (60000,) dtypes.uchar


## Use the model

In [11]:
model = Model()
acc = (model(X_test).argmax(axis=1) == Y_test).mean()
print(acc.item())  

0.08919999748468399


In [12]:
from prettytable import PrettyTable
import numpy as np

def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    layers = [model.l1, model.l2, model.l3]
    layer_names = ['l1', 'l2', 'l3']
    for layer, name in zip(layers, layer_names):
        weight_params = np.prod(layer.weight.shape)
        bias_params = np.prod(layer.bias.shape)
        table.add_row([f"{name}.weight", weight_params])
        table.add_row([f"{name}.bias", bias_params])
        total_params += weight_params + bias_params
    print(table)
    print(f"Total Trainable Params: {total_params}\n")
    return total_params

count_parameters(model)

+-----------+------------+
|  Modules  | Parameters |
+-----------+------------+
| l1.weight |    288     |
|  l1.bias  |     32     |
| l2.weight |   18432    |
|  l2.bias  |     64     |
| l3.weight |   16000    |
|  l3.bias  |     10     |
+-----------+------------+
Total Trainable Params: 34826



np.int64(34826)

### Use the PyTorch weights

In [13]:
loaded_weights = np.load('pytorch_weights.npy', allow_pickle=True).item()

model.l1.weight = Tensor(loaded_weights['l1.weight'])
model.l1.bias = Tensor(loaded_weights['l1.bias'])
model.l2.weight = Tensor(loaded_weights['l2.weight'])
model.l2.bias = Tensor(loaded_weights['l2.bias'])
model.l3.weight = Tensor(loaded_weights['l3.weight'])
model.l3.bias = Tensor(loaded_weights['l3.bias'])

## Final probabilities

In [14]:
test_image = X_test[0:1]
tinygrad_probs = model(test_image).softmax().numpy()
print("tinygrad probabilities:", tinygrad_probs)

tinygrad probabilities: [[2.8544218e-07 9.3154862e-09 2.1578304e-05 3.9734609e-06 3.3519236e-09
  2.2360809e-09 2.8835455e-16 9.9997044e-01 3.5189873e-06 2.5945394e-07]]
