In [1]:
from tensor import Tensor
import numpy as np

import torch


In [2]:
class Module:
    def __init__(self):
        pass

    def zero_grad(self):
        for p in self.parameters():
            p.grad = np.zeros_like(p.data)

    def params(self):
        return []

In [3]:
class Linear(Module):
    def __init__(self, in_features, out_features, bias=True):
        self.bias = bias
        self.w = Tensor.normal(shape=(in_features, out_features))
        self.b = Tensor.zeros((1, out_features)) if self.bias else None

    def __call__(self, x):
        x = x if isinstance(x, Tensor) else Tensor(x)
        if self.bias:
            out = self.w @ x + self.b
        else:
            out = self.w @ x

        return out

    def params(self):
        return [self.w, self.b]

In [4]:
# Madgrad linear forward
layer = Linear(5, 6)
data = Tensor.normal(shape=(1, 6))
out = layer(data.transpose())
print(f'Madgrad: {out}')
# PyTorch linear forward
layer_t = torch.nn.Linear(5, 6)
data_t = torch.tensor(data.numpy())
out_t = layer(data_t.transpose(0, 1))
print(f'PyTorch: {out_t}')

Madgrad: Tensor(data=[[ 0.22353754  0.22353754  0.22353754  0.22353754  0.22353754  0.22353754]
 [ 2.54002636  2.54002636  2.54002636  2.54002636  2.54002636  2.54002636]
 [-0.74357802 -0.74357802 -0.74357802 -0.74357802 -0.74357802 -0.74357802]
 [ 0.07711063  0.07711063  0.07711063  0.07711063  0.07711063  0.07711063]
 [-0.62669134 -0.62669134 -0.62669134 -0.62669134 -0.62669134 -0.62669134]])
PyTorch: Tensor(data=[[ 0.22353754  0.22353754  0.22353754  0.22353754  0.22353754  0.22353754]
 [ 2.54002636  2.54002636  2.54002636  2.54002636  2.54002636  2.54002636]
 [-0.74357802 -0.74357802 -0.74357802 -0.74357802 -0.74357802 -0.74357802]
 [ 0.07711063  0.07711063  0.07711063  0.07711063  0.07711063  0.07711063]
 [-0.62669134 -0.62669134 -0.62669134 -0.62669134 -0.62669134 -0.62669134]])


In [None]:
class MLP(Module):
    def __init__(self, layer_sizes):
        assert isinstance(layer_sizes, tuple) or isinstance(layer_sizes, list), 'layer_sizes must be a list or a tuple'
        self.layers = [Linear(layer_sizes[i], layer_sizes[i+1]) for i in range(len(layer_sizes)-1)]

    def __call__(self, x):
        for i, layer in enumerate(self.layers):
            x = layer(x).relu()
        return x