In [1]:
import numpy as np

In [2]:
class Tensor(np.ndarray):
    '''Tensor implementation
       extend numpy.ndarray to support gradient recording.
    '''
    def __new__(cls, input_array, requires_grad=False, dtype=None, op=None, in_nodes=None):
        t = np.asarray(input_array,dtype=dtype).view(cls)
        t.requires_grad = requires_grad
        t.op = op
        t.in_nodes = in_nodes
        t.outdegree = 0
        t.is_root = op is None
        t._grad = np.zeros(t.shape, dtype=dtype)

        return t
    
    @property
    def grad(self):
        if self.requires_grad:
            return self._grad
    
    
    
    def __array_finalize__(self, obj):
        if obj is None:
            return
        
        self.requires_grad = False
        self.op = None
        self.in_nodes= None
        self.outdegree = 0
        self.is_root=False
        self._grad = np.zeros(self.shape, dtype=self.dtype)
        
        
    def backward(self):
        grads = self.op.backward(self.in_nodes, self._grad)
        for i, node in enumerate(self.in_nodes):
            node._grad += grads[i]
            node.outdegree-=1
            if not node.is_root and node.outdegree == 0:
                node.backward()
            
    def zeros_grad(self):
        self._grad = np.zeros(self.shape, dtype=self.dtype)

In [13]:
class Operator:
    def __call__(self, *args, **kwargs):
        for arg in args:
            assert isinstance(arg, Tensor)
            arg.outdegree+=1
        
        out = self.forward(*args, **kwargs)
        out.op = self
        out.in_nodes=args
        return out
    def forward(self):
        raise NotImplementedError
    def backward(self):
        raise NotImplementedError
    
class Add(Operator):
    def forward(self, x, y):
        return x + y
    
    def backward(self, in_nodes, grad):
        x, y = in_nodes
        return [1*grad, grad.mean(axis=0)]
    
class MatMul(Operator):
    def forward(self, x, y):
        return np.matmul(x, y)
    
    def backward(self, in_nodes, grad):
        x, y = in_nodes
        return [np.matmul(grad, y.T), np.matmul(x.T, grad)]
    
class Relu(Operator):
    def forward(self, x):
        pass
    def backward(self, in_nodes):
        pass
    
class Sigmoid(Operator):
    def forward(self, x):
        pass
    
    def backward(self, in_nodes):
        pass

In [14]:
class Module:
    def __call__(self, *args, **kwargs):
        return self.forward(*args, **kwargs)
    
class Linear(Module):
    def __init__(self, d_in, d_out, bias=True):
        self.d_in = d_in
        self.d_out = d_out
        self.bias = bias
        self.w = Tensor(np.random.randn(d_in, d_out), requires_grad=True)
        if bias:
            self.b = Tensor(np.zeros(d_out), requires_grad=True)
    
    def forward(self, x):
        z = MatMul()(x, self.w)
        if self.bias:
            z = Add()(z, self.b)
        return z    

In [15]:
a = Tensor([1,2,3], requires_grad=True)
b = Tensor([2,2,2], requires_grad=True)

In [16]:
c = Tensor([[1,1,1],[2,2,2]])
l1 = Linear(3,4,bias=True)

In [17]:
z = l1(c)
z._grad+=1

In [18]:
z.backward()

In [19]:
l1.b._grad

array([1., 1., 1., 1.])

In [20]:
l1.w._grad

array([[3., 3., 3., 3.],
       [3., 3., 3., 3.],
       [3., 3., 3., 3.]])