In [1]:
from typing import Optional

import numpy as np

In [2]:
class Tensor:
    def __init__(self, data, is_leaf=True, backward_fn=None):
        if not is_leaf and backward_fn is None:
            raise ValueError("non leaf nodes requires backward_fn")
        
        self.is_leaf = is_leaf
        self.prev = []
        self.backward_fn = backward_fn
        self.data = data
        self.zero_grad()
        
    def __repr__(self):
        return f'Tensor(data:{self.data}, grad:{self.grad})\n'
    
    # grad stuff 
    
    def zero_grad(self):
        self.grad = np.zeros(self.data.shape)
    
    def calculate_grad(self):
        self.backward_fn(dy=self.grad)

    def backward(self):
        topologial_sorted = self._tsort()
        self.grad = np.ones(self.data.shape)
        for tensor in reversed(topologial_sorted):
            tensor.calculate_grad()
            
    def _tsort(self):
        tensors_seen = set()
        topological_sorted = []
        
        def helper(tensor):
            if tensor in tensors_seen or tensor.is_leaf:
                pass
            else: 
                tensors_seen.add(tensor)
                for prev_tensor in tensor.prev:
                    helper(prev_tensor)
                topological_sorted.append(tensor)
        
        helper(self)
        return topological_sorted
    
    # ops
    
    def dot(self, other):
        self.check(other)
            
        def b_fn(dy):
            if np.isscalar(dy):
                dy = np.ones(1) * dy
            self.grad += np.dot(dy, other.data.T)
            other.grad += np.dot(self.data.T, dy)
        
        res = Tensor(np.dot(self.data, other.data), is_leaf=False, backward_fn=b_fn)
        res.prev.extend([self, other])
        return res
    
    def minus(self, other):
        self.check(other)
        
        def b_fn(dy):
            self.grad -= dy
            other.grad -= dy
            
        res = Tensor(self.data - other.data, is_leaf=False, backward_fn=b_fn)
        res.prev.extend([self, other])
        return res
    
    def multiply(self, other):
        self.check(other)
            
        def b_fn(dy):
            if np.isscalar(dy):
                dy = np.ones(1) * dy
            self.grad += np.multiply(dy, other.data)
            other.grad += np.multiply(dy, self.data)
        
        res = Tensor(np.multiply(self.data, other.data), is_leaf=False, backward_fn=b_fn)
        res.prev.extend([self, other])
        return res
    
    def plus(self, other):
        self.check(other)
        
        def b_fn(dy):
            self.grad += dy
            other.grad += dy
            
        res = Tensor(self.data + other.data, is_leaf=False, backward_fn=b_fn)
        res.prev.extend([self, other])
        return res
    
    def plus_bias(self, bias):
        self.check(bias)
        
        def b_fn(dy):
            bias.grad += dy.sum(axis=0)
            self.grad += dy
            
        res = Tensor(self.data + bias.data, is_leaf=False, backward_fn=b_fn)
        res.prev.extend([self, bias])
        return res
    
    def relu(self):
        def b_fn(dy=1):
            self.grad[self.data > 0] += dy[self.data > 0]
            
        res = Tensor(np.maximum(self.data, 0), is_leaf=False, backward_fn=b_fn)
        res.prev.append(self)
        return res
    
    def scalar_mul(self, scalar):
        if not isinstance(scalar,(int, float)):
            raise ValueError('c needs to be one of (int, float)')
            
        def b_fn(dy=1):
            self.grad += dy * c
        
        res = Tensor(self.data * scalar, is_leaf=False, backward_fn=b_fn)
        res.prev.append(self)
        return res
    
    def sum(self):
        def b_fn(dy=1):
            self.grad += np.ones(a.data.shape) * dy
        
        res = Tensor(np.sum(self.data), is_leaf=False, backward_fn=b_fn)
        res.prev.append(self)
        return res
    
    def transpose(self):
        def b_fn(dy):
            self.grad += dy.T
            
        res = Tensor(self.data.T, is_leaf=False, backward_fn=b_fn)
        res.prev.append(self)
        return res
    

    # helper stuff
    
    def check(self, *others):
        for other in others:
            if not isinstance(other, Tensor):
                raise ValueError("other needs to be a Tensor instance")
                
    def shape(self):
        return self.data.shape

In [3]:
# We want this to work
l1 = Tensor(np.arange(-4,4).reshape(2,4))
l2 = Tensor(np.arange(-2,2).reshape(4,1))
n1 = l1.dot(l2)
n2 = n1.relu()
n3 = n2.sum()
n2.backward()
print(l1.grad)
# [[-2. -1.  0.  1.]
#  [-2. -1.  0.  1.]]
print(l2.grad)
# [[-4.]
#  [-2.]
#  [ 0.]
#  [ 2.]]

[[-2. -1.  0.  1.]
 [-2. -1.  0.  1.]]
[[-4.]
 [-2.]
 [ 0.]
 [ 2.]]


In [4]:
l1 = Tensor(np.arange(-4,4).reshape(2,4))
l2 = Tensor(np.arange(-4,4).reshape(2,4))
l1, l2

(Tensor(data:[[-4 -3 -2 -1]
  [ 0  1  2  3]], grad:[[0. 0. 0. 0.]
  [0. 0. 0. 0.]]),
 Tensor(data:[[-4 -3 -2 -1]
  [ 0  1  2  3]], grad:[[0. 0. 0. 0.]
  [0. 0. 0. 0.]]))

In [5]:
n1 = l1.multiply(l2)
n1

Tensor(data:[[16  9  4  1]
 [ 0  1  4  9]], grad:[[0. 0. 0. 0.]
 [0. 0. 0. 0.]])

In [6]:
n1.backward()

In [7]:
l1.grad

array([[-4., -3., -2., -1.],
       [ 0.,  1.,  2.,  3.]])

In [8]:
l2.grad

array([[-4., -3., -2., -1.],
       [ 0.,  1.,  2.,  3.]])