In [31]:
import numpy as np 
import torch

In [62]:
a = torch.randn(2,3,3)
a,torch.exp(a)

(tensor([[[ 1.5505,  2.2295,  0.3588],
          [ 1.6500,  0.5570,  0.0745],
          [-0.3352,  0.6276, -2.1292]],
 
         [[ 0.4529, -2.0799, -1.2591],
          [ 0.6653, -0.4961,  0.5000],
          [-0.7080, -1.0767, -1.5270]]]),
 tensor([[[4.7136, 9.2949, 1.4317],
          [5.2071, 1.7455, 1.0774],
          [0.7152, 1.8732, 0.1189]],
 
         [[1.5729, 0.1249, 0.2839],
          [1.9451, 0.6089, 1.6487],
          [0.4926, 0.3407, 0.2172]]]))

In [33]:
torch.exp(a)

tensor([[1.0055, 1.7234, 1.0141],
        [8.2085, 0.6610, 0.4452]])

In [2]:
import numpy as np 

class Tensor:
    def __init__(self, data, _children=(), grad_fn=None):
        assert type(data) == np.ndarray or type(data) == list, f"Data must be of type numpy.ndarray or list, not {type(data)}"
        self.data = data
        self.shape = data.shape
        self._prev = set(_children)
        self.grad = 0
        self._backward = lambda: None
        self.grad_fn = grad_fn
    

    def __repr__(self):
        data_str = np.array2string(self.data, separator=', ', prefix='tensor(', suffix=')', precision=4) # makes formatting nicer
        return 'tensor(' + data_str + ', grad_fn=' + str(self.grad_fn) + ')'
    
    
    def __add__(self, other):
        assert type(other) == Tensor or type(other) == int or type(other) == float, f"Expected other to be of type Tensor or int, not {type(other)}"
        if type(other) == int or type(other) == float:
            other = Tensor(np.array(other))
        out =  Tensor(self.data + other.data, _children=(self, other), grad_fn='AddBackward')
        
        def _backward():
            self.grad += 1. * out.grad
            other.grad += 1. * out.grad
        out._backward = _backward
        return out
    
    def __mul__(self, other):
        assert type(other) == Tensor or type(other) == int or type(other) == float, f"Expected other to be of type Tensor, int, or float, not {type(other)}"
        if type(other) == int or type(other) == float:
            other = Tensor(np.array(other))
        out =  Tensor(self.data * other.data, _children=(self, other), grad_fn='MulBackward')
        
        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward
        return out
    
    def __matmul__(self, other):
        assert type(other) == Tensor, f"Expected other to be of type Tensor, not {type(other)}"
        out =  Tensor(self.data @ other.data, _children=(self, other), grad_fn='MatmulBackward')
        
        def _backward():
            self.grad += out.grad @ np.swapaxes(other.data, -1, -2)
            other.grad += (np.swapaxes(self.data, -1, -2) @ out.grad).sum(axis=0)
        out._backward = _backward
        return out
    
    def __pow__(self, other):
        assert type(other) == int or type(other)==float, f"Exponent must be an integer or float, not {type(other)}"
        out =  Tensor(self.data ** other, _children=(self,), grad_fn='PowBackward')
        
        def _backward():
            self.grad += other * self.data ** (other-1) * out.grad
        out._backward = _backward
        return out
    
    def exp(self):
        out = Tensor(np.exp(self.data), _children=(self,), grad_fn='ExpBackward')
        
        def _backward():
            self.grad += np.exp(self.data) * out.grad
        out._backward = _backward
        return out
    
    def sigmoid(self):
        out = Tensor(1 / (1 + np.exp(-self.data)), _children=(self,), grad_fn='SigmoidBackward')
        
        def backward():
            self.grad += out.data * (1 - out.data) * out.grad
        out._backward = backward
        return out
    
    def tanh(self):
        out = Tensor(np.tanh(self.data), _children=(self,), grad_fn='TanhBackward')
        
        def backward():
            self.grad += (1 - out.data ** 2) * out.grad
        out._backward = backward
        return out
    
    def relu(self):
        out = Tensor(np.maximum(0, self.data), _children=(self,), grad_fn='ReluBackward')
        
        def backward():
            self.grad += (self.data > 0) * out.grad
        out._backward = backward
        return out 
    
    def __sub__(self, other):
        return self + -1.0 * other
    
    def __truediv__(self, other):
        return self * (other**-1)
    
    def __neg__(self):
        return self * -1.0
    
    def __radd__(self, other):
        return self + other
    
    def __rsub__(self, other):
        return other + -1.0 * self
    
    def __rmul__(self, other):
        return self * other
    
    def __rtruediv__(self, other):
        return other * self**-1
    
    def backward(self):
        # sort children in topological order
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)
        # chain rule here
        self.grad = np.ones_like(self.data).astype(np.float32) # set grad of this node to 1s
        for v in reversed(topo):
            v._backward()
        
    
        
    
    

In [14]:
ta = torch.randn(10, 3, 4, 6, requires_grad=True)
tb = torch.randn(10, 3, 6, 4,requires_grad=True)
tc = ta @ tb
tc.backward(torch.ones_like(tc))

a = Tensor(ta.detach().numpy())
b = Tensor(tb.detach().numpy())
c = a @ b
c.backward()

In [15]:
torch.allclose(torch.from_numpy(a.grad.astype(np.float32)), ta.grad)

True

In [29]:
ta = torch.randn(3, 4, requires_grad=True)
tb = torch.randn(10,4,5,requires_grad=True)


In [30]:
(ta@tb).shape

torch.Size([10, 3, 5])

In [23]:
broadcasted_axes_A = tuple(i for i, (a, c) in enumerate(zip(ta.shape, tb.shape)) if a == 1 and c > 1)
broadcasted_axes_B = tuple(i for i, (b, c) in enumerate(zip(tb.shape, tc.shape)) if b == 1 and c > 1)

In [24]:
broadcasted_axes_A, broadcasted_axes_B

((), ())