In [1]:
import numpy as np
import torch

### To Be Added

1. softmax forward and backward
2. ~~convert tensor to numpy array~~


In [2]:
class Tensor:
    def __init__(self, data, _children=()):
        self.data = data if isinstance(data, np.ndarray) else np.array(data)
        # Set data to floats to division can be done
        self.data = self.data.astype(float) 
        self._prev = set(_children)
        self.grad = np.zeros_like(self.data)
        self.shape = self.data.shape
        self.size = self.data.size
        self._backward = lambda: None
    
    def __add__(self, other):
        # Elementwise addition. Tensors must be the same size, or one of 
        # them must be a scalar 
        other = other if isinstance(other, Tensor) else Tensor(other)
        out = Tensor(self.data + other.data, (self, other))

        def _backward():
            print('\n+')
            self.grad += out.grad
            other.grad += out.grad
        out._backward = _backward

        return out
    
    def __sub__(self, other):    
        other = other if isinstance(other, Tensor) else Tensor(other)
        out = Tensor(self.data - other.data, (self, other))

        def _backward():
            print('\n-')
            self.grad += out.grad
            other.grad -= out.grad
        out._backward = _backward

        return out

    def __mul__(self, other):
        # Elementwise multiplication. Tensors must be the same size, or one of 
        # them must be a scalar 
        other = other if isinstance(other, Tensor) else Tensor(other)
        out = Tensor(self.data * other.data, (self, other))

        def _backward():
            print('\n*')
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward

        return out
    
    def __pow__(self, other):
        # The exponent must be a scalar
        assert isinstance(other, (int, float)), "Exponent must be a scalar (int/float)"
        out = Tensor(self.data ** other, (self, other))

        def _backward():
            print('\n**')
            self.grad += (other * self.data ** (other -1)) * out.grad
        out._backward = _backward

        return out
    
    def __matmul__(self, other):
        # Rows of self must match columns of other
        other = other if isinstance(other, Tensor) else Tensor(other)
        out = Tensor(self.data @ other.data, (self, other))

        def _backward():
            print('\nmatmul')
            """print(f'\nself data shape = {self.data.shape}')
            print(f'other data shape = {other.data.shape}')
            print(f'output data shape = {out.data.shape}')
            print(f'self grad shape = {out.grad.shape}@{other.data.T.shape}')
            print(f'other grad shape = {self.data.T.shape}@{out.grad.shape}')"""
            self.grad += out.grad @ np.transpose(other.data)
            other.grad += np.transpose(self.data) @ out.grad
        out._backward = _backward

        return out
    
    def relu(self):
        out = self.data * (self.data > 0)
        out = Tensor(self.data * (self.data > 0), (self,))

        def _backward():
            print('\nrelu')
            self.grad += (self.data > 0) * out.grad
        out._backward = _backward

        return out
    
    def transpose(self,):
        out = Tensor(np.transpose(self.data), (self,))
        
        def _backward():
            print('\ntranspose')
            self.grad += np.transpose(out.grad)
        out._backward = _backward
        
        return out
    
    def log(self):
        print(self.data)
        val = Tensor(np.log(self.data) + 1e-9, (self,))
        print(val)
        out = val

        def _backward():
            print('\nlog')
            self.grad += (1/self.data) * out.grad
        out._backward = _backward

        return out
    
    def exp(self):
        out = Tensor(np.exp(self.data), (self,))
        
        def _backward():
            print('\nexp')
            self.grad += np.exp(self.data) * out.grad
        out._backward = _backward

        return out
    
    def sigmoid(self):
        value = 1/(1 + np.exp(-self.data))
        out = Tensor(value, (self,))
        
        def _backward():
            print('\nsigmoid')
            exp = np.exp(-self.data)
            g = exp/((1+exp)**2)
            self.grad += g * out.grad
        out._backward = _backward

        return out
    
    def tanh(self):
        val = (np.exp(self.data) - np.exp(-self.data))/(np.exp(self.data) + np.exp(-self.data))
        out = Tensor(val, (self,))

        def _backward():
            print('\ntanh')
            self.grad += (1 - val**2) * out.grad
        out._backward = _backward

        return out
    
    def softmax(self, dim=1):
        exps = np.exp(self.data - np.max(self.data, axis=dim, keepdims=True))
        probs = exps / np.sum(exps, axis=dim, keepdims=True)
        out = Tensor(probs, (self,))

        def _backward():
            print('\nsoftmax')
            for i, (output, grad) in enumerate(zip(out.data, out.grad)):
                output = output.reshape(-1, 1)
                jacobian = np.diagflat(output) - output @ np.transpose(output)
                g = np.transpose(jacobian @ np.transpose(grad))
                self.grad[i] = g
        out._backward = _backward

        return out
    
    def reshape(self, *new_shape):
        old_shape = self.shape
        out = Tensor(self.data.reshape(*new_shape), (self,))

        def _backward():
            print('\reshape')
            self.grad += out.grad.reshape(old_shape)
        out._backward = _backward

        return out
    
    def backward(self):
        # https://github.com/karpathy/micrograd/blob/master/micrograd/engine.py
        # topological order all of the children in the graph
        
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)

        # go one variable at a time and apply the chain rule to get its gradient
        self.grad = np.ones_like(self.data)
        for v in reversed(topo):
            v._backward() 
    
    def max(self):
        return np.max(self.data)
    
    def min(self):
        return np.min(self.data)
    
    def numpy(self):
        return np.array(self.data)
        
    @classmethod
    def zeros(cls, shape):
        assert isinstance(shape, int) or isinstance(shape, tuple), f'shape should be int or tuple insted of {type(shape)}'
        return cls(np.zeros(shape))

    @classmethod
    def ones(cls, shape):
        assert isinstance(shape, int) or isinstance(shape, tuple), f'shape should be int or tuple insted of {type(shape)}'
        return cls(np.ones(shape))
    
    @classmethod
    def normal(cls, mean=0.0, std=1.0, shape=None):
        assert isinstance(shape, int) or isinstance(shape, tuple), f'shape should be int or tuple insted of {type(shape)}'
        return cls(np.random.normal(mean, std, shape))
    
    @classmethod
    def eye(cls, N, M=None):
        return cls(np.eye(N, M))

    def __neg__(self):
        return self * -1
    
    def __radd__(self, other):
        return self + other
    
    def __rsub__(self, other):
        return other - self
    
    def __rmul__(self, other):
        return self * other
    
    def __truediv__(self, other):
        return self * other**-1
    
    def __rtruediv__(self, other):
        return other * self**-1
    
    def __repr__(self):
        return f'Tensor(data={self.data})'
    
    def __len__(self):
        return len(self.data)
    
    def __iter__(self):
        # Track the current element in the iterable
        self.current = 0
        return self
    
    def __next__(self):
        if self.current >= len(self.data):
            raise StopIteration
        current = self.data[self.current]
        self.current += 1
        return current
    
    def __getitem__(self, key):
        return self.data[key]
    
    def __setitem__(self, key, value):
        self.data[key] = value

## Test Basic Ops

In [3]:
a = Tensor([1, 2, 3])
b = Tensor([4, 5, 6])
c = a + b
print(c)
d = Tensor([5, 3.0, 2.7])
e = d/c
print(e)

Tensor(data=[5. 7. 9.])
Tensor(data=[1.         0.42857143 0.3       ])


In [4]:
a = Tensor([16.0, 2.0, 4.5])
b = Tensor([2.0, 4.0, 3.5])
c = a * b
print(c)

Tensor(data=[32.    8.   15.75])


In [5]:
a = Tensor([[2.0, 4.0, 5.0], 
           [1.0, 5.5, 2.4]])
b = 3.0
c = a ** b
print(c)

Tensor(data=[[  8.     64.    125.   ]
 [  1.    166.375  13.824]])


In [6]:
a = Tensor([[-2.0, 4.0, 5.0], 
           [1.0, -5.5, 2.4]])
a.relu()

Tensor(data=[[-0.   4.   5. ]
 [ 1.  -0.   2.4]])

In [7]:
a = Tensor([2.5])
b = Tensor([3.0])
c = a @ b
print(c)

Tensor(data=7.5)


In [8]:
a = Tensor([[-2.0, 4.0, 5.0], 
           [1.0, -5.5, 2.4]])
b = Tensor([[-2.0, 4.0], 
           [1.0, 2.4],
           [1.4, 9.0]])
c = a @ b
print(a.shape, b.shape, c.shape)

(2, 3) (3, 2) (2, 2)


In [9]:
a = Tensor([[2.0, 4.0, 5.0], 
           [1.0, 5.5, 2.4]])
a.log()

[[2.  4.  5. ]
 [1.  5.5 2.4]]
Tensor(data=[[6.93147182e-01 1.38629436e+00 1.60943791e+00]
 [1.00000000e-09 1.70474809e+00 8.75468738e-01]])


Tensor(data=[[6.93147182e-01 1.38629436e+00 1.60943791e+00]
 [1.00000000e-09 1.70474809e+00 8.75468738e-01]])

In [10]:
a = Tensor([[-2.0, 4.0, 5.0], 
           [1.0, -5.5, 2.4]])
print(np.exp(a.data))
print(a.exp())

[[1.35335283e-01 5.45981500e+01 1.48413159e+02]
 [2.71828183e+00 4.08677144e-03 1.10231764e+01]]
Tensor(data=[[1.35335283e-01 5.45981500e+01 1.48413159e+02]
 [2.71828183e+00 4.08677144e-03 1.10231764e+01]])


In [11]:
grads = Tensor.normal(shape=(2, 2))
data = Tensor.normal(shape=(2, 2))

In [12]:
data = data.reshape(-1, 1)

In [13]:
jacobian = np.diagflat(data) - data @ data.transpose()
jacobian

array([[ 0.20773543, -1.66052635, -0.52589174,  0.0077555 ],
       [-1.66052635, -3.18512408, -1.75406352,  0.02586776],
       [-0.52589174, -1.75406352,  0.18981374,  0.00819237],
       [ 0.0077555 ,  0.02586776,  0.00819237, -0.01111243]])

In [14]:
a_t = torch.tensor(a.numpy())
a_t.softmax(dim=0)

tensor([[4.7426e-02, 9.9993e-01, 9.3086e-01],
        [9.5257e-01, 7.4846e-05, 6.9138e-02]], dtype=torch.float64)

## Test Autodiff

In [28]:
x1 = Tensor([[-8.0, 1.0], [2.0, 1.5]])
w1 = Tensor([[3.2, 1.2], [1.0, 1.5]])
x1 = x1.transpose()
c1 = x1.sigmoid()
d1 = w1.tanh()
z1 = d1 * c1 - c1
z1i = z1.softmax(dim=1)
q1 = z1i.relu()
y1 = q1 @ x1
s1 = y1 @ Tensor([[1.2], [3.1]])
t1 = Tensor([[1.0, 3.2]]) @ s1
t1.backward()

print(x1.grad)
print(w1.grad)
print(z1.grad)
print(q1.grad)
print(y1.grad)
print(s1.grad)


matmul

matmul

matmul

relu

softmax

-

*

tanh

sigmoid

transpose
[[2.47108042 6.34344553]
 [2.91497526 6.5319404 ]]
[[-5.10957611e-06  6.17955604e-01]
 [-2.26668383e+00  1.09073046e+00]]
[[-2.30013379  2.30013379]
 [-7.38271341  7.38271341]]
[[ -3.4    5.85]
 [-10.88  18.72]]
[[1.2  3.1 ]
 [3.84 9.92]]
[[1. ]
 [3.2]]


In [27]:
import torch

x = torch.Tensor([[-8.0, 1.0], [2.0, 1.5]]); x.requires_grad = True
w = torch.Tensor([[3.2, 1.2], [1.0, 1.5]]); w.requires_grad = True
x = x.transpose(0,1) ; x.retain_grad()
c = x.sigmoid() ; c.retain_grad()
d = w.tanh(); c.retain_grad()
z = d * c - c; z.retain_grad()
zi = z.softmax(dim=1); zi.retain_grad()
q = zi.relu(); q.retain_grad()
y = q @ x; y.retain_grad()
s = y @ torch.Tensor([[1.2], [3.1]]); s.retain_grad()
t = torch.Tensor([[1.0, 3.2]]) @ s
t.backward()

print(x.grad)
print(w.grad)
print(z.grad)
print(q.grad)
print(y.grad)
print(s.grad)

tensor([[2.4711, 6.3434],
        [2.9150, 6.5319]])
tensor([[-5.1096e-06,  6.1796e-01],
        [-2.2667e+00,  1.0907e+00]])
tensor([[-2.3001,  2.3001],
        [-7.3827,  7.3827]])
tensor([[ -3.4000,   5.8500],
        [-10.8800,  18.7200]])
tensor([[1.2000, 3.1000],
        [3.8400, 9.9200]])
tensor([[1.0000],
        [3.2000]])


In [22]:
x

tensor([[-8.0000,  2.0000],
        [ 1.0000,  1.5000]], requires_grad=True)

In [23]:
x.transpose(0,1)

tensor([[-8.0000,  1.0000],
        [ 2.0000,  1.5000]], grad_fn=<TransposeBackward0>)

## Test Iterable

In [None]:
a = Tensor([[-2.0, 4.0, 5.0], 
           [1.0, -5.5, 2.4]])
for array in a:
    print(array)

In [None]:
a[0], a[1]

In [None]:
a[0] = np.array([3.8, 9.0, 2.3])

## Make tensors

In [None]:
a = Tensor.zeros((2, 3))
a

In [None]:
b = Tensor.ones((4, 2))
b

In [None]:
c = Tensor.normal(shape=(5, 4))
c

In [None]:
d = Tensor.eye(6)
d

In [None]:
e = Tensor.normal(shape=(4, 5))
print(e)
f = e.numpy()
print(f)