In [11]:
import numpy as np
import torch

In [12]:
import numpy as np

class Tensor:
    def __init__(self, data, _children=()):
        self.data = data if isinstance(data, np.ndarray) else np.array(data)
        self.grad = np.zeros_like(self.data)
        # Set data to floats to division can be done
        self.data = self.data.astype(np.float32) 
        self.grad = self.grad.astype(np.float32)
        self._prev = set(_children)
        self.shape = self.data.shape
        self.size = self.data.size
        self.dtype = self.data.dtype
        self._backward = lambda: None
    
    def __add__(self, other):
        other = other if isinstance(other, Tensor) else Tensor(other)
        out = Tensor(self.data + other.data, (self, other))

        def _backward():
            self.grad += out.grad
            other.grad += out.grad
        out._backward = _backward

        return out
    
    def __sub__(self, other):    
        other = other if isinstance(other, Tensor) else Tensor(other)
        out = Tensor(self.data - other.data, (self, other))

        def _backward():
            self.grad += out.grad
            other.grad -= out.grad
        out._backward = _backward

        return out

    def __mul__(self, other):
        other = other if isinstance(other, Tensor) else Tensor(other)
        out = Tensor(self.data * other.data, (self, other))

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward

        return out
    
    def __pow__(self, other):
        assert isinstance(other, (int, float)), "Exponent must be a scalar (int/float)"
        out = Tensor(self.data ** other, (self,))

        def _backward():
            self.grad += (other * self.data ** (other -1)) * out.grad
        out._backward = _backward

        return out
    
    def __matmul__(self, other):
        other = other if isinstance(other, Tensor) else Tensor(other)
        out = Tensor(self.data @ other.data, (self, other))

        def _backward():
            self.grad += out.grad @ np.transpose(other.data)
            other.grad += np.transpose(self.data) @ out.grad
        out._backward = _backward

        return out
    
    def relu(self):
        out = self.data * (self.data > 0)
        out = Tensor(self.data * (self.data > 0), (self,))

        def _backward():
            self.grad += (self.data > 0) * out.grad
        out._backward = _backward

        return out
    
    def transpose(self,):
        out = Tensor(np.transpose(self.data), (self,))
        
        def _backward():
            self.grad += np.transpose(out.grad)
        out._backward = _backward
        
        return out
    
    def log(self):
        print(self.data)
        val = Tensor(np.log(self.data) + 1e-9, (self,))
        print(val)
        out = val

        def _backward():
            self.grad += (1/self.data) * out.grad
        out._backward = _backward

        return out
    
    def exp(self):
        out = Tensor(np.exp(self.data), (self,))
        
        def _backward():
            self.grad += np.exp(self.data) * out.grad
        out._backward = _backward

        return out
    
    def sigmoid(self):
        value = 1/(1 + np.exp(-self.data))
        out = Tensor(value, (self,))
        
        def _backward():
            exp = np.exp(-self.data)
            g = exp/((1+exp)**2)
            self.grad += g * out.grad
        out._backward = _backward

        return out
    
    def tanh(self):
        val = (np.exp(self.data) - np.exp(-self.data))/(np.exp(self.data) + np.exp(-self.data))
        out = Tensor(val, (self,))

        def _backward():
            self.grad += (1 - val**2) * out.grad
        out._backward = _backward

        return out
    
    def softmax(self, dim=1):
        exps = np.exp(self.data - np.max(self.data, axis=dim, keepdims=True))
        probs = exps / np.sum(exps, axis=dim, keepdims=True)
        out = Tensor(probs, (self,))

        def _backward():
            for i, (output, grad) in enumerate(zip(out.data, out.grad)):
                output = output.reshape(-1, 1)
                jacobian = np.diagflat(output) - output @ np.transpose(output)
                g = np.transpose(jacobian @ np.transpose(grad))
                self.grad[i] += g
        out._backward = _backward

        return out
    
    def reshape(self, *new_shape):
        old_shape = self.shape
        out = Tensor(self.data.reshape(*new_shape), (self,))

        def _backward():
            self.grad += out.grad.reshape(old_shape)
        out._backward = _backward

        return out
    
    def sum(self):
        out = Tensor(self.data.sum(), (self,))

        def _backward():
            self.grad += np.ones_like(self.data) * out.grad
        out._backward = _backward

        return out
        
    def backward(self):
        # https://github.com/karpathy/micrograd/blob/master/micrograd/engine.py
        # topological order all of the children in the graph
        
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)

        # go one variable at a time and apply the chain rule to get its gradient
        self.grad = np.ones_like(self.data)
        for v in reversed(topo):
            v._backward() 
    
    def max(self):
        return np.max(self.data)
    
    def min(self):
        return np.min(self.data)
    
    def numpy(self):
        return np.array(self.data)
        
    @classmethod
    def zeros(cls, shape):
        assert isinstance(shape, int) or isinstance(shape, tuple), f'shape should be int or tuple insted of {type(shape)}'
        return cls(np.zeros(shape))

    @classmethod
    def ones(cls, shape):
        assert isinstance(shape, int) or isinstance(shape, tuple), f'shape should be int or tuple insted of {type(shape)}'
        return cls(np.ones(shape))
    
    @classmethod
    def normal(cls, mean=0.0, std=1.0, shape=None):
        assert isinstance(shape, int) or isinstance(shape, tuple), f'shape should be int or tuple insted of {type(shape)}'
        return cls(np.random.normal(mean, std, shape))
    
    @classmethod
    def randn(cls, *args):
        return cls(np.random.randn(*args))
    
    @classmethod
    def eye(cls, N, M=None):
        return cls(np.eye(N, M))

    def __neg__(self):
        return self * -1
    
    def __radd__(self, other):
        return self + other
    
    def __rsub__(self, other):
        return other - self
    
    def __rmul__(self, other):
        return self * other
    
    def __truediv__(self, other):
        return self * other**-1
    
    def __rtruediv__(self, other):
        return other * self**-1
    
    def __repr__(self):
        return f'Tensor(data={self.data}, dtype={self.data.dtype})'
    
    def __len__(self):
        return len(self.data)
    
    def __iter__(self):
        # Track the current element in the iterable
        self.current = 0
        return self
    
    def __next__(self):
        if self.current >= len(self.data):
            raise StopIteration
        current = self.data[self.current]
        self.current += 1
        return current
    
    def __getitem__(self, key):
        return self.data[key]
    
    def __setitem__(self, key, value):
        self.data[key] = value

## Test Basic Ops

In [13]:
a = Tensor([1, 2, 3])
b = Tensor([4, 5, 6])
c = a + b
print(c)
d = Tensor([5, 3.0, 2.7])
e = d/c
print(e)

Tensor(data=[5. 7. 9.], dtype=float32)
Tensor(data=[1.         0.42857146 0.3       ], dtype=float32)


In [14]:
a = Tensor([16.0, 2.0, 4.5])
b = Tensor([2.0, 4.0, 3.5])
c = a * b
print(c)

Tensor(data=[32.    8.   15.75], dtype=float32)


In [15]:
a = Tensor([[2.0, 4.0, 5.0], 
           [1.0, 5.5, 2.4]])
b = 3.0
c = a ** b
print(c)

Tensor(data=[[  8.        64.       125.      ]
 [  1.       166.375     13.824001]], dtype=float32)


In [16]:
a = Tensor([[-2.0, 4.0, 5.0], 
           [1.0, -5.5, 2.4]])
a.relu()

Tensor(data=[[-0.   4.   5. ]
 [ 1.  -0.   2.4]], dtype=float32)

In [17]:
a = Tensor([2.5])
b = Tensor([3.0])
c = a @ b
print(c)

Tensor(data=7.5, dtype=float32)


In [18]:
a = Tensor([[-2.0, 4.0, 5.0], 
           [1.0, -5.5, 2.4]])
b = Tensor([[-2.0, 4.0], 
           [1.0, 2.4],
           [1.4, 9.0]])
c = a @ b
print(a.shape, b.shape, c.shape)

(2, 3) (3, 2) (2, 2)


In [19]:
a = Tensor([[2.0, 4.0, 5.0], 
           [1.0, 5.5, 2.4]])
a.log()

[[2.  4.  5. ]
 [1.  5.5 2.4]]
Tensor(data=[[6.9314718e-01 1.3862944e+00 1.6094379e+00]
 [9.9999997e-10 1.7047480e+00 8.7546879e-01]], dtype=float32)


Tensor(data=[[6.9314718e-01 1.3862944e+00 1.6094379e+00]
 [9.9999997e-10 1.7047480e+00 8.7546879e-01]], dtype=float32)

In [20]:
a = Tensor([[-2.0, 4.0, 5.0], 
           [1.0, -5.5, 2.4]])
print(np.exp(a.data))
print(a.exp())

[[1.3533528e-01 5.4598148e+01 1.4841316e+02]
 [2.7182817e+00 4.0867715e-03 1.1023177e+01]]
Tensor(data=[[1.3533528e-01 5.4598148e+01 1.4841316e+02]
 [2.7182817e+00 4.0867715e-03 1.1023177e+01]], dtype=float32)


In [21]:
grads = Tensor.normal(shape=(2, 2))
data = Tensor.normal(shape=(2, 2))

In [22]:
data = data.reshape(-1, 1)

In [23]:
jacobian = np.diagflat(data) - data @ data.transpose()
jacobian

array([[ 0.19950941,  0.31803176, -0.17093538,  0.10663676],
       [ 0.31803176, -2.4897647 ,  0.7172879 , -0.4474747 ],
       [-0.17093538,  0.7172879 ,  0.23538119,  0.24050823],
       [ 0.10663676, -0.4474747 ,  0.24050823, -0.53738827]],
      dtype=float32)

In [24]:
a_t = torch.tensor(a.numpy())
a_t.softmax(dim=0)

tensor([[4.7426e-02, 9.9993e-01, 9.3086e-01],
        [9.5257e-01, 7.4846e-05, 6.9138e-02]])

## Test Autodiff

In [25]:
x1 = Tensor([[-8.0, 2.0], [1.0, 1.5]])
w1 = Tensor([[3.2, 1.2], [1.0, 1.5]])
x1 = x1.transpose()
c1 = x1.sigmoid()
d1 = w1.tanh()
z1 = d1 * c1 - c1
z1i = z1.softmax(dim=1)
q1 = z1i.relu().log()
y1 = q1 @ x1
s1 = y1 @ Tensor([[1.2], [3.1]])
t1 = Tensor([[1.0, 3.2]]) @ s1
t1.backward()

print(x1.grad)
print(w1.grad)
print(z1.grad)
print(q1.grad)
print(y1.grad)
print(s1.grad)

[[0.5303644  0.46963564]
 [0.4669386  0.5330614 ]]
Tensor(data=[[-0.634191   -0.7557981 ]
 [-0.76155746 -0.6291187 ]], dtype=float32)
[[-3.6854024 -9.742768 ]
 [-2.7815564 -8.889711 ]]
[[-1.5087178e-05  1.5144639e+00]
 [-7.9981709e+00  3.1944327e+00]]
[[ -6.7917013   6.791701 ]
 [-21.621813   21.621813 ]]
[[ -6.5000005   7.05     ]
 [-20.800001   22.560001 ]]
[[1.2       3.1      ]
 [3.8400002 9.92     ]]
[[1. ]
 [3.2]]


In [26]:
x = torch.Tensor([[-8.0, 2.0], [1.0, 1.5]]); x.requires_grad = True
w = torch.Tensor([[3.2, 1.2], [1.0, 1.5]]); w.requires_grad = True
x = x.transpose(0,1) ; x.retain_grad()
c = x.sigmoid() ; c.retain_grad()
d = w.tanh(); c.retain_grad()
z = d * c - c; z.retain_grad()
zi = z.softmax(dim=1); zi.retain_grad()
q = zi.relu().log(); q.retain_grad()
y = q @ x; y.retain_grad()
s = y @ torch.Tensor([[1.2], [3.1]]); s.retain_grad()
t = torch.Tensor([[1.0, 3.2]]) @ s
t.backward()

print(x.grad)
print(w.grad)
print(z.grad)
print(q.grad)
print(y.grad)
print(s.grad)

tensor([[-3.6854, -9.7428],
        [-2.7816, -8.8897]])
tensor([[-1.5087e-05,  1.5145e+00],
        [-7.9982e+00,  3.1944e+00]])
tensor([[ -6.7917,   6.7917],
        [-21.6218,  21.6218]])
tensor([[ -6.5000,   7.0500],
        [-20.8000,  22.5600]])
tensor([[1.2000, 3.1000],
        [3.8400, 9.9200]])
tensor([[1.0000],
        [3.2000]])


In [27]:
x

tensor([[-8.0000,  1.0000],
        [ 2.0000,  1.5000]], grad_fn=<TransposeBackward0>)

In [28]:
x.transpose(0,1)

tensor([[-8.0000,  2.0000],
        [ 1.0000,  1.5000]], grad_fn=<TransposeBackward0>)

## Test Iterable

In [29]:
a = Tensor([[-2.0, 4.0, 5.0], 
           [1.0, -5.5, 2.4]])
for array in a:
    print(array)

[-2.  4.  5.]
[ 1.  -5.5  2.4]


In [30]:
a[0], a[1]

(array([-2.,  4.,  5.], dtype=float32),
 array([ 1. , -5.5,  2.4], dtype=float32))

In [31]:
a[0] = np.array([3.8, 9.0, 2.3])

## Make tensors

In [32]:
a = Tensor.zeros((2, 3))
a

Tensor(data=[[0. 0. 0.]
 [0. 0. 0.]], dtype=float32)

In [33]:
b = Tensor.ones((4, 2))
b

Tensor(data=[[1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]], dtype=float32)

In [34]:
c = Tensor.normal(shape=(5, 4))
c

Tensor(data=[[ 2.2703073  -0.87986165  0.8481479  -0.62249815]
 [ 0.01475679 -0.6145898   0.28788444 -0.10494109]
 [-1.1931285   0.6689222   0.17278725 -0.4485536 ]
 [-0.06168244 -1.334231   -0.56350183  0.6163185 ]
 [-0.9226992  -1.2011671  -1.6699742  -0.5620623 ]], dtype=float32)

In [35]:
d = Tensor.eye(6)
d

Tensor(data=[[1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1.]], dtype=float32)

In [36]:
e = Tensor.normal(shape=(4, 5))
print(e)
f = e.numpy()
print(f)

Tensor(data=[[-6.1813986e-01 -2.7583241e-01  1.5022295e+00 -1.8896624e-03
  -7.9983167e-02]
 [ 2.1573989e+00 -2.7955315e-01  1.5221264e+00 -2.6515085e-01
  -3.8024883e-03]
 [ 7.4349099e-01  5.0601006e-01 -3.0673323e-02  1.1141809e+00
   5.1435101e-01]
 [ 1.0036160e-01 -6.8598026e-01  1.7330390e+00 -8.7002379e-01
   7.1909678e-01]], dtype=float32)
[[-6.1813986e-01 -2.7583241e-01  1.5022295e+00 -1.8896624e-03
  -7.9983167e-02]
 [ 2.1573989e+00 -2.7955315e-01  1.5221264e+00 -2.6515085e-01
  -3.8024883e-03]
 [ 7.4349099e-01  5.0601006e-01 -3.0673323e-02  1.1141809e+00
   5.1435101e-01]
 [ 1.0036160e-01 -6.8598026e-01  1.7330390e+00 -8.7002379e-01
   7.1909678e-01]]


In [37]:
g = Tensor.randn(2, 3)
g

Tensor(data=[[-1.4515268   0.18671152  0.23241463]
 [-0.4515628   1.1693501   0.00795554]], dtype=float32)