In [1]:
from __future__ import annotations
import math
import numpy as np

In [17]:
class Value:
    def __init__(self, data: float | np.ndarray, _children=(), _op=''):
        self.data = data 
        self.grad = 0.0
        self._prev = set(_children)
        self._backward = lambda: None

    def __repr__(self):
        return f'Value(data={self.data}, grad={self.grad})'

    def __getitem__(self, idx):
        if idx<0 or idx>len(self.data):
            raise IndexError(f"Row index {idx} out of range; must be in [0, {len(self.val)-1}]")

        return self.data[idx]

    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self, other), '+')
        
        def _backward():
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad
        out._backward = _backward
        return out

    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), '*')

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward
        return out

    def exp(self):
        x = self.data
        out = Value(math.exp(x), (self,), 'exp')

        def _backward():
            self.grad += out.data * out.grad
        out._backward = _backward
        return out

    def tanh(self):
        y = math.tanh(self.data)
        out = Value(y, (self,), 'tanh')

        def _backward():
            self.grad += (1 - y*y) * out.grad
        out._backward = _backward
        return out

    def __pow__(self, other):
        assert isinstance(other, (int, float)); 'Only accepting int, float'
        out = Value(self.data ** other, (self, ), '**') 
        
        def _backward():
            self.grad += other * (self.data) ** (other - 1) * out.grad

        out._backward = _backward
        return out

    def __matmul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
    
        A = self.data      # assume list of lists or 2D array
        B = other.data
    
        # infer shapes
        n_rows_A = len(A)
        n_cols_A = len(A[0])
        n_rows_B = len(B)
        n_cols_B = len(B[0])
    
        if n_cols_A != n_rows_B:
            raise ValueError(
                f"matmul shape mismatch: {n_rows_A}x{n_cols_A} @ {n_rows_B}x{n_cols_B}"
            )
        
        # allocate result (n_rows_A x n_cols_B)
        res = [[0.0 for _ in range(n_cols_B)] for _ in range(n_rows_A)]
    
        for i in range(n_rows_A):
            for j in range(n_cols_B):
                total = 0.0
                for k in range(n_cols_A):
                    total += A[i][k] * B[k][j]
                res[i][j] = total
    
        out = Value(res, (self, other), '@')
    
        def _backward():
            # dL/dA = dL/dOut @ B^T
            # dL/dB = A^T @ dL/dOut
            # but here out.grad will be matrix-shaped
            raise NotImplementedError("backward for matmul not implemented yet")
    
        out._backward = _backward
        return out

        
    def __neg__(self):
        return self * -1

    def __radd__(self, other): # other + self
        return self + other

    def __sub__(self, other):
        return self + (-other)

    def __rsub__(self, other):
        return other + (-self)

    def __rmul__(self, other): # other * self
        return self * other

    def __truediv__(self, other): # self / other
        return self * other**-1

    def __rtruediv__(self, other): # other / self
        return other * self**-1

    def backward(self):
        visited = set()
        topo = []

        def build(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build(child)
                topo.append(v)

        build(self)
        self.grad = 1.0
        for x in reversed(topo):
            x._backward()
                
    

# Testing Value class

In [18]:
a = Value(2)
c = Value(3)
b = Value(4)

d = a * b
f = d + c
f

Value(data=11, grad=0.0)

In [19]:
f.grad = 1

In [20]:
f._backward()
d

Value(data=8, grad=1.0)

In [21]:
d._backward()
d

Value(data=8, grad=1.0)

In [22]:
c._backward()

In [23]:
d._backward()

In [24]:
a._backward()
b._backward()

In [25]:
a,b,c,d,f # checking populations of grads

(Value(data=2, grad=8.0),
 Value(data=4, grad=4.0),
 Value(data=3, grad=1.0),
 Value(data=8, grad=1.0),
 Value(data=11, grad=1))

In [26]:
d._prev

{Value(data=2, grad=8.0), Value(data=4, grad=4.0)}

In [27]:
c = Value(3)
x = c ** 4 
x

Value(data=81, grad=0.0)

In [28]:
x.grad = 1
x._backward()
c._backward()
c

Value(data=3, grad=108.0)

In [29]:
x._prev

{Value(data=3, grad=108.0)}

In [30]:
a = Value(2)
b = Value(4)
c = Value(3)

d = a * b      # 8
f = d + c      # 11

f.backward()   # builds topo, sets f.grad=1, backprops

print(a, b, c) # grads: df/da = 4, df/db = 2, df/dc = 1

Value(data=2, grad=4.0) Value(data=4, grad=2.0) Value(data=3, grad=1.0)


In [31]:
mat1 = Value([[1, 2, 3], [4, 5, 6]]) 
mat2 = Value([[1, 2], [3, 4], [5, 6]])

mat1@mat2

Value(data=[[22.0, 28.0], [49.0, 64.0]], grad=0.0)

# making sure scalar autograd works

In [42]:
# analysing on f(x) = (a*b + c).tanh()

a = Value(2.0)
b = Value(3.0)
c = Value(-7.0)

f = (a * b + c).tanh()

In [43]:
f.backward()

In [None]:
# training a tiny model on this 

# weights and bias
W = Value(0.1)
b = Value(0.4)

# dataset 