In [None]:
class Value:
    # _var is a variable intended for internal use within a class
    # self._var = var makes it publicly accessible as obj.var instead of obj._var
    def __init__(self, data, _children=(), _op='', _exp='', label=''):
        self.data = data
        self.grad = 0.0
        self._backward = lambda: None # Function to calculate local grads of the input nodes to this output node
        self._prev = set(_children)
        self._op = _op
        self.exp = _exp
        
        
    # The __repr__ method provides a string representation of the instance, which is useful for debugging and displaying the object    
    def __repr__(self):
        return f"Value(data={self.data})"
    
    
    # The __add__ method is a special method used to define the behavior of the addition operator (+) for instances of a class
    # Internally, the expression 'a + b' calls a.__add__(b)
    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other) # So that we can add a numeric value directly to a Value object like Value(3.0) + 4
        out = Value(self.data + other.data, (self, other), '+', f"{self} + {other}")
        
        def _backward():
            self.grad += out.grad # += because if b = a + a, the db/da should be 2 but self first becomes 1 and then other (which is also a) becomes 1 so we want to accumulate instead of overwrite. Also if z = x + y and w = x * y, backprop must add dz/dx and dw/dx for x and so for y
            other.grad += out.grad
            
        out._backward = _backward # Not out._backward = _backward() as lambda functions return None and also the object's _backward attribute has a function value so passing _backward() will pass its returned value, not the function itself. We simply set the function to out._backward and not it's executed value
        return out
    
    
    # The __sub__ method is a special method used to define the behavior of the subtraction operator (-) for instances of a class
    def __sub__(self, other):
        return self + (-other)
    
    
    def __rsub__(self, other): # other - self
        other = other if isinstance(other, Value) else Value(other)
        return other + (-self)
    
    
    # The __mul__ method is a special method used to define the behavior of the multiplication operator (*) for instances of a class
    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), '*', f"{self} * {other}")
        
        # Multiply by out.grad because we are applying the chain rule from the output back to that local layer or expression
        def _backward():
            self.grad += other.data * out.grad 
            other.grad += self.data * out.grad
            
        out._backward = _backward
        return out
    
    
    # Something like 2 * Value(3.0) will throw an error as we have defined self * other where self is the Value object. __rmul__ will swap the expression so that 2 * Value(3.0) will become Value(3.0) * 2 and now this goes to __mul__
    def __rmul__(self, other):
        return self * other 
    
    
    # The __truediv__ method is a special method used to define the behavior of the division operator (/) for instances of a class
    def __truediv__(self, other):
        return Value(self * other**-1, (self, other), '/', f"{self} / {other}") # We expressed / as a * equation so that the definition of * handles backprop without needing to redefine it
    
    
    # The __neg__ method is a special method used to define the behavior of the negative operator (-) for instances of a class
    def __neg__(self):
        out = Value(self.data * -1, (self,), '-', f"-{self}")
        
        def _backward():
            self.grad += -1 * out.grad
            
        out._backward = _backward
        return out
    
    
    # The __pow__ method is a special method used to define the behavior of the power operator (**) for instances of a class
    def __pow__(self, other):
        # assert isinstance(other, (int, float))
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data ** other.data, (self, other), '**', f"{self} ^ {other}")
        
        def _backward():
            self.grad += other.data * self.data**(other.data - 1) * out.grad
            
        out._backward = _backward
        return out
    
    
    def tanh(self): # Can be called as x = Value(3, label='x'); x.tanh()
        x = self.data
        tanh = (math.exp(2*x) - 1) / (math.exp(2*x) + 1)
        out = Value(tanh, (self, ), label='tanh')
        
        def _backward():  
            self.grad += (1 - tanh**2) * out.grad
        
        out._backward = _backward
        return out
    
    
    def exp(self):
        x = self.data
        out = Value(math.exp(x), (self, ), 'exp')
        
        def _backward():
            self.grad += out.data * out.grad
        
        out._backward = _backward
        return out
    
    
    def backward(self):
        # topological order all of the children in the graph
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)
        
        # This function is expected to be called on the output node to initiate backprop from there so o.backward() will initiate backprop. Remember that we are finding grads for each weight as the derivative of the output w.r.t to that weight and for the output itself, do/do = 1
        self.grad = 1.0
        
        for node in reversed(topo): # Reversed because the list is ordered from input layer to output layer and we wanna go in the backwards direction starting from the output for backprop
            node._backward()
            
print("--------DONE--------")

In [None]:
a = Value(3, label='a')
b = Value(4, label='b')
d = a + b; d.label='d'
e = a - 3
f = -a

In [None]:
e

In [None]:
e.data

In [None]:
d._prev, d.exp

In [None]:
# !pip install graphviz

In [None]:
# from graphviz import Digraph

# def trace(root):
#     nodes, edges = set(), set()
#     def build(v):
#         if v not in nodes:
#             nodes.add(v)
#             for child in v._prev:
#                 edges.add((child, v))
#                 build(child)
#     build(root)
#     return nodes, edges

# def draw_dot(root, format='svg', rankdir='LR'):
#     """
#     format: png | svg | ...
#     rankdir: TB (top to bottom graph) | LR (left to right)
#     """
#     assert rankdir in ['LR', 'TB']
#     nodes, edges = trace(root)
#     dot = Digraph(format=format, graph_attr={'rankdir': rankdir}) #, node_attr={'rankdir': 'TB'})
    
#     for n in nodes:
#         dot.node(name=str(id(n)), label = "{ data %.4f | grad %.4f }" % (n.data, n.grad), shape='record')
#         if n._op:
#             dot.node(name=str(id(n)) + n._op, label=n._op)
#             dot.edge(str(id(n)) + n._op, str(id(n)))
    
#     for n1, n2 in edges:
#         dot.edge(str(id(n1)), str(id(n2)) + n2._op)
    
#     return dot

In [None]:
# draw_dot(d)

In [None]:
import random, math

class Neuron:
    def __init__(self, nin):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
        self.b = Value(random.uniform(-1,1))
    
    # This is what is returned when an object of Neuron is called with some x like n(x) below
    def __call__(self, x):
        # sum takes an optional second argument to begin adding to instead of 0 which is the same as saying activation = sum(wi * xi for wi, xi in zip(self.w, x)) + self.b
        activation = sum((wi * xi for wi, xi in zip(self.w, x)), self.b)
        o = activation.tanh()
        return o

In [None]:
x = [2.0, 3.0]
n = Neuron(2)
n(x)

In [None]:
class Layer:
    def __init__(self, nin, nout):
        # Each i/p feature feeds into each neuron (Neuron(nin)) and we want nout such neurons in the layer
        self.neurons = [Neuron(nin) for _ in range(nout)]
        
    def __call__(self, x):
        # 1 output from each neuron in the layer
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else outs

In [None]:
x = [2.0, 3.0]
n = Layer(2, 3)
n(x)

In [None]:
class MLP:
    # nout is a list containing the number of neurons in each layer
    def __init__(self, nin, nout):
        tot = [nin] + nout
        self.layers = [Layer(tot[i], tot[i + 1]) for i in range(len(nout))]
        
    def __call__(self, x):
        o = [layer(x) for layer in self.layers]
        # The nn output is the output of the last layer
        return o[-1]

In [None]:
x = [2.0, 3.0, -1]
n = MLP(3, [4, 4, 1])
n(x)

In [None]:
# Trying out on a tiny dataset
xs = [
    [2, 3, -1],
    [3, -1, 0.5],
    [0.5, 1, 1],
    [1, 1, -1],
]

ys = [1, -1, -1, 1]

ypred = [n(x) for x in xs]
ypred

In [None]:
loss = sum([(ypredi - ysi)**2 for ysi, ypredi in zip(ys, ypred)])
loss

In [None]:
loss.backward()

In [None]:
n.layers[0].neuron[0].w[0]