In [2]:
import math
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
from graphviz import Digraph
def trace(root):
    nodes, edges = set(), set()
    def build(v):
        if v not in nodes:
            nodes.add(v)
            for child in v._prev:
                edges.add((child, v)) # creating edge between child and node
                build(child)
    build(root)
    return nodes, edges

def draw_dot(root):
    dot = Digraph(format = 'svg', graph_attr = {'rankdir':'LR'}) # left to right
    nodes, edges = trace(root)
    for n in nodes:
        uid = str(id(n))
        # for any value in graph, create a rectangular node for it
        dot.node(name = uid , label = "{ %s | data %.4f | grad %.4f }" % (n.label, n.data, n.grad), shape = 'record')
        if n._op:
            # if this value is a result of some operation, create an op node that is a oval one
            dot.node(name = uid + n._op, label = n._op)
            # and connect this node to it
            dot.edge(uid + n._op, uid)
            
    for n1, n2 in edges:
        # connect n1 to the op node of n2
        dot.edge(str(id(n1)), str(id(n2)) + n2._op)
        
    return dot

In [4]:
class Value:
    def __init__(self, data, _children=(), _op = ' ', label = ''):
        self.data = data
        self.grad = 0.0
        self._backward = lambda:None #empty function like a leaf node
        self._prev = set(_children)
        self._op = _op
        self.label = label
        
    def __repr__(self):
        return f"Value(data={self.data})"

    def __neg__(self):
        return self * -1
        
    def __add__(self,other):
        other = other if isinstance(other, Value) else Value(other)
        output = Value(self.data + other.data, (self, other), '+')
        def _backward():
            self.grad += 1.0 * output.grad
            other.grad += 1.0 * output.grad
        output._backward = _backward
        return output
        
    def __sub__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        output = Value(self.data - other.data, (self, other), '-')
        
        def _backward():
            self.grad += 1.0 * output.grad
            other.grad -= 1.0 * output.grad

        output._backward = _backward
        return output
 
        
    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        output = Value(self.data * other.data, (self, other),'*')
        def _backward():
            self.grad += other.data * output.grad
            other.grad += self.data * output.grad
        output._backward = _backward
        return output

    def exp(self):
        x = self.data
        output = Value(math.exp(x),(self,), 'exp')
        def _backward():
            self.grad += output.data * output.grad
        output._backward = _backward        
        return output

    def __pow__(self,other):
        assert isinstance(other,(int,float)), "only supporting int/float powers for now"
        output = Value(self.data ** other, (self,), f'**{other}')
        def _backward():
            self.grad += other * (self.data ** (other - 1)) * output.grad
        output._backward = _backward
        return output
        
    def __rmul__(self,other):
        return self * other

    def __truediv__(self,other):
        return self * other ** -1
        
    def tanh(self):
        x = self.data
        t = (math.exp(2*x) - 1)/(math.exp(2*x)+1)
        output = Value(t, (self,), 'tanh')
        def _backward():
            self.grad += (1 - t**2) * output.grad
        output._backward = _backward
        return output
        
    def backward(self):
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)
        self.grad = 1.0
        for node in reversed(topo):
            node._backward()

In [5]:
#forwarding a single neuron
import random
from math import tanh

class Neuron:
    def __init__(self, ninputs):#self is inputs
        self.w = [Value(random.uniform(-1,1)) for _ in range(ninputs)] #take input that is between -1 and 1 wrt each input
        self.b = Value(random.uniform(-1,1)) #controls the triggered happiness
    def __call__(self,x):
        #w*x + b
        #Step1: pairwise multiplication of every element of w with x
        act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b) # by default the 2nd parameter of sum is 0.0 here we used bias
        out = act.tanh()
        return out

    
    # gather up all the paramters of neural network so that we can operate on all of them simultaneously
    # everyone of them will be nudget based on the gradient info
    def parameters(self):
        return self.w + [self.b] # return parameter  as parameter scalars
# a layer of neurons = set of neurons evaluated independently
class Layer:
    def __init__(self,ninputs,noutputs):
        self.neurons = [Neuron(ninputs) for _ in range(noutputs)]
    def __call__(self,x):
        outputs = [neuron(x) for neuron in self.neurons]
        return outputs[0] if len(outputs) == 1 else outputs
    # def parameters(self):
    #     params = []
    #     for neurons in self.neurons:
    #         ps = neuron.parameters()
    #         params.extend(ps)
    #         return params

    #         OR
    def parameters(self):
        return [ p for neuron in self.neurons for p in neuron.parameters()]
        

#multilayer perceptron with each layer feeding other layer = MLP
class MLP:
    def __init__(self, ninputs,noutputs): #noutputs: list of sizes of each layer
        sz = [ninputs] + noutputs
        self.layers = [Layer(sz[i],sz[i+1]) for i in range(len(noutputs))]
    def __call__(self,x):
        for layer in self.layers:
            x = layer(x)
        return x     
    def parameters(self):
        return [ p for layer in self.layers for p in layer.parameters()]


In [6]:
x = [2.0,3.0,-1.0] # self
n = MLP(3,[4,4,1]) #ninputs
n(x) # triggers __call__ fn

Value(data=-0.02367516693743202)

In [7]:
n.parameters() # all the weights and biases in the entire  MLP

[Value(data=-0.13145273673971247),
 Value(data=0.11113637027629375),
 Value(data=-0.6263733112821854),
 Value(data=-0.432306321521825),
 Value(data=0.8010869480117573),
 Value(data=-0.598469805898328),
 Value(data=0.259650372152217),
 Value(data=0.1384111248218869),
 Value(data=-0.8450837674967488),
 Value(data=0.14157382653023443),
 Value(data=0.8277179786030611),
 Value(data=0.4968600204640634),
 Value(data=0.17607716773849003),
 Value(data=-0.5418946173350732),
 Value(data=-0.2830898788202485),
 Value(data=-0.9722227728451576),
 Value(data=0.06730357598350767),
 Value(data=0.09327335741604936),
 Value(data=0.011429195370570744),
 Value(data=-0.6971687877900941),
 Value(data=-0.29572050188791565),
 Value(data=-0.49283880799179536),
 Value(data=-0.26492100272027064),
 Value(data=-0.30237208864249143),
 Value(data=-0.0003756788007343026),
 Value(data=0.1695036390276281),
 Value(data=-0.18213258220614303),
 Value(data=-0.6639055949089332),
 Value(data=-0.9431445419370017),
 Value(data=0

In [8]:
#creating a tiny dataset
xs = [
    [2.0,3.0,-1.0],
    [3.0,-1.0,0.5],
    [0.5,1.0,1.0],
    [1.0,1.0,-1.0]
] # 4 possible inputs
ys = [1.0,-1.0,-1.0,1.0] #desired targets 1.0 for  [2.0,3.0,-1.0]

In [9]:
y_prediction = [n(x) for x in xs]
y_prediction # these values are very different from what we want to achieve - targets

[Value(data=-0.02367516693743202),
 Value(data=-0.08996538830748282),
 Value(data=0.3359775423109274),
 Value(data=0.028472846789608944)]

In [10]:
#tuning weights for better prediction of desired targets
loss =[ (y_output - y_groundtruth)**2 for y_groundtruth, y_output in zip(ys,y_prediction)]
loss # more we are far away from the target more will be the loss

[Value(data=1.0479108474043792),
 Value(data=0.8281629944783506),
 Value(data=1.784835993559146),
 Value(data=0.9438650094250867)]

In [11]:
loss = sum(((y_output - Value(y_groundtruth))**2 for y_groundtruth, y_output in zip(ys, y_prediction)), Value(0.0))
print("Total loss:", loss.data)


Total loss: 4.604774844866963


In [12]:
# we need to tune the loss to be 0
loss.backward()

In [13]:
n.layers[0].neurons[0].w[0].grad # weights have grad because of backward propogation
# increasing slightly the weight of the particular neuron in the particular layer, makes the loss go down

-0.5055550609599222

In [14]:
n.layers[0].neurons[0].w[0].data #before

-0.13145273673971247

In [15]:
for p in n.parameters():
    p.data += -0.01 * p.grad #minimising the loss
    #update in GD, vector pointing at the direction of increased loss +0.01 or decreased loss -0.01
    # if the neurons grad value since negative, when applied cause the loss to decrease but with an increased data, loss increased
    

In [16]:
n.layers[0].neurons[0].w[0].data # after grad has been improved to produce minimum loss


-0.12639718613011325

In [17]:
y_prediction = [n(x) for x in xs]
loss = sum(((y_output - Value(y_groundtruth))**2 for y_groundtruth, y_output in zip(ys, y_prediction)), Value(0.0))
print("Total loss:", loss.data) # loss after the change
#low loss means prediction matching the target

Total loss: 4.373114425039914


In [18]:
y_prediction

[Value(data=0.023606972849536632),
 Value(data=-0.0765536778736573),
 Value(data=0.30274093642829325),
 Value(data=0.06732426627625142)]