<a href="https://colab.research.google.com/github/martinpius/PYTORCH/blob/main/MicroGradEngine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from timeit import default_timer as timer
t1 = timer()
try:
  from google.colab import drive
  drive.mount("/content/drive/", force_remount = True)
  import random, torch, math
  import matplotlib.pyplot as plt
  import numpy as np
  print(f">>>> You are in CoLaB with torch version: {torch.__version__}")
except Exception as e:
  print(f">>>> {type(e)}: {e}\n>>>> Please correct {type(e)} and reload")
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f">>>> Available device: {device}")
def mytimer(t: float = timer())->float:
  h = int(t / (60 * 60))
  m = int(t % (60 * 60) / 60)
  s = int(t % 60)
  return f"hrs: {h} mins: {m:>02} secs: {s:>05.2f}"
print(f">>>> Time elapsed:\t{mytimer(timer() - t1)}")

Mounted at /content/drive/
>>>> You are in CoLaB with torch version: 2.0.1+cu118
>>>> Available device: cpu
>>>> Time elapsed:	hrs: 0 mins: 00 secs: 37.00


* We are implementing the Micrograd engine for backpropagation at elementary level.
* This notebook reproduce the work of Andrej Karpathy: https://github.com/karpathy/micrograd much thanks for his great contribution


In [2]:
class Micro:
  """""
  This class implement the micrograd engine
  for carrying out BPP at elementary level
  """""
  def __init__(self,
               data,
               _children = (),
               _op = " ",
               label = " "):
    
    self.grad = 0.0
    self._backward = lambda: None
    self.data = data
    self._children = set(_children)
    self._op = _op
    self.label = label

  def __add__(self, other):
    """""
    This method implement the backpropagation
    via an addition node
    """""
    other = other if isinstance(other, Micro) else Micro(other)
    out = Micro(self.data + other.data, (self, other), "+", "plus")

    def _backward():
      self.grad += 1.0 * out.grad
      other.grad += 1.0 * out.grad
    out._backward = _backward
    return out

  def __mul__(self, other):
    """""
    This method implement BPP through a multiplication node
    """""
    other = other if isinstance(other, Micro) else Micro(other)
    out = Micro(self.data * other.data, (self, other), "*", "mul")

    def _backward():
      self.grad += other.data * out.grad
      other.grad += self.data * out.grad
    out._backward = _backward
    return out

  def __pow__(self, other):
    """""
    This method implement the BPP through a power node
    """""
    assert isinstance(other, (float, int))
    out = Micro(self.data ** other, (self,), "pow", "power")

    def _backward():
      self.grad += other * (self.data **(other - 1)) * out.grad
    
    out._backward = _backward
    return out

  def tanh(self):
    """""
    This method implement BPP through a tanh activation
    """""
    x = self.data
    exp = math.exp(2 * x)
    t = (exp - 1) / (exp + 1)
    out = Micro(t, (self,), "tanh", "activation")

    def _backward():
      self.grad += (1 - t**2) * out.grad
    out._backward = _backward
    return out

  
  def sigmoid(self):
    """""
    This method implement the BPP through a logistic activation
    """""
    x = self.data
    exp = math.exp(x)
    s = exp / (1 + exp)
    out = Micro(s, (self,), "sigmoid", "activation")

    def _backward():
      self.grad += s * (1 - s) * out.grad
    out._backward = _backward
    return out

  def relu(self):
    """""
    This method implement the BPP through a ReLU
    """""
    x = self.data
    r = 0 if self.data < 0 else self.data
    out = Micro(r, (self,), "ReLU", "activation")

    def _backward():
      self.grad += (r > 0) * out.grad
    out._backward = _backward
    return out

  def backward(self):
    """""
    This method automate the BPP through the
    entire network
    """""
    topo = [] # A collection of all nodes
    visited = set() # To assess if the node is already attended
    def build_topology(v):
      if v not in visited:
        visited.add(v)
        for child in v._children:
          build_topology(child)
        topo.append(v)
    build_topology(self)

    self.grad = 1.0 # initialize the grad of the terminal node to 1 i.e dc/dc == 1
    for node in reversed(topo):
      node._backward()
  
  def __neg__(self):
    return self * (-1)
  
  def __sub__(self, other):
    return self + (-other)
  
  def __rsub__(self, other):
    return other + (-self)
  
  def __rmul__(self, other):
    return self * other
  
  def __truediv__(self, other):
    return self * other ** (-1)
  
  def __rtruediv__(self, other):
    return other * self **(-1)
  
  def __radd__(self, other):
    return other + self
  
  def __repr__(self):
    return f"Micro(data = {self.data}, grad = {self.grad})"
    


In [3]:
# Testing our micrograd function for a single neuron
x1, x2, w1, w2 = Micro(2.0, label ="x1"), Micro(0.0, label = "x2"), Micro(-3.0, label = "w1"), Micro(4.0, label = "w2")
x1w1 = w1 * x1 ; x1w1.label = "x1w1"
x2w2 = w2 * x2 ; x2w2.label = "x2w2"
x1w1x2w2 = x1w1 + x2w2 ; x1w1x2w2.label = "x1w1x2w2"
b = Micro(6.78689948, label = "bias")
n = x1w1x2w2 + b ; n.label = "neuron"
o = n.tanh(); o.label = "output"
o.backward()

In [10]:
assert n.grad == x1w1x2w2.grad == b.grad == x1w1.grad == x2w2.grad

In [11]:
assert w2.grad == 0.0

* Now we implement the simple neural network from its core


In [19]:
class Module:
  """"" The parent class to be inherited
        by our models classes to reset the
        gradients to zero before next step
        of backpropagation
  """""
  def zero_grad(self):
    for p in self.parameters():
      p.grad = 0
  
  def parameters(self):
    return []

class Neuron(Module):
  """""
  This class implement a neuron
  --------------------
  parameters:
  n_in: input dim
  """""
  def __init__(self, n_in, act = True):
    self.w = [Micro(random.uniform(-1,1)) for _ in range(n_in)] # initialize the weights
    self.b = Micro(random.uniform(-1,1)) # initialize the bias
    self.act = act
  
  def __call__(self, x):
    res = [wi*xi for wi, xi in zip(self.w, x)]
    out = sum(res, self.b)
    return out.relu() if self.act else out
  
  def parameters(self):
    return [self.b] + self.w
  
  def __repr__(self):
    return f"{'ReLU' if self.act else 'Linear'}Neuron({len(self.w)})"

class Layer(Module):
  """""
  This class implement a Layer
  -----------------------------
  parameters: n_in---> inputs dim, n_out ---> number of neurons
  """""
  def __init__(self, n_in, n_out, **kwargs):
    self.neurons = [Neuron(n_in, **kwargs) for _ in range(n_out)]

  def __call__(self, x):
    outs = [n(x) for n in self.neurons]
    return outs[0] if len(outs) == 1 else outs
  
  def parameters(self):
    return [p for n in self.neurons for p in n.parameters()]
  
  def __repr__(self):
    return f"Layer of[{', '.join(str(n) for n in self.neurons)}]"

class MLP(Module):
  """""
  This class implement the MLP net
  -------------------------
  parameters
  n_in : input dim n_out: output dim
  """""
  def __init__(self, n_in, n_out, **kwargs):
    n_layers = [n_in] + n_out # list of all layers
    self.layers = [Layer(n_layers[i], n_layers[i + 1], act = i != len(n_out) - 1) for i in range(len(n_out))]
  
  def __call__(self, x):
    for layer in self.layers:
      x = layer(x)
    return x
  
  def parameters(self):
    return [p for l in self.layers for p in l.parameters()]
  
  def __repr__(self):
    return f"MLP of[{', '.join(str(l) for l in self.layers)}]"



In [20]:
# Testing our model class:

In [25]:
# Initialize the network with 5 layers
n_in = 4
n_out = [6, 9, 3, 1]
mlp = MLP(n_in, n_out)

# Create data for a simple binary problem
samples = [[1.0, 2.0, 1.8, 2.0],
        [3.8, 4.2, 2.8, 3.0],
        [3.1, 2.8, 3.6, 1.0],
        [1.1, 1.8, 2.7, 1.1],
        [1.0, 1.4, 0.8, 1.6]]
labs = [1, -1, -1, 1, 1]
preds = [mlp(data) for data in samples]
display(f">>>> Predictions are:")
preds

'>>>> Predictions are:'

[Micro(data = 0.22956359855460248, grad = 0.0),
 Micro(data = 0.23261255990481733, grad = 0.0),
 Micro(data = 0.23901272972494236, grad = 0.0),
 Micro(data = 0.23404434337308302, grad = 0.0),
 Micro(data = 0.09064504437412427, grad = 0.0)]

* In the Next notebook we will train our network by using the Micrograd :-)