## MicroGrad by Andrej Karpathy
[Video](https://www.youtube.com/watch?v=VMj-3S1tku0), [Source](https://github.com/karpathy/micrograd)

Translated from Python to JavaScript by Joshua Moore

Tuesday, February 4th, 2025

The value class represents a single number to be used in a calculation, along with the capability of detecting this value's influence on the output of a calculation, its gradient.

Several mathematical operations are defined on the Value, the key for adding more being to also define a `_backward` function on the return value that can be used to calculate the gradient. 

In [1]:
Value = class Value {
  constructor(data, children=[], op='', label=''){
    this.data = data;
    this.prev = new Set(children);
    this.op = op;
    this.label = label;
    this.grad = 0;
    this._backward = () => {};
  }

  toString(){
    return `Value ${this.label + ' '}(data=${this.data.toFixed(3)} grad=${this.grad.toFixed(3)})`;
  }

  add(other){
    other = other instanceof Value ? other : new Value(other);
    const out = new Value(this.data + other.data, [this, other], '+');
    out._backward = () => {
      this.grad += out.grad;
      other.grad += out.grad;
    };
    
    return out;
  }

  mul(other){
    other = other instanceof Value ? other : new Value(other)
    const out = new Value(this.data * other.data, [this, other], '*');
    out._backward = () => {
      this.grad += other.data * out.grad;
      other.grad += this.data * out.grad;
    }
    
    return out;
  }

  pow(other){
    console.assert(`Argument other must be a Number.`, other instanceof Number);
    const out = new Value(this.data**other, [this], `**${other}`);
    out._backward = () => {
      this.grad += (other * this.data**(other-1)) * out.grad;
    }

    return out;
  }

  relu(){
    const out = new Value(this.data < 0 ? 0 : this.data, [this], 'ReLu');
    out._backward = () => {
      this.grad += (out.data > 0 ? 1 : 0) * out.grad;
    }

    return out;
  }

  backward(){
    const topo = [];
    const visited = new Set();
    const buildTopo = (v) => {
      if(!visited.has(v)){
        visited.add(v);
        for(let child of v.prev){
          buildTopo(child);
        }
        topo.push(v);
      }
    };
    buildTopo(this);

    this.grad = 1.0;
    for(let i = topo.length -1; i>=0; i--){
      topo[i]._backward();
    }
    
    return topo;
  }

  neg(){
    return this.mul(-1);
  }

  sub(other){
    other = other instanceof Value ? other : new Value(other);
    return this.add(other.neg());
  }

  div(other){
    return this.mul(other.pow(-1));
  }

  tanh(){
    const x = this.data;
    const t = (Math.exp(2*x) - 1)/(Math.exp(2*x) + 1);
    const out = new Value(t, [this], 'tanh');
    out._backward = () => {
      this.grad += (1- t**2) * out.grad;
    }

    return out;
  }

  exp(){
    const x = this.data;
    const out = new Value(Math.exp(x), [this], 'exp');
    out._backward = () => {
      this.grad += out.data * out.grad;
    }

    return out;
  }

  trace(){
    const nodes = new Set(), edges = new Set();
    const build = (v) => {
      nodes.add(v);
      for(let child of v.prev){
        edges.add([child, v]);
        build(child);
      }
    }
    build(root);
    return nodes, edges;
  }
}





A Neuron is a collection of values and weights.

In [1]:
Neuron = class Neuron {
  constructor(nin, nonlin=true){
    this.w = Array(nin).fill(0).map(() => new Value((Math.random() * 2) - 1));
    this.b = new Value(0);
    this.nonlin = nonlin;
  }

  call(x){
    const act = this.w
      .filter((w, i) => i >= this.b.data)
      .map((wi, i) => [wi, x[i]])
      .reduce((acc, pair) => {
        return acc.add(pair[0].mul(pair[1]))
      }, new Value(0));
    
    const out = act.tanh();
    return out;
  }

  parameters(){
    return this.w.concat([this.b]);
  }

  toString(){
    return `${this.nonlin ? 'ReLu' : 'Linear'} Neuron(${this.w.length})`;
  }
}





A layer is a collection of Neurons

In [1]:
Layer = class Layer {
  constructor(nin, nout, nonlin){
    this.neurons = Array(nout).fill(0).map(() => new Neuron(nin, nonlin));
  }

  call(x){
    const outs = this.neurons.map(n => n.call(x));
    return outs.length == 1 ? outs[0] : outs;
  }

  parameters(){
    const params = [];
    for(let neuron of this.neurons){
      for(let p of neuron.parameters()){
        params.push(p);
      }
    }
    
    return params;
  }

  toString(){
    return `Layer of [{${this.neurons.map(n => n.toString()).join(', ')}}]`
  }
}





A multilayer perceptron consists of a number of layers.

In [1]:
MLP = class MLP {
  constructor(nin, nouts){
    const sz = [nin].concat(...nouts);
    this.layers = Array(nouts.length).fill(0).map((_, i) => new Layer(sz[i], sz[i+1], i < nouts.length - 1))
  }

  call(x){
    for(let layer of this.layers){
      x = layer.call(x);
    }
    
    return x;
  }

  parameters(){
    let params = [];
    for(let layer of this.layers){
      for(let p of layer.parameters()){
        params.push(p);
      }
    }

    return params;
  }

  toString(){
    return `MLP of [{${this.layers.map(l => l.toString()).join(', ')}}]`
  }
}





In [2]:
let x = [2.0, 3.0, -1.0]
let mlp = new MLP(3, [4, 4, 1]);
let rounds = 1000;
mlp.call(x)

// inputs
let xs =[
  [2.0, 3.0, -1.0],
  [3.0, -1.0, 0.5],
  [0.5, 1.0, 1.0],
  [1.0, 1.0, -1.0]
];

// correct (training) outputs
let ys = [1.0, -1.0, -1.0, 1.0];

console.log(`target values:<br>`, JSON.stringify(ys));

// prediction by the MLP
let predictions = xs.map((x) => mlp.call(x));

// essentially beefed up difference between correct outputs and mlp outputs
let loss = ys
  .map((ygt, i) => predictions[i].sub(ygt).pow(2))
  .reduce((acc, l) => acc.add(l), new Value(0));

// calculate the gradients of all expressions' nodes
loss.backward()

// log values to output so we can see
console.log(`<br>prediction before optimization:<br>`, JSON.stringify(predictions.map(y => parseFloat(y.data.toFixed(3))), null, 2));
console.log(`<br>starting loss:<br> ${loss.data.toFixed(3)}`);

// optimization
// adjust the mlp's parameters in the direction (up or down) multiplied by
// some small constant to control the size of the variation steps.
for(var i=0; i<1000; i++){
  // mlp predict for each input
  predictions = xs.map((x) => mlp.call(x));
  loss = ys
    .map((ygt, i) => predictions[i].sub(ygt).pow(2))
    .reduce((acc, l) => acc.add(l), new Value(0));

  // reset the gradients (not sure why, but it was explained)
  let params = mlp.parameters();
  for(let p of params){
    p.grad = 0.0;
  }

  // calculate the mlp's .grads
  loss.backward();

  // set value of p to -0.01 * p.grad.
  // - - because we want the value to change in the direction of less wrong
  // 0.01 some small step size, chosen to neither over-step, nor take too long.
  // p.grad is an arrow 
  params = mlp.parameters();
  for(let p of params){
    p.data += -0.01 * p.grad;
  }

  // console.info(`${i} ${loss.data}`)
}

console.log(`<br>rounds of optimization:<br>${rounds}`)
console.log(`<br>predictions after optimization:<br>`, JSON.stringify(predictions.map(y => parseFloat(y.data.toFixed(3))), null, 2));
console.log(`<br>final loss:<br> ${loss.data.toFixed(3)}`);

target values:<br> [1,-1,-1,1]<br>prediction before optimization:<br> [
  -0.005,
  -0.098,
  -0.302,
  0.146
]<br>starting loss:<br> 3.041<br>rounds of optimization:<br>1000<br>predictions after optimization:<br> [
  0.971,
  -0.977,
  -0.971,
  0.974
]<br>final loss:<br> 0.003

