## Welcome to the micrograd visualization experiment
This notebook demonstrates a graph visualization of a small Multilayer Perceptron's loss function at the website [verticesandedges.net](https://verticesandedges.net). Each vertex is a term, and although labels are disabled for this simulation, they contain the gradient of each term, or in other words, the influence of the term on the final result. 

The following code, the Value, Neuron, Layer and MLP come from @karpathy's [micrograd](https://github.com/karpathy/micrograd). The visualization I wrote based on a [2007 description]((https://arxiv.org/abs/0712.1549) of a force directed graph visualization by Canadian CS Professor Dr. Todd Veldhuizen.

I translated the micrograd code from Python to Javascript, and substituted my own graph visualization for the graphviz calls. 

To run this simulation, minimize the notebook (so you can see the middle of the page, and click Run All). Once the script added all the vertices, there'll be a swarm of edges. Give that a little time to settle (if they all disappear, start over, if you changed the Constants, refresh the page to reset them), then carefully increase first Repulsion, the Attraction by a factor of ten, by selecting the lower of each property, and sliding it one step to the right. The top slider is the base, the second the exponent. 

We use global variables to create classes that can be reused across cells.

In [2]:
Value = class Value {
  constructor(data, children=[], op='', label=''){
    this.id = `${Value.idPrefix}${++Value.idCount}`;
    this.data = data;
    this.prev = new Set(children);
    this.op = op;
    this.label = label;
    this.grad = 0;
    this._backward = () => {};
  }

  toString(){
    return `Value ${this.label + ' '}(data=${this.data.toFixed(3)} grad=${this.grad.toFixed(3)})`;
  }

  add(other){
    other = other instanceof Value ? other : new Value(other);
    const out = new Value(this.data + other.data, [this, other], '+');
    out._backward = () => {
      this.grad += out.grad;
      other.grad += out.grad;
    };
    
    return out;
  }

  mul(other){
    other = other instanceof Value ? other : new Value(other)
    const out = new Value(this.data * other.data, [this, other], '*');
    out._backward = () => {
      this.grad += other.data * out.grad;
      other.grad += this.data * out.grad;
    }
    
    return out;
  }

  pow(other){
    console.assert(`Argument other must be a Number.`, other instanceof Number);
    const out = new Value(this.data**other, [this], `**${other}`);
    out._backward = () => {
      this.grad += (other * this.data**(other-1)) * out.grad;
    }

    return out;
  }

  relu(){
    const out = new Value(this.data < 0 ? 0 : this.data, [this], 'ReLu');
    out._backward = () => {
      this.grad += (out.data > 0 ? 1 : 0) * out.grad;
    }

    return out;
  }

  backward(){
    const topo = [];
    const visited = new Set();
    const buildTopo = (v) => {
      if(!visited.has(v)){
        visited.add(v);
        for(let child of v.prev){
          buildTopo(child);
        }
        topo.push(v);
      }
    };
    buildTopo(this);

    this.grad = 1.0;
    for(let i = topo.length -1; i>=0; i--){
      topo[i]._backward();
    }
    
    return topo;
  }

  neg(){
    return this.mul(-1);
  }

  sub(other){
    other = other instanceof Value ? other : new Value(other);
    return this.add(other.neg());
  }

  div(other){
    return this.mul(other.pow(-1));
  }

  tanh(){
    const x = this.data;
    const t = (Math.exp(2*x) - 1)/(Math.exp(2*x) + 1);
    const out = new Value(t, [this], 'tanh');
    out._backward = () => {
      this.grad += (1- t**2) * out.grad;
    }

    return out;
  }

  exp(){
    const x = this.data;
    const out = new Value(Math.exp(x), [this], 'exp');
    out._backward = () => {
      this.grad += out.data * out.grad;
    }

    return out;
  }

  static idPrefix = 'v'
  static idCount = 0;
}





In [0]:
Neuron = class Neuron {
  constructor(nin, nonlin=true){
    this.w = Array(nin).fill(0).map(() => new Value((Math.random() * 2) - 1));
    this.b = new Value(0);
    this.nonlin = nonlin;
  }

  call(x){
    const act = this.w
      .filter((w, i) => i >= this.b.data)
      .map((wi, i) => [wi, x[i]])
      .reduce((acc, pair) => {
        return acc.add(pair[0].mul(pair[1]))
      }, new Value(0));
    
    const out = act.tanh();
    return out;
  }

  parameters(){
    return this.w.concat([this.b]);
  }

  toString(){
    return `${this.nonlin ? 'ReLu' : 'Linear'} Neuron(${this.w.length})`;
  }
}





In [0]:
trace = (root) => {
  let nodes = new Set([]);
  let edges = new Set([]);

  let build = (v) => {
    if(!nodes.has(v)){
      nodes.add(v);
      for(let child of v.prev){
        edges.add([child, v]);
        build(child);
      }
    }
  }
  
  build(root)
  return {nodes: [...nodes], edges: [...edges]}
}

/*
x = new Value(1.0);
y = x.mul(2).add(1).relu();
y.backward();
return trace(y)
*/





This is the draw function. It makes use of a built in variable called `graph`. This variable is available on this website. It is not standard, so be aware that these notebooks primarily work with this site. 

In [0]:
const wait = async (delay) => {
  return new Promise((resolve, reject) => setTimeout(resolve, delay));
}

draw = async (root, labels = false, delay = 10) => {
  const labelOptions = {
    fontSize: '24px',
    backgroundColor: "transparent"
  }; 
  const { nodes, edges } = trace(root)
  let vertexOptions;
  
  for(const n of nodes){
    vertexOptions = {
      id: `${n.id}`,
      size: 0.25
    }
    if(labels){
      vertexOptions.label = {
        text: `{ data ${n.data.toFixed(2)} | grad ${n.grad.toFixed(2)} }`,
        ...labelOptions
      }
    }

    await wait(delay)
    graph.addVertex(vertexOptions)
    
    if(n.op){
      vertexOptions = {
        id: `${n.id}${n.op}`,
        size: 0.25
      }
      if(labels){
        vertexOptions.label = {
          text: vertexOptions.id,
          ...labelOptions
        }
      }

      await wait(delay);
      graph.addVertex(vertexOptions);
      graph.addEdge(
        `${n.id}${n.op}`, 
        n.id,
        {
          arrow: true
        }
      );
    }
  }

  for([n, m] of Object.values(edges)){
    graph.addEdge(n.id, m.id, {arrow: true})
  }
}

/*
x = new Value(1.0);
y = x.mul(new Value(2)).add(new Value(1)).relu();
y.backward();
draw(y);
*/

/*
n = new Neuron(2);
x = [new Value(1.0), new Value(-2.0)]
y = n.call(x)
y.backward()
draw(y);
*/





In [0]:
Layer = class Layer {
  constructor(nin, nout, nonlin){
    this.neurons = Array(nout).fill(0).map(() => new Neuron(nin, nonlin));
  }

  call(x){
    const outs = this.neurons.map(n => n.call(x));
    return outs.length == 1 ? outs[0] : outs;
  }

  parameters(){
    const params = [];
    for(let neuron of this.neurons){
      for(let p of neuron.parameters()){
        params.push(p);
      }
    }
    
    return params;
  }

  toString(){
    return `Layer of [{${this.neurons.map(n => n.toString()).join(', ')}}]`
  }
}

/*
const l = new Layer(new Value(2), new Value(2));
const x = new Value(3);
const y = l.call(x);
draw(y)
*/





In [0]:
MLP = class MLP {
  constructor(nin, nouts){
    const sz = [nin].concat(...nouts);
    this.layers = Array(nouts.length).fill(0).map((_, i) => new Layer(sz[i], sz[i+1], i < nouts.length - 1))
  }

  call(x){
    for(let layer of this.layers){
      x = layer.call(x);
    }
    
    return x;
  }

  parameters(){
    let params = [];
    for(let layer of this.layers){
      for(let p of layer.parameters()){
        params.push(p);
      }
    }

    return params;
  }

  toString(){
    return `MLP of [{${this.layers.map(l => l.toString()).join(', ')}}]`
  }
}





Here is where it all comes together. We train a small MLP to imitate some function, calculate its loss, and visualize the gradient graph of this loss. 

In [0]:
let x = [2.0, 3.0, -1.0]
let mlp = new MLP(3, [4, 4, 1]);
let rounds = 1000;
mlp.call(x)

// inputs
let xs =[
  [2.0, 3.0, -1.0],
  [3.0, -1.0, 0.5],
  [0.5, 1.0, 1.0],
  [1.0, 1.0, -1.0]
];

// correct (training) outputs
let ys = [1.0, -1.0, -1.0, 1.0];

console.log(`target values:<br>`, JSON.stringify(ys));

// prediction by the MLP
let predictions = xs.map((x) => mlp.call(x));

// essentially beefed up difference between correct outputs and mlp outputs
let loss = ys
  .map((ygt, i) => predictions[i].sub(ygt).pow(2))
  .reduce((acc, l) => acc.add(l), new Value(0));

// calculate the gradients of all expressions' nodes
loss.backward()
// draw(loss)


// log values to output so we can see
console.log(`<br>prediction before optimization:<br>`, JSON.stringify(predictions.map(y => parseFloat(y.data.toFixed(3))), null, 2));
console.log(`<br>starting loss:<br> ${loss.data.toFixed(3)}`);

// optimization
// adjust the mlp's parameters in the direction (up or down) multiplied by
// some small constant to control the size of the variation steps.
for(var i=0; i<1000; i++){
  // mlp predict for each input
  predictions = xs.map((x) => mlp.call(x));
  loss = ys
    .map((ygt, i) => predictions[i].sub(ygt).pow(2))
    .reduce((acc, l) => acc.add(l), new Value(0));

  // reset the gradients (not sure why, but it was explained)
  let params = mlp.parameters();
  for(let p of params){
    p.grad = 0.0;
  }

  // calculate the mlp's .grads
  loss.backward();

  // set value of p to -0.01 * p.grad.
  // - - because we want the value to change in the direction of less wrong
  // 0.01 some small step size, chosen to neither over-step, nor take too long.
  // p.grad is an arrow 
  params = mlp.parameters();
  for(let p of params){
    p.data += -0.01 * p.grad;
  }

  // console.info(`${i} ${loss.data}`)
}

console.log(`<br>rounds of optimization:<br>${rounds}`)
console.log(`<br>predictions after optimization:<br>`, JSON.stringify(predictions.map(y => parseFloat(y.data.toFixed(3))), null, 2));
console.log(`<br>final loss:<br> ${loss.data.toFixed(3)}`);

draw(loss)



