In [1]:
//default_exp nn

# nn

> A few modules that can be used to build neural nets.

In [1]:
/**
Imports we need in nn.module.js
*/
import {shape,transpose,dotProduct,randn,zeros,argmax} from './src/util.module.js';
import {matrixSum,matrixSubtract1d,matrixSubtract2d,matrixMultiply1d,matrixMultiply2d} from './src/util.module.js';
import {head,tail,parseCsv,IRIS_CLASS_MAP,IrisRowHandler,shuffle,split,batches} from './src/data.module.js';

In [2]:
// Imports we need for testing
import {testEq} from './src/testutil.module.js'

If you want to use this js module in an html page, you'll probably need something like;
```
<script type="module">
  import {shape,transpose,dotProduct,randn,zeros,matrixSum,matrixSubtract2d,matrixMultiply2d} from './src/util.js';
  import {Sigmoid, BinaryCrossEntropyLoss, Linear} from './src/nn.js'
</script>
```
See: demo.html

In [3]:
/**
*/
function accuracy(yPred2d,yTrue2d) {
    if (yPred2d.length != yTrue2d.length) {
        throw Error(`Expected yPred2d.length ${yPred2d.length} to equal yTrue2d.length ${yTrue2d.length}`)
    }
    let correctCount=0;
    yPred2d.map((yPred1d, rowIndex) => {
        const yTrue1d=yTrue2d[rowIndex];
        if (Array.isArray(yPred1d)) {
            if (argmax(yPred1d) == argmax(yTrue1d)) {
                correctCount++;
            }
        } else {
            if (yPred1d == yTrue1d) {
                correctCount++;
            }
        }
    });
    return correctCount/yPred2d.length;
}

In [4]:
let yPred=[[0,.1],[0,.9],[0,.33],[0,1],[0,1],[0,1],[0,1],[0,1],[1,0]];
let yTrue=[[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[1,0]];
testEq(1.0, accuracy(yPred,yTrue));
yPred[0][0]=.8;
testEq(8/9, accuracy(yPred,yTrue));
yPred.map(a=>a[0]=1.1); // accuracy doesn't care if we're not within 0 and 1
testEq(1/9, accuracy(yPred,yTrue));

In [5]:
/**
*/
class BinaryCrossEntropyLoss {
    _forward1d(yPred1d,yTrue1d) {
        const temp=[];
        yPred1d.forEach(function (yPred, i) {
            let tempValue=yPred;
            tempValue=(yTrue1d[i]==1.) ? tempValue : 1-tempValue;
            tempValue=Math.log(tempValue);
            temp.push(tempValue);
        });
        return -temp.reduce((a,b) => a+b) / temp.length;
    }
    forward(yPred2d,yTrue2d) {
        this.yPred2d=yPred2d;
        this.yTrue2d=yTrue2d;
        const lossValue1d=yPred2d.map((yPred1d,i) => this._forward1d(yPred1d,yTrue2d[i]));
        return lossValue1d.reduce((a,b) => a+b) / lossValue1d.length;
    }
    _backward1d(yPred1d,yTrue1d) {
        const temp=[];
        yPred1d.forEach(function (yPred, i) { // TODO: rewrite with map
            let tempValue=(yTrue1d[i]==1.) ? -1/yPred : 1/(1-yPred);
            temp.push(tempValue);
        });
        return temp;
    }
    backward() {
        const yTrue2d=this.yTrue2d;
        this.grad=this.yPred2d.map((yPred1d,i) => this._backward1d(yPred1d,yTrue2d[i]));
        return this.grad;
    }
}

In [6]:
/**
*/
class Sigmoid {
    forward(x2d) {
        this.results=x2d.map(x1d => x1d.map(x => 1./(1.+Math.pow(Math.E, -x))));
        return this.results;
    }
    backward(gradients) {
        // `s * (1.-s)` calculates sigmoid grad, then we chain gradients passed in
        this.grad=this.results.map((result,i) => result.map((s,j) => s * (1.-s) * gradients[i][j]));
        return this.grad;
    }
}

In [7]:
/**
*/
class ReLU {
    forward(x2d) {
        this.gradMask=zeros(...shape(x2d));
        return x2d.map((x1d,rowIndex) => x1d.map((x,colIndex) => {
            if (x>0) {
                this.gradMask[rowIndex][colIndex]=1;
            }
            return Math.max(0,x)
        }));
    }
    matrixMultiply(a2d, b2d) {
        return a2d.map((a1d,rowIndex) => a1d.map((a,colIndex) => a*b2d[rowIndex][colIndex]));
    }
    backward(gradient) {
        return this.matrixMultiply(this.gradMask,gradient);
    }
}

In [8]:
let relu = new ReLU();
let data = [
  [ -0.3132450550822199, 0.06746248970796562, 0.7502210053477679 ],
  [ 0.32586239499711434, 0.276573231917191, 0.4718188033994297 ],
  [ 0.3375259522729109, -1.4738907605515226, -0.11109898767917284 ],
  [ -0.6095143988686595, 1.094470501593892, -0.4982351760328258 ],
  [ 0.28664244098736347, -0.35879217465991975, -0.754257906608068 ]
];
testEq(relu.forward(data),relu.backward(data));

In [9]:
/**
*/
class Linear {
    constructor(inputDim,numHidden=1,bias=true) {
        this.inputDim=inputDim;
        this.numHidden=numHidden;
        // Kaiming Init
        this.weights=matrixMultiply2d(randn(inputDim,numHidden), Math.sqrt(2.0/inputDim));
        this.bias=zeros(numHidden)
        this.updateBias=bias;
    }
    forward(x) {
        this.x=x; // shape(bs,inputDim)
        return matrixSum(dotProduct(x,this.weights), this.bias);
    }
    backward(gradient) { // gradient shape(bs,numHidden)
        // weightsGradient/biasGradient need to be the same shape as weights/bias
        this.weightsGradient=dotProduct(transpose(this.x), gradient);
        // this.biasGradient=gradient.sum(axis=0)
        this.biasGradient=transpose(gradient).map(col => col.reduce((a,b) => a+b));
        this.xGradient=dotProduct(gradient,transpose(this.weights));
        return this.xGradient;
    }
    update(lr) {
        // gradient calculations in backward don't account for batch size, so we do it here
        lr=lr/this.x.length;
        this.weights=matrixSubtract2d(this.weights,matrixMultiply2d(this.weightsGradient,lr));
        if (this.updateBias) {
            this.bias=matrixSubtract1d(this.bias,matrixMultiply1d(this.biasGradient,lr));
        }
    }
}

In [10]:
/**
*/
class Learner {
    constructor(model, lossFn, data, metrics=[accuracy]) {
        this.model=model;
        this.lossFn=lossFn;
        this.metrics=metrics;
        const splitData=split(shuffle(data));
        this.xTrain=splitData[0][0];
        this.xValid=splitData[0][1];
        this.yTrain=splitData[1][0];
        this.yValid=splitData[1][1];
        // shame that we can destructure into this. )o:
//         [[this.xTrain,this.xValid],[this.yTrain,this.yValid]]=split(data);
    }
    forward(x) {
        for (let i=0; i<this.model.length; i++) {
            x=this.model[i].forward(x);
        }
        return x;
    }
    backward(gradients) {
        for (let i=this.model.length-1; i>=0; i--) {
            gradients=this.model[i].backward(gradients);
        }
        return gradients;
    }
    step(lr) {
        this.model.forEach(m => {
            if (typeof m.update=='function') {
                m.update(lr);
            }
        });
    }
    validate(epoch) {
        const preds=this.forward(this.xValid);
        const lossValue=this.lossFn.forward(preds,this.yValid);
        const metricValues=this.metrics.map(metric=>metric(preds,this.yValid));
        console.log('epoch',epoch,'valid loss',lossValue,'metrics',metricValues);
    }
    fit(epochs, lr=0.1, bs=64) {
        this.validate(-1); // Note: we use epoch -1 to indicate before training
        for (let epoch=0; epoch<epochs; epoch++) {
            batches([this.xTrain,this.yTrain]).forEach(batch => {
                const [xb,yb]=batch;
                const preds=this.forward(xb);
                const lossValue=this.lossFn.forward(preds,yb);
                this.lossFn.backward();
                this.backward(this.lossFn.grad);
                this.step(lr);
            });
            this.validate(epoch);
        }
    }
    predict(x,y,yToLabelFn=(a=>a)) {
        const preds=this.forward(x);
        return preds.map((pred,rowIndex) => {
            const row=[pred,yToLabelFn(pred)];
            if (y!=null) {
                row.push(yToLabelFn(y[rowIndex]));
            }
            return row;
        });
    }
}

## Train a linear model to classify iris flowers

In [11]:
let stringData=require('fs').readFileSync('data/iris.data').toString();
let data=parseCsv(stringData, new IrisRowHandler()).result;
let lossFn=new BinaryCrossEntropyLoss();
let model=[new Linear(4,3), new Sigmoid()];
let learn=new Learner(model, lossFn, data);
learn.fit(25);

epoch -1 valid loss 0.9305320052015166 metrics [ 0.3 ]
epoch 0 valid loss 0.8124346028126798 metrics [ 0.3 ]
epoch 1 valid loss 0.7201653916418457 metrics [ 0.3 ]
epoch 2 valid loss 0.6488545395140407 metrics [ 0.6666666666666666 ]
epoch 3 valid loss 0.5936857959188477 metrics [ 0.7333333333333333 ]
epoch 4 valid loss 0.5507305180157367 metrics [ 0.7666666666666667 ]
epoch 5 valid loss 0.5166975406367534 metrics [ 0.7666666666666667 ]
epoch 6 valid loss 0.4893861900067906 metrics [ 0.8 ]
epoch 7 valid loss 0.4670614675736316 metrics [ 0.8 ]
epoch 8 valid loss 0.4487808232578822 metrics [ 0.8 ]
epoch 9 valid loss 0.43325662260526904 metrics [ 0.8 ]
epoch 10 valid loss 0.4198407402419589 metrics [ 0.8333333333333334 ]
epoch 11 valid loss 0.4080527400753461 metrics [ 0.8666666666666667 ]
epoch 12 valid loss 0.398031094815362 metrics [ 0.8666666666666667 ]
epoch 13 valid loss 0.38909398161089465 metrics [ 0.8333333333333334 ]
epoch 14 valid loss 0.3811830351559472 metrics [ 0.8333333333333

## Train a neural net to classify iris flowers

In [12]:
let model=[new Linear(4,50), new ReLU(), new Linear(50,3), new Sigmoid()];
let learn=new Learner(model, lossFn, data);
learn.fit(25);

epoch -1 valid loss 1.434770098015484 metrics [ 0.1 ]
epoch 0 valid loss 0.48049650975911656 metrics [ 0.6333333333333333 ]
epoch 1 valid loss 0.36288155473821376 metrics [ 0.6666666666666666 ]
epoch 2 valid loss 0.3217096761722764 metrics [ 0.8 ]
epoch 3 valid loss 0.30127967820682816 metrics [ 0.8 ]
epoch 4 valid loss 0.28929890515538015 metrics [ 0.8 ]
epoch 5 valid loss 0.2794691917450497 metrics [ 0.8 ]
epoch 6 valid loss 0.2712307964657556 metrics [ 0.7666666666666667 ]
epoch 7 valid loss 0.2653983446935495 metrics [ 0.7666666666666667 ]
epoch 8 valid loss 0.2608898715238113 metrics [ 0.7333333333333333 ]
epoch 9 valid loss 0.2562780325421997 metrics [ 0.7666666666666667 ]
epoch 10 valid loss 0.2528522471042441 metrics [ 0.7666666666666667 ]
epoch 11 valid loss 0.2493386459593269 metrics [ 0.7666666666666667 ]
epoch 12 valid loss 0.24650837829184813 metrics [ 0.7666666666666667 ]
epoch 13 valid loss 0.24323462295653867 metrics [ 0.7666666666666667 ]
epoch 14 valid loss 0.23882386

### Look at some predictions 

We use the lambda ```(y=>`${argmax(y)}: ${IRIS_CLASS_MAP[argmax(y)]}`)``` to convert predictions like `[0.000, 0.183, 0.843]` to readable labels.

In [13]:
// head(learn.predict(learn.xValid, learn.yValid)); run this to see "raw" targets
head(learn.predict(learn.xValid, learn.yValid, (y=>`${argmax(y)}: ${IRIS_CLASS_MAP[argmax(y)]}`)));

0 [
  [ 0.9854556163505043, 0.02841877927060306, 0.010853559374306819 ],
  '0: Iris-setosa',
  '0: Iris-setosa'
]
1 [
  [ 0.019228847712054718, 0.27706131337284035, 0.6547377938382565 ],
  '2: Iris-virginica',
  '1: Iris-versicolor'
]
2 [
  [ 0.9779458599906009, 0.026660450552892605, 0.001094613996604906 ],
  '0: Iris-setosa',
  '0: Iris-setosa'
]
3 [
  [ 0.045925734976965386, 0.9069343060086859, 0.01387515538599218 ],
  '1: Iris-versicolor',
  '1: Iris-versicolor'
]
4 [
  [ 0.08275706615474566, 0.4092522495956165, 0.6154607041282848 ],
  '2: Iris-virginica',
  '1: Iris-versicolor'
]
5 [
  [ 0.0382533287641072, 0.8109695581387877, 0.12731161483251058 ],
  '1: Iris-versicolor',
  '1: Iris-versicolor'
]
6 [
  [ 0.06380478511374656, 0.7380918427043541, 0.1137130775279813 ],
  '1: Iris-versicolor',
  '1: Iris-versicolor'
]
7 [
  [ 0.02802124453797384, 0.2961622737098596, 0.5534831892176728 ],
  '2: Iris-virginica',
  '1: Iris-versicolor'
]
8 [
  [ 0.0591675464874481, 0.39425041257170207, 0

Show how we could train a linear layer without `Learner` - this is not a proper training loop, we just;
- forward pass
- print training loss
- backward pass
- update

In [15]:
let data=parseCsv(stringData, new IrisRowHandler()).result;
let x=data[0],y=data[1];
console.log('shape(x)',shape(x), 'shape(y)',shape(y));
let loss_fn=new BinaryCrossEntropyLoss()
let sig=new Sigmoid()
let lin=new Linear(4,3);
for (let epoch = 0; epoch < 10; epoch++) {
    let y_pred=sig.forward(lin.forward(x));
    let loss_value=loss_fn.forward(y_pred,y);
    console.log('epoch',epoch,'loss_value',loss_value);
    loss_fn.backward();
    sig.backward(loss_fn.grad);
    lin.backward(sig.grad);
    lin.update(.1);
}

shape(x) [ 150, 4 ] shape(y) [ 150, 3 ]
epoch 0 loss_value 0.9458359423667567
epoch 1 loss_value 0.8865889120022508
epoch 2 loss_value 0.8332766329108048
epoch 3 loss_value 0.7855779844740713
epoch 4 loss_value 0.7430796777385983
epoch 5 loss_value 0.7053164053827152
epoch 6 loss_value 0.6718067785928921
epoch 7 loss_value 0.642080117607136
epoch 8 loss_value 0.6156935701091388
epoch 9 loss_value 0.5922413269342163


## Can we teach a linear layer to convert one hot encoded integers to their bitwise representations?

In [16]:
let x=[
    [1,0,0,0,0,0,0,0,0,0],
    [0,1,0,0,0,0,0,0,0,0],
    [0,0,1,0,0,0,0,0,0,0],
    [0,0,0,1,0,0,0,0,0,0],
    [0,0,0,0,1,0,0,0,0,0],
    [0,0,0,0,0,1,0,0,0,0],
    [0,0,0,0,0,0,1,0,0,0],
    [0,0,0,0,0,0,0,1,0,0],
    [0,0,0,0,0,0,0,0,1,0],
    [0,0,0,0,0,0,0,0,0,1]
];
let y=[
    [0,0,0,0],
    [1,0,0,0],
    [0,1,0,0],
    [1,1,0,0],
    [0,0,1,0],
    [1,0,1,0],
    [0,1,1,0],
    [1,1,1,0],
    [0,0,0,1],
    [1,0,0,1]
];

`x` is an identity matrix, so ... `x.y` is `y`

In [17]:
testEq(y,dotProduct(x,y))

so ... will `y` make the perfect weights (if bias is zero)?

In [18]:
let loss_fn=new BinaryCrossEntropyLoss()
let sig=new Sigmoid()
let linearNoBias=new Linear(10,4,false);
let y_pred=null;
for (let epoch = 0; epoch<10; epoch++) {
    y_pred=sig.forward(linearNoBias.forward(x));
    const loss_value=loss_fn.forward(y_pred,y);
    if (epoch%10==9) {
        console.log('epoch',epoch,'loss_value',loss_value);
    }
    loss_fn.backward();
    sig.backward(loss_fn.grad);
    linearNoBias.backward(sig.grad);
    linearNoBias.update(50);
}
console.log(y_pred)

epoch 9 loss_value 0.018203853392163315
[
  [
    0.017765963419412955,
    0.018513679227884435,
    0.017972795056249616,
    0.018451380406563567
  ],
  [
    0.9817597923021617,
    0.01840876795577198,
    0.0177376152776771,
    0.017657065226376167
  ],
  [
    0.01814707486791065,
    0.9824483316853887,
    0.017426972862388104,
    0.018359622438672642
  ],
  [
    0.9824907176660984,
    0.9823106256683668,
    0.018383556123528686,
    0.018259000082134776
  ],
  [
    0.018018125511020982,
    0.01807740740719039,
    0.9819268646207838,
    0.01823975151390869
  ],
  [
    0.9819336676106034,
    0.01775467963127004,
    0.981584019217094,
    0.018213722369912238
  ],
  [
    0.01818387729428256,
    0.981947332342349,
    0.9822774018887132,
    0.01754689572683346
  ],
  [
    0.9818915608577578,
    0.9815547758995751,
    0.9823236931874945,
    0.0183591363387856
  ],
  [
    0.01837812312151495,
    0.01801695612215326,
    0.018493833748674117,
    0.9821619872125

dump our linear layer to output - so we can look at the learned weights.

In [19]:
linearNoBias

Linear {
  inputDim: 10,
  numHidden: 4,
  weights: [
    [
      -4.101374964189836,
      -4.063126596122098,
      -4.0906237580240115,
      -4.066249264060397
    ],
    [
      4.076919334707077,
      -4.068391730374297,
      -4.102859001712197,
      -4.10708979487589
    ],
    [
      -4.081667517315954,
      4.1126570892627505,
      -4.119290362707814,
      -4.0708693185405505
    ],
    [
      4.114906148313115,
      4.105390310343357,
      -4.069661849891186,
      -4.075964412786913
    ],
    [
      -4.088285253915077,
      -4.085236566941334,
      4.085455904016719,
      -4.076942528558559
    ],
    [
      4.085805302587522,
      -4.101965370015323,
      4.068028708358435,
      -4.078266970274971
    ],
    [
      -4.079788091016202,
      4.0865075456255475,
      4.103646190962427,
      -4.112910037614187
    ],
    [
      4.083644990579904,
      4.066558460383619,
      4.106077269872562,
      -4.070893860156097
    ],
    [
      -4.069935799261

In [19]:
export {Sigmoid,BinaryCrossEntropyLoss,ReLU,Linear,Learner}