In [1]:
//default_exp nn

# nn

> A few modules that can be used to build neural nets.

In [2]:
/**
Imports we need in nn.module.js
*/
import {shape,transpose,dotProduct,randn,zeros,argmax,mean} from './src/util.module.js';
import {matrixSum1d,matrixSum2d,matrixSubtract1d,matrixSubtract2d,matrixMultiply1d,matrixMultiply2d} from './src/util.module.js';
import {head,tail,parseCsv,IRIS_CLASS_MAP,IrisRowHandler,shuffle,split,batches} from './src/data.module.js';

In [3]:
// Imports we need for testing
import {testEq} from './src/testutil.module.js'

In [4]:
/**
*/
function accuracy(yPred2d,yTrue2d) {
    if (yPred2d.length != yTrue2d.length) {
        throw Error(`Expected yPred2d.length ${yPred2d.length} to equal yTrue2d.length ${yTrue2d.length}`)
    }
    let correctCount=0;
    yPred2d.map((yPred1d, rowIndex) => {
        const yTrue1d=yTrue2d[rowIndex];
        if (Array.isArray(yPred1d)) {
            if (argmax(yPred1d) == argmax(yTrue1d)) {
                correctCount++;
            }
        } else {
            if (yPred1d == yTrue1d) {
                correctCount++;
            }
        }
    });
    return correctCount/yPred2d.length;
}

In [5]:
let yPred=[[0,.1],[0,.9],[0,.33],[0,1],[0,1],[0,1],[0,1],[0,1],[1,0]];
let yTrue=[[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[1,0]];
testEq(1.0, accuracy(yPred,yTrue));
yPred[0][0]=.8;
testEq(8/9, accuracy(yPred,yTrue));
yPred.map(a=>a[0]=1.1); // accuracy doesn't care if we're not within 0 and 1
testEq(1/9, accuracy(yPred,yTrue));

In [6]:
/**
*/
class MSE {
    forward(yPred2d,yTrue2d) {
        this.error=matrixSubtract2d(yPred2d,yTrue2d);
        return mean(this.error.map(row=>row.map(elem=>elem**2)));
    }
    backward() {
        this.grad=matrixMultiply2d(this.error, 2/this.error.length);
        return this.grad;
    }
}

In [7]:
let mse=new MSE();
let mseValue=mse.forward(
    [[-0.1684, -1.0158, -1.3667,  1.4327],
    [ 0.0245, -0.6284, -2.5182,  2.2007],
    [-1.8774, -0.0352, -0.5946,  0.4272]],
    [[-0.3516,  0.5787,  0.8858,  0.9198],
    [ 0.1892, -0.6473,  2.1278,  0.1345],
    [ 2.2919, -0.9939, -0.3137, -0.4314]]);
testEq(4.409, Math.round(mseValue*1000)/1000);

In [8]:
/**
*/
class BinaryCrossEntropyLoss {
    _forward1d(yPred1d,yTrue1d) {
        const temp=[];
        yPred1d.forEach(function (yPred, i) {
            let tempValue=yPred;
            tempValue=(yTrue1d[i]==1.) ? tempValue : 1-tempValue;
            tempValue=Math.log(tempValue);
            temp.push(tempValue);
        });
        return -temp.reduce((a,b) => a+b) / temp.length;
    }
    forward(yPred2d,yTrue2d) {
        this.yPred2d=yPred2d;
        this.yTrue2d=yTrue2d;
        const lossValue1d=yPred2d.map((yPred1d,i) => this._forward1d(yPred1d,yTrue2d[i]));
        return lossValue1d.reduce((a,b) => a+b) / lossValue1d.length;
    }
    _backward1d(yPred1d,yTrue1d) {
        const temp=[];
        yPred1d.forEach(function (yPred, i) { // TODO: rewrite with map
            let tempValue=(yTrue1d[i]==1.) ? -1/yPred : 1/(1-yPred);
            temp.push(tempValue);
        });
        return temp;
    }
    backward() {
        const yTrue2d=this.yTrue2d;
        this.grad=this.yPred2d.map((yPred1d,i) => this._backward1d(yPred1d,yTrue2d[i]));
        return this.grad;
    }
}

In [9]:
/**
*/
class Sigmoid {
    forward(x2d) {
        this.results=x2d.map(x1d => x1d.map(x => 1./(1.+Math.pow(Math.E, -x))));
        return this.results;
    }
    backward(gradients) {
        // `s * (1.-s)` calculates sigmoid grad, then we chain gradients passed in
        this.grad=this.results.map((result,i) => result.map((s,j) => s * (1.-s) * gradients[i][j]));
        return this.grad;
    }
}

In [10]:
/**
*/
class ReLU {
    forward(x2d) {
        this.gradMask=zeros(...shape(x2d));
        return x2d.map((x1d,rowIndex) => x1d.map((x,colIndex) => {
            if (x>0) {
                this.gradMask[rowIndex][colIndex]=1;
            }
            return Math.max(0,x)
        }));
    }
    matrixMultiply(a2d, b2d) {
        return a2d.map((a1d,rowIndex) => a1d.map((a,colIndex) => a*b2d[rowIndex][colIndex]));
    }
    backward(gradient) {
        return this.matrixMultiply(this.gradMask,gradient);
    }
}

In [11]:
let relu = new ReLU();
let data = [
  [ -0.3132450550822199, 0.06746248970796562, 0.7502210053477679 ],
  [ 0.32586239499711434, 0.276573231917191, 0.4718188033994297 ],
  [ 0.3375259522729109, -1.4738907605515226, -0.11109898767917284 ],
  [ -0.6095143988686595, 1.094470501593892, -0.4982351760328258 ],
  [ 0.28664244098736347, -0.35879217465991975, -0.754257906608068 ]
];
testEq(relu.forward(data),relu.backward(data));

In [13]:
/**
*/
class Linear {
    constructor(inputDim,numHidden=1,bias=true) {
        this.inputDim=inputDim;
        this.numHidden=numHidden;
        // Kaiming Init
        this.weights=matrixMultiply2d(randn(inputDim,numHidden), Math.sqrt(2.0/inputDim));
        this.bias=zeros(numHidden)
        this.updateBias=bias;
    }
    forward(x) {
        this.x=x; // shape(bs,inputDim)
        return matrixSum2d(dotProduct(x,this.weights), this.bias);
    }
    backward(gradient) { // gradient shape(bs,numHidden)
        // weightsGradient/biasGradient need to be the same shape as weights/bias
        this.weightsGradient=dotProduct(transpose(this.x), gradient);
        // this.biasGradient=gradient.sum(axis=0)
        this.biasGradient=transpose(gradient).map(col => col.reduce((a,b) => a+b));
        this.xGradient=dotProduct(gradient,transpose(this.weights));
        return this.xGradient;
    }
    update(lr) {
        // gradient calculations in backward don't account for batch size, so we do it here
        lr=lr/this.x.length;
        this.weights=matrixSubtract2d(this.weights,matrixMultiply2d(this.weightsGradient,lr));
        if (this.updateBias) {
            this.bias=matrixSubtract1d(this.bias,matrixMultiply1d(this.biasGradient,lr));
        }
    }
}

In [14]:
/**
*/
class Learner {
    constructor(model, lossFn, data, metrics=[accuracy]) {
        this.model=model;
        this.lossFn=lossFn;
        this.metrics=metrics;
        const splitData=split(shuffle(data));
        this.xTrain=splitData[0][0];
        this.xValid=splitData[0][1];
        this.yTrain=splitData[1][0];
        this.yValid=splitData[1][1];
        // shame that we can destructure into this. )o:
//         [[this.xTrain,this.xValid],[this.yTrain,this.yValid]]=split(data);
    }
    forward(x) {
        for (let i=0; i<this.model.length; i++) {
            x=this.model[i].forward(x);
        }
        return x;
    }
    backward(gradients) {
        for (let i=this.model.length-1; i>=0; i--) {
            gradients=this.model[i].backward(gradients);
        }
        return gradients;
    }
    step(lr) {
        this.model.forEach(m => {
            if (typeof m.update=='function') {
                m.update(lr);
            }
        });
    }
    validate(epoch) {
        const preds=this.forward(this.xValid);
        const lossValue=this.lossFn.forward(preds,this.yValid);
        const metricValues=this.metrics.map(metric=>metric(preds,this.yValid));
        console.log('epoch',epoch,'valid loss',lossValue,'metrics',metricValues);
    }
    fit(epochs, lr=0.1, bs=64) {
        this.validate(-1); // Note: we use epoch -1 to indicate before training
        for (let epoch=0; epoch<epochs; epoch++) {
            batches([this.xTrain,this.yTrain]).forEach(batch => {
                const [xb,yb]=batch;
                const preds=this.forward(xb);
                const lossValue=this.lossFn.forward(preds,yb);
                this.lossFn.backward();
                this.backward(this.lossFn.grad);
                this.step(lr);
            });
            this.validate(epoch);
        }
    }
    predict(x,y,yToLabelFn=(a=>a)) {
        const preds=this.forward(x);
        return preds.map((pred,rowIndex) => {
            const row=[pred,yToLabelFn(pred)];
            if (y!=null) {
                row.push(yToLabelFn(y[rowIndex]));
            }
            return row;
        });
    }
}

## Train a linear model to classify iris flowers

In [15]:
let stringData=require('fs').readFileSync('data/iris.data').toString();
let data=parseCsv(stringData, new IrisRowHandler()).result;
let lossFn=new BinaryCrossEntropyLoss();
let model=[new Linear(4,3), new Sigmoid()];
let learn=new Learner(model, lossFn, data);
learn.fit(25);

epoch -1 valid loss 1.0341568967724117 metrics [ 0.16666666666666666 ]
epoch 0 valid loss 0.9325867196558995 metrics [ 0.26666666666666666 ]
epoch 1 valid loss 0.8488194357846887 metrics [ 0.4 ]
epoch 2 valid loss 0.7817729477892316 metrics [ 0.4 ]
epoch 3 valid loss 0.728478108209485 metrics [ 0.4666666666666667 ]
epoch 4 valid loss 0.6859980236457647 metrics [ 0.5333333333333333 ]
epoch 5 valid loss 0.6515921136936502 metrics [ 0.5666666666666667 ]
epoch 6 valid loss 0.6230460849101055 metrics [ 0.5666666666666667 ]
epoch 7 valid loss 0.5988652143439598 metrics [ 0.5666666666666667 ]
epoch 8 valid loss 0.5783371292634836 metrics [ 0.5666666666666667 ]
epoch 9 valid loss 0.5602518645192137 metrics [ 0.5666666666666667 ]
epoch 10 valid loss 0.5440782744796268 metrics [ 0.5333333333333333 ]
epoch 11 valid loss 0.5300051597341906 metrics [ 0.5333333333333333 ]
epoch 12 valid loss 0.5173244743277592 metrics [ 0.5333333333333333 ]
epoch 13 valid loss 0.5058613223723426 metrics [ 0.53333333

## Train a neural net to classify iris flowers

In [16]:
let model=[new Linear(4,50), new ReLU(), new Linear(50,3), new Sigmoid()];
let learn=new Learner(model, lossFn, data);
learn.fit(25);

epoch -1 valid loss 0.6938991457298941 metrics [ 0.5333333333333333 ]
epoch 0 valid loss 0.4548354829721551 metrics [ 0.6 ]
epoch 1 valid loss 0.4113382233575783 metrics [ 0.6 ]
epoch 2 valid loss 0.3888552847512898 metrics [ 0.6 ]
epoch 3 valid loss 0.37224323134484205 metrics [ 0.6333333333333333 ]
epoch 4 valid loss 0.3612227646399962 metrics [ 0.6666666666666666 ]
epoch 5 valid loss 0.35046984479274285 metrics [ 0.7333333333333333 ]
epoch 6 valid loss 0.3433615379548912 metrics [ 0.8 ]
epoch 7 valid loss 0.33715814528346794 metrics [ 0.8 ]
epoch 8 valid loss 0.3325923508198957 metrics [ 0.8 ]
epoch 9 valid loss 0.327426555998033 metrics [ 0.8333333333333334 ]
epoch 10 valid loss 0.32464915066354466 metrics [ 0.8333333333333334 ]
epoch 11 valid loss 0.3213880318868554 metrics [ 0.8333333333333334 ]
epoch 12 valid loss 0.31672560480290624 metrics [ 0.8333333333333334 ]
epoch 13 valid loss 0.3136953289175437 metrics [ 0.8333333333333334 ]
epoch 14 valid loss 0.3113740841669949 metrics

### Look at some predictions 

We use the lambda ```(y=>`${argmax(y)}: ${IRIS_CLASS_MAP[argmax(y)]}`)``` to convert predictions like `[0.000, 0.183, 0.843]` to readable labels.

In [17]:
// head(learn.predict(learn.xValid, learn.yValid)); run this to see "raw" targets
head(learn.predict(learn.xValid, learn.yValid, (y=>`${argmax(y)}: ${IRIS_CLASS_MAP[argmax(y)]}`)));

0 [
  [ 0.00042137207983839636, 0.017669550460275074, 0.9914352029755926 ],
  '2: Iris-virginica',
  '2: Iris-virginica'
]
1 [
  [ 0.00018883711773105428, 0.02191564158350578, 0.9777823816219321 ],
  '2: Iris-virginica',
  '2: Iris-virginica'
]
2 [
  [ 0.07104406164042015, 0.37001067882527106, 0.6448975168487258 ],
  '2: Iris-virginica',
  '1: Iris-versicolor'
]
3 [
  [ 0.011128677461420395, 0.1269044016431082, 0.8871811141637539 ],
  '2: Iris-virginica',
  '2: Iris-virginica'
]
4 [
  [ 0.10571851114688964, 0.3467647745317223, 0.670171687621607 ],
  '2: Iris-virginica',
  '1: Iris-versicolor'
]
5 [
  [ 0.0016197337102044492, 0.5041511824528401, 0.5278003553540569 ],
  '2: Iris-virginica',
  '2: Iris-virginica'
]
6 [
  [ 0.07005075078998631, 0.8993697087030239, 0.05962436096195428 ],
  '1: Iris-versicolor',
  '1: Iris-versicolor'
]
7 [
  [ 0.9791550239754409, 0.032118187482808314, 0.0027552402826122287 ],
  '0: Iris-setosa',
  '0: Iris-setosa'
]
8 [
  [ 0.9928228437723307, 0.00448519731

Show how we could train a linear layer without `Learner` - this is not a proper training loop, we just;
- forward pass
- print training loss
- backward pass
- update

In [18]:
let data=parseCsv(stringData, new IrisRowHandler()).result;
let x=data[0],y=data[1];
console.log('shape(x)',shape(x), 'shape(y)',shape(y));
let loss_fn=new BinaryCrossEntropyLoss()
let sig=new Sigmoid()
let lin=new Linear(4,3);
for (let epoch = 0; epoch < 10; epoch++) {
    let y_pred=sig.forward(lin.forward(x));
    let loss_value=loss_fn.forward(y_pred,y);
    console.log('epoch',epoch,'loss_value',loss_value);
    loss_fn.backward();
    sig.backward(loss_fn.grad);
    lin.backward(sig.grad);
    lin.update(.1);
}

shape(x) [ 150, 4 ] shape(y) [ 150, 3 ]
epoch 0 loss_value 1.235995062659249
epoch 1 loss_value 1.167000635815671
epoch 2 loss_value 1.1022427808821882
epoch 3 loss_value 1.0417496749771475
epoch 4 loss_value 0.9855245150120745
epoch 5 loss_value 0.9335343276077428
epoch 6 loss_value 0.88570082139485
epoch 7 loss_value 0.8418954603815388
epoch 8 loss_value 0.801940145908543
epoch 9 loss_value 0.7656135346876503


## Can we teach a linear layer to convert one hot encoded integers to their bitwise representations?

In [19]:
let x=[
    [1,0,0,0,0,0,0,0,0,0],
    [0,1,0,0,0,0,0,0,0,0],
    [0,0,1,0,0,0,0,0,0,0],
    [0,0,0,1,0,0,0,0,0,0],
    [0,0,0,0,1,0,0,0,0,0],
    [0,0,0,0,0,1,0,0,0,0],
    [0,0,0,0,0,0,1,0,0,0],
    [0,0,0,0,0,0,0,1,0,0],
    [0,0,0,0,0,0,0,0,1,0],
    [0,0,0,0,0,0,0,0,0,1]
];
let y=[
    [0,0,0,0],
    [1,0,0,0],
    [0,1,0,0],
    [1,1,0,0],
    [0,0,1,0],
    [1,0,1,0],
    [0,1,1,0],
    [1,1,1,0],
    [0,0,0,1],
    [1,0,0,1]
];

`x` is an identity matrix, so ... `x.y` is `y`

In [20]:
testEq(y,dotProduct(x,y))

so ... will `y` make the perfect weights (if bias is zero)?

In [21]:
let loss_fn=new BinaryCrossEntropyLoss()
let sig=new Sigmoid()
let linearNoBias=new Linear(10,4,false);
let y_pred=null;
for (let epoch = 0; epoch<10; epoch++) {
    y_pred=sig.forward(linearNoBias.forward(x));
    const loss_value=loss_fn.forward(y_pred,y);
    if (epoch%10==9) {
        console.log('epoch',epoch,'loss_value',loss_value);
    }
    loss_fn.backward();
    sig.backward(loss_fn.grad);
    linearNoBias.backward(sig.grad);
    linearNoBias.update(50);
}
console.log(y_pred)

epoch 9 loss_value 0.018094021889458724
[
  [
    0.018003756966588333,
    0.01744292046471256,
    0.017576158167113467,
    0.017510800864165305
  ],
  [
    0.9823215411087681,
    0.017611248370096998,
    0.018103027132388053,
    0.018160334882053136
  ],
  [
    0.01846134199326819,
    0.981558985051603,
    0.018484160238985112,
    0.01830457666021364
  ],
  [
    0.9818571524211799,
    0.982085208270608,
    0.01803809698397853,
    0.0181332948198287
  ],
  [
    0.017762493391384426,
    0.017698571958530255,
    0.9819465497109459,
    0.017993788229084998
  ],
  [
    0.9817155964133643,
    0.01756600897335701,
    0.9821342878462279,
    0.017711880265266072
  ],
  [
    0.01810492666896741,
    0.9823106987960304,
    0.9824927634946891,
    0.017490392216672222
  ],
  [
    0.981875781222325,
    0.9818544496917284,
    0.9819212146062137,
    0.017908656227585435
  ],
  [
    0.017818463366004865,
    0.018475146664228004,
    0.018088154276081053,
    0.982026417

dump our linear layer to output - so we can look at the learned weights.

In [22]:
linearNoBias

Linear {
  inputDim: 10,
  numHidden: 4,
  weights: [
    [
      -4.089025810865415,
      -4.118439178298224,
      -4.111360282819613,
      -4.114825471975158
    ],
    [
      4.10596409752842,
      -4.109505539453307,
      -4.083922353727718,
      -4.080989881978654
    ],
    [
      -4.065749185774642,
      4.0667699262966694,
      -4.064604791066378,
      -4.073652864712373
    ],
    [
      4.081883659449666,
      4.0936253081682406,
      -4.087256976845267,
      -4.082372293849601
    ],
    [
      -4.101556484789402,
      -4.104907118229748,
      4.086467310028912,
      -4.089539975160333
    ],
    [
      4.074675234214243,
      -4.111897475316033,
      4.096173255355881,
      -4.104208451746711
    ],
    [
      -4.08382499313203,
      4.105394153146257,
      4.11501485079797,
      -4.115910369718556
    ],
    [
      4.082836800650856,
      4.081745462098862,
      4.085165829419428,
      -4.093943419175744
    ],
    [
      -4.098633284827972,

In [19]:
export {Sigmoid,MSE,BinaryCrossEntropyLoss,ReLU,Linear,Learner}