In [1]:
//default_exp nn

# nn

> A few modules that can be used to build neural nets.

In [2]:
/**
Imports we need in nn.module.js
*/
import {exp,shape,transpose,dotProduct,randn,uniform,zeros,argmax,mean,round} from './src/util.module.js';
import {matrixSum1d,matrixSum2d,matrixSubtract1d,matrixSubtract2d,matrixMultiply1d,matrixMultiply2d} from './src/util.module.js';
import {head,tail,parseCsv,IRIS_CLASS_MAP,IrisRowHandler,shuffle,split,batches} from './src/data.module.js';

In [3]:
// Imports we need for testing
import {testEq} from './src/testutil.module.js'

In [4]:
/**
yTrue can be either 2d (one-hot encoded targets) or 1d (array of class IDs).
*/
function accuracy(yPred2d,yTrue) {
    const yPredShape=shape(yPred2d);
    const yTrueShape=shape(yTrue);
    if (yPredShape[0] != yTrueShape[0]) {
        throw new Error(`Expected yPred2d.length ${yPredShape[0]} to equal yTrue.length ${yTrueShape[0]}`);
    }
    if (yTrueShape.length == 2 && yPredShape[1] != yTrueShape[1]) {
        throw new Error(`Expected shape(yPred2d)[1] ${yPredShape[1]} to equal shape(yTrue)[1] ${yTrueShape[1]}`);
    }
    let correctCount=0;
    for (let i=0; i<yPred2d.length; i++) {
        let p = argmax(yPred2d[i]);
        let t = (yTrueShape.length == 2) ? argmax(yTrue[i]) : yTrue[i];
        if (p == t) {
            correctCount++;
        }
    }
    return correctCount/yPredShape[0];
}

In [5]:
let yPred=[[0,.1],[0,.9],[0,.33],[0,1],[0,1],[0,1],[0,1],[0,1],[1,0]];
let yTrue=[[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[1,0]];
testEq(1.0, accuracy(yPred,yTrue));
yPred[0][0]=.8;
testEq(8/9, accuracy(yPred,yTrue));
yPred.map(a=>a[0]=1.1); // accuracy doesn't care if we're not within 0 and 1
testEq(1/9, accuracy(yPred,yTrue));
// test with class IDs
testEq(1/9, accuracy(yPred,[1,1,1,1,1,1,1,1,0]));

In [6]:
/**
*/
class MSE {
    forward(yPred2d,yTrue2d) {
        this.error=matrixSubtract2d(yPred2d,yTrue2d);
        return mean(this.error.map(row=>row.map(elem=>elem**2)));
    }
    backward() {
        this.grad=matrixMultiply2d(this.error, 2/this.error.length);
        return this.grad;
    }
}

In [7]:
let mse=new MSE();
let mseValue=mse.forward(
    [[-0.1684, -1.0158, -1.3667,  1.4327],
    [ 0.0245, -0.6284, -2.5182,  2.2007],
    [-1.8774, -0.0352, -0.5946,  0.4272]],
    [[-0.3516,  0.5787,  0.8858,  0.9198],
    [ 0.1892, -0.6473,  2.1278,  0.1345],
    [ 2.2919, -0.9939, -0.3137, -0.4314]]);
testEq(4.409, Math.round(mseValue*1000)/1000);

## cross entropy: negative log likelyhood of log softmax

The following is taken from https://github.com/fastai/course-v3/blob/master/nbs/dl2/03_minibatch_training.ipynb
```
def logsumexp(x):
    m = x.max(-1)[0]
    return m + (x-m[:,None]).exp().sum(-1).log()

def log_softmax(x): return x - x.logsumexp(-1,keepdim=True)

def nll(input, target): return -input[range(target.shape[0]), target].mean()
```

In [8]:
/**
Takes a 2d array and returns a 1d array of the log of the sum of the exp for each row.
*/
function logsumexp(x) {
    const m = x.map(a => Math.max(...a));
    let temp = x.map((row,i) => row.map(e => e-m[i])); // x-m[:,None]
    temp = temp.map(row => row.map(e => exp(e)));      // .exp()
    temp = temp.map(row => row.reduce((a,b) => a+b))   // .sum(-1)
    temp = temp.map(a => Math.log(a));                 // .log()
    return matrixSum1d(m, temp);                       // return m + ...
}

In [9]:
let testData=[[1.6392130817141863, 0.12928212984246149],
              [0.000843200027605633, -0.12680858189363003],
              [-0.9898354893794594, -1.5028466126461082]]
testEq([1.8388, 0.6322, -0.5207], round(logsumexp(testData),4));

In [10]:
/**
Takes a 2d array and returns a 2d array of log softmax for each element.
*/
function log_softmax(x) {
    const _logsumexp = logsumexp(x);
    return x.map((row,i) => row.map(e => e-_logsumexp[i]));
}

In [11]:
testEq([[-0.1996089581238054, -1.7095399099955302],
        [-0.6313567803288485, -0.7590085622500842],
        [-0.4691846265662769, -0.9821957498329257]], log_softmax(testData));

In [12]:
/**
Takes a 2d input (log softmax predictions) and a 1d array of target class IDs and returns the negative log likelihood.
*/
function nll(input, target) {
    return -mean(input.map((row,i) => row[target[i]]));
}

In [13]:
testEq(-0.2167, round(nll(testData,[0,0,0]),4));

`CrossEntropyLoss` uses an approach borrowed from https://beckernick.github.io/logistic-regression-from-scratch/

In [14]:
/**
Cross entropy with softmax.
yTrue1d is an array of target class IDs - not a 2d array of 1 hot encoded targets.
*/
class CrossEntropyLoss {
    softmax1d(a) {
        const maxValue=Math.max(...a); // normalize values for numerical stability (log sum exp)
        const temp=a.map(e => exp(e-maxValue));
        const sum=temp.reduce((a,b)=>a+b);
        return temp.map(e=>e/sum);
    }
        
    forward(yPred2d,yTrue1d) {
        this.yPred2d=yPred2d.map(yPred1d => this.softmax1d(yPred1d));
        this.yTrue1d=yTrue1d;
        const temp=this.yPred2d.map((yPred1d,i) => Math.log(yPred1d[yTrue1d[i]])); // TODO: add tiny value to avoid log(0)
        return -temp.reduce((a,b) => a+b) / temp.length;
    }
    
    backward() {
        const yTrue1d=this.yTrue1d;
        this.grad=this.yPred2d.map(yPred1d => [...yPred1d]); // copy preds
        this.grad.forEach((yPred1d,i)=>yPred1d[yTrue1d[i]]-=1);
        return this.grad;
    }
}

In [15]:
// show that both ways of calculating cross entropy loss give the same values
let lossFn = new CrossEntropyLoss()
testEq(round(nll(log_softmax(testData),[0,0,0]),4), round(lossFn.forward(testData,[0,0,0]),4));

In [16]:
/**
*/
class BinaryCrossEntropyLoss {
    _forward1d(yPred1d,yTrue1d) {
        const temp=yPred1d.map((yPred,i) => Math.log((yTrue1d[i]==1.) ? yPred : 1-yPred));
        return -temp.reduce((a,b) => a+b) / temp.length;
    }
    forward(yPred2d,yTrue2d) {
        this.yPred2d=yPred2d;
        this.yTrue2d=yTrue2d;
        const lossValue1d=yPred2d.map((yPred1d,i) => this._forward1d(yPred1d,yTrue2d[i]));
        return lossValue1d.reduce((a,b) => a+b) / lossValue1d.length;
    }
    _backward1d(yPred1d,yTrue1d) {
        return yPred1d.map((yPred,i) => (yTrue1d[i]==1.) ? -1/yPred : 1/(1-yPred));
    }
    backward() {
        const yTrue2d=this.yTrue2d;
        this.grad=this.yPred2d.map((yPred1d,i) => this._backward1d(yPred1d,yTrue2d[i]));
        return this.grad;
    }
}

In [17]:
/**
*/
class Sigmoid {
    forward(x2d) {
        this.results=x2d.map(x1d => x1d.map(x => 1./(1.+exp(-x))));
        return this.results;
    }
    backward(gradients) {
        // `s * (1.-s)` calculates sigmoid grad, then we chain gradients passed in
        this.grad=this.results.map((result,i) => result.map((s,j) => s * (1.-s) * gradients[i][j]));
        return this.grad;
    }
}

In [18]:
/**
*/
class ReLU {
    forward(x2d) {
        this.gradMask=zeros(...shape(x2d));
        return x2d.map((x1d,rowIndex) => x1d.map((x,colIndex) => {
            if (x>0) {
                this.gradMask[rowIndex][colIndex]=1;
            }
            return Math.max(0,x);
        }));
    }
    backward(gradient) {
        return matrixMultiply2d(this.gradMask,gradient);
    }
}

In [19]:
let relu = new ReLU();
let data = [
  [ -0.3132450550822199, 0.06746248970796562, 0.7502210053477679 ],
  [ 0.32586239499711434, 0.276573231917191, 0.4718188033994297 ],
  [ 0.3375259522729109, -1.4738907605515226, -0.11109898767917284 ],
  [ -0.6095143988686595, 1.094470501593892, -0.4982351760328258 ],
  [ 0.28664244098736347, -0.35879217465991975, -0.754257906608068 ]
];
testEq(relu.forward(data),relu.backward(data));

In [20]:
/**
Applies a linear transformation to `x`.
*/
class Linear {
    constructor(inputDim,numHidden=1,bias=true) {
        this.inputDim=inputDim;
        this.numHidden=numHidden;
        // Kaiming Init
        this.weights=matrixMultiply2d(randn(inputDim,numHidden), Math.sqrt(2.0/inputDim));
        this.bias=zeros(numHidden)
        this.updateBias=bias;
    }
    forward(x) {
        this.x=x; // shape(bs,inputDim)
        return matrixSum2d(dotProduct(x,this.weights), this.bias);
    }
    backward(gradient) { // gradient shape(bs,numHidden)
        // weightsGradient/biasGradient need to be the same shape as weights/bias
        this.weightsGradient=dotProduct(transpose(this.x), gradient);
        // this.biasGradient=gradient.sum(axis=0)
        this.biasGradient=transpose(gradient).map(col => col.reduce((a,b) => a+b));
        this.xGradient=dotProduct(gradient,transpose(this.weights));
        return this.xGradient;
    }
    update(lr) {
        // gradient calculations in backward don't account for batch size, so we do it here
        lr=lr/this.x.length; // TODO: change gradient calc to account for batch size - all XxxLoss classes
        this.weights=matrixSubtract2d(this.weights,matrixMultiply2d(this.weightsGradient,lr));
        if (this.updateBias) {
            this.bias=matrixSubtract1d(this.bias,matrixMultiply1d(this.biasGradient,lr));
        }
    }
}

In [21]:
/**
Using
- `Embedding` when `x` is an array of IDs or
- `Linear` when `x` is a one-hot encoded matrix
should give the same results - but `Embedding` should be faster.
*/
class Embedding extends Linear {
    constructor(inputDim,numHidden=1,bias=true) {
        super(inputDim,numHidden,bias);
        this.weights=uniform(inputDim,numHidden,-1,1);
    }
    forward(x) {
        this.x=x;
        return matrixSum2d(x.map(i=>this.weights[i]), this.bias);
    }
    backward(gradient) { // gradient shape(bs,numHidden)
        this.weightsGradient=zeros(this.inputDim,this.numHidden);
        for (let i=0; i<this.inputDim; i++) {
            this.x.map((row, rowIndex)=>{
                if (row == i) {
                    this.weightsGradient[i]=matrixSum1d(this.weightsGradient[i],gradient[rowIndex]);
                }
            })
        }
        this.biasGradient=transpose(gradient).map(col => col.reduce((a,b) => a+b));
        this.xGradient=dotProduct(gradient,transpose(this.weights));
        return this.xGradient;
    }
    update(lr) {
        // gradient calculations in backward don't account for batch size, so we do it here
        lr=lr/this.x.length; // TODO: change gradient calc to account for batch size - all XxxLoss classes
        this.weights=matrixSubtract2d(this.weights,matrixMultiply2d(this.weightsGradient,lr));
        if (this.updateBias) {
            this.bias=matrixSubtract1d(this.bias,matrixMultiply1d(this.biasGradient,lr));
        }
    }
}

The following shows that `Linear` and `Embedding` apply the same transformation and calculate the same gradients.

In [22]:
var inputDim=3;
var numHidden=5
let linear=new Linear(inputDim, numHidden);
let embedding=new Embedding(inputDim, numHidden);
linear.weights=JSON.parse(JSON.stringify(embedding.weights));

let embedding_in=[0,1,2,1];
let bs=embedding_in.length;
let linear_in=zeros(bs, inputDim);
embedding_in.forEach((e,i)=>linear_in[i][e]=1);
testEq(linear.forward(linear_in),embedding.forward(embedding_in));

let gradient=randn(bs,numHidden);
linear.backward(gradient);
embedding.backward(gradient);
testEq(linear.weightsGradient, embedding.weightsGradient);
testEq(linear.biasGradient, embedding.biasGradient);
testEq(linear.xGradient, embedding.xGradient);

In [23]:
/**
*/
class Learner {
    constructor(model, lossFn, data, metrics=[accuracy]) {
        this.model=model;
        this.lossFn=lossFn;
        this.metrics=metrics;
        const splitData=split(shuffle(data));
        this.xTrain=splitData[0][0];
        this.xValid=splitData[0][1];
        this.yTrain=splitData[1][0];
        this.yValid=splitData[1][1];
        // shame that we can destructure into this. )o:
//         [[this.xTrain,this.xValid],[this.yTrain,this.yValid]]=split(data);
    }
    forward(x) {
        for (let i=0; i<this.model.length; i++) {
            x=this.model[i].forward(x);
        }
        return x;
    }
    backward(gradients) {
        for (let i=this.model.length-1; i>=0; i--) {
            gradients=this.model[i].backward(gradients);
        }
        return gradients;
    }
    step(lr) {
        this.model.forEach(m => {
            if (typeof m.update=='function') {
                m.update(lr);
            }
        });
    }
    validate(epoch) {
        const preds=this.forward(this.xValid);
        const lossValue=this.lossFn.forward(preds,this.yValid);
        const metricValues=this.metrics.map(metric=>metric(preds,this.yValid));
        console.log('epoch',epoch,'valid loss',lossValue,'metrics',metricValues);
    }
    fit(epochs, lr=0.1, bs=64) {
        this.validate(-1); // Note: we use epoch -1 to indicate before training
        for (let epoch=0; epoch<epochs; epoch++) {
            batches([this.xTrain,this.yTrain]).forEach(batch => {
                const [xb,yb]=batch;
                const preds=this.forward(xb);
                const lossValue=this.lossFn.forward(preds,yb);
                this.lossFn.backward();
                this.backward(this.lossFn.grad);
                this.step(lr);
            });
            this.validate(epoch);
        }
    }
    predict(x,y,yToLabelFn=(a=>a)) {
        const preds=this.forward(x);
        return preds.map((pred,rowIndex) => {
            const row=[pred,yToLabelFn(pred)];
            if (y!=null) {
                row.push(yToLabelFn(y[rowIndex]));
            }
            return row;
        });
    }
}

## Train a linear model to classify iris flowers

Note: we use `BinaryCrossEntropyLoss` here just as an example. README.md and index.ipynb shows how to train with `CrossEntropyLoss`.

In [24]:
let stringData=require('fs').readFileSync('data/iris.data').toString();
let data=parseCsv(stringData, new IrisRowHandler()).result;
let lossFn=new BinaryCrossEntropyLoss();
let model=[new Linear(4,3), new Sigmoid()];
let learn=new Learner(model, lossFn, data);
learn.fit(25);

epoch -1 valid loss 0.9109090304204185 metrics [ 0.26666666666666666 ]
epoch 0 valid loss 0.8099330011050551 metrics [ 0.26666666666666666 ]
epoch 1 valid loss 0.7254364984214408 metrics [ 0.3 ]
epoch 2 valid loss 0.6569570399645729 metrics [ 0.5 ]
epoch 3 valid loss 0.6031691548791043 metrics [ 0.5 ]
epoch 4 valid loss 0.5614477140559698 metrics [ 0.6 ]
epoch 5 valid loss 0.529146473484039 metrics [ 0.7 ]
epoch 6 valid loss 0.5043085122388226 metrics [ 0.7 ]
epoch 7 valid loss 0.4849431608039424 metrics [ 0.7333333333333333 ]
epoch 8 valid loss 0.46965031567839827 metrics [ 0.7333333333333333 ]
epoch 9 valid loss 0.457001658109672 metrics [ 0.7333333333333333 ]
epoch 10 valid loss 0.4459796482305391 metrics [ 0.7333333333333333 ]
epoch 11 valid loss 0.4372260391400424 metrics [ 0.7333333333333333 ]
epoch 12 valid loss 0.42944545438344767 metrics [ 0.7333333333333333 ]
epoch 13 valid loss 0.42270065210260477 metrics [ 0.7333333333333333 ]
epoch 14 valid loss 0.4165616651757987 metrics 

## Train a neural net to classify iris flowers

In [25]:
let model=[new Linear(4,50), new ReLU(), new Linear(50,3), new Sigmoid()];
let learn=new Learner(model, lossFn, data);
learn.fit(25);

epoch -1 valid loss 0.6983069062925652 metrics [ 0.4666666666666667 ]
epoch 0 valid loss 0.4096526196336277 metrics [ 0.7 ]
epoch 1 valid loss 0.37116439964170433 metrics [ 0.7333333333333333 ]
epoch 2 valid loss 0.34691951302918655 metrics [ 0.7333333333333333 ]
epoch 3 valid loss 0.3301522572207321 metrics [ 0.8 ]
epoch 4 valid loss 0.31664335621564643 metrics [ 0.8 ]
epoch 5 valid loss 0.3056892179970104 metrics [ 0.8 ]
epoch 6 valid loss 0.2966833605517909 metrics [ 0.8 ]
epoch 7 valid loss 0.2911649176860406 metrics [ 0.8 ]
epoch 8 valid loss 0.2848935842362169 metrics [ 0.8 ]
epoch 9 valid loss 0.279592437884924 metrics [ 0.8 ]
epoch 10 valid loss 0.27462301274116585 metrics [ 0.8 ]
epoch 11 valid loss 0.270675603950377 metrics [ 0.8 ]
epoch 12 valid loss 0.2651677596225735 metrics [ 0.8 ]
epoch 13 valid loss 0.2616001690962012 metrics [ 0.8 ]
epoch 14 valid loss 0.2569000128382096 metrics [ 0.8 ]
epoch 15 valid loss 0.2547780795245656 metrics [ 0.8 ]
epoch 16 valid loss 0.251335

### Look at some predictions 

We use the lambda ```(y=>`${argmax(y)}: ${IRIS_CLASS_MAP[argmax(y)]}`)``` to convert predictions like `[0.000, 0.183, 0.843]` to readable labels.

In [26]:
// head(learn.predict(learn.xValid, learn.yValid)); run this to see "raw" targets
head(learn.predict(learn.xValid, learn.yValid, (y=>`${argmax(y)}: ${IRIS_CLASS_MAP[argmax(y)]}`)));

0 [
  [ 0.06423957178856468, 0.18659442021358044, 0.809785959867297 ],
  '2: Iris-virginica',
  '1: Iris-versicolor'
]
1 [
  [ 0.9984373014569424, 0.005217967220236772, 0.002201774761163568 ],
  '0: Iris-setosa',
  '0: Iris-setosa'
]
2 [
  [ 0.9771091594700118, 0.024259395882655306, 0.00273577528669128 ],
  '0: Iris-setosa',
  '0: Iris-setosa'
]
3 [
  [ 0.0350655771226941, 0.8436276427873515, 0.05686654528132866 ],
  '1: Iris-versicolor',
  '1: Iris-versicolor'
]
4 [
  [ 0.997157938906728, 0.009618056835112947, 0.0035352570500683917 ],
  '0: Iris-setosa',
  '0: Iris-setosa'
]
5 [
  [ 0.01690222876081816, 0.4580457037148286, 0.6482480428373324 ],
  '2: Iris-virginica',
  '1: Iris-versicolor'
]
6 [
  [ 0.0022571328639988033, 0.7361375740654459, 0.2820925870802532 ],
  '1: Iris-versicolor',
  '1: Iris-versicolor'
]
7 [
  [ 0.010447129598920581, 0.2404578763927446, 0.7768037213041088 ],
  '2: Iris-virginica',
  '1: Iris-versicolor'
]
8 [
  [ 0.0008017431765991797, 0.013955143715761774, 0.9

Show how we could train a linear layer without `Learner` - this is not a proper training loop, we just;
- forward pass
- print training loss
- backward pass
- update

In [27]:
let data=parseCsv(stringData, new IrisRowHandler()).result;
let x=data[0],y=data[1];
console.log('shape(x)',shape(x), 'shape(y)',shape(y));
let loss_fn=new BinaryCrossEntropyLoss()
let sig=new Sigmoid()
let lin=new Linear(4,3);
for (let epoch = 0; epoch < 10; epoch++) {
    let y_pred=sig.forward(lin.forward(x));
    let loss_value=loss_fn.forward(y_pred,y);
    console.log('epoch',epoch,'loss_value',loss_value);
    loss_fn.backward();
    sig.backward(loss_fn.grad);
    lin.backward(sig.grad);
    lin.update(.1);
}

shape(x) [ 150, 4 ] shape(y) [ 150, 3 ]
epoch 0 loss_value 0.9479798045352674
epoch 1 loss_value 0.8989487627957021
epoch 2 loss_value 0.8549446624067516
epoch 3 loss_value 0.8154406177269469
epoch 4 loss_value 0.7799195599908708
epoch 5 loss_value 0.747899259317327
epoch 6 loss_value 0.7189454569830287
epoch 7 loss_value 0.6926760714352166
epoch 8 loss_value 0.6687597778393528
epoch 9 loss_value 0.6469116154839275


## Can we teach a linear layer to convert one hot encoded integers to their bitwise representations?

In [28]:
let x=[
    [1,0,0,0,0,0,0,0,0,0],
    [0,1,0,0,0,0,0,0,0,0],
    [0,0,1,0,0,0,0,0,0,0],
    [0,0,0,1,0,0,0,0,0,0],
    [0,0,0,0,1,0,0,0,0,0],
    [0,0,0,0,0,1,0,0,0,0],
    [0,0,0,0,0,0,1,0,0,0],
    [0,0,0,0,0,0,0,1,0,0],
    [0,0,0,0,0,0,0,0,1,0],
    [0,0,0,0,0,0,0,0,0,1]
];
let y=[
    [0,0,0,0],
    [1,0,0,0],
    [0,1,0,0],
    [1,1,0,0],
    [0,0,1,0],
    [1,0,1,0],
    [0,1,1,0],
    [1,1,1,0],
    [0,0,0,1],
    [1,0,0,1]
];

`x` is an identity matrix, so ... `x.y` is `y`

In [29]:
testEq(y,dotProduct(x,y))

so ... will `y` make the perfect weights (if bias is zero)?

In [30]:
let loss_fn=new BinaryCrossEntropyLoss()
let sig=new Sigmoid()
let linearNoBias=new Linear(10,4,false);
let y_pred=null;
for (let epoch = 0; epoch<10; epoch++) {
    y_pred=sig.forward(linearNoBias.forward(x));
    const loss_value=loss_fn.forward(y_pred,y);
    if (epoch%10==9) {
        console.log('epoch',epoch,'loss_value',loss_value);
    }
    loss_fn.backward();
    sig.backward(loss_fn.grad);
    linearNoBias.backward(sig.grad);
    linearNoBias.update(50);
}
console.log(y_pred)

epoch 9 loss_value 0.01826801023638013
[
  [
    0.018241077102381677,
    0.017694911550413337,
    0.017702128848892752,
    0.01848791808616651
  ],
  [
    0.9815339066824532,
    0.017844643319954757,
    0.018298590220030598,
    0.017782599726117673
  ],
  [
    0.018236548558390426,
    0.9823281232641801,
    0.017986104399842672,
    0.017856259579140676
  ],
  [
    0.9822958847767344,
    0.9817832453226071,
    0.018417317680826203,
    0.018288915576708416
  ],
  [
    0.018023018565035534,
    0.018497624593485647,
    0.982326444561379,
    0.018102274095561973
  ],
  [
    0.9816681441318499,
    0.017799987666008377,
    0.9816486269148199,
    0.018436066482178944
  ],
  [
    0.01767397321248599,
    0.9818995388186126,
    0.9820452630496264,
    0.018207926784196908
  ],
  [
    0.9814915726192901,
    0.9822346042798649,
    0.9817955775220609,
    0.018368012355228863
  ],
  [
    0.01816660421112255,
    0.017910072882694757,
    0.017588670199200736,
    0.981

dump our linear layer to output - so we can look at the learned weights.

In [31]:
linearNoBias

Linear {
  inputDim: 10,
  numHidden: 4,
  weights: [
    [
      -4.076875133127986,
      -4.105099383336382,
      -4.104720331418261,
      -4.0644164714002535
    ],
    [
      4.065510768670068,
      -4.097269347731533,
      -4.0739561301344,
      -4.100505231976311
    ],
    [
      -4.077105394681426,
      4.106310282984099,
      -4.089936498577871,
      -4.096664847256857
    ],
    [
      4.104616036367389,
      4.078112572327417,
      -4.067961439055957,
      -4.074446461425296
    ],
    [
      -4.0880332103261905,
      -4.063930233372736,
      4.106221979206913,
      -4.083960953616492
    ],
    [
      4.072272287508215,
      -4.09959714011578,
      4.071285896786014,
      -4.067018601629332
    ],
    [
      -4.106200004705336,
      4.084053888817579,
      4.091557107686295,
      -4.078562144929315
    ],
    [
      4.0633894019047645,
      4.101404658491118,
      4.078740671686395,
      -4.070445849105483
    ],
    [
      -4.080669681885070

In [32]:
export {accuracy,Sigmoid,MSE,BinaryCrossEntropyLoss,CrossEntropyLoss,ReLU,Linear,Embedding,Learner}