# Kaggle: Tabular Playground Series - May 2021

If you'd like to run this notebook, please download the extract to dataset from https://www.kaggle.com/c/tabular-playground-series-may-2021 to `data/kaggle`

In [1]:
import {round,flatten,exp,shape,transpose,dotProduct,randn,uniform,full,zeros,mean,reshape,argmax,
        matrixSum1d,matrixSum2d,matrixSubtract1d,matrixSubtract2d,matrixMultiply1d,matrixMultiply2d,
        normalize,identity,meanAndStandardDeviation,} from './src/util.module.js';
import {head,tail,parseCsv} from './src/data.module.js';
import {BinaryCrossEntropyLoss,CrossEntropyLoss,Linear,Sigmoid,ReLU,Learner} from './src/nn.module.js';

In [2]:
let stringData=require('fs').readFileSync('data/kaggle/train.csv').toString();

In [3]:
const CLASS_MAP = {};
const _identity=identity(4);
[...Array(4).keys()].forEach(i=>{
    CLASS_MAP[i]=`Class_${i+1}`;
    CLASS_MAP[`Class_${i+1}-onehot`]=_identity[i];
    CLASS_MAP[`Class_${i+1}-classid`]=i;
});
CLASS_MAP

{
  '0': 'Class_1',
  '1': 'Class_2',
  '2': 'Class_3',
  '3': 'Class_4',
  'Class_1-onehot': [ 1, 0, 0, 0 ],
  'Class_1-classid': 0,
  'Class_2-onehot': [ 0, 1, 0, 0 ],
  'Class_2-classid': 1,
  'Class_3-onehot': [ 0, 0, 1, 0 ],
  'Class_3-classid': 2,
  'Class_4-onehot': [ 0, 0, 0, 1 ],
  'Class_4-classid': 3
}


In [4]:
class DataRowHandler {
    constructor(targetType) {
        this.targetType = (targetType==null) ? 'onehot' : targetType;
        this.result=[[],[]];
    }
    handleRow(row,i) {
        if (i==0) return;
        row = row.split(',');
        this.result[0].push(row.slice(1,51).map(a=>parseFloat(a)));
        this.result[1].push(CLASS_MAP[`${row[51]}-${this.targetType}`]);
    }
}

In [14]:
let data=parseCsv(stringData, new DataRowHandler('onehot'),5000).result;
data[0]=normalize(data[0]);
let lossFn=new BinaryCrossEntropyLoss();
let model=[new Linear(50,100), new ReLU(), new Linear(100,4), new Sigmoid()];
let learn=new Learner(model, lossFn, data);
learn.fit(10,3e-3);

epoch -1 valid loss 0.7344175807653834 metrics [ 0.273 ]
epoch 0 valid loss 0.5982401853821188 metrics [ 0.441 ]
epoch 1 valid loss 0.5733174043004285 metrics [ 0.494 ]
epoch 2 valid loss 0.5630355804535359 metrics [ 0.505 ]
epoch 3 valid loss 0.5566022618778866 metrics [ 0.509 ]
epoch 4 valid loss 0.5517827919197905 metrics [ 0.513 ]
epoch 5 valid loss 0.5478848960719098 metrics [ 0.515 ]
epoch 6 valid loss 0.5443343993744512 metrics [ 0.518 ]
epoch 7 valid loss 0.5411434317167338 metrics [ 0.519 ]
epoch 8 valid loss 0.5384417522267655 metrics [ 0.518 ]
epoch 9 valid loss 0.5354448920240993 metrics [ 0.517 ]


In [16]:
function yToLabelFn(y) {
    if (Array.isArray(y)) {
        y=argmax(y);
    }
    return `${y}: ${CLASS_MAP[y]}`
}
// For each row, learn.predict gives us [preds, predicted label, actual label]
let preds=learn.predict(learn.xValid, learn.yValid, yToLabelFn);
head(preds,3);

0 [
  [
    0.04738580760739074,
    0.6159061619317402,
    0.08083266355933032,
    0.12762368804514845
  ],
  '1: Class_2',
  '0: Class_1'
]
1 [
  [
    0.23486176662583835,
    0.3384535467359777,
    0.32449213160460993,
    0.3307962614825894
  ],
  '1: Class_2',
  '3: Class_4'
]
2 [
  [
    0.024346154631035996,
    0.876295510092895,
    0.060946196514458546,
    0.08674747970519878
  ],
  '1: Class_2',
  '0: Class_1'
]


In [17]:
let data=parseCsv(stringData, new DataRowHandler('classid'),5000).result;
data[0]=normalize(data[0]);
let lossFn=new CrossEntropyLoss();
let model=[new Linear(50,100), new ReLU(), new Linear(100,4)];
let learn=new Learner(model, lossFn, data);
learn.fit(10,3e-3);

epoch -1 valid loss 1.3396573086502428 metrics [ 0.506 ]
epoch 0 valid loss 1.2989734661898056 metrics [ 0.505 ]
epoch 1 valid loss 1.2735455834101406 metrics [ 0.512 ]
epoch 2 valid loss 1.2556609071949523 metrics [ 0.517 ]
epoch 3 valid loss 1.2417171477137985 metrics [ 0.521 ]
epoch 4 valid loss 1.2310551727225894 metrics [ 0.528 ]
epoch 5 valid loss 1.2230947066738047 metrics [ 0.533 ]
epoch 6 valid loss 1.2162349781734534 metrics [ 0.538 ]
epoch 7 valid loss 1.2094647160869438 metrics [ 0.541 ]
epoch 8 valid loss 1.204161103061065 metrics [ 0.542 ]
epoch 9 valid loss 1.199396474937912 metrics [ 0.542 ]


In [18]:
// For each row, learn.predict gives us [preds, predicted label, actual label]
let preds=learn.predict(learn.xValid, learn.yValid, yToLabelFn);
head(preds,3);

0 [
  [
    -0.32459711743356645,
    2.2448262942067467,
    0.4535846080218559,
    -0.451170345664062
  ],
  '1: Class_2',
  '1: Class_2'
]
1 [
  [
    -0.9707653114763204,
    1.5153784425638128,
    -0.19168877249443797,
    -1.50457828106656
  ],
  '1: Class_2',
  '0: Class_1'
]
2 [
  [
    -0.9498879280753949,
    0.35540389919180854,
    -1.1942133276787985,
    -1.1649341005165545
  ],
  '1: Class_2',
  '1: Class_2'
]
