In [1]:
//default_exp data

In [20]:
import {shape,zeros} from './src/util';

In [8]:
/**
*/
function head(data,rows=10) {
    rows=Math.min(rows,data.length);
    for (let i=0; i<rows; i++) {
        console.log(i, data[i]);
    }
}

function tail(data,rows=10) {
    rows=Math.min(rows,data.length);
    for (let i=-rows; i<0; i++) {
        console.log(i, data[data.length+i]);
    }
}

In [9]:
/**
Read simple csv files.
*/
const fs = require('fs');

class RowHandler {
    constructor() {
        this.result=[]
    }
    handleRow(row,i) {
        this.result.push(row.split(','));
    }
}

function readCsv(path, rowHandler, rowLimit) {
    if (rowHandler == null) {
        rowHandler = new RowHandler()
    }
    const fileData=fs.readFileSync(path).toString();
    const rows=fileData.split('\n');
    if (rowLimit==null) {
        rowLimit=rows.length;
    }
    for (let i=0; i<rowLimit; i++) {
        const row=rows[i];
        if (row !== '') {
            rowHandler.handleRow(row);
        }
    }
    return rowHandler;
}

We can use `IrisRowHandler` &darr; and `readCsv` &uarr; to prepare [iris.data](https://archive.ics.uci.edu/ml/datasets/iris) for learning.

In [13]:
/**
Convert a row of the iris dataset from string values to numbers for input features and one hot encoded targets.
*/
const IRIS_CLASS_MAP = {
    'Iris-setosa': [1,0,0],
    'Iris-versicolor': [0,1,0],
    'Iris-virginica': [0,0,1]
};
class IrisRowHandler {
    constructor() {
        this.result=[[],[]]
    }
    handleRow(row) {
        row = row.split(',');
        // convert datatypes and normalize input features
        this.result[0].push([
            (parseFloat(row[0])-5.843333333)/0.828066128,
            (parseFloat(row[1])-3.054)/0.433594311,
            (parseFloat(row[2])-3.758666667)/1.76442042,
            (parseFloat(row[3])-1.198666667)/0.763160742
        ]);
        this.result[1].push(IRIS_CLASS_MAP[row[4]])
    }
}

In [14]:
let data=readCsv('data/iris.data').result
head(data,5)
tail(data,5)

0 [ '5.1', '3.5', '1.4', '0.2', 'Iris-setosa' ]
1 [ '4.9', '3.0', '1.4', '0.2', 'Iris-setosa' ]
2 [ '4.7', '3.2', '1.3', '0.2', 'Iris-setosa' ]
3 [ '4.6', '3.1', '1.5', '0.2', 'Iris-setosa' ]
4 [ '5.0', '3.6', '1.4', '0.2', 'Iris-setosa' ]
-5 [ '6.7', '3.0', '5.2', '2.3', 'Iris-virginica' ]
-4 [ '6.3', '2.5', '5.0', '1.9', 'Iris-virginica' ]
-3 [ '6.5', '3.0', '5.2', '2.0', 'Iris-virginica' ]
-2 [ '6.2', '3.4', '5.4', '2.3', 'Iris-virginica' ]
-1 [ '5.9', '3.0', '5.1', '1.8', 'Iris-virginica' ]


In [15]:
let data=readCsv('data/iris.data', new IrisRowHandler(), 3).result
head(data[0])
head(data[1])
tail(data[0])
tail(data[1])

0 [
  -0.8976738787702239,
  1.0286112817564161,
  -1.3367940204410014,
  -1.3085928193617695
]
1 [
  -1.1392004830319542,
  -0.12454037940548492,
  -1.3367940204410014,
  -1.3085928193617695
]
2 [
  -1.3807270872936854,
  0.3367202850592759,
  -1.3934698551040348,
  -1.3085928193617695
]
0 [ 1, 0, 0 ]
1 [ 1, 0, 0 ]
2 [ 1, 0, 0 ]
-3 [
  -0.8976738787702239,
  1.0286112817564161,
  -1.3367940204410014,
  -1.3085928193617695
]
-2 [
  -1.1392004830319542,
  -0.12454037940548492,
  -1.3367940204410014,
  -1.3085928193617695
]
-1 [
  -1.3807270872936854,
  0.3367202850592759,
  -1.3934698551040348,
  -1.3085928193617695
]
-3 [ 1, 0, 0 ]
-2 [ 1, 0, 0 ]
-1 [ 1, 0, 0 ]


In [26]:
data=zeros(10,2)
shape(data.slice(1,4))

[ 3, 2 ]


The following `shuffle` function is borrowed from https://bost.ocks.org/mike/shuffle/ - modified to shuffle multiple arrays in the same way.

In [159]:
function shuffle(arrays) {
    var m = arrays[0].length, t, i;
    // While there remain elements to shuffle…
    while (m) {
        // Pick a remaining element…
        i = Math.floor(Math.random() * m--);
        // And swap it with the current element.
        arrays.forEach(array => {
            t = array[m];
            array[m] = array[i];
            array[i] = t;
        });
    }
}

In [160]:
function testEq(expected,actual) {
    if (Array.isArray(expected)) {
        expected=JSON.stringify(expected);
        actual=JSON.stringify(actual);
    }
    if (expected!==actual) {
        throw Error(`Expected ${expected} but found ${actual}`);
    }
}

In [161]:
let a=[],b=[];
for(let i=0; i<1000; i++) {
    a.push(i);
    b.push(i*10);
}
shuffle2([a,b])
b=b.map(x=>x/10)
testEq(a,b)

In [5]:
export {head,tail,readCsv,IRIS_CLASS_MAP,RowHandler,IrisRowHandler}