In [3]:
import * as Utils from 'causal-net.utils';
import * as Log from 'causal-net.log';
import * as Preprocessing from 'causal-net.preprocessing';
import { causalNetCore } from 'causal-net.core';
import { causalNetSGDOptimizer } from 'causal-net.optimizers';
import { causalNetModels } from "causal-net.models";
import * as Sampling from 'causal-net.sampling';
import * as Representation from 'causal-net.representation';
import * as fs from 'fs';

var R = causalNetCore.CoreFunction;
var T = causalNetCore.CoreTensor;
var { Loss } = causalNetModels.skipGram();
var { Stream } = Utils;
var { termLogger } = Log;
// var { indexDBStorage } = Storage;
var { nlpPreprocessing } = Preprocessing;
var { causalNetSampling } = Sampling; 
var { CSV, Fetch } = Utils;
var { SentenceEncode, causalNetWordVec } = Representation;
var optimizers = causalNetSGDOptimizer;

'use strict'

In [4]:
var inputData = JSON.parse(fs.readFileSync('./sentenceEmbedd.json'));
inputData.length;

11314

In [5]:
var data;
(async ()=>{
    data = await new Promise((resolve, reject)=>{
        let csv = CSV();
        let samples = [], labels = [];
        csv.on('end', ()=>resolve({samples, labels}));
        csv.on('data',(row)=>{
            //console.log(row);
            samples.push(row.sample);
            labels.push(row.label);
        });
        fs.createReadStream('../datasets/20newsgroups/dataset.csv').pipe(csv);
    });
// console.log([data.samples.length, data.labels.length]);
})();

In [34]:
// var label2int = R.compose(R.fromPairs, R.addIndex(R.map)((label, idx)=>[label, idx]), R.uniq)(data.labels);
// var int2label = R.compose(R.fromPairs, R.addIndex(R.map)((label, idx)=>[idx, label]), R.uniq)(data.labels);
// var labels = data.labels.map(l=>label2int[l]);
// var preprocessingData = inputData.map((d,i)=>[Array.from(d),labels[i]]);

'use strict'

In [7]:
import TSNE from 'tsne-js';
 
var model = new TSNE({
  dim: 2,
  perplexity: 30.0,
  earlyExaggeration: 4.0,
  learningRate: 100.0,
  nIter: 1000,
  metric: 'euclidean'
});
model.on('progressStatus', (d)=>{
    console.log(d);
});

TSNE {
  _events: [Object: null prototype] { progressStatus: [Function] },
  _eventsCount: 1,
  _maxListeners: undefined,
  dim: 2,
  perplexity: 30,
  earlyExaggeration: 4,
  learningRate: 100,
  nIter: 1000,
  metric: 'euclidean',
  barneshut: false,
  inputData: null,
  outputEmbedding: null }

In [8]:
// inputData is a nested array which can be converted into an ndarray
// alternatively, it can be an array of coordinates (second argument should be specified as 'sparse')
model.init({
  data: inputData,
  type: 'dense'
});

In [9]:
// `error`,  `iter`: final error and iteration number
// note: computation-heavy action happens here
var [error, iter] = model.run();
console.log({error, iter}); 

Calculating pairwise distances
Calculating joint probabilities
Early exaggeration with momentum 0.5
Early exaggeration with momentum 0.8
Final optimization with momentum 0.8
Optimization end
{ error: 5.613068619616683, iter: 102 }


In [10]:
// // rerun without re-calculating pairwise distances, etc.
// var [error, iter] = model.rerun();
// console.log({error, iter}); 
// // `output` is unpacked ndarray (regular nested javascript array)
// var output = model.getOutput();
// console.log({output}); 
// `outputScaled` is `output` scaled to a range of [-1, 1]
var outputScaled = model.getOutputScaled();
console.log({outputScaled});

{ outputScaled:
   [ [ 0.25597409654565, -0.04056887670985123 ],
     [ -0.05170156736476558, 0.021286714931931697 ],
     [ 0.04074567747699668, 0.0017369462049392926 ],
     [ 0.03883620736981353, -0.012300239449875629 ],
     [ -0.10315145459907042, 0.04521744831830672 ],
     [ 0.02091048832052565, -0.0214023737118922 ],
     [ 0.06722319221594364, -0.0039790907828985 ],
     [ 0.001535915598196999, -0.006288594860118439 ],
     [ 0.10769397074769917, -0.02260179076712454 ],
     [ -0.0015684676864494585, 0.01460974179179793 ],
     [ 0.12249858965836893, -0.008939513834409697 ],
     [ -0.6055676018267737, 0.12346136715526167 ],
     [ 0.05813495499312825, -0.01888958558272283 ],
     [ -0.04240332709795782, -0.033010083531536706 ],
     [ 0.5159978098090463, -0.024533688544269357 ],
     [ -0.14790985156646058, 0.12864818351547014 ],
     [ -0.00284655309039005, 0.018886547708904158 ],
     [ 0.14037592111509473, -0.005898270524707901 ],
     [ 0.029533009010782704, -0.0114812093

In [12]:
fs.writeFileSync('./outputScaled.json',JSON.stringify(outputScaled))

In [14]:
var data = outputScaled.map((v,i)=>[...v, data.labels[i]]);
// var data = [[1,1,'A1'],
//             [1,2,'b'],
//             [3,1,'b'],
//             [4,4,'b'],
//             [1,5,'1'],
//             [4,0,'b'],
//             [1,1,'c'],]

TypeError: Cannot read property '0' of undefined

In [15]:
data[0]

[ 0.25597409654565, -0.04056887670985123, 'rec.autos' ]

In [16]:
import { default as genBasePlot } from './plot.base';
import { default as export2png } from './plot.export2png';
import * as d3 from 'd3';
var { SVG, xMap, yMap, plot, margin, width, height } = genBasePlot();

xMap.domain(d3.extent(data, ([d0, d1])=>d0));
yMap.domain(d3.extent(data, ([d0, d1])=>d1));
var xAxis = d3.axisBottom(xMap);
var yAxis = d3.axisLeft(yMap);
var color = d3.scaleOrdinal(d3.schemeCategory10);
SVG.append("g").attr("class", "x axis")
            .attr("transform", "translate(0," + height + ")").call(xAxis)
            .append("text").attr("class", "label").attr("x", width)
            .attr("y", 0).style("text-anchor", "end").text("Sepal Width (cm)");
SVG.append("g").attr("class", "y axis").call(yAxis)
            .append("text").attr("class", "label").attr("transform", "rotate(-90)")
            .attr("y", 6).attr("dy", ".71em").style("text-anchor", "end").text("Sepal Length (cm)");
SVG.selectAll(".dot").data(data).enter().append("circle")
      .attr("class", "dot").attr("r", 3.5).attr("cx", (d)=>xMap(d[0]))
      .attr("cy", (d)=>yMap(d[1])).style("fill", (d)=>color(d[2]));
var legend = SVG.selectAll(".legend").data(color.domain()).enter()
                .append("g").attr("class", "legend")
                .attr("transform", (d,i)=>"translate(0," + i * 20 + ")");

legend.append("rect").attr("x", width - 18).attr("width", 18)
    .attr("height", 18).style("fill", color);

legend.append("text").attr("x", width - 24).attr("y", 9)
    .attr("dy", ".35em").style("text-anchor", "end").text(d=>d);

plot.style = `body {font: 10px sans-serif;}
            .axis path, .axis line {
                fill: none; stroke: #000; shape-rendering: crispEdges; }
            .dot { stroke: #000; }`;
export2png('./test.png', plot);

<img src="./test.png?123152322221">

In [None]:
var data = d3.range(1000).map(d3.randomBates(10));
data = data.map(d=>d+1);
var formatCount = d3.format(",.0f");
var margin = {top: 10, right: 30, bottom: 30, left: 30},
    width = 960 - margin.left - margin.right,
    height = 500 - margin.top - margin.bottom;
var x = d3.scaleLinear().rangeRound([0, width]);
    
var bins = d3.histogram().domain(x.domain()).thresholds(x.ticks(20))(data);
var y = d3.scaleLinear()
            .domain([0, d3.max(bins, function(d) { return d.length; })])
            .range([height, 0]);

var svgWidth = width + margin.left + margin.right
var svgHeight = height + margin.top + margin.bottom
    
var svg = d3n.createSVG(svgWidth, svgHeight).append("g")
              .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

var bar = svg.selectAll(".bar").data(bins).enter().append("g").attr("class", "bar")
          .attr("transform", function(d) { return "translate(" + x(d.x0) + "," + y(d.length) + ")"; });

bar.append("rect")
      .attr("x", 1)
      .attr("width", x(bins[0].x1) - x(bins[0].x0) - 1)
      .attr("height", function(d) { return height - y(d.length); });

bar.append("text")
      .attr("dy", ".75em")
      .attr("y", 6)
      .attr("x", (x(bins[0].x1) - x(bins[0].x0)) / 2)
      .attr("text-anchor", "middle")
      .text(function(d) { return formatCount(d.length); });

svg.append("g")
      .attr("class", "axis axis--x")
      .attr("transform", "translate(0," + height + ")")
      .call(d3.axisBottom(x));
    
output('../pie-canvas2', d3n).catch( err => console.error({err}) );

<img src="../pie-canvas2.png">

<img src="../datasets/MNIST_dataset/data-chunk-90.png?1213">

In [None]:
var model;
(async ()=>{
    model = await SentenceEncode.load();
})();

In [None]:
// // Load the model.
// var fetch = Fetch.fetch;
// var sentenceEmbed = [];
// (async ()=>{
//     let counter = 0;
//     for(let doc of data.samples){
//         console.log(counter);
//         counter += 1;
//         let docTensor = await model.embed([doc]);
//         let docVec = await docTensor.data();
//         sentenceEmbed.push(docVec);
//         docTensor.dispose();
//     }
// })();

In [26]:
var uniqlabels = R.uniq(labels)
var targets = labels.map(l=>Array.from(T.oneHot(l, 20).dataSync()))

'use strict'

In [39]:
var preprocessingData = inputData.map((o,i)=>[o, targets[i]])

'use strict'

In [40]:
var [trainSet, testSet] = R.splitAt(10314, preprocessingData)
console.log([trainSet.length, testSet.length])

[ 10314, 1000 ]


In [41]:
trainSet[0]

[ [ 0.028369115665555,
    0.03054637648165226,
    0.06017296761274338,
    -0.007304872386157513,
    -0.0015674697933718562,
    0.09297604858875275,
    0.05786536633968353,
    -0.038533542305231094,
    0.07912255078554153,
    0.056372079998254776,
    0.07710565626621246,
    0.014738659374415874,
    0.019380733370780945,
    0.09697484970092773,
    -0.059149663895368576,
    -0.025681007653474808,
    -0.05750536546111107,
    -0.005214315373450518,
    -0.010037926025688648,
    0.057757627218961716,
    -0.057805612683296204,
    -0.06593387573957443,
    -0.081797294318676,
    -0.005358944647014141,
    0.03587067499756813,
    0.0014632497914135456,
    0.02528754435479641,
    -0.00278215273283422,
    -0.020434193313121796,
    -0.049661602824926376,
    0.006134781055152416,
    0.043485455214977264,
    0.03838784620165825,
    -0.02073526382446289,
    0.020247383043169975,
    0.07384887337684631,
    -0.037138067185878754,
    0.03690420463681221,
    -0.05072337

In [48]:
// import CausalNet from 'causal-net';

'use strict'

In [42]:
var _NetConfig = {
    HyperParameters: {SampleSize:400},
    Classes: 2,
    Pipeline:[
        {   Name:'dense', Type: 'Tensor', 
            Parameters: { Weight: [512, 2], Bias: [2]  },
            Flow: [ { Op: 'dot', Parameter: 'Weight', Args: [] },
                    { Op: 'add', Parameter: 'Bias',  Args: [] } ] 
        },
        {   Name:'PipeOutput', Type: 'Tensor', 
            Flow: [ { Op: 'reshape', Args: [['$SampleSize', -1]] } ] 
        } 
    ] };
var parameters = {};
var causalNet = new cau(_NetConfig, parameters);

ReferenceError: SimpleNet is not defined

In [None]:
(async ()=>{
    const DoBatchTrainSampleGenerator = (epochIdx)=>([{idx:0, batchSize:400, data: [inputs, targets]}]);
    let logTrain = await causalNet.train(DoBatchTrainSampleGenerator, 20);
    termLogger.log(logTrain);
    const DoBatchTestSampleGenerator = ()=>([{idx:0, batchSize:4, data: [inputs, targets]}]);
    let testResult = await causalNet.test(DoBatchTestSampleGenerator);
    termLogger.log({testResult});
    await causalNet.saveParams('save_model.model');
    await causalNet.loadParams('save_model.model');
    testResult = await causalNet.test(DoBatchTestSampleGenerator);
    termLogger.log({testResult});
    testResult = await causalNet.ensembleTest(DoBatchTestSampleGenerator, ['save_model.model']);
    termLogger.log({testResult});
    testResult = await causalNet.ensembleTest(DoBatchTestSampleGenerator, ['save_model.model']);
    termLogger.log({testResult});
})().catch(err=>{
    console.error({err});
});

In [None]:
(async ()=>{
    const DoBatchTrainSampleGenerator = (epochIdx)=>([{idx:0, batchSize:4, data: [inputs, targets]}]);
    let logTrain = await causalNet.train(DoBatchTrainSampleGenerator, 20);
    termLogger.log(logTrain);
    const DoBatchTestSampleGenerator = ()=>([{idx:0, batchSize:4, data: [inputs, targets]}]);
    let testResult = await causalNet.test(DoBatchTestSampleGenerator);
    termLogger.log({testResult});
    await causalNet.saveParams('save_model.model');
    await causalNet.loadParams('save_model.model');
    testResult = await causalNet.test(DoBatchTestSampleGenerator);
    termLogger.log({testResult});
    testResult = await causalNet.ensembleTest(DoBatchTestSampleGenerator, ['save_model.model']);
    termLogger.log({testResult});
    testResult = await causalNet.ensembleTest(DoBatchTestSampleGenerator, ['save_model.model']);
    termLogger.log({testResult});
})().catch(err=>{
    console.error({err});
});