In [2]:
import * as Utils from 'causal-net.utils';
import * as Log from 'causal-net.log';
import * as Preprocessing from 'causal-net.preprocessing';
import * as Memory from 'causal-net.memory';
import { causalNetCore } from 'causal-net.core';
import { causalNetSGDOptimizer } from 'causal-net.optimizers';
import { causalNetModels } from "causal-net.models";
import * as Sampling from 'causal-net.sampling';
import * as fs from 'fs';
var R = causalNetCore.CoreFunction;
var T = causalNetCore.CoreTensor;
var { Loss } = causalNetModels.skipGram();
var { Stream } = Utils;
var { termLogger } = Log;
// import * as Storage from 'causal-net.storage';
// var { indexDBStorage } = Storage;
var { nlpPreprocessing } = Preprocessing;
var { causalNetMemory } = Memory;
var { causalNetSampling } = Sampling; 
var optimizers = causalNetSGDOptimizer;

'use strict'

In [3]:
var docs = ['He is the king', 'The king is royal', 'She is the royal queen'];

'use strict'

In [4]:
var corpus = '' + fs.readFileSync('../datasets/text8/text8.txt');
var docs = corpus.split('\n');

'use strict'

In [5]:
var docTokens = [];
for(let raw of docs){
    docTokens.push(nlpPreprocessing.tokenize(raw));
}
var rawVocab = R.compose(R.flatten)(docTokens);
//advance processing
var rawToken = nlpPreprocessing.wordFreqCount(rawVocab);
var pairs = R.toPairs(rawToken);
var sortedPairs = R.compose(R.filter(([w,f])=>f>0),R.sortBy(([w,f])=>-f))(pairs);
var [choosePairs, filterPairs] = R.splitAt(10000, sortedPairs);  
termLogger.log({'keep length': choosePairs.length, 'discard': filterPairs.length});
var vocab = R.map(([v,f]) => v, choosePairs);
var vocabCounts = R.map(([v,f]) => f, choosePairs);
var countTotal = R.sum(vocabCounts);
var wFracs = R.map((count)=>count / countTotal, vocabCounts);
var vocabProbs = R.map((frac)=>Math.pow(frac, 3/4), wFracs); 
// console.log({vocabProbs, wFracs});
var word2int = R.compose(R.fromPairs, R.addIndex(R.map)((w,i)=>[w,i]))(vocab);
var int2word = R.compose(R.fromPairs, R.addIndex(R.map)((w,i)=>[i,w]))(vocab);
console.log([vocab, word2int, int2word]);

{ 'keep length': 10000, discard: 243853 }
[ [ 'the',
    'of',
    'and',
    'one',
    'in',
    'a',
    'to',
    'zero',
    'nine',
    'two',
    'is',
    'as',
    'eight',
    'for',
    's',
    'five',
    'three',
    'was',
    'by',
    'that',
    'four',
    'six',
    'seven',
    'with',
    'on',
    'are',
    'it',
    'from',
    'or',
    'his',
    'an',
    'be',
    'this',
    'which',
    'at',
    'he',
    'also',
    'not',
    'have',
    'were',
    'has',
    'but',
    'other',
    'their',
    'its',
    'first',
    'they',
    'some',
    'had',
    'all',
    'more',
    'most',
    'can',
    'been',
    'such',
    'many',
    'who',
    'new',
    'used',
    'there',
    'after',
    'when',
    'into',
    'american',
    'time',
    'these',
    'only',
    'see',
    'may',
    'than',
    'world',
    'i',
    'b',
    'would',
    'd',
    'no',
    'however',
    'between',
    'about',
    'over',
    'years',
    'states',
    'people

In [6]:
function* skipGramContext(docTokens, windowSize, word2int){
    for(let tokens of docTokens){
        for(let index of R.range(0, tokens.length)){
            let target = tokens[index];
            let targetId = word2int[target];
            if(targetId !== undefined){
                for(let wid of R.range(-windowSize, windowSize)){
                    let context = tokens[index + wid];
                    let contextId = word2int[context];
                    if(  context !== undefined && contextId && target !== context ){
                        yield [target, context, targetId, contextId];
                    }
                }    
            }
        }    
    }
};

'use strict'

In [6]:
// var trainTargets = [], trainContexts = [], trainNegContexts = [];
// var idx = 0;
// for(let [target, context, targetId, contextId] of skipGramContext(docTokens, 5, word2int)){
//     console.log({idx});
//     var nonContext = causalNetSampling.negSampling(3, [targetId, contextId], vocabProbs);    
//     idx += 1;
//     console.log([target, context, targetId, contextId, nonContext]);
//     trainTargets.push(targetId);
//     trainContexts.push(contextId);
//     trainNegContexts.push(nonContext);
// }
// console.log(vocab.length);
// console.log(trainNegContexts.length);

'use strict'

In [11]:
function *batchSampleGenerator(batchSize=5, contextWindownSize=2, negSize=10){
    var trainTargets = [], trainContexts = [], trainNegContexts = [], idx = 0;
    const SampleGen = skipGramContext(docTokens, contextWindownSize, word2int);
    var newSample = true; 
    while(newSample){
        var {value, done} = SampleGen.next();
        newSample = !done;
        if(done){
            yield [trainTargets, trainContexts, trainNegContexts];
            trainTargets = [];
            trainContexts = [];
            trainNegContexts = [];
        }
        else{
            let [target, context, targetId, contextId] = value;
            let nonContextIds = causalNetSampling.negSampling(negSize, [targetId, contextId], vocabProbs); 
            trainTargets.push(targetId);
            trainContexts.push(contextId);
            trainNegContexts.push(nonContextIds);
            if(trainTargets.length >= batchSize){
                yield [trainTargets, trainContexts, trainNegContexts];
                trainTargets = [];
                trainContexts = [];
                trainNegContexts = [];
            }
        }
        
    }
}

'use strict'

In [8]:
var VocabLen = vocab.length, EmbeddingSize = 50;
var UVecs = T.variable(T.randomNormal([VocabLen + 1, EmbeddingSize]));
var VVecs = T.variable(T.randomNormal([VocabLen + 1, EmbeddingSize]));

'use strict'

In [9]:
async function checkTopMatch(words, embedding, k=3){
    function normalize(vecs){
        let meanTs = vecs.mean(1, true);
        let stdTs = vecs.sub(meanTs).pow(2).mean(1, true).pow(0.5);
        return vecs.sub(meanTs).div(stdTs);    
    }
    function getMatchScore(slotIdxs, normVecs){
        let cTs = normVecs.gather(slotIdxs);
        let similarityScore = normVecs.dot(cTs.transpose());
        return similarityScore;
    }
    var norms = normalize(embedding);
    for(let w of words){
        let wid = word2int[w];
        let matchScores = getMatchScore([wid], norms);
        let {values, indices} = matchScores.transpose().topk(k);
        let idxs = await indices.data();
        let [targetW, ...similarWs] = Array.from(idxs).map(i=>int2word[i]);
        console.log(`[${targetW}\t] is similar to: ${similarWs.join(', ')}`)
    }
}
var embed = UVecs.split([VocabLen, 1], 0)[0];
checkTopMatch(vocab.slice(0,10), embed, 7);

[the	] is similar to: asked, illness, unlimited, delphi, distinct, come
[of	] is similar to: revealed, hoped, pollution, leaders, exploits, preceding
[and	] is similar to: microwave, riots, advanced, posthumously, printers, powerpc
[one	] is similar to: fans, distribute, newspapers, morality, deliver, floating
[in	] is similar to: marie, bruno, interests, acquired, environment, gis
[a	] is similar to: absolute, associates, operator, hadrian, decline, interior
[to	] is similar to: forgotten, fifth, champion, more, hull, estonian
[zero	] is similar to: combat, rolls, yeast, currencies, way, costly
[nine	] is similar to: residence, cruel, talent, uranium, expect, capitol
[two	] is similar to: racist, rabbis, pointed, colonists, arabs, habits


In [21]:
var adam = optimizers.adam({learningRate: 0.2});
function trainLabel(targetId, contextId){
    return adam.fit(()=>{
        let [UEmbed, Ubias] = UVecs.split([VocabLen, 1], 0);
        let [VEmbed, Vbias] = VVecs.split([VocabLen, 1], 0);
        let enc = T.oneHot(targetId, VocabLen).matMul(UEmbed).add(Ubias);
        let dec = enc.add(Vbias).matMul(VEmbed.transpose());
        let logProb = T.oneHot(contextId, VocabLen).mul(dec.sub(dec.logSumExp(1, true))).neg().mean();
        return logProb;
    }, [UVecs, VVecs]);
}

async function training(){
    var [embed, ] = UVecs.split([VocabLen, 1], 0);
    await checkTopMatch(vocab.slice(0,10), embed, 7);

    var progress = {start: new Date()}
    for(let epoch of R.range(0,150)){
        var trainGenerator = batchSampleGenerator(512, 2);
        for(let [trainTargets, trainContexts, trainNegContexts] of trainGenerator){
            progress.elapse = new Date() - progress.start;
            progress.epoch = epoch;
            console.log(progress);
            trainLabel(trainTargets, trainContexts).print();
        }
        var [embed, ] = UVecs.split([VocabLen, 1], 0);
        await checkTopMatch(vocab.slice(0,10), embed, 7);
    }
}
training();

[is	] is similar to: the, royal, king, queen, she, he
[the	] is similar to: is, she, royal, king, he, queen
[king	] is similar to: is, she, the, royal, he, queen
[royal	] is similar to: is, the, king, she, queen, he
[he	] is similar to: queen, she, king, is, the, royal
[she	] is similar to: king, the, is, he, royal, queen
[queen	] is similar to: is, he, royal, she, the, king
{ start: 2019-02-26T04:46:50.623Z, elapse: 0, epoch: 0 }
Tensor
    0.2029091864824295
[is	] is similar to: the, royal, king, queen, she, he
[the	] is similar to: is, royal, she, king, he, queen
[king	] is similar to: is, she, the, royal, he, queen
[royal	] is similar to: is, the, king, she, queen, he
[he	] is similar to: queen, she, king, is, the, royal
[she	] is similar to: king, the, is, he, royal, queen
[queen	] is similar to: is, he, royal, she, the, king
{ start: 2019-02-26T04:46:50.623Z, elapse: 11, epoch: 1 }
Tensor
    0.21030327677726746
[is	] is similar to: the, royal, king, queen, she, he
[the	] is simi

[she	] is similar to: king, the, is, he, royal, queen
[queen	] is similar to: is, he, royal, she, the, king
{ start: 2019-02-26T04:46:50.623Z, elapse: 197, epoch: 17 }
Tensor
    0.20347407460212708
[is	] is similar to: the, royal, king, she, queen, he
[the	] is similar to: is, royal, she, king, he, queen
[king	] is similar to: is, she, the, royal, he, queen
[royal	] is similar to: is, the, king, she, queen, he
[he	] is similar to: queen, she, is, king, the, royal
[she	] is similar to: king, the, is, he, royal, queen
[queen	] is similar to: is, he, royal, she, the, king
{ start: 2019-02-26T04:46:50.623Z, elapse: 206, epoch: 18 }
Tensor
    0.20253558456897736
[is	] is similar to: the, royal, king, she, queen, he
[the	] is similar to: is, royal, she, king, he, queen
[king	] is similar to: is, she, the, royal, he, queen
[royal	] is similar to: is, the, king, she, queen, he
[he	] is similar to: queen, she, king, is, the, royal
[she	] is similar to: king, the, is, he, royal, queen
[queen	]

[the	] is similar to: is, royal, she, king, he, queen
[king	] is similar to: is, she, the, royal, he, queen
[royal	] is similar to: is, the, king, she, queen, he
[he	] is similar to: queen, she, is, king, the, royal
[she	] is similar to: king, the, is, he, royal, queen
[queen	] is similar to: is, he, royal, she, the, king
{ start: 2019-02-26T04:46:50.623Z, elapse: 391, epoch: 35 }
Tensor
    0.2031298130750656
[is	] is similar to: the, royal, king, she, queen, he
[the	] is similar to: is, royal, she, king, he, queen
[king	] is similar to: is, she, the, royal, he, queen
[royal	] is similar to: is, the, king, she, queen, he
[he	] is similar to: queen, she, is, king, the, royal
[she	] is similar to: king, the, is, he, royal, queen
[queen	] is similar to: is, he, royal, she, the, king
{ start: 2019-02-26T04:46:50.623Z, elapse: 400, epoch: 36 }
Tensor
    0.2033103108406067
[is	] is similar to: the, royal, king, she, queen, he
[the	] is similar to: is, royal, she, king, he, queen
[king	] is

[queen	] is similar to: is, he, royal, she, the, king
{ start: 2019-02-26T04:46:50.623Z, elapse: 564, epoch: 52 }
Tensor
    0.20208130776882172
[is	] is similar to: the, royal, king, she, queen, he
[the	] is similar to: is, royal, she, king, he, queen
[king	] is similar to: is, she, the, royal, he, queen
[royal	] is similar to: is, the, king, she, queen, he
[he	] is similar to: queen, she, is, king, the, royal
[she	] is similar to: king, the, is, he, royal, queen
[queen	] is similar to: is, he, royal, she, the, king
{ start: 2019-02-26T04:46:50.623Z, elapse: 573, epoch: 53 }
Tensor
    0.20123820006847382
[is	] is similar to: the, royal, king, she, queen, he
[the	] is similar to: is, royal, she, king, he, queen
[king	] is similar to: is, she, the, royal, he, queen
[royal	] is similar to: is, the, king, she, queen, he
[he	] is similar to: queen, she, is, king, the, royal
[she	] is similar to: king, the, is, he, royal, queen
[queen	] is similar to: is, he, royal, she, the, king
{ start:

[king	] is similar to: is, she, the, royal, he, queen
[royal	] is similar to: is, the, king, she, queen, he
[he	] is similar to: queen, she, king, is, the, royal
[she	] is similar to: king, the, is, he, royal, queen
[queen	] is similar to: is, he, royal, she, the, king
{ start: 2019-02-26T04:46:50.623Z, elapse: 741, epoch: 70 }
Tensor
    0.2004726678133011
[is	] is similar to: the, royal, king, she, queen, he
[the	] is similar to: is, royal, she, king, he, queen
[king	] is similar to: is, she, the, royal, he, queen
[royal	] is similar to: is, the, king, she, queen, he
[he	] is similar to: queen, she, king, is, the, royal
[she	] is similar to: king, the, is, he, royal, queen
[queen	] is similar to: is, he, royal, she, the, king
{ start: 2019-02-26T04:46:50.623Z, elapse: 750, epoch: 71 }
Tensor
    0.19976216554641724
[is	] is similar to: the, royal, king, she, queen, he
[the	] is similar to: is, royal, she, king, he, queen
[king	] is similar to: is, she, the, royal, he, queen
[royal	] 

{ start: 2019-02-26T04:46:50.623Z, elapse: 912, epoch: 87 }
Tensor
    0.20203642547130585
[is	] is similar to: the, royal, king, she, queen, he
[the	] is similar to: is, royal, she, king, he, queen
[king	] is similar to: is, she, the, royal, he, queen
[royal	] is similar to: is, the, king, she, queen, he
[he	] is similar to: queen, she, king, is, the, royal
[she	] is similar to: king, is, the, he, royal, queen
[queen	] is similar to: is, he, royal, she, the, king
{ start: 2019-02-26T04:46:50.623Z, elapse: 923, epoch: 88 }
Tensor
    0.20188501477241516
[is	] is similar to: the, royal, king, she, queen, he
[the	] is similar to: is, royal, she, king, he, queen
[king	] is similar to: is, she, the, royal, he, queen
[royal	] is similar to: is, the, king, she, queen, he
[he	] is similar to: queen, she, king, is, the, royal
[she	] is similar to: king, the, is, he, royal, queen
[queen	] is similar to: is, he, royal, she, the, king
{ start: 2019-02-26T04:46:50.623Z, elapse: 935, epoch: 89 }
Te

[royal	] is similar to: is, the, king, she, queen, he
[he	] is similar to: queen, she, king, is, the, royal
[she	] is similar to: king, is, the, he, royal, queen
[queen	] is similar to: is, he, royal, she, the, king
{ start: 2019-02-26T04:46:50.623Z, elapse: 1090, epoch: 105 }
Tensor
    0.20118078589439392
[is	] is similar to: the, royal, king, she, queen, he
[the	] is similar to: is, royal, she, king, he, queen
[king	] is similar to: is, she, the, royal, he, queen
[royal	] is similar to: is, the, king, she, queen, he
[he	] is similar to: queen, she, king, is, the, royal
[she	] is similar to: king, is, the, he, royal, queen
[queen	] is similar to: is, he, royal, she, king, the
{ start: 2019-02-26T04:46:50.623Z, elapse: 1100, epoch: 106 }
Tensor
    0.2018074095249176
[is	] is similar to: royal, the, king, she, queen, he
[the	] is similar to: is, royal, she, king, he, queen
[king	] is similar to: is, she, the, royal, he, queen
[royal	] is similar to: is, the, king, she, queen, he
[he	]

{ start: 2019-02-26T04:46:50.623Z, elapse: 1246, epoch: 122 }
Tensor
    0.20133009552955627
[is	] is similar to: royal, the, king, she, queen, he
[the	] is similar to: is, royal, she, king, he, queen
[king	] is similar to: is, she, the, royal, he, queen
[royal	] is similar to: is, the, king, she, queen, he
[he	] is similar to: queen, she, is, king, the, royal
[she	] is similar to: king, is, the, he, royal, queen
[queen	] is similar to: is, he, royal, she, the, king
{ start: 2019-02-26T04:46:50.623Z, elapse: 1256, epoch: 123 }
Tensor
    0.20101988315582275
[is	] is similar to: the, royal, king, she, queen, he
[the	] is similar to: is, royal, she, king, he, queen
[king	] is similar to: is, she, the, royal, he, queen
[royal	] is similar to: is, the, king, she, queen, he
[he	] is similar to: queen, she, king, is, the, royal
[she	] is similar to: king, is, the, he, royal, queen
[queen	] is similar to: is, he, royal, she, the, king
{ start: 2019-02-26T04:46:50.623Z, elapse: 1264, epoch: 12

[king	] is similar to: is, she, the, royal, he, queen
[royal	] is similar to: is, the, king, she, queen, he
[he	] is similar to: queen, she, is, king, the, royal
[she	] is similar to: king, is, the, he, royal, queen
[queen	] is similar to: is, he, royal, she, the, king
{ start: 2019-02-26T04:46:50.623Z, elapse: 1404, epoch: 140 }
Tensor
    0.20067940652370453
[is	] is similar to: the, royal, king, she, queen, he
[the	] is similar to: is, royal, she, king, he, queen
[king	] is similar to: is, she, the, royal, he, queen
[royal	] is similar to: is, the, king, she, queen, he
[he	] is similar to: queen, she, is, king, the, royal
[she	] is similar to: king, is, the, he, royal, queen
[queen	] is similar to: is, he, royal, she, the, king
{ start: 2019-02-26T04:46:50.623Z, elapse: 1413, epoch: 141 }
Tensor
    0.2003471851348877
[is	] is similar to: the, royal, king, she, queen, he
[the	] is similar to: is, royal, she, king, he, queen
[king	] is similar to: is, she, the, royal, he, queen
[roya

In [None]:
var adam = optimizers.adam({learningRate: 0.1});
function trainLabel(targetId, contextId, negContextId){
    console.log(negContextId.length);
    return adam.fit(()=>{
        let [UEmbed, Ubias] = UVecs.split([VocabLen, 1], 0);
        let [VEmbed, Vbias] = VVecs.split([VocabLen, 1], 0);
        let posU = UEmbed.gather(targetId).reshape([-1, 1, EmbeddingSize]);
        let posV = VEmbed.gather(contextId).reshape([-1, EmbeddingSize, 1]); 
        let pos = posU.matMul(posV).logSigmoid().mean();
        for(let bId of R.range(0, targetId.length)){
            let negU = UEmbed.gather(R.repeat(targetId[bId], negContextId[bId].length))
                                .reshape([-1, 1, EmbeddingSize]);
            let negV = VEmbed.gather(negContextId[bId])
                                .reshape([-1, EmbeddingSize, 1]);
            pos.add(negU.matMul(negV).neg().logSigmoid().sum());
        }
        let logProb = pos.neg().mean();
        return logProb;
    }, [UVecs, VVecs]);
}

async function checkTopMatch(words, embedding, k=3){
    function normalize(vecs){
        let meanTs = vecs.mean(1, true);
        let stdTs = vecs.sub(meanTs).pow(2).mean(1, true).pow(0.5);
        return vecs.sub(meanTs).div(stdTs);    
    }
    function getMatchScore(slotIdxs, normVecs){
        let cTs = normVecs.gather(slotIdxs);
        let similarityScore = normVecs.dot(cTs.transpose());
        return similarityScore;
    }
    var norms = normalize(embedding);
    for(let w of words){
        let wid = word2int[w];
        let matchScores = getMatchScore([wid], norms);
        let {values, indices} = matchScores.transpose().topk(k);
        let idxs = await indices.data();
        let [targetW, ...similarWs] = Array.from(idxs).map(i=>int2word[i]);
        console.log(`[${targetW}\t] is similar to: ${similarWs.join(', ')}`)
    }
}
async function training(){
    var embed = UVecs.split([VocabLen, 1], 0)[0];
    await checkTopMatch(vocab.slice(0,10), embed, 7);

    var progress = {start: new Date()}
    for(let epoch of R.range(0,5)){
        var trainGenerator = batchSampleGenerator(512);
        let iter = 0;
        for(let [trainTargets, trainContexts, trainNegContexts] of trainGenerator){
            progress.elapse = new Date() - progress.start;
            progress.epoch = epoch;
            console.log(progress);
            
            trainLabel(trainTargets, trainContexts, trainNegContexts).print();
            if(iter > 1000){
                console.log('...................................');
                var embed = UVecs.split([VocabLen, 1], 0)[0];
                await checkTopMatch(vocab.slice(0,10), embed, 7);
                console.log('...................................');
                iter = 0;
            }
            else{
                iter += 1;
            }
            
        }
    }
}
training();

[the	] is similar to: asked, illness, unlimited, delphi, distinct, come
[of	] is similar to: revealed, hoped, pollution, leaders, exploits, preceding
[and	] is similar to: microwave, riots, advanced, posthumously, printers, powerpc
[one	] is similar to: fans, distribute, newspapers, morality, deliver, floating
[in	] is similar to: marie, bruno, interests, acquired, environment, gis
[a	] is similar to: absolute, associates, operator, hadrian, decline, interior
[to	] is similar to: forgotten, fifth, champion, more, hull, estonian
[zero	] is similar to: combat, rolls, yeast, currencies, way, costly
[nine	] is similar to: residence, cruel, talent, uranium, expect, capitol
[two	] is similar to: racist, rabbis, pointed, colonists, arabs, habits
{ start: 2019-02-26T09:57:16.200Z, elapse: 3625, epoch: 0 }
512
Tensor
    2.893545150756836
{ start: 2019-02-26T09:57:16.200Z, elapse: 7494, epoch: 0 }
512
Tensor
    2.818140745162964
{ start: 2019-02-26T09:57:16.200Z, elapse: 11011, epoch: 0 }
512


512
Tensor
    1.9766855239868164
{ start: 2019-02-26T09:57:16.200Z, elapse: 310910, epoch: 0 }
512
Tensor
    1.9631015062332153
{ start: 2019-02-26T09:57:16.200Z, elapse: 314955, epoch: 0 }
512
Tensor
    2.159541130065918
{ start: 2019-02-26T09:57:16.200Z, elapse: 319558, epoch: 0 }
512
Tensor
    2.1734371185302734
{ start: 2019-02-26T09:57:16.200Z, elapse: 323270, epoch: 0 }
512
Tensor
    2.4008240699768066
{ start: 2019-02-26T09:57:16.200Z, elapse: 327179, epoch: 0 }
512
Tensor
    2.2832486629486084
{ start: 2019-02-26T09:57:16.200Z, elapse: 331195, epoch: 0 }
512
Tensor
    2.2939767837524414
{ start: 2019-02-26T09:57:16.200Z, elapse: 335081, epoch: 0 }
512
Tensor
    1.9780689477920532
{ start: 2019-02-26T09:57:16.200Z, elapse: 338945, epoch: 0 }
512
Tensor
    2.5139622688293457
{ start: 2019-02-26T09:57:16.200Z, elapse: 342876, epoch: 0 }
512
Tensor
    1.8987400531768799
{ start: 2019-02-26T09:57:16.200Z, elapse: 346723, epoch: 0 }
512
Tensor
    2.0353641510009766
{ start

512
Tensor
    1.8119126558303833
{ start: 2019-02-26T09:57:16.200Z, elapse: 649996, epoch: 0 }
512
Tensor
    1.9234521389007568
{ start: 2019-02-26T09:57:16.200Z, elapse: 653865, epoch: 0 }
512
Tensor
    2.1876511573791504
{ start: 2019-02-26T09:57:16.200Z, elapse: 657773, epoch: 0 }
512
Tensor
    2.117180347442627
{ start: 2019-02-26T09:57:16.200Z, elapse: 661750, epoch: 0 }
512
Tensor
    2.1104235649108887
{ start: 2019-02-26T09:57:16.200Z, elapse: 665792, epoch: 0 }
512
Tensor
    1.8700942993164062
{ start: 2019-02-26T09:57:16.200Z, elapse: 669784, epoch: 0 }
512
Tensor
    1.9299185276031494
{ start: 2019-02-26T09:57:16.200Z, elapse: 674553, epoch: 0 }
512
Tensor
    1.863479495048523
{ start: 2019-02-26T09:57:16.200Z, elapse: 678458, epoch: 0 }
512
Tensor
    1.7751708030700684
{ start: 2019-02-26T09:57:16.200Z, elapse: 682190, epoch: 0 }
512
Tensor
    1.5821433067321777
{ start: 2019-02-26T09:57:16.200Z, elapse: 686309, epoch: 0 }
512
Tensor
    1.6822844743728638
{ start:

512
Tensor
    1.7269577980041504
{ start: 2019-02-26T09:57:16.200Z, elapse: 987953, epoch: 0 }
512
Tensor
    1.3435148000717163
{ start: 2019-02-26T09:57:16.200Z, elapse: 991746, epoch: 0 }
512
Tensor
    1.1871514320373535
{ start: 2019-02-26T09:57:16.200Z, elapse: 995708, epoch: 0 }
512
Tensor
    1.5136260986328125
{ start: 2019-02-26T09:57:16.200Z, elapse: 999541, epoch: 0 }
512
Tensor
    1.5091907978057861
{ start: 2019-02-26T09:57:16.200Z, elapse: 1003422, epoch: 0 }
512
Tensor
    1.7142094373703003
{ start: 2019-02-26T09:57:16.200Z, elapse: 1007201, epoch: 0 }
512
Tensor
    1.3237249851226807
{ start: 2019-02-26T09:57:16.200Z, elapse: 1011055, epoch: 0 }
512
Tensor
    1.687830924987793
{ start: 2019-02-26T09:57:16.200Z, elapse: 1015026, epoch: 0 }
512
Tensor
    1.8313279151916504
{ start: 2019-02-26T09:57:16.200Z, elapse: 1018791, epoch: 0 }
512
Tensor
    1.709868311882019
{ start: 2019-02-26T09:57:16.200Z, elapse: 1022822, epoch: 0 }
512
Tensor
    1.838752031326294
{ s

512
Tensor
    1.3968088626861572
{ start: 2019-02-26T09:57:16.200Z, elapse: 1321004, epoch: 0 }
512
Tensor
    1.8897881507873535
{ start: 2019-02-26T09:57:16.200Z, elapse: 1324726, epoch: 0 }
512
Tensor
    1.8716766834259033
{ start: 2019-02-26T09:57:16.200Z, elapse: 1328518, epoch: 0 }
512
Tensor
    1.7502305507659912
{ start: 2019-02-26T09:57:16.200Z, elapse: 1332416, epoch: 0 }
512
Tensor
    1.7899110317230225
{ start: 2019-02-26T09:57:16.200Z, elapse: 1336164, epoch: 0 }
512
Tensor
    1.7643587589263916
{ start: 2019-02-26T09:57:16.200Z, elapse: 1340048, epoch: 0 }
512
Tensor
    1.7214457988739014
{ start: 2019-02-26T09:57:16.200Z, elapse: 1343949, epoch: 0 }
512
Tensor
    1.7289460897445679
{ start: 2019-02-26T09:57:16.200Z, elapse: 1348297, epoch: 0 }
512
Tensor
    1.6998273134231567
{ start: 2019-02-26T09:57:16.200Z, elapse: 1352175, epoch: 0 }
512
Tensor
    2.050999164581299
{ start: 2019-02-26T09:57:16.200Z, elapse: 1356007, epoch: 0 }
512
Tensor
    1.51693940162658

512
Tensor
    1.6236701011657715
{ start: 2019-02-26T09:57:16.200Z, elapse: 1653033, epoch: 0 }
512
Tensor
    1.7111425399780273
{ start: 2019-02-26T09:57:16.200Z, elapse: 1656849, epoch: 0 }
512
Tensor
    1.4704885482788086
{ start: 2019-02-26T09:57:16.200Z, elapse: 1660747, epoch: 0 }
512
Tensor
    1.547931432723999
{ start: 2019-02-26T09:57:16.200Z, elapse: 1664763, epoch: 0 }
512
Tensor
    1.759482502937317
{ start: 2019-02-26T09:57:16.200Z, elapse: 1668645, epoch: 0 }
512
Tensor
    1.4486098289489746
{ start: 2019-02-26T09:57:16.200Z, elapse: 1672475, epoch: 0 }
512
Tensor
    1.4501326084136963
{ start: 2019-02-26T09:57:16.200Z, elapse: 1676475, epoch: 0 }
512
Tensor
    1.6878268718719482
{ start: 2019-02-26T09:57:16.200Z, elapse: 1680326, epoch: 0 }
512
Tensor
    1.4856774806976318
{ start: 2019-02-26T09:57:16.200Z, elapse: 1684651, epoch: 0 }
512
Tensor
    1.4045357704162598
{ start: 2019-02-26T09:57:16.200Z, elapse: 1688683, epoch: 0 }
512
Tensor
    1.564612984657287

512
Tensor
    1.1707526445388794
{ start: 2019-02-26T09:57:16.200Z, elapse: 1985176, epoch: 0 }
512
Tensor
    1.2945773601531982
{ start: 2019-02-26T09:57:16.200Z, elapse: 1989025, epoch: 0 }
512
Tensor
    1.2904833555221558
{ start: 2019-02-26T09:57:16.200Z, elapse: 1992918, epoch: 0 }
512
Tensor
    1.719342827796936
{ start: 2019-02-26T09:57:16.200Z, elapse: 1996762, epoch: 0 }
512
Tensor
    1.358259677886963
{ start: 2019-02-26T09:57:16.200Z, elapse: 2000634, epoch: 0 }
512
Tensor
    1.371938705444336
{ start: 2019-02-26T09:57:16.200Z, elapse: 2004525, epoch: 0 }
512
Tensor
    1.3903348445892334
{ start: 2019-02-26T09:57:16.200Z, elapse: 2008391, epoch: 0 }
512
Tensor
    1.3897302150726318
{ start: 2019-02-26T09:57:16.200Z, elapse: 2012347, epoch: 0 }
512
Tensor
    1.22048020362854
{ start: 2019-02-26T09:57:16.200Z, elapse: 2016282, epoch: 0 }
512
Tensor
    1.325504183769226
{ start: 2019-02-26T09:57:16.200Z, elapse: 2020119, epoch: 0 }
512
Tensor
    1.0895127058029175
{ 

512
Tensor
    1.3987360000610352
{ start: 2019-02-26T09:57:16.200Z, elapse: 2320279, epoch: 0 }
512
Tensor
    1.5602954626083374
{ start: 2019-02-26T09:57:16.200Z, elapse: 2324307, epoch: 0 }
512
Tensor
    1.641430377960205
{ start: 2019-02-26T09:57:16.200Z, elapse: 2328486, epoch: 0 }
512
Tensor
    1.635344386100769
{ start: 2019-02-26T09:57:16.200Z, elapse: 2332713, epoch: 0 }
512
Tensor
    1.3692049980163574
{ start: 2019-02-26T09:57:16.200Z, elapse: 2336910, epoch: 0 }
512
Tensor
    1.4097073078155518
{ start: 2019-02-26T09:57:16.200Z, elapse: 2341037, epoch: 0 }
512
Tensor
    1.5354294776916504
{ start: 2019-02-26T09:57:16.200Z, elapse: 2345515, epoch: 0 }
512
Tensor
    1.4176506996154785
{ start: 2019-02-26T09:57:16.200Z, elapse: 2349455, epoch: 0 }
512
Tensor
    1.6047172546386719
{ start: 2019-02-26T09:57:16.200Z, elapse: 2353403, epoch: 0 }
512
Tensor
    1.4820232391357422
{ start: 2019-02-26T09:57:16.200Z, elapse: 2357380, epoch: 0 }
512
Tensor
    1.350428700447082

512
Tensor
    1.2824931144714355
{ start: 2019-02-26T09:57:16.200Z, elapse: 2663792, epoch: 0 }
512
Tensor
    1.5303211212158203
{ start: 2019-02-26T09:57:16.200Z, elapse: 2667729, epoch: 0 }
512
Tensor
    1.225423812866211
{ start: 2019-02-26T09:57:16.200Z, elapse: 2671580, epoch: 0 }
512
Tensor
    1.640089511871338
{ start: 2019-02-26T09:57:16.200Z, elapse: 2675530, epoch: 0 }
512
Tensor
    1.1609368324279785
{ start: 2019-02-26T09:57:16.200Z, elapse: 2679449, epoch: 0 }
512
Tensor
    1.0181822776794434
{ start: 2019-02-26T09:57:16.200Z, elapse: 2683619, epoch: 0 }
512
Tensor
    1.2399911880493164
{ start: 2019-02-26T09:57:16.200Z, elapse: 2687695, epoch: 0 }
512
Tensor
    1.1503684520721436
{ start: 2019-02-26T09:57:16.200Z, elapse: 2691486, epoch: 0 }
512
Tensor
    1.2668161392211914
{ start: 2019-02-26T09:57:16.200Z, elapse: 2695174, epoch: 0 }
512
Tensor
    1.2133500576019287
{ start: 2019-02-26T09:57:16.200Z, elapse: 2699662, epoch: 0 }
512
Tensor
    1.432405710220337

512
Tensor
    1.5903923511505127
{ start: 2019-02-26T09:57:16.200Z, elapse: 3006275, epoch: 0 }
512
Tensor
    1.4110379219055176
{ start: 2019-02-26T09:57:16.200Z, elapse: 3010329, epoch: 0 }
512
Tensor
    1.788332462310791
{ start: 2019-02-26T09:57:16.200Z, elapse: 3014162, epoch: 0 }
512
Tensor
    1.5133179426193237
{ start: 2019-02-26T09:57:16.200Z, elapse: 3018112, epoch: 0 }
512
Tensor
    1.824033498764038
{ start: 2019-02-26T09:57:16.200Z, elapse: 3022097, epoch: 0 }
512
Tensor
    1.6916232109069824
{ start: 2019-02-26T09:57:16.200Z, elapse: 3026091, epoch: 0 }
512
Tensor
    1.3173294067382812
{ start: 2019-02-26T09:57:16.200Z, elapse: 3030116, epoch: 0 }
512
Tensor
    1.6689339876174927
{ start: 2019-02-26T09:57:16.200Z, elapse: 3034211, epoch: 0 }
512
Tensor
    1.1180436611175537
{ start: 2019-02-26T09:57:16.200Z, elapse: 3038765, epoch: 0 }
512
Tensor
    1.4213919639587402
{ start: 2019-02-26T09:57:16.200Z, elapse: 3042807, epoch: 0 }
512
Tensor
    1.599950551986694

512
Tensor
    1.2853552103042603
{ start: 2019-02-26T09:57:16.200Z, elapse: 3346363, epoch: 0 }
512
Tensor
    1.6092710494995117
{ start: 2019-02-26T09:57:16.200Z, elapse: 3350308, epoch: 0 }
512
Tensor
    1.387014389038086
{ start: 2019-02-26T09:57:16.200Z, elapse: 3354344, epoch: 0 }
512
Tensor
    1.2893941402435303
{ start: 2019-02-26T09:57:16.200Z, elapse: 3358345, epoch: 0 }
512
Tensor
    1.3999898433685303
{ start: 2019-02-26T09:57:16.200Z, elapse: 3362331, epoch: 0 }
512
Tensor
    1.3739639520645142
{ start: 2019-02-26T09:57:16.200Z, elapse: 3366409, epoch: 0 }
512
Tensor
    1.6596460342407227
{ start: 2019-02-26T09:57:16.200Z, elapse: 3370892, epoch: 0 }
512
Tensor
    1.2915414571762085
{ start: 2019-02-26T09:57:16.200Z, elapse: 3374775, epoch: 0 }
512
Tensor
    1.6200273036956787
{ start: 2019-02-26T09:57:16.200Z, elapse: 3378754, epoch: 0 }
512
Tensor
    1.7805895805358887
{ start: 2019-02-26T09:57:16.200Z, elapse: 3383070, epoch: 0 }
512
Tensor
    1.71632122993469

512
Tensor
    1.4081186056137085
{ start: 2019-02-26T09:57:16.200Z, elapse: 3684845, epoch: 0 }
512
Tensor
    1.449614405632019
{ start: 2019-02-26T09:57:16.200Z, elapse: 3688849, epoch: 0 }
512
Tensor
    1.1456695795059204
{ start: 2019-02-26T09:57:16.200Z, elapse: 3692959, epoch: 0 }
512
Tensor
    1.0282909870147705
{ start: 2019-02-26T09:57:16.200Z, elapse: 3697477, epoch: 0 }
512
Tensor
    0.750444769859314
{ start: 2019-02-26T09:57:16.200Z, elapse: 3701477, epoch: 0 }
512
Tensor
    1.1494735479354858
{ start: 2019-02-26T09:57:16.200Z, elapse: 3705650, epoch: 0 }
512
Tensor
    1.0407297611236572
{ start: 2019-02-26T09:57:16.200Z, elapse: 3709861, epoch: 0 }
512
Tensor
    1.1957881450653076
{ start: 2019-02-26T09:57:16.200Z, elapse: 3713960, epoch: 0 }
512
Tensor
    1.3223800659179688
{ start: 2019-02-26T09:57:16.200Z, elapse: 3718014, epoch: 0 }
512
Tensor
    1.276688814163208
{ start: 2019-02-26T09:57:16.200Z, elapse: 3722006, epoch: 0 }
512
Tensor
    1.1868619918823242

In [None]:
function WordCoOccurentTraining(W, Wpos, Wneg, Vectors){
    var nce = ()=>{
        let Wpost = Wpos.map(w=>Vectors[w]);
        let Wnegt = Wneg.map(w=>Vectors[w]);
        let Wt = [...Wpost, ...WnegT].map(()=>Vectors[W]);
            return ((w, pos, neg)=>{
                let [PosLen, NegLen] = [pos.length, neg.length];
                var Wa = T.concat(Wt);
                let [Ws, Size] = Wa.shape;
                console.log(Wa.shape, PosLen , NegLen,Size);
                Wa = Wa.reshape([PosLen + NegLen,1,Size]);
//                 Wa.print();
                var Wb = T.concat([ T.concat(pos), T.concat(neg) ]);
//                 Wb.print();
                Wb = Wb.reshape([ PosLen + NegLen, Size, 1]);
                var label = T.concat([T.ones([PosLen]), T.ones([NegLen]).neg()]);
                return Loss(Wa.matMul(Wb).reshape([PosLen + NegLen]), label).neg();
                })(Wt, Wpost, Wnegt);
            };
    return adam.fit(nce);
};

In [None]:
(async ()=>{
    var vector = {0: T.variable(T.tensor([1,2,3,4,5]).reshape([1, 5])),
                  1: T.variable(T.tensor([1,2,3,4,5]).reshape([1, 5])),
                  2: T.variable(T.tensor([1,2,3,4,5]).reshape([1, 5]))}
    var W =    [0, 0, 0, 0, 0];
    var Wc1 =  [1, 1, 1]; 
    var Wnc1 = [2, 2];
    WordCoOccurentTraining(vids, vector).print();
    for(let v of Object.keys(vector)){
        vector[v].print();   
    }
//     const CheckResult = async ()=>{
//             let targetWords = [0,1,2,3,4,5];
//             let topKTensor = await memory.getTopKSimilar(targetWords, 10);
//             topKTensor.print();    
//     }
//     await CheckResult();     
})();    

In [None]:
var memory;
(async ()=>{
    memory = causalNetMemory;
    let initTensor = await memory.initMemory([15, 2]);
})();

In [None]:
var Vocab;
(async ()=>{
    Vocab = {
        words: {'a':0,'b':1,'c':2},
        iwords: {'0':'a','1':'b','2':'c'},
        wCounts: [1,2,3],
        indexToWord: function(idxs){
            idxs = Array.from(idxs);//clone avoid bufferArray issues
            return idxs.map((idx)=>this.iwords[idx]);
        },
        wordToIndex: function(ws){
            return ws.map((w)=>this.words[w]);
        },
        countToProb: function(){
            let countTotal = R.sum(this.wCounts);
            let wFracs = R.map((count)=>count / countTotal, this.wCounts);
            this.wProbs = R.map((frac)=>Math.sqrt(frac / 0.001 + 1)*(0.001 / frac), wFracs); 
            return this.wProbs;
        },
        samplingNegIndexs: function(positives, size){
            return causalNetSampling.negSampling(size, positives, this.wProbs);    
        }
    };
    termLogger.log(Vocab.indexToWord([1,2]));
    termLogger.log(Vocab.wordToIndex(['a','b']));
    termLogger.log(Vocab.countToProb());
    termLogger.log(Vocab.samplingNegIndexs([1,2,3], 6));
})();

In [None]:
(async ()=>{
    let initTensor = await memory.initMemory([10, 5]);
})();

In [None]:
(async ()=>{  
    for(let epoch in R.range(0, 501)){
        termLogger.log({epoch});
        let uids = R.range(0,5);
        let nuid = R.reverse(R.range(0,5));
        let nvid = R.range(5,10);
        uids = [...uids, ...uids];
        let vecId = new Set([...uids, ...nuid, ...nvid]);
        let vectors = {};
        for(let v of vecId){
            vectors[v] = await memory.readSlots([v]);
        }
        WordCoOccurentTraining(uids, nuid, nvid, vectors).print();
        for(let v of vecId){
            await memory.writeSlots([v], vectors[v]);
        }
        uids = R.range(5,10);
        nuid = R.reverse(R.range(5,10));
        nvid = R.range(0,5);
        uids = [...uids, ...uids];
        vecId = new Set([...uids, ...nuid, ...nvid]);
        vectors = {};
        for(let v of vecId){
            vectors[v] = await memory.readSlots([v]);
        }
        WordCoOccurentTraining(uids, nuid, nvid, vectors).print();
        for(let v of vecId){
            await memory.writeSlots([v], vectors[v]);
        }
   
    }
    const CheckResult = async ()=>{
        let targetWords = [0,1,4,3,2];
        let norm = await memory.normalize();
        //  norm.print();
        let matchScore = await memory.getMatchScore(targetWords);
        //  matchScore.print();
        let topKTensor = await memory.getTopKSimilar(targetWords, 5);
        topKTensor.print();    
    };
    await CheckResult();   
})();

In [None]:
var remainingChars = '', wordFreqCount = {}, lineIndex = 0;
function tranformFn(chunkData, chunkEncoding, afterTransformFn){
    let sampleText = remainingChars + chunkData;
    let sampleLines = sampleText.split('\n');
    let transformedData = [], counter = 0;
    for(let line of sampleLines){
        counter += 1;
        if(counter === sampleLines.length){//last line
            remainingChars = line;
        }
        else{
            if(line.length > 0){
                let tokens = nlpPreprocessing.tokenize(line);
                wordFreqCount = nlpPreprocessing.WordFreqCount(tokens, wordFreqCount);
                lineIndex += 1;
                //console.log({line, lineIndex, tokens});
                transformedData.push({lineIndex, tokens});
            }
        }
    }
    afterTransformFn(null, transformedData);
};

In [None]:
function writeTokens(transformedData, chunkEncoding, afterWriteFn){
    const WriteTokensToFile = async (transformedData)=>{
        for(let {lineIndex, tokens} of transformedData){
            console.log({lineIndex});
            await indexDBStorage.writeFile(`/corpus/line_${lineIndex}`, JSON.stringify(tokens));
        }
    }
    WriteTokensToFile(transformedData).then(()=>{
        afterWriteFn();
    });
}

In [None]:
var allTokens = [], tokenMatrix = null, Words=[], wordCounts=[];
(async ()=>{
    var corpusReader = fs.createReadStream('../datasets/text8/text8.txt');
    let writer = Stream.makeWritable(writeTokens);
    let transformer = Stream.makeTransform(tranformFn);
    let deletedFiles = await indexDBStorage.deleteFileByPrefix('/corpus/');
    termLogger.log({deletedFiles});
    const DataProgress = (dataBuffer)=> termLogger.log({'data length': dataBuffer.length});
    const CorpusStreamer = Stream.makePipeline([corpusReader, transformer, writer], DataProgress);
    let result = await CorpusStreamer;

    const SortByFreq = R.sortBy(([w,f])=>-f);
    let vocabFreqPairs = R.filter(([w,f])=>f>0)(SortByFreq(R.toPairs(wordFreqCount)));
    let [choosePairs, filterPairs] = R.splitAt(10000, vocabFreqPairs);  
    termLogger.log({'keep length': choosePairs.length, 'discard': filterPairs.length})
    const GetVocab = ([v,f]) => v;
    const GetFreq = ([v,f]) => f;
    Words = R.map(GetVocab, choosePairs);
    wordCounts = R.map(GetFreq, choosePairs);
})();
termLogger.log([Words.length, wordCounts.length]);
var wordMapper = R.compose( R.fromPairs, R.addIndex(R.map)((v,i)=>[v, i]) )(Words);
var iwordMapper = R.compose( R.fromPairs, R.addIndex(R.map)((v,i)=>[i, v]) )(Words);

In [None]:
var wordMapper = R.compose( R.fromPairs, R.addIndex(R.map)((v,i)=>[v, i]) )(Words);
var iwordMapper = R.compose( R.fromPairs, R.addIndex(R.map)((v,i)=>[i, v]) )(Words);
termLogger.log(Object.keys(wordMapper).length);
termLogger.log(Object.keys(iwordMapper).length);

In [None]:
var Vocab = {
    words: wordMapper,
    iwords: iwordMapper,
    wCounts: wordCounts,
    indexToWord: function(idxs){
        idxs = Array.from(idxs);//clone avoid bufferArray issues
        return idxs.map((idx)=>this.iwords[idx]);
    },
    wordToIndex: function(ws){
        return ws.map((w)=>this.words[w]);
    },
    countToProb: function(){
        let countTotal = R.sum(this.wCounts);
        console.log(countTotal);
        let wFracs = R.map((count)=>count / countTotal, this.wCounts);
        //rebalancing for rare words
        this.wProbs = R.map((frac)=>Math.sqrt(frac / 0.001 + 1)*(0.001 / frac), wFracs); 
        return this.wProbs;
    },
    samplingNegIndexs: function(positives, size){
        return causalNetSampling.negSampling(size, positives, this.wProbs);    
    }
};
var _prob = Vocab.countToProb();
_prob

In [None]:
termLogger.log(Vocab.indexToWord([0, 1, 2]));
termLogger.log(Vocab.wordToIndex(['a','the111']));
termLogger.log(Vocab.samplingNegIndexs([1,2,3], 6));

In [None]:
var MatrixInit = (s)=>R.map(()=>R.map(()=>0)(R.range(0,s)))(R.range(0,s))
var CooccurenceMatrixReducer = R.reduce((total, token)=>{
    let [w, ctx] = token;
    let r = w, c = ctx;
    try{
       total[r][c] += 1; 
    }
    catch(e){
        console.error(token)
    }
    return total;
});

In [None]:
function getTokenContext(tokens, windownSize, batchSize=2){
    function context({tokenContext, cooccurents}, token, index){
        let leftSizeContext = [], rightSizeContext = [], occur = new Set([token]);
        for(let w_idx of R.range(1, windownSize+1)){
            if(index - w_idx >= 0){
                occur.add(tokens[index - w_idx]);
            }
            if(index + w_idx < tokens.length){
                occur.add(tokens[index + w_idx]);
            }
        }
        tokenContext.push(token);
        cooccurents.push(Array.from(occur));
        return {tokenContext, cooccurents};
    }
    let { tokenContext, cooccurents } = R.addIndex(R.reduce)(context,
             { tokenContext:[], cooccurents:[] }, tokens);    
    
    return {tokenContext, cooccurents};
}
var tokens = [9,2,3,45,5,6];
var {tokenContext, cooccurents } = getTokenContext(tokens, 3);
console.log(R.zip(tokenContext, cooccurents));
var tokens = [9,2,3,45,5,6];
var {tokenContext, cooccurents } = getTokenContext(tokens, 3);
console.log(R.zip(tokenContext, cooccurents));

In [None]:
(async ()=>{
    var VocabLen = Vocab.wCounts.length;
    console.log(VocabLen);
//     var tokenMatrix = MatrixInit(VocabLen, VocabLen);
    await memory.initMemory([VocabLen, 100]);
})();       

In [None]:
(async ()=>{    
    let uTensor = await memory.readSlots([0]);
    let nuTensor = await memory.readSlots([1, 2, 3, 4, 5, 6]);
    let nvTensor = await memory.readSlots([8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]);
    WordCoOccurentTraining(uTensor, nuTensor, nvTensor);
    await memory.writeSlots([0], uTensor);
    await memory.writeSlots([1,2], nuTensor);
    await memory.writeSlots([8,9], nvTensor);
    const CheckResult = async ()=>{
        let targetWords = [0,1,2,3,4,5];
        let topKTensor = await memory.getTopKSimilar(targetWords, 10);
        topKTensor.print();
        let topks = R.splitEvery(10, Array.from(await topKTensor.data()));
        for(let [w, sim] of R.zip(targetWords, topks)){
            let [tw, ...sws] = Vocab.indexToWord(sim);
            termLogger.log(`[${tw}] is similar to ${sws.join(',')}`);
        }    
    }
    await CheckResult();
})();     

In [None]:
(async ()=>{
    let listFiles = await indexDBStorage.getFileList('/corpus/');
    let getlist = listFiles.slice(0,1);//take all
    let startTime = new Date();
    for(let epoch in R.range(0, 100)){
        for(let lfile of getlist){
            termLogger.log({lfile, elapse: new Date() - startTime});
            let rawtokens = await indexDBStorage.readFile(lfile);
            let tokens = JSON.parse(rawtokens);
            const tokenIdxs = Vocab.wordToIndex(tokens);
            
            const FilterUndefined = R.filter((v)=>v!==undefined);
            let seletedTokens = FilterUndefined(tokenIdxs);
            let loss = [];
            var {tokenContext, cooccurents} = getTokenContext(seletedTokens, 2);
//             console.log(R.zip(tokenContext, cooccurents), seletedTokens);
            for(let [w, posWs] of R.zip(tokenContext, cooccurents)){            
                
                let negWs = Vocab.samplingNegIndexs(posWs, 10);
//                 console.log({w, posWs, negWs});
                let uTensor = await memory.readSlots([w]);
                let nuTensor = await memory.readSlots(posWs);
                let nvTensor = await memory.readSlots(negWs);
                
                let l = await WordCoOccurentTraining(uTensor, nuTensor, nvTensor).data();
                loss = [...loss, ...l];
                await memory.writeSlots([w], uTensor);
                await memory.writeSlots(posWs, nuTensor);
                await memory.writeSlots(negWs, nvTensor);
            }
            console.log({epoch, lfile, loss: R.mean(loss)});
        }
        const CheckResult = async ()=>{
            let targetWords = [0,1,2,3,4,5];
            let topKTensor = await memory.getTopKSimilar(targetWords, 10);
            topKTensor.print();
            let topks = R.splitEvery(10, Array.from(await topKTensor.data()));
            for(let [w, sim] of R.zip(targetWords, topks)){
                let [tw, ...sws] = Vocab.indexToWord(sim);
                termLogger.log(`[${tw}] is similar to ${sws.join(',')}`);
            }    
        }
        await CheckResult();    
    }
    // tokenMatrix = CooccurenceMatrixReducer(tokenMatrix)(tokenContexts);
})();

In [None]:
fs.writeFileSync('./cooccurent.matrix.json', JSON.stringify(tokenMatrix));
fs.writeFileSync('./tokenMapper.json', JSON.stringify(mapper));