WAT

Simply put predict next word user will write.

HOWTO

installation

    git clone git@github.com:syzer/distributedNgram.git && cd $_
    npm install
    npm install --save-dev

The file nGram.js offers more compact version of code:

    npm start

testing basic distributed task

var jsSpark = require('js-spark')({workers: 16});
var task = jsSpark.jsSpark;
var q = jsSpark.q;

task([20, 30, 40, 50])
    // this is executed on client side
    .map(function addOne(num) {
        return num + 1;
    })
    .reduce(function sumUp(sum, num) {
        return sum + num;
    })
    .run()
    .then(function(data) {
        // this is executed on back on server
        console.log('i finished calculating', data);
    })

tests

    npm test

Tasks

clone https://github.com/syzer/distributedNgram.git

./index.js

load:

dracula
lodash
load helpers

(gist)

// helpers ./lib/index.js

make function prepare()

// remove special characters
function prepare(str){}
prepare('“Listen to them, the children of the night. What music they make!”')
//=>"listen to them the children of the night what music they make"

(gist)

./index.js

make bigramText()

bigramText("to listen to them the children of the night what music they make");
//=>{to: {listen: 1, them:1} , listen:{to:1}, the:{children:1}}...

function bigramText(str) {
    return arr.reduce(bigramArray);
}

(gist)

./index.js

function mergeSmall()

create 2 tasks ch01, and ch02
use tasks to bigram those chapters
reduce response with _.merge

(gist)

./index.js

function mergeBig(texts)

load [ch1, ch2, ch3] or texts
make distinct tasks to bigram this text
reduce with _.mergeObjectsInArr
cache result
return result

(gist)

./index.js

function predict(word)

load appropriate key/word from cache
calc total hits
sort all hits in order,

may use helper function objToSortedArr(obj)

calc frequency/probability of next word

(gist)

./index.js

function train(fileName, splitter)

load file
prepare
use splitter(string) to create separate tasks
calculate tasks on clients using mergeBig()

TODO

[ ] git checkout [ ] js-spark adventure

Name		Name	Last commit message	Last commit date
Latest commit History 29 Commits
data/text/dracula		data/text/dracula
lib		lib
test		test
.gitignore		.gitignore
README.md		README.md
index.js		index.js
nGram.js		nGram.js
package.json		package.json

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

WAT

HOWTO

installation

testing basic distributed task

tests

Tasks

./index.js

// helpers ./lib/index.js

./index.js

./index.js

./index.js

./index.js

./index.js

TODO

About

Releases

Packages

Contributors 2

Languages

syzer/distributedNgram

Folders and files

Latest commit

History

Repository files navigation

WAT

HOWTO

installation

testing basic distributed task

tests

Tasks

./index.js

// helpers ./lib/index.js

./index.js

./index.js

./index.js

./index.js

./index.js

TODO

About

Resources

Stars

Watchers

Forks

Releases

Packages 0

Contributors 2

Languages

Packages