Skip to content

Commit

Permalink
adding CLI, upped version to 0.1.6
Browse files Browse the repository at this point in the history
  • Loading branch information
moos committed May 30, 2012
1 parent e4236ab commit 58d95dd
Show file tree
Hide file tree
Showing 3 changed files with 284 additions and 3 deletions.
104 changes: 103 additions & 1 deletion README.md
Expand Up @@ -211,12 +211,114 @@ To override, pass an options hash to the constructor. With the `profile` option,
// true 'fast' 29
```

## Fast Index
### Fast Index

Version 0.1.4 introduces `fastIndex` option. This uses a secondary index on the index files and is much faster. It is on by default. Secondary index files are generated at install time and placed in the same directory as WNdb.path. Details can be found in tools/stat.js.

See blog article [Optimizing WordPos](http://blog.42at.com/optimizing-wordpos).

## CLI

Version 0.1.6 introduces the command-line interface (./bin/wordpos-cli.js), available as 'wordpos' when using npm install.

```bash
$ wordpos get The angry bear chased the frightened little squirrel
# Noun 4:
bear
chased
little
squirrel

# Adjective 3:
angry
frightened
little

# Verb 1:
bear

# Adverb 1:
little
```
Just the nouns, brief output:
```bash
$ wordpos get --noun -b The angry bear chased the frightened little squirrel
bear chased little squirrel
```
Just the counts: (nouns, adjectives, verbs, adverbs, total parsed words)
```bash
$ wordpos get -c The angry bear chased the frightened little squirrel
4 3 1 1 7
```
Just the adjective count: (nouns, adjectives, verbs, adverbs, total parsed words)
```bash
$ wordpos get --adj -c The angry bear chased the frightened little squirrel
0 3 0 0 7
```

Get definitions:
```bash
$ wordpos def git
git
n: a person who is deemed to be despicable or contemptible; "only a rotter would do that"; "kill the rat"; "throw the bum out"; "you cowardly little pukes!"; "the British call a contemptible persona `git'"
```
Get full result object:
```bash
$ wordpos def git -f
{ git:
[ { synsetOffset: 10539715,
lexFilenum: 18,
pos: 'n',
wCnt: 0,
lemma: 'rotter',
synonyms: [],
lexId: '0',
ptrs: [],
gloss: 'a person who is deemed to be despicable or contemptible; "only a rotter would do that
"; "kill the rat"; "throw the bum out"; "you cowardly little pukes!"; "the British call a contemptib
le person a `git\'" ' } ] }
```
As JSON:
```bash
$ wordpos def git -j
{"git":[{"synsetOffset":10539715,"lexFilenum":18,"pos":"n","wCnt":0,"lemma":"rotter","synonyms":[],"
lexId":"0","ptrs":[],"gloss":"a person who is deemed to be despicable or contemptible; \"only a rotter
would do that\"; \"kill the rat\"; \"throw the bum out\"; \"you cowardly little pukes!\"; \"the British
call a contemptible person a `git'\" "}]}
```
Usage:
```bash
$ wordpos
Usage: wordpos-cli.js [options] <command> [word ... | -i <file> | <stdin>]
Commands:
get
get list of words for particular POS
def
lookup definitions
parse
show parsed words, deduped and less stopwords
Options:
-h, --help output usage information
-V, --version output the version number
-n, --noun Get nouns
-a, --adj Get adjectives
-v, --verb Get verbs
-r, --adv Get adverbs
-c, --count count only (noun, adj, verb, adv, total parsed words)
-b, --brief brief output (all on one line, no headers)
-f, --full full results object
-j, --json full results object as JSON
-i, --file <file> input file
-s, --stopwords include stopwords
```
## Benchmark
node wordpos-bench.js
Expand Down
178 changes: 178 additions & 0 deletions bin/wordpos-cli.js
@@ -0,0 +1,178 @@
#!/usr/bin/env node
/**
* wordpos.js
*
* command-line interface to wordpos
*
* Usage:
* wordpos [options] <get|parse|def> <stdin|words*>
*
* Copyright (c) 2012 mooster@42at.com
* https://github.com/moos/wordpos
*
* Released under MIT license
*/

var program = require('commander'),
_ = require('underscore')._,
POS = {noun:'Noun', adj:'Adjective', verb:'Verb', adv:'Adverb'},
nWords;

program
.version('0.1.0')
.usage('[options] <command> [word ... | -i <file> | <stdin>]')

.option('-n, --noun', 'Get nouns')
.option('-a, --adj', 'Get adjectives')
.option('-v, --verb', 'Get verbs')
.option('-r, --adv', 'Get adverbs')

.option('-c, --count', 'count only (noun, adj, verb, adv, total parsed words)')
.option('-b, --brief', 'brief output (all on one line, no headers)')
.option('-f, --full', 'full results object')
.option('-j, --json', 'full results object as JSON')
.option('-i, --file <file>', 'input file')
.option('-s, --stopwords', 'include stopwords')
;

program.command('get')
.description('get list of words for particular POS')
.action(exec);

program.command('def')
.description('lookup definitions')
.action(function(){
_.last(arguments).name = 'lookup';
exec.apply(this, arguments);
});

program.command('parse')
.description('show parsed words, deduped and less stopwords')
.action(exec);

var
WordPos = require('../src/wordpos'),
fs = require('fs'),
util = require('util'),
results = {},
cmd = null;


program.parse(process.argv);
if (!cmd) console.log(program.helpInformation());


function exec(/* args, ..., program.command */){
var args = _.initial(arguments);
cmd = _.last(arguments).name;

if (program.file) {
fs.readFile(program.file, 'utf8', function(err, data){
if (err) return console.log(err);
run(data);
});
} else if (args.length){
run(args.join(' '));
} else {
read_stdin(run);
}
}

function read_stdin(callback) {
var data = '';
process.stdin.resume();
process.stdin.setEncoding('utf8');
process.stdin.on('data', function (chunk) {
var c = chunk.charCodeAt(0);
if (c == 4 || c == 26) // ^c or ^d followed by \n
return process.stdin.emit('end') && process.stdin.pause();
data += chunk;
});
process.stdin.on('end', function () {
callback(data);
});
}

function optToFn() {
var fns = _.reject(POS, function(fn, opt) { return !program[opt] });
if (!fns.length) fns = _.values(POS); //default to all if no POS given
return fns;
}


function run(data) {
var
opts = {stopwords: !program.stopwords},
wordpos = new WordPos(opts),
words = wordpos.parse(data),
fns = optToFn(),
plural = (cmd=='get' ? 's':''),
results = {},
finale = _.after(
plural ? fns.length : words.length * fns.length,
_.bind(output, null, results)),
collect = function(what, result, word){
if (word) { // lookup
results[word] = [].concat(results[word] || [], result);
} else { // get
results[what] = result;
}
finale();
};

nWords = words.length;
if (cmd == 'parse') return output({words: words});

// loop over desired POS
_(fns).each(function(fn){
var method = cmd + fn + plural,
cb = _.bind(collect, null, fn);
if (cmd == 'get') {
wordpos[method](words, cb);
} else {
words.forEach(function(word){
wordpos[method](word, cb);
});
}
});
}

function output(results) {
var str;
if (program.count && cmd != 'lookup') {
str = (cmd == 'get' && _.reduce(POS, function(memo, v){
return memo + ((results[v] && results[v].length) || 0) +" ";
},'')) + nWords;
} else {
str = sprint(results);
}
console.log(str);
}

function sprint(results) {
if (program.json) {
return util.format('%j',results);
} else if (program.full) {
return util.inspect(results,false,10, true);
}
var sep = program.brief ? ' ' : '\n';

switch (cmd) {
case 'lookup':
return _.reduce(results, function(memo, v, k){
return memo + (v.length && (k +"\n"+ print_def(v) +"\n") || '');
}, '');
default:
return _.reduce(results, function(memo, v, k){
var pre = program.brief ? '' : util.format('# %s %d:%s', k, v.length, sep);
return memo + (v.length && util.format('%s%s%s\n', pre, v.join(sep), sep) || '');
}, '');
}

function print_def(defs) {
return _.reduce(defs, function(memo, v, k){
return memo + util.format(' %s: %s\n', v.pos, v.gloss);
},'');
}
}

5 changes: 3 additions & 2 deletions package.json
Expand Up @@ -3,11 +3,12 @@
"author": "Moos <mooster@42at.com>",
"keywords": ["natural", "language", "wordnet", "pos"],
"description": "wordpos is a set of part-of-speech utilities for Node.js using natural's WordNet module.",
"version": "0.1.5",
"version": "0.1.6",
"homepage": "https://github.com/moos/wordpos",
"engines": {
"node": ">=0.4.10"
"node": ">=0.6"
},
"bin": "./bin/wordpos-cli.js",
"dependencies": {
"natural": "latest",
"underscore": ">=1.3.1",
Expand Down

0 comments on commit 58d95dd

Please sign in to comment.