diff --git a/CHANGES.md b/CHANGES.md index b3741ee..48e0e5e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,12 @@ Release history =============== +Version 0.1.8 +------------- + + * Added an optional LRU cache + + Version 0.1.7 ------------- diff --git a/README.md b/README.md index 795916b..3cf2182 100644 --- a/README.md +++ b/README.md @@ -40,16 +40,31 @@ npm install wndb-with-exceptions --save API --- -### new WordNet([directory]) +### new WordNet([options | string]) -The constructor returns a new object to access a WordNet database at the specified -directory. If no directory is passed, the module uses `require` to locate -`wndb-with-exceptions`, so if you don't want to deploy your own WordNet, all you -need to do is add `wndb-with-exceptions` as an application dependency and not -pass a directory to the constructor. +The constructor returns a new object to access a WordNet database. The passed +options configure the interface. The following options are available: + + * __dataDir__ -- specifies the location of the Wordnet directory. + + If this option isn't passed, the module uses `require` to locate + `wndb-with-exceptions`, so if you don't want to deploy your own WordNet, all you + need to do is add `wndb-with-exceptions` as an application dependency and not + pass a directory to the constructor. + The original WordNet data files can always be manually downloaded and installed + anywhere from http://wordnet.princeton.edu/wordnet/download. + + As a shortcut, if you pass a string directly to the constructor, it's interpreted + as a Wordnet directory, and all other options default in sensible ways. + + * __cache__ -- adds an LRU cache to the Wordnet access. + + If the option is false, no cache is set; and if it is true, then a cache (using + `lru-cache` with a default size of 2000 items) is set. In addition, the cache can be + an object. If that object has a `get` method then it's used as a cache directly, and + if it doesn't, it's assumed to be a configuration object which will be used to + configure a new `lru-cache`. -The original WordNet data files can always be manually downloaded and installed -anywhere from http://wordnet.princeton.edu/wordnet/download. ### lookup(word, callback) diff --git a/lib/wordnet.js b/lib/wordnet.js index 4cca0bf..fb03cf5 100644 --- a/lib/wordnet.js +++ b/lib/wordnet.js @@ -1,4 +1,4 @@ -var DataFile, IndexFile, Promise, WordNet, async, fs, path, +var DataFile, IndexFile, LRU, Promise, WordNet, async, fs, path, __slice = [].slice; IndexFile = require('./index_file'); @@ -13,14 +13,25 @@ path = require('path'); fs = require('fs'); +LRU = require('lru-cache'); + require('es6-shim'); WordNet = (function() { var exceptions, forms, tokenDetach, unique, _forms, _loadExceptions, _validForms, _validFormsWithExceptions; - function WordNet(dataDir) { + function WordNet(options) { var WNdb, e; - if (!dataDir) { + if (typeof options === 'string') { + options = { + dataDir: options + }; + } else { + if (options == null) { + options = {}; + } + } + if (options.dataDir == null) { try { WNdb = require('wndb-with-exceptions'); } catch (_error) { @@ -28,17 +39,31 @@ WordNet = (function() { console.error("Please 'npm install wndb-with-exceptions' before using WordNet module or specify a dict directory."); throw e; } - dataDir = WNdb.path; + options.dataDir = WNdb.path; + } + if (!options.cache) { + this.cache = null; + } else { + if (options.cache === true) { + options.cache = { + max: 2000 + }; + } + if (typeof options.cache === 'object' && typeof options.cache.get === 'function') { + this.cache = options.cache; + } else { + this.cache = LRU(options.cache); + } } - this.path = dataDir; - this.nounIndex = new IndexFile(dataDir, 'noun'); - this.verbIndex = new IndexFile(dataDir, 'verb'); - this.adjIndex = new IndexFile(dataDir, 'adj'); - this.advIndex = new IndexFile(dataDir, 'adv'); - this.nounData = new DataFile(dataDir, 'noun'); - this.verbData = new DataFile(dataDir, 'verb'); - this.adjData = new DataFile(dataDir, 'adj'); - this.advData = new DataFile(dataDir, 'adv'); + this.path = options.dataDir; + this.nounIndex = new IndexFile(this.path, 'noun'); + this.verbIndex = new IndexFile(this.path, 'verb'); + this.adjIndex = new IndexFile(this.path, 'adj'); + this.advIndex = new IndexFile(this.path, 'adv'); + this.nounData = new DataFile(this.path, 'noun'); + this.verbData = new DataFile(this.path, 'verb'); + this.adjData = new DataFile(this.path, 'adj'); + this.advData = new DataFile(this.path, 'adv'); this.allFiles = [ { index: this.nounIndex, @@ -61,9 +86,21 @@ WordNet = (function() { } WordNet.prototype.get = function(synsetOffset, pos, callback) { - var dataFile; - dataFile = this.getDataFile(pos); - return dataFile.get(synsetOffset, callback); + var dataFile, hit, query, wordnet; + wordnet = this; + if (this.cache) { + query = "get:" + synsetOffset + ":" + pos; + if (hit = wordnet.cache.get(query)) { + return callback(hit); + } + } + dataFile = wordnet.getDataFile(pos); + return dataFile.get(synsetOffset, function(result) { + if (query) { + wordnet.cache.set(query, result); + } + return callback(result); + }); }; WordNet.prototype.getAsync = function(synsetOffset, pos) { @@ -77,14 +114,25 @@ WordNet = (function() { }; WordNet.prototype.lookup = function(input, callback) { - var lword, pos, selectedFiles, word, wordnet, _ref; + var hit, lword, pos, query, selectedFiles, word, wordnet, _ref; wordnet = this; _ref = input.split('#'), word = _ref[0], pos = _ref[1]; lword = word.toLowerCase().replace(/\s+/g, '_'); + if (this.cache) { + query = "lookup:" + input; + if (hit = wordnet.cache.get(query)) { + return callback(hit); + } + } selectedFiles = !pos ? wordnet.allFiles : wordnet.allFiles.filter(function(file) { return file.pos === pos; }); - return wordnet.lookupFromFiles(selectedFiles, [], lword, callback); + return wordnet.lookupFromFiles(selectedFiles, [], lword, function(results) { + if (query) { + wordnet.cache.set(query, results); + } + return callback(results); + }); }; WordNet.prototype.lookupAsync = function(input, callback) { @@ -98,9 +146,15 @@ WordNet = (function() { }; WordNet.prototype.findSense = function(input, callback) { - var lword, pos, selectedFiles, sense, senseNumber, word, wordnet, _ref; + var hit, lword, pos, query, selectedFiles, sense, senseNumber, word, wordnet, _ref; wordnet = this; _ref = input.split('#'), word = _ref[0], pos = _ref[1], senseNumber = _ref[2]; + if (this.cache) { + query = "findSense:" + input; + if (hit = wordnet.cache.get(query)) { + return callback(hit); + } + } sense = parseInt(senseNumber); if (Number.isNaN(sense)) { throw new Error("Sense number should be an integer"); @@ -112,7 +166,12 @@ WordNet = (function() { return file.pos === pos; }); return wordnet.lookupFromFiles(selectedFiles, [], lword, function(response) { - return callback(response[sense - 1]); + var result; + result = response[sense - 1]; + if (query) { + wordnet.cache.set(query, result); + } + return callback(result); }); }; @@ -127,9 +186,15 @@ WordNet = (function() { }; WordNet.prototype.querySense = function(input, callback) { - var pos, word, wordnet, _ref; + var hit, pos, query, word, wordnet, _ref; wordnet = this; _ref = input.split('#'), word = _ref[0], pos = _ref[1]; + if (this.cache) { + query = "querySense:" + input; + if (hit = wordnet.cache.get(query)) { + return callback(hit); + } + } return wordnet.lookup(input, function(results) { var i, sense, senseCounts, senses; senseCounts = {}; @@ -149,6 +214,9 @@ WordNet = (function() { } return _results; })(); + if (query) { + wordnet.cache.set(query, senses); + } return callback(senses); }); }; @@ -502,7 +570,20 @@ WordNet = (function() { }; WordNet.prototype.validForms = function(string, callback) { - return _validFormsWithExceptions(this, string, callback); + var hit, query, wordnet; + wordnet = this; + if (this.cache) { + query = "validForms:" + string; + if (hit = wordnet.cache.get(query)) { + return callback(hit); + } + } + return _validFormsWithExceptions(this, string, function(result) { + if (query) { + wordnet.cache.set(query, result); + } + return callback(result); + }); }; WordNet.prototype.validFormsAsync = function(string) { diff --git a/package.json b/package.json index ce82777..a58aa3d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "node-wordnet", - "version": "0.1.7", + "version": "0.1.8", "description": "Node.js interface for Wordnet", "main": "lib/wordnet.js", "scripts": { @@ -31,6 +31,7 @@ "dependencies": { "async": "^0.9.0", "bluebird": "^2.6.0", - "es6-shim": "^0.22.1" + "es6-shim": "^0.22.1", + "lru-cache": "^2.5.0" } } diff --git a/src/wordnet.coffee b/src/wordnet.coffee index 76efc73..9f6013e 100644 --- a/src/wordnet.coffee +++ b/src/wordnet.coffee @@ -26,39 +26,63 @@ ## (5) - move to use wndb-with-exceptions instead of WNdb, to provide morphological exceptions ## (6) - significant improvements in testing -IndexFile = require('./index_file') -DataFile = require('./data_file') +IndexFile = require './index_file' +DataFile = require './data_file' -async = require('async') -Promise = require('bluebird') -path = require('path') -fs = require('fs') +async = require 'async' +Promise = require 'bluebird' +path = require 'path' +fs = require 'fs' + +LRU = require 'lru-cache' require('es6-shim') class WordNet - constructor: (dataDir) -> + constructor: (options) -> + + ## For compatibility, if the options are a string, it's just the Wordnet path + if typeof options == 'string' + options = {dataDir: options} + else + options ?= {} - if !dataDir + + if ! options.dataDir? try WNdb = require('wndb-with-exceptions') catch e console.error("Please 'npm install wndb-with-exceptions' before using WordNet module or specify a dict directory.") throw e - dataDir = WNdb.path + options.dataDir = WNdb.path + + + if ! options.cache + @cache = null + else + if options.cache == true + options.cache = { + max: 2000 + } + + if typeof options.cache == 'object' and typeof options.cache.get == 'function' + @cache = options.cache + else + @cache = LRU options.cache - @path = dataDir - @nounIndex = new IndexFile(dataDir, 'noun') - @verbIndex = new IndexFile(dataDir, 'verb') - @adjIndex = new IndexFile(dataDir, 'adj') - @advIndex = new IndexFile(dataDir, 'adv') + @path = options.dataDir - @nounData = new DataFile(dataDir, 'noun') - @verbData = new DataFile(dataDir, 'verb') - @adjData = new DataFile(dataDir, 'adj') - @advData = new DataFile(dataDir, 'adv') + @nounIndex = new IndexFile(@path, 'noun') + @verbIndex = new IndexFile(@path, 'verb') + @adjIndex = new IndexFile(@path, 'adj') + @advIndex = new IndexFile(@path, 'adv') + + @nounData = new DataFile(@path, 'noun') + @verbData = new DataFile(@path, 'verb') + @adjData = new DataFile(@path, 'adj') + @advData = new DataFile(@path, 'adv') @allFiles = [ {index: @nounIndex, data: @nounData, pos: 'n'} @@ -68,8 +92,16 @@ class WordNet ] get: (synsetOffset, pos, callback) -> - dataFile = @getDataFile(pos) - dataFile.get synsetOffset, callback + wordnet = @ + + if @cache + query = "get:#{synsetOffset}:#{pos}" + return callback(hit) if hit = wordnet.cache.get query + + dataFile = wordnet.getDataFile(pos) + dataFile.get synsetOffset, (result) -> + wordnet.cache.set query, result if query + callback(result) getAsync: (synsetOffset, pos) -> wordnet = @ @@ -82,8 +114,14 @@ class WordNet [word, pos] = input.split('#') lword = word.toLowerCase().replace(/\s+/g, '_') + if @cache + query = "lookup:#{input}" + return callback(hit) if hit = wordnet.cache.get query + selectedFiles = if ! pos then wordnet.allFiles else wordnet.allFiles.filter (file) -> file.pos == pos - wordnet.lookupFromFiles selectedFiles, [], lword, callback + wordnet.lookupFromFiles selectedFiles, [], lword, (results) -> + wordnet.cache.set query, results if query + callback(results) lookupAsync: (input, callback) -> wordnet = @ @@ -95,6 +133,10 @@ class WordNet wordnet = @ [word, pos, senseNumber] = input.split('#') + if @cache + query = "findSense:#{input}" + return callback(hit) if hit = wordnet.cache.get query + sense = parseInt(senseNumber) if Number.isNaN(sense) throw new Error("Sense number should be an integer") @@ -104,7 +146,9 @@ class WordNet lword = word.toLowerCase().replace(/\s+/g, '_') selectedFiles = wordnet.allFiles.filter (file) -> file.pos == pos wordnet.lookupFromFiles selectedFiles, [], lword, (response) -> - callback(response[sense - 1]) + result = response[sense - 1] + wordnet.cache.set query, result if query + callback(result) findSenseAsync: (input) -> wordnet = @ @@ -116,6 +160,10 @@ class WordNet wordnet = @ [word, pos] = input.split('#') + if @cache + query = "querySense:#{input}" + return callback(hit) if hit = wordnet.cache.get query + wordnet.lookup input, (results) -> senseCounts = {} senses = for sense, i in results @@ -124,6 +172,7 @@ class WordNet senseCounts[pos] ?= 1 word + "#" + pos + "#" + senseCounts[pos]++ + wordnet.cache.set query, senses if query callback(senses) querySenseAsync: (input) -> @@ -389,7 +438,15 @@ class WordNet validForms: (string, callback) -> - _validFormsWithExceptions @, string, callback + wordnet = @ + + if @cache + query = "validForms:#{string}" + return callback(hit) if hit = wordnet.cache.get query + + _validFormsWithExceptions @, string, (result) -> + wordnet.cache.set query, result if query + callback(result) validFormsAsync: (string) -> new Promise (resolve, reject) => diff --git a/test/wordnet_cache_test.coffee b/test/wordnet_cache_test.coffee new file mode 100644 index 0000000..5235259 --- /dev/null +++ b/test/wordnet_cache_test.coffee @@ -0,0 +1,94 @@ +chai = require('chai') +chai.use(require('chai-as-promised')) +should = chai.should() + +async = require('async') + +Wordnet = require('../lib/wordnet') + +describe 'wordnet with cache enabled', () -> + + wordnet = undefined + + beforeEach (done) -> + wordnet = new Wordnet({cache: true}) + done() + + describe 'get', () -> + it 'should succeed', (done) -> + wordnet.get 3827107, 'n', (results) -> + should.exist(results) + results.should.have.property('gloss', '(computer science) any computer that is hooked up to a computer network ') + done() + + it 'should return the exact same value for a second query', (done) -> + wordnet.get 3827107, 'n', (results) -> + should.exist(results) + wordnet.get 3827107, 'n', (results2) -> + (results == results2).should.be.true + done() + + + describe 'lookup', () -> + it 'should succeed for node', (done) -> + wordnet.lookup 'node', (results) -> + should.exist(results) + results.should.be.an.instanceOf(Array) + results[0].should.have.property('synsetOffset', 3827107) + done() + + it 'should return the exact same value for a second query', (done) -> + wordnet.lookup 'node', (results) -> + should.exist(results) + wordnet.lookup 'node', (results2) -> + (results == results2).should.be.true + done() + + + describe 'findSense', () -> + + it 'should succeed for lie#v#1', (done) -> + wordnet.findSense 'lie#v#1', (results) -> + should.exist(results) + results.should.have.property('lemma', 'lie_down') + results.should.have.property('pos', 'v') + done() + + it 'should return the exact same value for a second query', (done) -> + wordnet.findSense 'lie#v#1', (results) -> + should.exist(results) + wordnet.findSense 'lie#v#1', (results2) -> + (results == results2).should.be.true + done() + + + describe 'querySense', () -> + it 'should succeed for node', (done) -> + wordnet.querySense 'node', (results) -> + should.exist(results) + results.should.be.an.instanceOf(Array) + results.should.have.length(8) + done() + + it 'should return the exact same value for a second query', (done) -> + wordnet.querySense 'node', (results) -> + should.exist(results) + wordnet.querySense 'node', (results2) -> + (results == results2).should.be.true + done() + + + describe 'validForms', () -> + + it 'should succeed for axes#n', (done) -> + wordnet.validForms 'axes#n', (results) -> + should.exist(results) + results.should.eql(['ax#n', 'axis#n']) + done() + + it 'should return the exact same value for a second query', (done) -> + wordnet.validForms 'axes#n', (results) -> + should.exist(results) + wordnet.validForms 'axes#n', (results2) -> + (results == results2).should.be.true + done()