Skip to content

Commit

Permalink
feature: implement stemming - fix #9
Browse files Browse the repository at this point in the history
  • Loading branch information
sayanee committed Oct 21, 2014
1 parent 649bdb5 commit 85bd4b9
Show file tree
Hide file tree
Showing 8 changed files with 84 additions and 4 deletions.
3 changes: 2 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
1. Fork this project and install the packages with `npm install`
- Create a new feature/patch branch
- write test in `test` folder
- Code code code and amend `example.js` if needed
- Code code code
- amend files `example.js` and `readme.md` if needed
- Run `npm test` to check all linting and tests are passing or setup a git pre-commit hook to run `npm test`
- Write a [good commit message](https://github.com/angular/angular.js/blob/master/CONTRIBUTING.md#commit) in the format `<type>(<scope>): <subject>`
- Pull request using the new feature/patch branch
Expand Down
11 changes: 11 additions & 0 deletions example.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,14 @@ console.log(overlap(sentence1, sentence2, {
ignorePlurals: [ 'hitchhikings' ],
ignoreCommonWords: true
}));

console.log(colors.black('\nWith options stemming:'));
console.log('var sentence1 = \'A programming course in SmallTalk\';');
console.log('var sentence2 = \'Have you programmed in SmallTalk?\';');
console.log('overlap(sentence1, sentence2, { stemming: true, ignoreCommonWords: true })');
sentence1 = 'A programming course in SmallTalk';
sentence2 = 'Have you programmed in SmallTalk?';
console.log(overlap(sentence1, sentence2, {
stemming: true,
ignoreCommonWords: true
}));
6 changes: 6 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ module.exports = function(phrase1, phrase2, options) {
options.common = options.common || null;
options.depluralize = options.depluralize || false;
options.ignorePlurals = options.ignorePlurals || [];
options.stemming = options.stemming || false;

if (options.ignoreCase) {
phrase1 = phrase1.toLowerCase();
Expand All @@ -38,6 +39,11 @@ module.exports = function(phrase1, phrase2, options) {
compare2 = lib.removeCommonWords(compare2, options.common);
}

if (options.stemming) {
compare1 = lib.stem(compare1);
compare2 = lib.stem(compare2);
}

compare1.forEach(function(element) {
if (compare2.indexOf(element) > -1) {
answer.push(element);
Expand Down
9 changes: 8 additions & 1 deletion lib/lib.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
'use strict';

var pluralize = require('pluralize');
var pluralize = require('pluralize'),
natural = require('natural');

exports.sanitize = function sanitize(word) {
// matches common punctuations:
Expand Down Expand Up @@ -34,3 +35,9 @@ exports.depluralize = function depluralize(words, ignorePlurals) {
return pluralize.singular(element);
})
}

exports.stem = function stem(words) {
return words.map(function(element) {
return natural.PorterStemmer.stem(element);
})
}
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
}
},
"dependencies": {
"natural": "^0.1.28",
"pluralize": "~1.0.2"
}
}
13 changes: 13 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,19 @@ overlap(sentence1, sentence2, {
// [ 'hitchhikings', 'meetup' ]
```

###option: stemming

```js
var sentence1 = 'A programming course in SmallTalk';
var sentence2 = 'Have you programmed in SmallTalk?';

overlap(sentence1, sentence2, {
stemming: true,
ignoreCommonWords: true
});
// [ 'program', 'smalltalk' ]
```

Try out the examples in file `example.js` with the command `node example.js`

##Contribute
Expand Down
22 changes: 20 additions & 2 deletions test/indexSpec.js
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,8 @@
var sentence1 = 'Base and base',
sentence2 = 'Base in bases',
overlapList = overlap(sentence1, sentence2, {
depluralize: true,
common: [ 'base' ]
depluralize: true,
common: [ 'base' ]
});

expect(overlapList).to.include.members([ 'Base' ]);
Expand All @@ -182,6 +182,24 @@
overlapList = overlap(sentence1, sentence2);

expect(overlapList).to.have.length(0);
done();
});
});

describe('With option stemming', function() {
it('returns the word stem or root word', function(done) {
var sentence1 = 'A programming course in SmallTalk',
sentence2 = 'Have you programmed in SmallTalk?',
overlapList = overlap(sentence1, sentence2, {
stemming: true,
ignoreCommonWords: true
});

expect(overlapList).to.include.members([
'program',
'smalltalk'
]);

done();
});
})
Expand Down
23 changes: 23 additions & 0 deletions test/lib/libSpec.js
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,29 @@
});
});

describe('Stem words', function() {

it('returns an array with stem words', function(done) {
var words = [
'programming',
'swimming',
'eating',
'traveled'
],
answer = lib.stem(words);

expect(answer).to.be.an('array');
expect(answer).to.have.members([
'program',
'swim',
'eat',
'travel'
]);

done();
});
});

});

})();

0 comments on commit 85bd4b9

Please sign in to comment.