Skip to content

Commit

Permalink
add preprocessing pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
kbrsh committed May 21, 2017
1 parent 8b1ed02 commit a55060a
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 19 deletions.
39 changes: 30 additions & 9 deletions dist/wade.js
Expand Up @@ -57,6 +57,24 @@
return match;
}

var lowercase = function(str) {
return str.toLowerCase();
}

var stopWords = ['about', 'after', 'all', 'also', 'am', 'an', 'and', 'another', 'any', 'are', 'as', 'at', 'be', 'because', 'been', 'before', 'being', 'between', 'both', 'but', 'by', 'came', 'can', 'come', 'could', 'did', 'do', 'each', 'for', 'from', 'get', 'got', 'has', 'had', 'he', 'have', 'her', 'here', 'him', 'himself', 'his', 'how', 'if', 'in', 'into', 'is', 'it', 'like', 'make', 'many', 'me', 'might', 'more', 'most', 'much', 'must', 'my', 'never', 'now', 'of', 'on', 'only', 'or', 'other', 'our', 'out', 'over', 'said', 'same', 'see', 'should', 'since', 'some', 'still', 'such', 'take', 'than', 'that', 'the', 'their', 'them', 'then', 'there', 'these', 'they', 'this', 'those', 'through', 'to', 'too', 'under', 'up', 'very', 'was', 'way', 'we', 'well', 'were', 'what', 'where', 'which', 'while', 'who', 'with', 'would', 'you', 'your', 'a', 'i'];

var removeStopWords = function(str) {
var words = str.split(" ");

for(var i = 0; i < words.length; i++) {
if(stopWords.indexOf(words[i]) !== -1) {
words.splice(i, 1);
}
}

return words.join(" ");
}

var Wade = function(data) {
var search = function(item) {
var data = search.data;
Expand All @@ -67,7 +85,7 @@
var results = [];

for(var i = 0; i < keywordsLength; i++) {
var keyword = keywords[i];
var keyword = Wade.process(keywords[i]);
var length = keyword.length;
lengths[i] = length;
tables[i] = createTable(keyword, length);
Expand Down Expand Up @@ -97,22 +115,25 @@
return results;
}

for(var i = 0; i < data.length; i++) {
data[i] = Wade.process(data[i]);
}

search.data = data;

return search;
}

Wade.normalize = function(item) {
return item.toLowerCase();
}
Wade.pipeline = [lowercase, removeStopWords];

Wade.normalizeAll = function(data) {
for(var i = 0; i < data.length; i++) {
var item = data[i];
data[i] = Wade.normalize(item);
Wade.process = function(item) {
var pipeline = Wade.pipeline;

for(var j = 0; j < pipeline.length; j++) {
item = pipeline[j](item);
}

return data;
return item;
}

Wade.version = "0.1.0";
Expand Down
2 changes: 1 addition & 1 deletion dist/wade.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 30 additions & 9 deletions src/index.js
Expand Up @@ -47,6 +47,24 @@ var contains = function(item, itemLength, table, data) {
return match;
}

var lowercase = function(str) {
return str.toLowerCase();
}

var stopWords = ['about', 'after', 'all', 'also', 'am', 'an', 'and', 'another', 'any', 'are', 'as', 'at', 'be', 'because', 'been', 'before', 'being', 'between', 'both', 'but', 'by', 'came', 'can', 'come', 'could', 'did', 'do', 'each', 'for', 'from', 'get', 'got', 'has', 'had', 'he', 'have', 'her', 'here', 'him', 'himself', 'his', 'how', 'if', 'in', 'into', 'is', 'it', 'like', 'make', 'many', 'me', 'might', 'more', 'most', 'much', 'must', 'my', 'never', 'now', 'of', 'on', 'only', 'or', 'other', 'our', 'out', 'over', 'said', 'same', 'see', 'should', 'since', 'some', 'still', 'such', 'take', 'than', 'that', 'the', 'their', 'them', 'then', 'there', 'these', 'they', 'this', 'those', 'through', 'to', 'too', 'under', 'up', 'very', 'was', 'way', 'we', 'well', 'were', 'what', 'where', 'which', 'while', 'who', 'with', 'would', 'you', 'your', 'a', 'i'];

var removeStopWords = function(str) {
var words = str.split(" ");

for(var i = 0; i < words.length; i++) {
if(stopWords.indexOf(words[i]) !== -1) {
words.splice(i, 1);
}
}

return words.join(" ");
}

var Wade = function(data) {
var search = function(item) {
var data = search.data;
Expand All @@ -57,7 +75,7 @@ var Wade = function(data) {
var results = [];

for(var i = 0; i < keywordsLength; i++) {
var keyword = keywords[i];
var keyword = Wade.process(keywords[i]);
var length = keyword.length;
lengths[i] = length;
tables[i] = createTable(keyword, length);
Expand Down Expand Up @@ -87,22 +105,25 @@ var Wade = function(data) {
return results;
}

for(var i = 0; i < data.length; i++) {
data[i] = Wade.process(data[i]);
}

search.data = data;

return search;
}

Wade.normalize = function(item) {
return item.toLowerCase();
}
Wade.pipeline = [lowercase, removeStopWords];

Wade.normalizeAll = function(data) {
for(var i = 0; i < data.length; i++) {
var item = data[i];
data[i] = Wade.normalize(item);
Wade.process = function(item) {
var pipeline = Wade.pipeline;

for(var j = 0; j < pipeline.length; j++) {
item = pipeline[j](item);
}

return data;
return item;
}

Wade.version = "__VERSION__";

0 comments on commit a55060a

Please sign in to comment.