Skip to content

Commit

Permalink
Merge pull request Yoast#4 from Yoast/stories/DT/stopWords
Browse files Browse the repository at this point in the history
Stories/dt/stop words
  • Loading branch information
omarreiss committed May 11, 2015
2 parents 35c5c3e + 8b38530 commit fa251cb
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 26 deletions.
72 changes: 52 additions & 20 deletions js/analyzer.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,14 @@ Analyzer.prototype.init = function() {
if(typeof this.config.queue !== 'undefined' && this.config.queue.length !== 0){
this.queue = this.config.queue;
}else{
this.queue = ['keywordDensity', 'subheaderChecker'];
this.queue = ['keywordDensity', 'subheaderChecker', 'stopwordChecker'];
}
//if no available keywords, load default array
if(typeof this.config.wordsToRemove === 'undefined'){
this.config.wordsToRemove = [' a', ' in', ' an', ' on', ' for', ' the', ' and'];
if(typeof this.config.wordsToRemove == 'undefined'){
this.config.wordsToRemove = analyzerConfig.wordsToRemove;
}
if(typeof this.config.stopWords == 'undefined'){
this.config.stopWords = analyzerConfig.stopWords;
}
//set default variables
this.keywordRegex = new RegExp(this.config.keyword);
Expand All @@ -45,6 +48,7 @@ Analyzer.prototype.runQueue = function(){
this.runQueue();
}
};

/**
* clears current queue of functions, effectively stopping execution of the analyzer.
*/
Expand All @@ -53,7 +57,6 @@ Analyzer.prototype.abortQueue = function(){
this.queue = [];
};


/**
* checks the keyword density of given keyword against the cleantext stored in _store.
* @returns resultObject
Expand Down Expand Up @@ -83,6 +86,7 @@ Analyzer.prototype.keywordDensity = function(){
}
return result;
};

/**
* checks if keywords appear in subheaders of stored cleanTextSomeTags text.
* @returns resultObject
Expand All @@ -96,7 +100,7 @@ Analyzer.prototype.subheaderChecker = function() {
}else {
var foundInHeader = 0;
for (var i = 0; i < headers.length; i++) {
var formattedHeaders = this.stripKeywords(headers[i]);
var formattedHeaders = this.stringReplacer(headers[i], this.config.wordsToRemove);
if (formattedHeaders.match(new RegExp(this.config.keyword, 'g')) || headers[i].match(new RegExp(this.config.keyword, 'g'))) {
foundInHeader++;
}
Expand All @@ -113,25 +117,53 @@ Analyzer.prototype.subheaderChecker = function() {
return result;
};

/**
* check if the keyword contains stopwords
*/
Analyzer.prototype.stopwordChecker = function(){
matches = this.stringCounter(this.config.keyword, this.config.stopWords);
result = {name: 'stopWords', result: {count: matches.length, matches: matches}, rating:5 };
return result;
}

/**helper functions*/

/**
* removes certain words from string
* @params textString
* @returns textString without keywords
*/
Analyzer.prototype.stripKeywords = function(textString){
//words to remove
var wordString = '';
for (var i = 0; i < this.config.wordsToRemove.length; i++){
if(wordString.length > 0){ wordString += '|'; }
wordString += '('+this.config.wordsToRemove[i]+')\\b';
* removes strings from array and replaces them with keyword.
* @param textString
* @param stringsToRemove []
* @param replacement (default == space)
* @returns {textString}
*/
Analyzer.prototype.stringReplacer = function(textString, stringsToRemove, replacement){
if(typeof replacement == 'undefined'){replacement = ' '};
textString = textString.replace(this.regexStringBuilder(stringsToRemove), replacement);
return yst_pp.stripSpaces(textString);
}

/**
* matches string with given array of strings to match.
* @param textString
* @param stringsToMatch
* @returns {matches}
*/
Analyzer.prototype.stringCounter = function(textString, stringsToMatch){
return textString.match(this.regexStringBuilder(stringsToMatch));
}

/**
* builds regex from array with strings
* @param stringArray
* @returns {RegExp}
*/
Analyzer.prototype.regexStringBuilder = function(stringArray){
var regexString = '';
for(var i = 0; i < stringArray.length; i++){
if(regexString.length > 0){ regexString += '|'; }
regexString += '('+stringArray[i]+')\\b';
}
var wordsRegex = new RegExp(wordString, 'g');
textString = textString.replace(wordsRegex, '');
//remove double space
return yst_preProcessor.stripSpaces(textString);
};
return new RegExp(regexString, 'g');
}

/**
* PreProcessor object definition. Creates _store object and calls init.
Expand Down
8 changes: 8 additions & 0 deletions js/config/config.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions spec/headingtagsSpec.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions spec/keywordDensitySpec.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
/**
* Created by danny on 3/23/15.
*/
require('../js/config/config.js');
require('../js/analyzer.js');

keywordArgs = {
Expand Down
1 change: 1 addition & 0 deletions spec/preprocessorSpec.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
require('../js/config/config.js');
require('../js/analyzer.js');

args = {
Expand Down
19 changes: 19 additions & 0 deletions spec/stopwordSpec.js

Large diffs are not rendered by default.

0 comments on commit fa251cb

Please sign in to comment.