/
proofreader.js
128 lines (110 loc) · 3.39 KB
/
proofreader.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
var cheerio = require('cheerio');
var fs = require('fs');
var writeGood = require('write-good');
var nodehun = require('nodehun');
var spellcheck = require('nodehun-sentences');
module.exports = Proofreader;
function Proofreader() {
/**
* @type nodehun
* @private
*/
this._dictionary = null;
this._whitelist = '';
this._blacklist = '';
this._writeGoodSettings = null;
}
/**
* Adds a dictionary. First dictionary has to provide both dic (words) and aff (grammar) filepaths, all subsequent ones are
* expected to provide only dic filepath.
*
* @param {String} dicFilePath
* @param {String} affFilePath
*/
Proofreader.prototype.addDictionary = function (dicFilePath, affFilePath) {
if (!this._dictionary) {
this._dictionary = new nodehun(fs.readFileSync(affFilePath), fs.readFileSync(dicFilePath));
} else {
this._dictionary.addDictionary(fs.readFileSync(dicFilePath));
}
};
/**
* Sets a selector with whitelisted elements
* @param {String} value
*/
Proofreader.prototype.setWhitelist = function (value) {
if (typeof value !== 'string') {
throw new Error('Whitelist must be a string.');
}
this._whitelist = value;
};
/**
* Sets a selector with blacklisted elements
* @param {String} value
*/
Proofreader.prototype.setBlacklist = function (value) {
if (typeof value !== 'string') {
throw new Error('Blacklist must be a string.');
}
this._blacklist = value;
};
/**
* Sets write-good settings object.
* @see https://github.com/btford/write-good#checks
* @param {Object} settings
*/
Proofreader.prototype.setWriteGoodSettings = function (settings) {
if (settings !== undefined && typeof settings !== 'object') {
throw new Error('Blacklist must be a string.');
}
this._writeGoodSettings = settings;
};
/**
* Returns an array of writeGood and spelling suggestions (via promise) for provided HTML string
* @param {String} html
* @returns {Promise}
*/
Proofreader.prototype.proofread = function (html) {
var $ = cheerio.load(html);
var dictionary = this._dictionary;
var whitelist = this._whitelist;
var blacklist = this._blacklist;
var writeGoodSettings = this._writeGoodSettings;
return new Promise(function (resolve, reject) {
var suggestions = [];
var promises = [];
//Blacklisted elements are removed before text is processed
if (blacklist) {
$(blacklist).remove();
}
//Only whitelisted elements are processed
$(whitelist).each(function () {
var text = $(this).text();
//remove linebreaks from text
text = text.replace(/(\r\n|\n|\r)+/gm, " ");
//replace ’ with '
text = text.replace(/’/g, "'");
//replace multiple spaces with a single one
text = text.replace(/\s{2,}/g, ' ');
//trim text
text = text.trim();
if (text.length) {
var writeGoodSuggestions = writeGood(text, writeGoodSettings);
var spellcheckResolve = null;
var promise = new Promise(function (r) { spellcheckResolve = r; });
promises.push(promise);
spellcheck(dictionary, text, function (error, spellcheckerSuggestions) {
suggestions.push({
text: text,
suggestions: {
writeGood: writeGoodSuggestions,
spelling: spellcheckerSuggestions || []
}
});
spellcheckResolve();
});
}
});
Promise.all(promises).then(function() {resolve(suggestions);})
});
};