/
utils.js
270 lines (226 loc) · 7 KB
/
utils.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
// Utilities
//
'use strict';
function _class(obj) { return Object.prototype.toString.call(obj); }
function isString(obj) { return _class(obj) === '[object String]'; }
var _hasOwnProperty = Object.prototype.hasOwnProperty;
function has(object, key) {
return _hasOwnProperty.call(object, key);
}
// Merge objects
//
function assign(obj /*from1, from2, from3, ...*/) {
var sources = Array.prototype.slice.call(arguments, 1);
sources.forEach(function (source) {
if (!source) { return; }
if (typeof source !== 'object') {
throw new TypeError(source + 'must be object');
}
Object.keys(source).forEach(function (key) {
obj[key] = source[key];
});
});
return obj;
}
// Remove element from array and put another array at those position.
// Useful for some operations with tokens
function arrayReplaceAt(src, pos, newElements) {
return [].concat(src.slice(0, pos), newElements, src.slice(pos + 1));
}
////////////////////////////////////////////////////////////////////////////////
function isValidEntityCode(c) {
/*eslint no-bitwise:0*/
// broken sequence
if (c >= 0xD800 && c <= 0xDFFF) { return false; }
// never used
if (c >= 0xFDD0 && c <= 0xFDEF) { return false; }
if ((c & 0xFFFF) === 0xFFFF || (c & 0xFFFF) === 0xFFFE) { return false; }
// control codes
if (c >= 0x00 && c <= 0x08) { return false; }
if (c === 0x0B) { return false; }
if (c >= 0x0E && c <= 0x1F) { return false; }
if (c >= 0x7F && c <= 0x9F) { return false; }
// out of range
if (c > 0x10FFFF) { return false; }
return true;
}
function fromCodePoint(c) {
/*eslint no-bitwise:0*/
if (c > 0xffff) {
c -= 0x10000;
var surrogate1 = 0xd800 + (c >> 10),
surrogate2 = 0xdc00 + (c & 0x3ff);
return String.fromCharCode(surrogate1, surrogate2);
}
return String.fromCharCode(c);
}
var UNESCAPE_MD_RE = /\\([!"#$%&'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])/g;
var NAMED_ENTITY_RE = /&([a-z#][a-z0-9]{1,31});/gi;
var DIGITAL_ENTITY_TEST_RE = /^#((?:x[a-f0-9]{1,8}|[0-9]{1,8}))/i;
var UNESCAPE_ALL_RE = new RegExp(UNESCAPE_MD_RE.source + '|' + NAMED_ENTITY_RE.source, 'gi');
var entities = require('./entities');
function replaceEntityPattern(match, name) {
var code = 0;
if (has(entities, name)) {
return entities[name];
} else if (name.charCodeAt(0) === 0x23/* # */ && DIGITAL_ENTITY_TEST_RE.test(name)) {
code = name[1].toLowerCase() === 'x' ?
parseInt(name.slice(2), 16)
:
parseInt(name.slice(1), 10);
if (isValidEntityCode(code)) {
return fromCodePoint(code);
}
}
return match;
}
function replaceEntities(str) {
if (str.indexOf('&') < 0) { return str; }
return str.replace(NAMED_ENTITY_RE, replaceEntityPattern);
}
function unescapeMd(str) {
if (str.indexOf('\\') < 0) { return str; }
return str.replace(UNESCAPE_MD_RE, '$1');
}
function unescapeAll(str) {
if (str.indexOf('\\') < 0 && str.indexOf('&') < 0) { return str; }
return str.replace(UNESCAPE_ALL_RE, function(match, escaped, entity) {
if (escaped) { return escaped; }
return replaceEntityPattern(match, entity);
});
}
////////////////////////////////////////////////////////////////////////////////
var HTML_ESCAPE_TEST_RE = /[&<>"]/;
var HTML_ESCAPE_REPLACE_RE = /[&<>"]/g;
var HTML_REPLACEMENTS = {
'&': '&',
'<': '<',
'>': '>',
'"': '"'
};
function replaceUnsafeChar(ch) {
return HTML_REPLACEMENTS[ch];
}
function escapeHtml(str) {
if (HTML_ESCAPE_TEST_RE.test(str)) {
return str.replace(HTML_ESCAPE_REPLACE_RE, replaceUnsafeChar);
}
return str;
}
////////////////////////////////////////////////////////////////////////////////
var encode = require('mdurl/encode');
// Incoming link can be partially encoded. Convert possible combinations to
// unified form.
//
// TODO: Rewrite it. Should use:
//
// - encodeURIComponent for query
// - encodeURI for path
// - (?) punicode for domain mame (but encodeURI seems to work in real world)
//
function normalizeLink(url) {
return encode(url);
}
////////////////////////////////////////////////////////////////////////////////
var REGEXP_ESCAPE_RE = /[.?*+^$[\]\\(){}|-]/g;
function escapeRE (str) {
return str.replace(REGEXP_ESCAPE_RE, '\\$&');
}
////////////////////////////////////////////////////////////////////////////////
// Zs (unicode class) || [\t\f\v\r\n]
function isWhiteSpace(code) {
if (code >= 0x2000 && code <= 0x200A) { return true; }
switch (code) {
case 0x09: // \t
case 0x0A: // \n
case 0x0B: // \v
case 0x0C: // \f
case 0x0D: // \r
case 0x20:
case 0xA0:
case 0x1680:
case 0x202F:
case 0x205F:
case 0x3000:
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
/*eslint-disable max-len*/
var UNICODE_PUNCT_RE = require('uc.micro/categories/P/regex');
// Currently without astral characters support.
function isPunctChar(char) {
return UNICODE_PUNCT_RE.test(char);
}
// Markdown ASCII punctuation characters.
//
// !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~
// http://spec.commonmark.org/0.15/#ascii-punctuation-character
//
// Don't confuse with unicode punctuation !!! It lacks some chars in ascii range.
//
function isMdAsciiPunct(ch) {
switch (ch) {
case 0x21/* ! */:
case 0x22/* " */:
case 0x23/* # */:
case 0x24/* $ */:
case 0x25/* % */:
case 0x26/* & */:
case 0x27/* ' */:
case 0x28/* ( */:
case 0x29/* ) */:
case 0x2A/* * */:
case 0x2B/* + */:
case 0x2C/* , */:
case 0x2D/* - */:
case 0x2E/* . */:
case 0x2F/* / */:
case 0x3A/* : */:
case 0x3B/* ; */:
case 0x3C/* < */:
case 0x3D/* = */:
case 0x3E/* > */:
case 0x3F/* ? */:
case 0x40/* @ */:
case 0x5B/* [ */:
case 0x5C/* \ */:
case 0x5D/* ] */:
case 0x5E/* ^ */:
case 0x5F/* _ */:
case 0x60/* ` */:
case 0x7B/* { */:
case 0x7C/* | */:
case 0x7D/* } */:
case 0x7E/* ~ */:
return true;
default:
return false;
}
}
// Hepler to unify [reference labels].
//
function normalizeReference(str) {
// use .toUpperCase() instead of .toLowerCase()
// here to avoid a conflict with Object.prototype
// members (most notably, `__proto__`)
return str.trim().replace(/\s+/g, ' ').toUpperCase();
}
////////////////////////////////////////////////////////////////////////////////
exports.assign = assign;
exports.isString = isString;
exports.has = has;
exports.unescapeMd = unescapeMd;
exports.unescapeAll = unescapeAll;
exports.isValidEntityCode = isValidEntityCode;
exports.fromCodePoint = fromCodePoint;
exports.replaceEntities = replaceEntities;
exports.escapeHtml = escapeHtml;
exports.arrayReplaceAt = arrayReplaceAt;
exports.normalizeLink = normalizeLink;
exports.isWhiteSpace = isWhiteSpace;
exports.isMdAsciiPunct = isMdAsciiPunct;
exports.isPunctChar = isPunctChar;
exports.escapeRE = escapeRE;
exports.normalizeReference = normalizeReference;