-
-
Notifications
You must be signed in to change notification settings - Fork 5k
/
htmlUtils.js
88 lines (68 loc) · 2.04 KB
/
htmlUtils.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
const urlUtils = require('lib/urlUtils.js');
const Entities = require('html-entities').AllHtmlEntities;
const htmlentities = new Entities().encode;
// [\s\S] instead of . for multiline matching
// https://stackoverflow.com/a/16119722/561309
const imageRegex = /<img([\s\S]*?)src=["']([\s\S]*?)["']([\s\S]*?)>/gi;
const anchorRegex = /<a([\s\S]*?)href=["']([\s\S]*?)["']([\s\S]*?)>/gi;
class HtmlUtils {
headAndBodyHtml(doc) {
const output = [];
if (doc.head) output.push(doc.head.innerHTML);
if (doc.body) output.push(doc.body.innerHTML);
return output.join('\n');
}
extractImageUrls(html) {
if (!html) return [];
const output = [];
let matches;
while ((matches = imageRegex.exec(html))) {
output.push(matches[2]);
}
return output;
}
replaceImageUrls(html, callback) {
return this.processImageTags(html, data => {
const newSrc = callback(data.src);
return {
type: 'replaceSource',
src: newSrc,
};
});
}
processImageTags(html, callback) {
if (!html) return '';
return html.replace(imageRegex, (v, before, src, after) => {
const action = callback({ src: src });
if (!action) return `<img${before}src="${src}"${after}>`;
if (action.type === 'replaceElement') {
return action.html;
}
if (action.type === 'replaceSource') {
return `<img${before}src="${action.src}"${after}>`;
}
if (action.type === 'setAttributes') {
const attrHtml = this.attributesHtml(action.attrs);
return `<img${before}${attrHtml}${after}>`;
}
throw new Error(`Invalid action: ${action.type}`);
});
}
prependBaseUrl(html, baseUrl) {
if (!html) return '';
return html.replace(anchorRegex, (v, before, href, after) => {
const newHref = urlUtils.prependBaseUrl(href, baseUrl);
return `<a${before}href="${newHref}"${after}>`;
});
}
attributesHtml(attr) {
const output = [];
for (const n in attr) {
if (!attr.hasOwnProperty(n)) continue;
output.push(`${n}="${htmlentities(attr[n])}"`);
}
return output.join(' ');
}
}
const htmlUtils = new HtmlUtils();
module.exports = htmlUtils;