This repository has been archived by the owner on Feb 24, 2022. It is now read-only.
/
parser.js
95 lines (77 loc) · 2.58 KB
/
parser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
const {dom, rule, ruleset} = require('fathom-web');
function buildRuleset(name, rules) {
const reversedRules = Array.from(rules).reverse();
const builtRuleset = ruleset(...reversedRules.map(([query, handler], order) => rule(
dom(query),
node => [{
score: order,
flavor: name,
notes: handler(node),
}]
)));
return doc => {
const kb = builtRuleset.score(doc);
const maxNode = kb.max(name);
if (maxNode) {
const value = maxNode.flavors.get(name);
if (value) {
return value.trim();
}
}
};
}
const titleRules = buildRuleset('title', [
['meta[property="og:title"]', node => node.element.content],
['meta[property="twitter:title"]', node => node.element.content],
['meta[name="hdl"]', node => node.element.content],
['title', node => node.element.text],
]);
const canonicalUrlRules = buildRuleset('url', [
['meta[property="og:url"]', node => node.element.content],
['link[rel="canonical"]', node => node.element.href],
]);
const iconRules = buildRuleset('icon', [
['link[rel="apple-touch-icon"]', node => node.element.href],
['link[rel="apple-touch-icon-precomposed"]', node => node.element.href],
['link[rel="icon"]', node => node.element.href],
['link[rel="fluid-icon"]', node => node.element.href],
['link[rel="shortcut icon"]', node => node.element.href],
['link[rel="Shortcut Icon"]', node => node.element.href],
['link[rel="mask-icon"]', node => node.element.href],
]);
const imageRules = buildRuleset('image', [
['meta[property="og:image"]', node => node.element.content],
['meta[property="twitter:image"]', node => node.element.content],
['meta[name="thumbnail"]', node => node.element.content],
['img', node => node.element.src],
]);
const descriptionRules = buildRuleset('description', [
['meta[property="og:description"]', node => node.element.content],
['meta[name="description"]', node => node.element.content],
]);
const typeRules = buildRuleset('type', [
['meta[property="og:type"]', node => node.element.content],
]);
const metadataRules = {
description: descriptionRules,
icon_url: iconRules,
image_url: imageRules,
title: titleRules,
type: typeRules,
url: canonicalUrlRules
};
function getMetadata(doc, rules) {
const metadata = {};
const ruleSet = rules || metadataRules;
Object.keys(ruleSet).map(metadataKey => {
const metadataRule = ruleSet[metadataKey];
metadata[metadataKey] = typeof metadataRule === 'function' ?
metadataRule(doc) :
getMetadata(doc, metadataRule);
});
return metadata;
}
module.exports = {
metadataRules,
getMetadata
};