Skip to content
This repository has been archived by the owner on Feb 24, 2022. It is now read-only.

Commit

Permalink
Make parser extendable
Browse files Browse the repository at this point in the history
  • Loading branch information
k88hudson committed Jul 15, 2016
1 parent 82ac24e commit 3e0a015
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 100 deletions.
34 changes: 34 additions & 0 deletions lib/default-ruleset.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
module.exports = {
description: [
['meta[property="og:description"]', node => node.element.content],
['meta[name="description"]', node => node.element.content],
],
icon_url: [
['link[rel="apple-touch-icon"]', node => node.element.href],
['link[rel="apple-touch-icon-precomposed"]', node => node.element.href],
['link[rel="icon"]', node => node.element.href],
['link[rel="fluid-icon"]', node => node.element.href],
['link[rel="shortcut icon"]', node => node.element.href],
['link[rel="Shortcut Icon"]', node => node.element.href],
['link[rel="mask-icon"]', node => node.element.href],
],
image_url: [
['meta[property="og:image"]', node => node.element.content],
['meta[property="twitter:image"]', node => node.element.content],
['meta[name="thumbnail"]', node => node.element.content],
['img', node => node.element.src],
],
title: [
['meta[property="og:title"]', node => node.element.content],
['meta[property="twitter:title"]', node => node.element.content],
['meta[name="hdl"]', node => node.element.content],
['title', node => node.element.text],
],
type: [
['meta[property="og:type"]', node => node.element.content],
],
url: [
['meta[property="og:url"]', node => node.element.content],
['link[rel="canonical"]', node => node.element.href],
]
};
127 changes: 36 additions & 91 deletions parser.js
Original file line number Diff line number Diff line change
@@ -1,95 +1,40 @@
const {dom, rule, ruleset} = require('fathom-web');


function buildRuleset(name, rules) {
const reversedRules = Array.from(rules).reverse();
const builtRuleset = ruleset(...reversedRules.map(([query, handler], order) => rule(
dom(query),
node => [{
score: order,
flavor: name,
notes: handler(node),
}]
)));

return doc => {
const kb = builtRuleset.score(doc);
const maxNode = kb.max(name);
if (maxNode) {
const value = maxNode.flavors.get(name);
if (value) {
return value.trim();
const DEFAULT_RULESET = require('./lib/default-ruleset');

const MetadataParser = {
metadataRules: Object.assign({}, DEFAULT_RULESET),
buildRuleset(name, rules) {
const reversedRules = Array.from(rules).reverse();
const builtRuleset = ruleset(...reversedRules.map(([query, handler], order) => rule(
dom(query),
node => [{
score: order,
flavor: name,
notes: handler(node),
}]
)));

return doc => {
const kb = builtRuleset.score(doc);
const maxNode = kb.max(name);
if (maxNode) {
const value = maxNode.flavors.get(name);
if (value) {
return value.trim();
}
}
}
};
}


const titleRules = buildRuleset('title', [
['meta[property="og:title"]', node => node.element.content],
['meta[property="twitter:title"]', node => node.element.content],
['meta[name="hdl"]', node => node.element.content],
['title', node => node.element.text],
]);

const canonicalUrlRules = buildRuleset('url', [
['meta[property="og:url"]', node => node.element.content],
['link[rel="canonical"]', node => node.element.href],
]);


const iconRules = buildRuleset('icon', [
['link[rel="apple-touch-icon"]', node => node.element.href],
['link[rel="apple-touch-icon-precomposed"]', node => node.element.href],
['link[rel="icon"]', node => node.element.href],
['link[rel="fluid-icon"]', node => node.element.href],
['link[rel="shortcut icon"]', node => node.element.href],
['link[rel="Shortcut Icon"]', node => node.element.href],
['link[rel="mask-icon"]', node => node.element.href],
]);

const imageRules = buildRuleset('image', [
['meta[property="og:image"]', node => node.element.content],
['meta[property="twitter:image"]', node => node.element.content],
['meta[name="thumbnail"]', node => node.element.content],
['img', node => node.element.src],
]);

const descriptionRules = buildRuleset('description', [
['meta[property="og:description"]', node => node.element.content],
['meta[name="description"]', node => node.element.content],
]);

const typeRules = buildRuleset('type', [
['meta[property="og:type"]', node => node.element.content],
]);


const metadataRules = {
description: descriptionRules,
icon_url: iconRules,
image_url: imageRules,
title: titleRules,
type: typeRules,
url: canonicalUrlRules
};
},
getMetadata(doc) {
const metadata = {};

Object.keys(MetadataParser.metadataRules).forEach(metadataKey => {
const metadataRule = MetadataParser.buildRuleset(metadataKey, MetadataParser.metadataRules[metadataKey]);
metadata[metadataKey] = metadataRule(doc);
});

return metadata;
}
};


function getMetadata(doc, rules) {
const metadata = {};
const ruleSet = rules || metadataRules;

Object.keys(ruleSet).map(metadataKey => {
const metadataRule = ruleSet[metadataKey];
metadata[metadataKey] = typeof metadataRule === 'function' ?
metadataRule(doc) :
getMetadata(doc, metadataRule);
});

return metadata;
}

module.exports = {
metadataRules,
getMetadata
};
module.exports = MetadataParser;
18 changes: 9 additions & 9 deletions tests/metadataRules.test.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Tests for parse.js
const {assert} = require('chai');
const {metadataRules} = require('../parser');
const {metadataRules, buildRuleset} = require('../parser');
const {stringToDom} = require('./test-utils');

function buildHTML(tag) {
Expand All @@ -13,11 +13,11 @@ function buildHTML(tag) {
`;
}

function ruleTest(testName, testRule, expected, testTag) {
function ruleTest(testName, testRuleName, expected, testTag) {
it(`finds ${testName}`, () => {
const html = buildHTML(testTag);
const doc = stringToDom(html);
const found = testRule(doc);
const found = buildRuleset(testRuleName, metadataRules[testRuleName])(doc);
assert.equal(found, expected, `Unable to find ${testName} in ${html}`);
});
}
Expand All @@ -33,7 +33,7 @@ describe('Title Rule Tests', function() {
['title', `<title>${pageTitle}</title>`],
];

ruleTests.map(([testName, testTag]) => ruleTest(testName, metadataRules.title, pageTitle, testTag));
ruleTests.map(([testName, testTag]) => ruleTest(testName, 'title', pageTitle, testTag));
});


Expand All @@ -45,7 +45,7 @@ describe('Canonical URL Rule Tests', function() {
['rel=canonical', `<link rel="canonical" href="${pageUrl}" />`],
];

ruleTests.map(([testName, testTag]) => ruleTest(testName, metadataRules.url, pageUrl, testTag));
ruleTests.map(([testName, testTag]) => ruleTest(testName, 'url', pageUrl, testTag));
});


Expand All @@ -62,7 +62,7 @@ describe('Icon Rule Tests', function() {
['mask-icon', `<link rel="mask-icon" href="${pageIcon}" />`],
];

ruleTests.map(([testName, testTag]) => ruleTest(testName, metadataRules.icon_url, pageIcon, testTag));
ruleTests.map(([testName, testTag]) => ruleTest(testName, 'icon_url', pageIcon, testTag));
});


Expand All @@ -76,7 +76,7 @@ describe('Image Rule Tests', function() {
['img', `<img src="${pageImage}" />`],
];

ruleTests.map(([testName, testTag]) => ruleTest(testName, metadataRules.image_url, pageImage, testTag));
ruleTests.map(([testName, testTag]) => ruleTest(testName, 'image_url', pageImage, testTag));
});


Expand All @@ -88,7 +88,7 @@ describe('Description Rule Tests', function() {
['description', `<meta name="description" content="${pageDescription}" />`],
];

ruleTests.map(([testName, testTag]) => ruleTest(testName, metadataRules.description, pageDescription, testTag));
ruleTests.map(([testName, testTag]) => ruleTest(testName, 'description', pageDescription, testTag));
});


Expand All @@ -99,5 +99,5 @@ describe('Type Rule Tests', function() {
['og:type', `<meta property="og:type" content="${pageType}" />`],
];

ruleTests.map(([testName, testTag]) => ruleTest(testName, metadataRules.type, pageType, testTag));
ruleTests.map(([testName, testTag]) => ruleTest(testName, 'type', pageType, testTag));
});

0 comments on commit 3e0a015

Please sign in to comment.