Skip to content
This repository has been archived by the owner on Feb 24, 2022. It is now read-only.

Commit

Permalink
Use absolute URLs #16
Browse files Browse the repository at this point in the history
Fixes #16
  • Loading branch information
jaredlockhart committed Aug 25, 2016
1 parent 0aab05b commit 208a7ce
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 44 deletions.
129 changes: 87 additions & 42 deletions parser.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
const urlparse = require('url');
const {dom, rule, ruleset} = require('fathom-web');

function buildRuleset(name, rules) {
function makeUrlAbsolute(base, relative) {
const relativeParsed = urlparse.parse(relative);

if (relativeParsed.host === null) {
return urlparse.resolve(base, relative);
}

return relative;
}

function buildRuleset(name, rules, processors) {
const reversedRules = Array.from(rules).reverse();
const builtRuleset = ruleset(...reversedRules.map(([query, handler], order) => rule(
dom(query),
Expand All @@ -11,11 +22,19 @@ function buildRuleset(name, rules) {
}]
)));

return doc => {
return (doc, context) => {
const kb = builtRuleset.score(doc);
const maxNode = kb.max(name);

if (maxNode) {
const value = maxNode.flavors.get(name);
let value = maxNode.flavors.get(name);

if (processors) {
processors.forEach(processor => {
value = processor(value, context);
});
}

if (value) {
return value.trim();
}
Expand All @@ -24,53 +43,79 @@ function buildRuleset(name, rules) {
}

const metadataRules = {
description: [
['meta[property="og:description"]', node => node.element.getAttribute('content')],
['meta[name="description"]', node => node.element.getAttribute('content')],
],
icon_url: [
['link[rel="apple-touch-icon"]', node => node.element.getAttribute('href')],
['link[rel="apple-touch-icon-precomposed"]', node => node.element.getAttribute('href')],
['link[rel="icon"]', node => node.element.getAttribute('href')],
['link[rel="fluid-icon"]', node => node.element.getAttribute('href')],
['link[rel="shortcut icon"]', node => node.element.getAttribute('href')],
['link[rel="Shortcut Icon"]', node => node.element.getAttribute('href')],
['link[rel="mask-icon"]', node => node.element.getAttribute('href')],
],
image_url: [
['meta[property="og:image:secure_url"]', node => node.element.getAttribute('content')],
['meta[property="og:image:url"]', node => node.element.getAttribute('content')],
['meta[property="og:image"]', node => node.element.getAttribute('content')],
['meta[property="twitter:image"]', node => node.element.getAttribute('content')],
['meta[name="thumbnail"]', node => node.element.getAttribute('content')],
],
keywords: [
['meta[name="keywords"]', node => node.element.getAttribute('content')],
],
title: [
['meta[property="og:title"]', node => node.element.getAttribute('content')],
['meta[property="twitter:title"]', node => node.element.getAttribute('content')],
['meta[name="hdl"]', node => node.element.getAttribute('content')],
['title', node => node.element.text],
],
type: [
['meta[property="og:type"]', node => node.element.getAttribute('content')],
],
url: [
['meta[property="og:url"]', node => node.element.getAttribute('content')],
['link[rel="canonical"]', node => node.element.getAttribute('href')],
],
description: {
rules: [
['meta[property="og:description"]', node => node.element.getAttribute('content')],
['meta[name="description"]', node => node.element.getAttribute('content')],
],
},

icon_url: {
rules: [
['link[rel="apple-touch-icon"]', node => node.element.getAttribute('href')],
['link[rel="apple-touch-icon-precomposed"]', node => node.element.getAttribute('href')],
['link[rel="icon"]', node => node.element.getAttribute('href')],
['link[rel="fluid-icon"]', node => node.element.getAttribute('href')],
['link[rel="shortcut icon"]', node => node.element.getAttribute('href')],
['link[rel="Shortcut Icon"]', node => node.element.getAttribute('href')],
['link[rel="mask-icon"]', node => node.element.getAttribute('href')],
],
processors: [
(icon_url, context) => makeUrlAbsolute(context.url, icon_url)
]
},

image_url: {
rules: [
['meta[property="og:image:secure_url"]', node => node.element.getAttribute('content')],
['meta[property="og:image:url"]', node => node.element.getAttribute('content')],
['meta[property="og:image"]', node => node.element.getAttribute('content')],
['meta[property="twitter:image"]', node => node.element.getAttribute('content')],
['meta[name="thumbnail"]', node => node.element.getAttribute('content')],
],
processors: [
(image_url, context) => makeUrlAbsolute(context.url, image_url)
],
},

keywords: {
rules: [
['meta[name="keywords"]', node => node.element.getAttribute('content')],
],
},

title: {
rules: [
['meta[property="og:title"]', node => node.element.getAttribute('content')],
['meta[property="twitter:title"]', node => node.element.getAttribute('content')],
['meta[name="hdl"]', node => node.element.getAttribute('content')],
['title', node => node.element.text],
],
},

type: {
rules: [
['meta[property="og:type"]', node => node.element.getAttribute('content')],
],
},

url: {
rules: [
['meta[property="og:url"]', node => node.element.getAttribute('content')],
['link[rel="canonical"]', node => node.element.getAttribute('href')],
],
},
};

function getMetadata(doc, rules) {
function getMetadata(doc, rules, context = {}) {
const metadata = {};
const ruleSet = rules || metadataRules;

Object.keys(ruleSet).map(metadataKey => {
const metadataRule = ruleSet[metadataKey];

if(Array.isArray(metadataRule)) {
metadata[metadataKey] = buildRuleset(metadataKey, metadataRule)(doc);
if(Array.isArray(metadataRule.rules)) {
metadata[metadataKey] = buildRuleset(metadataKey, metadataRule.rules, metadataRule.processors)(doc, context);
} else {
metadata[metadataKey] = getMetadata(doc, metadataRule);
}
Expand Down
21 changes: 21 additions & 0 deletions tests/getMetadata.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,27 @@ describe('Get Metadata Tests', function() {
assert.equal(metadata.url, sampleUrl, `Unable to find ${sampleUrl} in ${sampleHtml}`);
});

it('uses absolute URLs when url parameter passed in through context', () => {
const relativeHtml = `
<html>
<head>
<meta property="og:description" content="${sampleDescription}" />
<link rel="icon" href="/favicon.ico" />
<meta property="og:image" content="/image.png" />
<meta property="og:title" content="${sampleTitle}" />
<meta property="og:type" content="${sampleType}" />
<meta property="og:url" content="${sampleUrl}" />
</head>
</html>
`;

const doc = stringToDom(relativeHtml);
const metadata = getMetadata(doc, metadataRules, {url: 'http://www.example.com/'});

assert.equal(metadata.icon_url, sampleIcon, `Unable to find ${sampleIcon} in ${relativeHtml}`);
assert.equal(metadata.image_url, sampleImageHTTP, `Unable to find ${sampleImageHTTP} in ${relativeHtml}`);
});

it('allows custom rules', () => {
const doc = stringToDom(sampleHtml);
const rules = {
Expand Down
10 changes: 8 additions & 2 deletions tests/metadataRules.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ function ruleTest(testName, testRule, expected, testTag) {
it(`finds ${testName}`, () => {
const html = buildHTML(testTag);
const doc = stringToDom(html);
const rule = buildRuleset(testName, testRule);
const found = rule(doc);
const rule = buildRuleset(testName, testRule.rules, testRule.processors);
const found = rule(doc, {
url: 'http://www.example.com/'
});
assert.equal(found, expected, `Unable to find ${testName} in ${html}`);
});
}
Expand Down Expand Up @@ -52,6 +54,7 @@ describe('Canonical URL Rule Tests', function() {

describe('Icon Rule Tests', function() {
const pageIcon = 'http://www.example.com/favicon.ico';
const relativeIcon = '/favicon.ico';

const ruleTests = [
['apple-touch-icon', `<link rel="apple-touch-icon" href="${pageIcon}" />`],
Expand All @@ -61,6 +64,7 @@ describe('Icon Rule Tests', function() {
['shortcut icon', `<link rel="shortcut icon" href="${pageIcon}" />`],
['Shortcut Icon', `<link rel="Shortcut Icon" href="${pageIcon}" />`],
['mask-icon', `<link rel="mask-icon" href="${pageIcon}" />`],
['relative icon', `<link rel="icon" href="${relativeIcon}" />`],
];

ruleTests.map(([testName, testTag]) => ruleTest(testName, metadataRules.icon_url, pageIcon, testTag));
Expand All @@ -69,13 +73,15 @@ describe('Icon Rule Tests', function() {

describe('Image Rule Tests', function() {
const pageImage = 'http://www.example.com/image.png';
const relativeImage = '/image.png';

const ruleTests = [
['og:image', `<meta property="og:image" content="${pageImage}" />`],
['og:image:url', `<meta property="og:image:url" content="${pageImage}" /> `],
['og:image:secure_url', `<meta property="og:image:secure_url" content="${pageImage}" /> `],
['twitter:image', `<meta property="twitter:image" content="${pageImage}" />`],
['thumbnail', `<meta name="thumbnail" content="${pageImage}" />`],
['relative image', `<meta name="thumbnail" content="${relativeImage}" />`],
];

ruleTests.map(([testName, testTag]) => ruleTest(testName, metadataRules.image_url, pageImage, testTag));
Expand Down

0 comments on commit 208a7ce

Please sign in to comment.