Skip to content
This repository has been archived by the owner on Feb 24, 2022. It is now read-only.

Commit

Permalink
Merge pull request #52 from JaredKerim-Mozilla/50
Browse files Browse the repository at this point in the history
Make rules extendable #50
  • Loading branch information
jaredlockhart committed Aug 24, 2016
2 parents 7e8b27f + 0cd91e5 commit 9943dc9
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 66 deletions.
53 changes: 46 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,29 +53,68 @@ This library includes many rules for a single desired piece of metadata which sh
This library provides rules to find the following forms of metadata in a page:

Field | Description
--- | ---
--- | ---
type | The type of content as defined by [opengraph](http://ogp.me/#types).
url | A canonical URL for the page.
title | A user displayable title for the page.
description | A user displayable description for the page.
icon_url | A URL which contains an icon for the page.
image_url | A URL which contains a preview image for the page.
keywords | The meta keywords for the page.

To use a single rule to find a particular piece of metadata within a page, simply pass that rule a [Document](https://developer.mozilla.org/en-US/docs/Web/API/Document) object and it will apply each possible selector for that rule until it finds a matching piece of information and return it.
To use a single rule to find a particular piece of metadata within a page, simply pass that rule and a [Document](https://developer.mozilla.org/en-US/docs/Web/API/Document) object to getMetadata and it will apply each possible selector for that rule until it finds a matching piece of information and return it.

Example:

const {metadataRules} = require('page-metadata-parser');
const {getMetadata, metadataRules} = require('page-metadata-parser');

const pageTitle = metadataRules.title(document);
const pageTitle = getMetadata(doc, {title: metadataRules.title});


### Extending a single rule

To add your own additional custom parser to an existing rule, you can simply push it into that rule's array.

Example:


const {getMetadata, metadataRules} = require('page-metadata-parser');

const customDescriptionRules = metadataRules.description;

customDescriptionRules.push([
['meta[name="customDescription"]', node => node.element.content]
]);

const pageDescription = getMetadata(doc, {description: customDescriptionRules});


### Using all rules

To parse all of the available metadata on a page using all of the rules provided in this library, simply call getMetadata on the [Document](https://developer.mozilla.org/en-US/docs/Web/API/Document).

const {getMetadata} = require('page-metadata-parser');
const {getMetadata, metadataRules} = require('page-metadata-parser');

const pageMetadata = getMetadata(doc, metadataRules);


### Nesting rules

You can nest rules into arbitrarily deep object structures which will mirror the structure of the returned metadata payload.

Example:

const {getMetadata, metadataRules} = require('page-metadata-parser');

const pageMetadata = getMetadata(document);
const nestedMetadataRules = {
images: {
preview: metadataRules.image_url,
icon: metadataRules.icon_url,
},
text: {
title: metadataRules.title,
description: metadataRules.description,
}
};

const pageTitle = pageMetadata.title;
const nestedMetadata = getMetadata(doc, nestedMetadataRules);
100 changes: 43 additions & 57 deletions parser.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
const {dom, rule, ruleset} = require('fathom-web');


function buildRuleset(name, rules) {
const reversedRules = Array.from(rules).reverse();
const builtRuleset = ruleset(...reversedRules.map(([query, handler], order) => rule(
Expand All @@ -24,77 +23,64 @@ function buildRuleset(name, rules) {
};
}


const titleRules = buildRuleset('title', [
['meta[property="og:title"]', node => node.element.content],
['meta[property="twitter:title"]', node => node.element.content],
['meta[name="hdl"]', node => node.element.content],
['title', node => node.element.text],
]);

const canonicalUrlRules = buildRuleset('url', [
['meta[property="og:url"]', node => node.element.content],
['link[rel="canonical"]', node => node.element.href],
]);

const keywordsRules = buildRuleset('keywords', [
['meta[name="keywords"]', node => node.element.content],
]);

const iconRules = buildRuleset('icon', [
['link[rel="apple-touch-icon"]', node => node.element.href],
['link[rel="apple-touch-icon-precomposed"]', node => node.element.href],
['link[rel="icon"]', node => node.element.href],
['link[rel="fluid-icon"]', node => node.element.href],
['link[rel="shortcut icon"]', node => node.element.href],
['link[rel="Shortcut Icon"]', node => node.element.href],
['link[rel="mask-icon"]', node => node.element.href],
]);

const imageRules = buildRuleset('image', [
['meta[property="og:image:secure_url"]', node => node.element.content],
['meta[property="og:image:url"]', node => node.element.content],
['meta[property="og:image"]', node => node.element.content],
['meta[property="twitter:image"]', node => node.element.content],
['meta[name="thumbnail"]', node => node.element.content],
]);

const descriptionRules = buildRuleset('description', [
['meta[property="og:description"]', node => node.element.content],
['meta[name="description"]', node => node.element.content],
]);

const typeRules = buildRuleset('type', [
['meta[property="og:type"]', node => node.element.content],
]);


const metadataRules = {
description: descriptionRules,
icon_url: iconRules,
image_url: imageRules,
keywords: keywordsRules,
title: titleRules,
type: typeRules,
url: canonicalUrlRules
description: [
['meta[property="og:description"]', node => node.element.content],
['meta[name="description"]', node => node.element.content],
],
icon_url: [
['link[rel="apple-touch-icon"]', node => node.element.href],
['link[rel="apple-touch-icon-precomposed"]', node => node.element.href],
['link[rel="icon"]', node => node.element.href],
['link[rel="fluid-icon"]', node => node.element.href],
['link[rel="shortcut icon"]', node => node.element.href],
['link[rel="Shortcut Icon"]', node => node.element.href],
['link[rel="mask-icon"]', node => node.element.href],
],
image_url: [
['meta[property="og:image:secure_url"]', node => node.element.content],
['meta[property="og:image:url"]', node => node.element.content],
['meta[property="og:image"]', node => node.element.content],
['meta[property="twitter:image"]', node => node.element.content],
['meta[name="thumbnail"]', node => node.element.content],
],
keywords: [
['meta[name="keywords"]', node => node.element.content],
],
title: [
['meta[property="og:title"]', node => node.element.content],
['meta[property="twitter:title"]', node => node.element.content],
['meta[name="hdl"]', node => node.element.content],
['title', node => node.element.text],
],
type: [
['meta[property="og:type"]', node => node.element.content],
],
url: [
['meta[property="og:url"]', node => node.element.content],
['link[rel="canonical"]', node => node.element.href],
],
};


function getMetadata(doc, rules) {
const metadata = {};
const ruleSet = rules || metadataRules;

Object.keys(ruleSet).map(metadataKey => {
const metadataRule = ruleSet[metadataKey];
metadata[metadataKey] = typeof metadataRule === 'function' ?
metadataRule(doc) :
getMetadata(doc, metadataRule);

if(Array.isArray(metadataRule)) {
metadata[metadataKey] = buildRuleset(metadataKey, metadataRule)(doc);
} else {
metadata[metadataKey] = getMetadata(doc, metadataRule);
}
});

return metadata;
}

module.exports = {
buildRuleset,
metadataRules,
getMetadata
};
5 changes: 3 additions & 2 deletions tests/metadataRules.test.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Tests for parse.js
const {assert} = require('chai');
const {metadataRules} = require('../parser');
const {buildRuleset, metadataRules} = require('../parser');
const {stringToDom} = require('./test-utils');

function buildHTML(tag) {
Expand All @@ -17,7 +17,8 @@ function ruleTest(testName, testRule, expected, testTag) {
it(`finds ${testName}`, () => {
const html = buildHTML(testTag);
const doc = stringToDom(html);
const found = testRule(doc);
const rule = buildRuleset(testName, testRule);
const found = rule(doc);
assert.equal(found, expected, `Unable to find ${testName} in ${html}`);
});
}
Expand Down

0 comments on commit 9943dc9

Please sign in to comment.