diff --git a/README.md b/README.md index f10fcbd..352eab6 100644 --- a/README.md +++ b/README.md @@ -53,29 +53,68 @@ This library includes many rules for a single desired piece of metadata which sh This library provides rules to find the following forms of metadata in a page: Field | Description ---- | --- +--- | --- type | The type of content as defined by [opengraph](http://ogp.me/#types). url | A canonical URL for the page. title | A user displayable title for the page. description | A user displayable description for the page. icon_url | A URL which contains an icon for the page. image_url | A URL which contains a preview image for the page. +keywords | The meta keywords for the page. -To use a single rule to find a particular piece of metadata within a page, simply pass that rule a [Document](https://developer.mozilla.org/en-US/docs/Web/API/Document) object and it will apply each possible selector for that rule until it finds a matching piece of information and return it. +To use a single rule to find a particular piece of metadata within a page, simply pass that rule and a [Document](https://developer.mozilla.org/en-US/docs/Web/API/Document) object to getMetadata and it will apply each possible selector for that rule until it finds a matching piece of information and return it. Example: - const {metadataRules} = require('page-metadata-parser'); + const {getMetadata, metadataRules} = require('page-metadata-parser'); - const pageTitle = metadataRules.title(document); + const pageTitle = getMetadata(doc, {title: metadataRules.title}); + + +### Extending a single rule + +To add your own additional custom parser to an existing rule, you can simply push it into that rule's array. + +Example: + + + const {getMetadata, metadataRules} = require('page-metadata-parser'); + + const customDescriptionRules = metadataRules.description; + + customDescriptionRules.push([ + ['meta[name="customDescription"]', node => node.element.content] + ]); + + const pageDescription = getMetadata(doc, {description: customDescriptionRules}); ### Using all rules To parse all of the available metadata on a page using all of the rules provided in this library, simply call getMetadata on the [Document](https://developer.mozilla.org/en-US/docs/Web/API/Document). - const {getMetadata} = require('page-metadata-parser'); + const {getMetadata, metadataRules} = require('page-metadata-parser'); + + const pageMetadata = getMetadata(doc, metadataRules); + + +### Nesting rules + +You can nest rules into arbitrarily deep object structures which will mirror the structure of the returned metadata payload. + +Example: + + const {getMetadata, metadataRules} = require('page-metadata-parser'); - const pageMetadata = getMetadata(document); + const nestedMetadataRules = { + images: { + preview: metadataRules.image_url, + icon: metadataRules.icon_url, + }, + text: { + title: metadataRules.title, + description: metadataRules.description, + } + }; - const pageTitle = pageMetadata.title; + const nestedMetadata = getMetadata(doc, nestedMetadataRules); diff --git a/parser.js b/parser.js index b1e44af..f1987d0 100644 --- a/parser.js +++ b/parser.js @@ -1,6 +1,5 @@ const {dom, rule, ruleset} = require('fathom-web'); - function buildRuleset(name, rules) { const reversedRules = Array.from(rules).reverse(); const builtRuleset = ruleset(...reversedRules.map(([query, handler], order) => rule( @@ -24,77 +23,64 @@ function buildRuleset(name, rules) { }; } - -const titleRules = buildRuleset('title', [ - ['meta[property="og:title"]', node => node.element.content], - ['meta[property="twitter:title"]', node => node.element.content], - ['meta[name="hdl"]', node => node.element.content], - ['title', node => node.element.text], -]); - -const canonicalUrlRules = buildRuleset('url', [ - ['meta[property="og:url"]', node => node.element.content], - ['link[rel="canonical"]', node => node.element.href], -]); - -const keywordsRules = buildRuleset('keywords', [ - ['meta[name="keywords"]', node => node.element.content], -]); - -const iconRules = buildRuleset('icon', [ - ['link[rel="apple-touch-icon"]', node => node.element.href], - ['link[rel="apple-touch-icon-precomposed"]', node => node.element.href], - ['link[rel="icon"]', node => node.element.href], - ['link[rel="fluid-icon"]', node => node.element.href], - ['link[rel="shortcut icon"]', node => node.element.href], - ['link[rel="Shortcut Icon"]', node => node.element.href], - ['link[rel="mask-icon"]', node => node.element.href], -]); - -const imageRules = buildRuleset('image', [ - ['meta[property="og:image:secure_url"]', node => node.element.content], - ['meta[property="og:image:url"]', node => node.element.content], - ['meta[property="og:image"]', node => node.element.content], - ['meta[property="twitter:image"]', node => node.element.content], - ['meta[name="thumbnail"]', node => node.element.content], -]); - -const descriptionRules = buildRuleset('description', [ - ['meta[property="og:description"]', node => node.element.content], - ['meta[name="description"]', node => node.element.content], -]); - -const typeRules = buildRuleset('type', [ - ['meta[property="og:type"]', node => node.element.content], -]); - - const metadataRules = { - description: descriptionRules, - icon_url: iconRules, - image_url: imageRules, - keywords: keywordsRules, - title: titleRules, - type: typeRules, - url: canonicalUrlRules + description: [ + ['meta[property="og:description"]', node => node.element.content], + ['meta[name="description"]', node => node.element.content], + ], + icon_url: [ + ['link[rel="apple-touch-icon"]', node => node.element.href], + ['link[rel="apple-touch-icon-precomposed"]', node => node.element.href], + ['link[rel="icon"]', node => node.element.href], + ['link[rel="fluid-icon"]', node => node.element.href], + ['link[rel="shortcut icon"]', node => node.element.href], + ['link[rel="Shortcut Icon"]', node => node.element.href], + ['link[rel="mask-icon"]', node => node.element.href], + ], + image_url: [ + ['meta[property="og:image:secure_url"]', node => node.element.content], + ['meta[property="og:image:url"]', node => node.element.content], + ['meta[property="og:image"]', node => node.element.content], + ['meta[property="twitter:image"]', node => node.element.content], + ['meta[name="thumbnail"]', node => node.element.content], + ], + keywords: [ + ['meta[name="keywords"]', node => node.element.content], + ], + title: [ + ['meta[property="og:title"]', node => node.element.content], + ['meta[property="twitter:title"]', node => node.element.content], + ['meta[name="hdl"]', node => node.element.content], + ['title', node => node.element.text], + ], + type: [ + ['meta[property="og:type"]', node => node.element.content], + ], + url: [ + ['meta[property="og:url"]', node => node.element.content], + ['link[rel="canonical"]', node => node.element.href], + ], }; - function getMetadata(doc, rules) { const metadata = {}; const ruleSet = rules || metadataRules; Object.keys(ruleSet).map(metadataKey => { const metadataRule = ruleSet[metadataKey]; - metadata[metadataKey] = typeof metadataRule === 'function' ? - metadataRule(doc) : - getMetadata(doc, metadataRule); + + if(Array.isArray(metadataRule)) { + metadata[metadataKey] = buildRuleset(metadataKey, metadataRule)(doc); + } else { + metadata[metadataKey] = getMetadata(doc, metadataRule); + } }); return metadata; } module.exports = { + buildRuleset, metadataRules, getMetadata }; diff --git a/tests/metadataRules.test.js b/tests/metadataRules.test.js index fcaf0c1..6488209 100644 --- a/tests/metadataRules.test.js +++ b/tests/metadataRules.test.js @@ -1,6 +1,6 @@ // Tests for parse.js const {assert} = require('chai'); -const {metadataRules} = require('../parser'); +const {buildRuleset, metadataRules} = require('../parser'); const {stringToDom} = require('./test-utils'); function buildHTML(tag) { @@ -17,7 +17,8 @@ function ruleTest(testName, testRule, expected, testTag) { it(`finds ${testName}`, () => { const html = buildHTML(testTag); const doc = stringToDom(html); - const found = testRule(doc); + const rule = buildRuleset(testName, testRule); + const found = rule(doc); assert.equal(found, expected, `Unable to find ${testName} in ${html}`); }); }