Skip to content

Commit 1ad56ae

Browse files
committed
Add logo support
1 parent 84c0588 commit 1ad56ae

File tree

146 files changed

+344
-151
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

146 files changed

+344
-151
lines changed

index.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
'use strict'
22

3-
const reduce = require('lodash.reduce')
3+
const {reduce} = require('lodash')
44
const {ensureAsync} = require('async')
55
const {promisify} = require('util')
66

package.json

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,9 @@
2222
"cheerio": "~1.0.0-rc.2",
2323
"chrono-node": "~1.3.4",
2424
"condense-whitespace": "~1.0.0",
25+
"is-relative-url": "~2.0.0",
2526
"isostring": "0.0.1",
26-
"lodash.flow": "~3.5.0",
27-
"lodash.isstring": "~4.0.1",
28-
"lodash.reduce": "~4.6.0",
27+
"lodash": "~4.17.4",
2928
"normalize-url": "~1.9.1",
3029
"req-all": "~1.0.0",
3130
"sanitize-html": "~1.14.1",

src/get-data/rules/author.js

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
'use strict'
22

33
const condenseWhitespace = require('condense-whitespace')
4-
const isString = require('lodash.isstring')
4+
const {isString, flow} = require('lodash')
55
const toTitle = require('to-title-case')
66
const urlRegex = require('url-regex')
7-
const flow = require('lodash.flow')
87

98
const REGEX_BY = /^[\s\n]*by[\s\n]*/im
109
const REGEX_STRICT = /^\S+\s+\S+/

src/get-data/rules/description.js

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
'use strict'
22

33
const condenseWhitespace = require('condense-whitespace')
4-
const isString = require('lodash.isstring')
54
const smartquotes = require('smartquotes')
6-
const flow = require('lodash.flow')
5+
const {isString, flow} = require('lodash')
76

87
const REGEX_LOCATION = /^[A-Z\s]+\s+[-]\s+/
98

src/get-data/rules/logo.js

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
'use strict'
2+
3+
const isRelativeUrl = require('is-relative-url')
4+
const sanetizeUrl = require('normalize-url')
5+
const {flow, chain, first, isString, concat, toNumber, split} = require('lodash')
6+
const {resolve: resolveUrl} = require('url')
7+
8+
const normalizeUrl = url => sanetizeUrl(url, {stripWWW: false})
9+
10+
/**
11+
* Wrap a rule with validation and formatting logic.
12+
*
13+
* @param {Function} rule
14+
* @return {Function} wrapped
15+
*/
16+
17+
const wrap = rule => (htmlDom, baseUrl) => {
18+
const value = rule(htmlDom)
19+
if (!isString(value)) return
20+
const url = isRelativeUrl(value) ? resolveUrl(baseUrl, value) : value
21+
return normalizeUrl(url)
22+
}
23+
24+
const getSize = flow([str => split(str, 'x'), first, toNumber])
25+
26+
const getDomNodeSizes = (domNodes, attr) =>
27+
chain(domNodes)
28+
.map(({attribs}) => ({
29+
size: getSize(attribs.sizes),
30+
link: attribs[attr]
31+
}))
32+
.value()
33+
34+
const getSizes = ($, collection) => chain(collection)
35+
.reduce((acc, {tag, attr}) => {
36+
const domNode = $(tag).get()
37+
const selectors = getDomNodeSizes(domNode, attr)
38+
return concat(acc, selectors)
39+
}, [])
40+
.sortBy(({size}) => -size)
41+
.value()
42+
43+
const sizeSelectors = [
44+
{tag: 'meta[property="og:logo"]', attr: 'content'},
45+
{tag: 'link[rel="apple-touch-icon"]', attr: 'href'},
46+
{tag: 'link[rel="apple-touch-icon-precomposed"]', attr: 'href'},
47+
{tag: 'meta[name="msapplication-TileImage"]', attr: 'content'},
48+
{tag: 'link[rel="icon"]', attr: 'href'},
49+
{tag: 'link[rel="shortcut icon"]', attr: 'href'}
50+
]
51+
52+
/**
53+
* Rules.
54+
*/
55+
56+
module.exports = [
57+
wrap($ => {
58+
const sizes = getSizes($, sizeSelectors)
59+
const size = chain(sizes).first().get('link').value()
60+
return size
61+
}),
62+
wrap($ => '/favicon.ico')
63+
]

src/get-data/rules/publisher.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
'use strict'
22

3-
const isString = require('lodash.isstring')
3+
const {isString} = require('lodash')
44
const condenseWhitespace = require('condense-whitespace')
55

66
const REGEX_RSS = /^(.*?)\s[-|]\satom$/i

src/get-data/rules/title.js

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
'use strict'
22

33
const condenseWhitespace = require('condense-whitespace')
4-
const isString = require('lodash.isstring')
4+
const {isString, flow} = require('lodash')
55
const smartquotes = require('smartquotes')
6-
const flow = require('lodash.flow')
76

87
const sanetize = flow([
98
condenseWhitespace,

src/get-data/rules/url.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
'use strict'
22

33
const sanetizeUrl = require('normalize-url')
4-
const isString = require('lodash.isstring')
4+
const {isString} = require('lodash')
55

66
const normalizeUrl = url => sanetizeUrl(url, {stripWWW: false})
77

src/html/index.js

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
'use strict'
22

33
const sanitizeHtml = require('sanitize-html')
4-
const flow = require('lodash.flow')
4+
const {flow} = require('lodash')
55
const cheerio = require('cheerio')
66

77
const sanitize = html => sanitizeHtml(html, {
@@ -15,6 +15,10 @@ const sanitize = html => sanitizeHtml(html, {
1515
a: (tagName, attribs) => {
1616
if (attribs.href) attribs.href = attribs.href.toLowerCase()
1717
return {tagName, attribs}
18+
},
19+
link: (tagName, attribs) => {
20+
if (attribs.rel) attribs.rel = attribs.rel.toLowerCase()
21+
return {tagName, attribs}
1822
}
1923
}
2024
})

test/web/anandtech/index.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@ const fs = require('fs')
99
const getMetaData = require('../../..')
1010
const readFile = promisify(fs.readFile)
1111

12-
it('arstechnica', async () => {
12+
it('anandtech', async () => {
1313
const html = await readFile(resolve(__dirname, 'input.html'))
1414
const json = await loadJSON(resolve(__dirname, 'output.json'))
15-
const metadata = await getMetaData({html})
15+
const {url} = json
16+
const metadata = await getMetaData({html, url})
1617
should(metadata).be.eql(json)
1718
})

0 commit comments

Comments
 (0)