Skip to content

Commit a807b70

Browse files
committed
Move helpers method into helpers package
1 parent 9f09429 commit a807b70

File tree

36 files changed

+2351
-1054
lines changed

36 files changed

+2351
-1054
lines changed

packages/metascraper-amazon/index.js

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,48 @@
11
'use strict'
22

3-
const { getUrl, getValue, titleize, isUrl } = require('@metascraper/helpers')
3+
const { url: urlFn, $filter, titleize } = require('@metascraper/helpers')
44
const { URL } = require('url')
55
const { chain } = require('lodash')
66

77
const REGEX_AMAZON_URL = /https?:\/\/(.*amazon\..*\/.*|.*amzn\..*\/.*|.*a\.co\/.*)/i
88
const isAmazonUrl = url => REGEX_AMAZON_URL.test(url)
99

1010
const SUFFIX_LANGUAGES = {
11-
'ca': 'en',
12-
'cn': 'zh',
11+
ca: 'en',
12+
cn: 'zh',
1313
'co.jp': 'ja',
1414
'co.uk': 'en',
1515
'com.mx': 'es',
16-
'com': 'en',
17-
'de': 'de',
18-
'es': 'es',
19-
'fr': 'fr',
20-
'in': 'en',
21-
'it': 'it'
16+
com: 'en',
17+
de: 'de',
18+
es: 'es',
19+
fr: 'fr',
20+
in: 'en',
21+
it: 'it'
2222
}
2323

24-
const getSuffix = host => chain(host)
25-
.replace('www.', '')
26-
.split('.')
27-
.tail()
28-
.join('.')
29-
.value()
24+
const getSuffix = host =>
25+
chain(host)
26+
.replace('www.', '')
27+
.split('.')
28+
.tail()
29+
.join('.')
30+
.value()
3031

31-
const getDomainLanguage = url => (
32-
SUFFIX_LANGUAGES[getSuffix(new URL(url).host)]
33-
)
32+
const getDomainLanguage = url => SUFFIX_LANGUAGES[getSuffix(new URL(url).host)]
3433

3534
const createWrap = fn => rule => ({ htmlDom, url }) => {
3635
const value = isAmazonUrl(url) && rule(htmlDom)
3736
return fn(url, value)
3837
}
3938

4039
const wrap = createWrap((url, value) => value)
41-
const wrapUrl = createWrap((url, value) => isUrl(value) && getUrl(url, value))
40+
const wrapUrl = createWrap((url, value) => urlFn(value, { url }))
4241

4342
module.exports = () => ({
44-
lang: [({ htmlDom: $, meta, url }) => isAmazonUrl(url) && getDomainLanguage(url)],
43+
lang: [
44+
({ htmlDom: $, meta, url }) => isAmazonUrl(url) && getDomainLanguage(url)
45+
],
4546
author: [
4647
wrap($ => titleize($('.contributorNameID').text())),
4748
wrap($ => titleize($('#bylineInfo').text())),
@@ -50,7 +51,7 @@ module.exports = () => ({
5051
title: [
5152
wrap($ => titleize($('#productTitle').text())),
5253
wrap($ => titleize($('#btAsinTitle').text())),
53-
wrap($ => titleize(getValue($, $('h1.a-size-large')))),
54+
wrap($ => titleize($filter($, $('h1.a-size-large')))),
5455
wrap($ => titleize($('#item_name').text()))
5556
],
5657
publisher: [wrap($ => 'Amazon')],

packages/metascraper-amazon/test/index.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ const fs = require('fs')
1010
const readFile = promisify(fs.readFile)
1111

1212
const metascraper = require('metascraper')([
13-
require('metascraper-amazon')(),
13+
require('..')(),
1414
require('metascraper-author')(),
1515
require('metascraper-date')(),
1616
require('metascraper-description')(),
@@ -30,8 +30,8 @@ describe('metascraper-amazon', () => {
3030
)
3131
const url =
3232
'https://www.amazon.co.uk/Vegetable-Perfection-tasty-recipes-shoots/dp/1849757097/ref=asap_bc?ie=UTF8'
33-
const meta = omit(await metascraper({ html, url }), ['date'])
34-
snapshot(meta)
33+
const metadata = omit(await metascraper({ html, url }), ['date'])
34+
snapshot(metadata)
3535
})
3636
})
3737

packages/metascraper-author/index.js

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,9 @@
11
'use strict'
22

3-
const { getValue, isUrl, titleize } = require('@metascraper/helpers')
4-
const { isString } = require('lodash')
3+
const { $filter, author } = require('@metascraper/helpers')
54

65
const REGEX_STRICT = /^\S+\s+\S+/
76

8-
const validator = value => (
9-
isString(value) &&
10-
!isUrl(value, {relative: false}) &&
11-
titleize(value, {removeBy: true})
12-
)
13-
147
/**
158
* Wrap a rule with validation and formatting logic.
169
*
@@ -20,7 +13,7 @@ const validator = value => (
2013

2114
const wrap = rule => ({ htmlDom }) => {
2215
const value = rule(htmlDom)
23-
return validator(value)
16+
return author(value)
2417
}
2518

2619
/**
@@ -44,16 +37,14 @@ module.exports = () => ({
4437
wrap($ => $('meta[name="author"]').attr('content')),
4538
wrap($ => $('meta[property="author"]').attr('content')),
4639
wrap($ => $('meta[property="article:author"]').attr('content')),
47-
wrap($ => getValue($, $('[itemprop*="author"] [itemprop="name"]'))),
48-
wrap($ => getValue($, $('[itemprop*="author"]'))),
49-
wrap($ => getValue($, $('[rel="author"]'))),
50-
strict(wrap($ => getValue($, $('a[class*="author"]')))),
51-
strict(wrap($ => getValue($, $('[class*="author"] a')))),
52-
strict(wrap($ => getValue($, $('a[href*="/author/"]')))),
53-
wrap($ => getValue($, $('a[class*="screenname"]'))),
54-
strict(wrap($ => getValue($, $('[class*="author"]')))),
55-
strict(wrap($ => getValue($, $('[class*="byline"]'))))
40+
wrap($ => $filter($, $('[itemprop*="author"] [itemprop="name"]'))),
41+
wrap($ => $filter($, $('[itemprop*="author"]'))),
42+
wrap($ => $filter($, $('[rel="author"]'))),
43+
strict(wrap($ => $filter($, $('a[class*="author"]')))),
44+
strict(wrap($ => $filter($, $('[class*="author"] a')))),
45+
strict(wrap($ => $filter($, $('a[href*="/author/"]')))),
46+
wrap($ => $filter($, $('a[class*="screenname"]'))),
47+
strict(wrap($ => $filter($, $('[class*="author"]')))),
48+
strict(wrap($ => $filter($, $('[class*="byline"]'))))
5649
]
5750
})
58-
59-
module.exports.validator = validator

packages/metascraper-date/index.js

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,6 @@
11
'use strict'
22

3-
const chrono = require('chrono-node')
4-
const isIso = require('isostring')
5-
6-
const validator = value => {
7-
if (!value) return false
8-
9-
// remove whitespace for easier parsing
10-
value = value.trim()
11-
12-
// convert isodates to restringify, because sometimes they are truncated
13-
if (isIso(value)) return new Date(value).toISOString()
14-
15-
// try to parse with the built-in date parser
16-
const native = new Date(value)
17-
if (!isNaN(native.getTime())) return native.toISOString()
18-
19-
// try to parse a complex date string
20-
const parsed = chrono.parseDate(value)
21-
if (parsed) return parsed.toISOString()
22-
}
3+
const { date } = require('@metascraper/helpers')
234

245
/**
256
* Wrap a rule with validation and formatting logic.
@@ -30,7 +11,7 @@ const validator = value => {
3011

3112
const wrap = rule => ({ htmlDom }) => {
3213
const value = rule(htmlDom)
33-
return validator(value)
14+
return date(value)
3415
}
3516

3617
/**
@@ -68,5 +49,3 @@ module.exports = () => ({
6849
wrap($ => $('[class*="time"]').text())
6950
]
7051
})
71-
72-
module.exports.validator = validator

packages/metascraper-date/package.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@
1616
"url": "https://github.com/microlinkhq/metascraper/issues"
1717
},
1818
"dependencies": {
19-
"chrono-node": "~1.3.5",
20-
"isostring": "0.0.1"
19+
"@metascraper/helpers": "^4.0.1"
2120
},
2221
"devDependencies": {
2322
"standard": "latest"
Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,6 @@
11
'use strict'
22

3-
const { getValue, titleize } = require('@metascraper/helpers')
4-
const { isString } = require('lodash')
5-
6-
const REGEX_LOCATION = /^[A-Z\s]+\s+[-]\s+/
7-
8-
const removeLocation = value => value.replace(REGEX_LOCATION, '')
9-
10-
const validator = value => (
11-
isString(value) &&
12-
titleize(removeLocation(value), { capitalize: false })
13-
)
3+
const { $filter, description } = require('@metascraper/helpers')
144

155
/**
166
* Wrap a rule with validation and formatting logic.
@@ -21,7 +11,7 @@ const validator = value => (
2111

2212
const wrap = rule => ({ htmlDom }) => {
2313
const value = rule(htmlDom)
24-
return validator(value)
14+
return description(value)
2515
}
2616

2717
/**
@@ -35,9 +25,7 @@ module.exports = () => ({
3525
wrap($ => $('meta[name="description"]').attr('content')),
3626
wrap($ => $('meta[itemprop="description"]').attr('content')),
3727
wrap($ => $('#description').text()),
38-
wrap($ => getValue($, $('[class*="content"] > p'))),
39-
wrap($ => getValue($, $('[class*="content"] p')))
28+
wrap($ => $filter($, $('[class*="content"] > p'))),
29+
wrap($ => $filter($, $('[class*="content"] p')))
4030
]
4131
})
42-
43-
module.exports.validator = validator

packages/metascraper-description/package.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@
1616
"url": "https://github.com/microlinkhq/metascraper/issues"
1717
},
1818
"dependencies": {
19-
"@metascraper/helpers": "^4.0.1",
20-
"lodash": "~4.17.10"
19+
"@metascraper/helpers": "^4.0.1"
2120
},
2221
"devDependencies": {
2322
"standard": "latest"

0 commit comments

Comments
 (0)