Skip to content

Commit 329452b

Browse files
committed
Support load config
1 parent 71b83b3 commit 329452b

File tree

13 files changed

+290
-218
lines changed

13 files changed

+290
-218
lines changed

packages/metascraper-author/index.js

Lines changed: 51 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -61,63 +61,67 @@ const getFirst = ($, collection) =>
6161
* Rules.
6262
*/
6363

64-
module.exports = [
65-
wrap($ => $('meta[property="author"]').attr('content')),
66-
wrap($ => $('meta[property="article:author"]').attr('content')),
67-
wrap($ => $('meta[name="author"]').attr('content')),
68-
wrap($ => $('meta[name="sailthru.author"]').attr('content')),
69-
wrap($ =>
70-
$('[rel="author"]')
71-
.first()
72-
.text()
73-
),
74-
wrap($ =>
75-
$('[itemprop*="author"] [itemprop="name"]')
76-
.first()
77-
.text()
78-
),
79-
wrap($ =>
80-
$('[itemprop*="author"]')
81-
.first()
82-
.text()
83-
),
84-
wrap($ => $('meta[property="book:author"]').attr('content')),
85-
strict(
64+
module.exports = () => {
65+
const rules = [
66+
wrap($ => $('meta[property="author"]').attr('content')),
67+
wrap($ => $('meta[property="article:author"]').attr('content')),
68+
wrap($ => $('meta[name="author"]').attr('content')),
69+
wrap($ => $('meta[name="sailthru.author"]').attr('content')),
8670
wrap($ =>
87-
$('a[class*="author"]')
71+
$('[rel="author"]')
8872
.first()
8973
.text()
90-
)
91-
),
92-
strict(
74+
),
9375
wrap($ =>
94-
$('[class*="author"] a')
76+
$('[itemprop*="author"] [itemprop="name"]')
9577
.first()
9678
.text()
97-
)
98-
),
99-
strict(wrap($ => getFirst($, $('a[href*="/author/"]')))),
100-
wrap($ =>
101-
$('a[class*="screenname"]')
102-
.first()
103-
.text()
104-
),
105-
strict(
79+
),
10680
wrap($ =>
107-
$('[class*="author"]')
81+
$('[itemprop*="author"]')
10882
.first()
10983
.text()
110-
)
111-
),
112-
strict(
84+
),
85+
wrap($ => $('meta[property="book:author"]').attr('content')),
86+
strict(
87+
wrap($ =>
88+
$('a[class*="author"]')
89+
.first()
90+
.text()
91+
)
92+
),
93+
strict(
94+
wrap($ =>
95+
$('[class*="author"] a')
96+
.first()
97+
.text()
98+
)
99+
),
100+
strict(wrap($ => getFirst($, $('a[href*="/author/"]')))),
113101
wrap($ =>
114-
$('[class*="byline"]')
102+
$('a[class*="screenname"]')
115103
.first()
116104
.text()
117-
)
118-
),
119-
wrap($ => getFirst($, $('.fullname'))),
120-
wrap($ => $('[class*="user-info"]').text())
121-
]
105+
),
106+
strict(
107+
wrap($ =>
108+
$('[class*="author"]')
109+
.first()
110+
.text()
111+
)
112+
),
113+
strict(
114+
wrap($ =>
115+
$('[class*="byline"]')
116+
.first()
117+
.text()
118+
)
119+
),
120+
wrap($ => getFirst($, $('.fullname'))),
121+
wrap($ => $('[class*="user-info"]').text())
122+
]
123+
124+
rules.propName = 'author'
122125

123-
module.exports.propName = 'author'
126+
return rules
127+
}

packages/metascraper-clearbit-logo

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Subproject commit 1bfe0855bd7f2059cacf1a0ae7ba114018e11cfa

packages/metascraper-date/index.js

Lines changed: 35 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -33,35 +33,39 @@ const wrap = rule => ({ htmlDom }) => {
3333
* Rules.
3434
*/
3535

36-
module.exports = [
37-
wrap($ => $('meta[property="article:published_time"]').attr('content')),
38-
wrap($ => $('meta[name="dc.date"]').attr('content')),
39-
wrap($ => $('meta[name="dc.date.issued"]').attr('content')),
40-
wrap($ => $('meta[name="dc.date.created"]').attr('content')),
41-
wrap($ => $('meta[name="date"]').attr('content')),
42-
wrap($ => $('meta[name="dcterms.date"]').attr('content')),
43-
wrap($ => $('[itemprop="datePublished"]').attr('content')),
44-
wrap($ => $('time[itemprop*="pubdate"]').attr('datetime')),
45-
wrap($ => $('[property*="dc:date"]').attr('content')),
46-
wrap($ => $('[property*="dc:created"]').attr('content')),
47-
wrap($ => $('time[datetime][pubdate]').attr('datetime')),
48-
wrap($ => $('meta[name="sailthru.date"]').attr('content')),
49-
wrap($ => $('meta[property="book:release_date"]').attr('content')),
50-
wrap($ => $('time[datetime]').attr('datetime')),
51-
wrap($ => $('[class*="byline"]').text()),
52-
wrap($ => $('[class*="dateline"]').text()),
53-
wrap($ => $('[id*="date"]').text()),
54-
wrap($ => $('[class*="date"]').text()),
55-
wrap($ => $('[id*="publish"]').text()),
56-
wrap($ => $('[class*="publish"]').text()),
57-
wrap($ => $('[id*="post-timestamp"]').text()),
58-
wrap($ => $('[class*="post-timestamp"]').text()),
59-
wrap($ => $('[id*="post-meta"]').text()),
60-
wrap($ => $('[class*="post-meta"]').text()),
61-
wrap($ => $('[id*="metadata"]').text()),
62-
wrap($ => $('[class*="metadata"]').text()),
63-
wrap($ => $('[id*="time"]').text()),
64-
wrap($ => $('[class*="time"]').text())
65-
]
36+
module.exports = () => {
37+
const rules = [
38+
wrap($ => $('meta[property="article:published_time"]').attr('content')),
39+
wrap($ => $('meta[name="dc.date"]').attr('content')),
40+
wrap($ => $('meta[name="dc.date.issued"]').attr('content')),
41+
wrap($ => $('meta[name="dc.date.created"]').attr('content')),
42+
wrap($ => $('meta[name="date"]').attr('content')),
43+
wrap($ => $('meta[name="dcterms.date"]').attr('content')),
44+
wrap($ => $('[itemprop="datePublished"]').attr('content')),
45+
wrap($ => $('time[itemprop*="pubdate"]').attr('datetime')),
46+
wrap($ => $('[property*="dc:date"]').attr('content')),
47+
wrap($ => $('[property*="dc:created"]').attr('content')),
48+
wrap($ => $('time[datetime][pubdate]').attr('datetime')),
49+
wrap($ => $('meta[name="sailthru.date"]').attr('content')),
50+
wrap($ => $('meta[property="book:release_date"]').attr('content')),
51+
wrap($ => $('time[datetime]').attr('datetime')),
52+
wrap($ => $('[class*="byline"]').text()),
53+
wrap($ => $('[class*="dateline"]').text()),
54+
wrap($ => $('[id*="date"]').text()),
55+
wrap($ => $('[class*="date"]').text()),
56+
wrap($ => $('[id*="publish"]').text()),
57+
wrap($ => $('[class*="publish"]').text()),
58+
wrap($ => $('[id*="post-timestamp"]').text()),
59+
wrap($ => $('[class*="post-timestamp"]').text()),
60+
wrap($ => $('[id*="post-meta"]').text()),
61+
wrap($ => $('[class*="post-meta"]').text()),
62+
wrap($ => $('[id*="metadata"]').text()),
63+
wrap($ => $('[class*="metadata"]').text()),
64+
wrap($ => $('[id*="time"]').text()),
65+
wrap($ => $('[class*="time"]').text())
66+
]
6667

67-
module.exports.propName = 'date'
68+
rules.name = 'date'
69+
70+
return rules
71+
}

packages/metascraper-description/index.js

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -35,22 +35,26 @@ const wrap = rule => ({ htmlDom }) => {
3535
* Rules.
3636
*/
3737

38-
module.exports = [
39-
wrap($ => $('meta[property="og:description"]').attr('content')),
40-
wrap($ => $('meta[name="twitter:description"]').attr('content')),
41-
wrap($ => $('meta[name="description"]').attr('content')),
42-
wrap($ => $('meta[name="sailthru.description"]').attr('content')),
43-
wrap($ => $('meta[itemprop="description"]').attr('content')),
44-
wrap($ =>
45-
$('[class*="content"] > p')
46-
.first()
47-
.text()
48-
),
49-
wrap($ =>
50-
$('[class*="content"] p')
51-
.first()
52-
.text()
53-
)
54-
]
55-
56-
module.exports.propName = 'description'
38+
module.exports = () => {
39+
const rules = [
40+
wrap($ => $('meta[property="og:description"]').attr('content')),
41+
wrap($ => $('meta[name="twitter:description"]').attr('content')),
42+
wrap($ => $('meta[name="description"]').attr('content')),
43+
wrap($ => $('meta[name="sailthru.description"]').attr('content')),
44+
wrap($ => $('meta[itemprop="description"]').attr('content')),
45+
wrap($ =>
46+
$('[class*="content"] > p')
47+
.first()
48+
.text()
49+
),
50+
wrap($ =>
51+
$('[class*="content"] p')
52+
.first()
53+
.text()
54+
)
55+
]
56+
57+
rules.propName = 'description'
58+
59+
return rules
60+
}

packages/metascraper-image/index.js

Lines changed: 29 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19,29 +19,33 @@ const wrap = rule => ({ htmlDom, url: baseUrl }) => {
1919
* Rules.
2020
*/
2121

22-
module.exports = [
23-
wrap($ => $('meta[property="og:image:secure_url"]').attr('content')),
24-
wrap($ => $('meta[property="og:image:url"]').attr('content')),
25-
wrap($ => $('meta[property="og:image"]').attr('content')),
26-
wrap($ => $('meta[name="twitter:image"]').attr('content')),
27-
wrap($ => $('meta[property="twitter:image"]').attr('content')),
28-
wrap($ => $('meta[name="twitter:image:src"]').attr('content')),
29-
wrap($ => $('meta[property="twitter:image:src"]').attr('content')),
30-
wrap($ => $('meta[name="sailthru.image"]').attr('content')),
31-
wrap($ => $('meta[name="sailthru.image.full"]').attr('content')),
32-
wrap($ => $('meta[name="sailthru.image.thumb"]').attr('content')),
33-
wrap($ =>
34-
$('article img[src]')
35-
.first()
36-
.attr('src')
37-
),
38-
wrap($ =>
39-
$('#content img[src]')
40-
.first()
41-
.attr('src')
42-
),
43-
wrap($ => $('img[alt*="author"]').attr('src')),
44-
wrap($ => $('img[src]').attr('src'))
45-
]
22+
module.exports = () => {
23+
const rules = [
24+
wrap($ => $('meta[property="og:image:secure_url"]').attr('content')),
25+
wrap($ => $('meta[property="og:image:url"]').attr('content')),
26+
wrap($ => $('meta[property="og:image"]').attr('content')),
27+
wrap($ => $('meta[name="twitter:image"]').attr('content')),
28+
wrap($ => $('meta[property="twitter:image"]').attr('content')),
29+
wrap($ => $('meta[name="twitter:image:src"]').attr('content')),
30+
wrap($ => $('meta[property="twitter:image:src"]').attr('content')),
31+
wrap($ => $('meta[name="sailthru.image"]').attr('content')),
32+
wrap($ => $('meta[name="sailthru.image.full"]').attr('content')),
33+
wrap($ => $('meta[name="sailthru.image.thumb"]').attr('content')),
34+
wrap($ =>
35+
$('article img[src]')
36+
.first()
37+
.attr('src')
38+
),
39+
wrap($ =>
40+
$('#content img[src]')
41+
.first()
42+
.attr('src')
43+
),
44+
wrap($ => $('img[alt*="author"]').attr('src')),
45+
wrap($ => $('img[src]').attr('src'))
46+
]
4647

47-
module.exports.propName = 'image'
48+
rules.propName = 'image'
49+
50+
return rules
51+
}

packages/metascraper-logo/index.js

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -56,19 +56,23 @@ const wrap = rule => ({ htmlDom, url: baseUrl }) => {
5656
* Rules.
5757
*/
5858

59-
module.exports = [
60-
wrap($ => $('meta[property="og:logo"]').attr('content')),
61-
wrap($ => $('meta[itemprop="logo"]').attr('content')),
62-
wrap($ => $('img[itemprop="logo"]').attr('src')),
63-
wrap($ => {
64-
const sizes = getSizes($, sizeSelectors)
65-
const size = chain(sizes)
66-
.first()
67-
.get('link')
68-
.value()
69-
return size
70-
}),
71-
wrap($ => '/favicon.ico')
72-
]
59+
module.exports = () => {
60+
const rules = [
61+
wrap($ => $('meta[property="og:logo"]').attr('content')),
62+
wrap($ => $('meta[itemprop="logo"]').attr('content')),
63+
wrap($ => $('img[itemprop="logo"]').attr('src')),
64+
wrap($ => {
65+
const sizes = getSizes($, sizeSelectors)
66+
const size = chain(sizes)
67+
.first()
68+
.get('link')
69+
.value()
70+
return size
71+
}),
72+
wrap($ => '/favicon.ico')
73+
]
74+
75+
rules.propName = 'logo'
7376

74-
module.exports.propName = 'logo'
77+
return rules
78+
}

0 commit comments

Comments
 (0)