Skip to content

Commit 7e14b48

Browse files
committed
Improve author & description accuracy
1 parent 447280c commit 7e14b48

File tree

9 files changed

+6579
-28
lines changed

9 files changed

+6579
-28
lines changed

packages/metascraper-author/index.js

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,9 @@ module.exports = () => ({
5151
strict(wrap($ => getValue($, $('a[href*="/author/"]')))),
5252
wrap($ => getValue($, $('a[class*="screenname"]'))),
5353
strict(wrap($ => getValue($, $('[class*="author"]')))),
54-
strict(wrap($ => getValue($, $('[class*="byline"]')))),
55-
wrap($ => getValue($, $('.fullname'))),
56-
wrap($ => $('[class*="user-info"]').text())
54+
wrap($ => $('#owner-name').text()),
55+
wrap($ => $('#channel-title').text()),
56+
wrap($ => getValue($, $('[class*="user-info"]'))),
57+
strict(wrap($ => getValue($, $('[class*="byline"]'))))
5758
]
5859
})

packages/metascraper-description/index.js

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
'use strict'
22

3-
const { titleize } = require('@metascraper/helpers')
3+
const { getValue, titleize } = require('@metascraper/helpers')
44
const { isString } = require('lodash')
55

66
const REGEX_LOCATION = /^[A-Z\s]+\s+[-]\s+/
@@ -30,15 +30,8 @@ module.exports = () => ({
3030
wrap($ => $('meta[name="description"]').attr('content')),
3131
wrap($ => $('meta[name="sailthru.description"]').attr('content')),
3232
wrap($ => $('meta[itemprop="description"]').attr('content')),
33-
wrap($ =>
34-
$('[class*="content"] > p')
35-
.first()
36-
.text()
37-
),
38-
wrap($ =>
39-
$('[class*="content"] p')
40-
.first()
41-
.text()
42-
)
33+
wrap($ => $('#description').text()),
34+
wrap($ => getValue($, $('[class*="content"] > p'))),
35+
wrap($ => getValue($, $('[class*="content"] p')))
4336
]
4437
})

packages/metascraper/__snapshots__/index.js.snap-shot

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ exports['nytimes 1'] = {
505505
exports['pikabu 1'] = {
506506
"author": null,
507507
"date": null,
508-
"description": null,
508+
"description": "Вчера к нам приезжали друзья. К вечеру все засобирались домой, потопали каждый к своей машине. Через 5 минут позвонил один из товарищей и сказал, что его авто заперли. В общем, позвали его обратно домой, чай пить, а сами пока стали думать, что же тут делать. Вот, собственно, как выглядел паркинг",
509509
"image": "https://ads.adfox.ru/211055/getCode?p1=bvoqq&p2=v&pe=b&pfb=eyxrx&pfc=bpndz&pr=[RANDOM]",
510510
"logo": "https://cs.pikabu.ru/images/icon_ios144.png",
511511
"publisher": "Комментарий дня ТОП 50 Вот так камеры в Москве фиксируют штрафы. +1314 figurist; 14 часов назад # Годный интерфейс, как в каком-нибудь фильме про хацкеров (:",
@@ -850,18 +850,6 @@ exports['yahoo-news 1'] = {
850850
"lang": "en"
851851
}
852852

853-
exports['youtube 1'] = {
854-
"author": "Andrew JRT",
855-
"date": "2017-07-07T00:00:00.000Z",
856-
"description": "👍 Subscribe - http://bit.do/AndrewJRT 🐦 Twitter - https://twitter.com/andrew_jrt 🎮 Twitch - https://twitch.tv/andrewjrt ❤ Patreon - https://www.patreon.com/a...",
857-
"image": "https://i.ytimg.com/vi/GDRd-BFTYIg/maxresdefault.jpg",
858-
"logo": "https://www.youtube.com/yts/img/favicon_144-vflWmzoXw.png",
859-
"publisher": "YouTube",
860-
"title": "Overwatch - Huge Massive Fist Punch Man",
861-
"url": "https://www.youtube.com/watch?v=GDRd-BFTYIg",
862-
"lang": "en"
863-
}
864-
865853
exports['zdnet 1'] = {
866854
"author": "Natalie Gagliordi",
867855
"date": "2016-05-24T13:30:03.000Z",
@@ -886,3 +874,39 @@ exports['soundcloud 1'] = {
886874
"lang": "en"
887875
}
888876

877+
exports['youtube classic 1'] = {
878+
"author": "Andrew JRT",
879+
"date": "2017-07-07T00:00:00.000Z",
880+
"description": "👍 Subscribe - http://bit.do/AndrewJRT 🐦 Twitter - https://twitter.com/andrew_jrt 🎮 Twitch - https://twitch.tv/andrewjrt ❤ Patreon - https://www.patreon.com/a...",
881+
"image": "https://i.ytimg.com/vi/GDRd-BFTYIg/maxresdefault.jpg",
882+
"lang": "en",
883+
"logo": "https://www.youtube.com/yts/img/favicon_144-vflWmzoXw.png",
884+
"publisher": "YouTube",
885+
"title": "Overwatch - Huge Massive Fist Punch Man",
886+
"url": "https://www.youtube.com/watch?v=GDRd-BFTYIg"
887+
}
888+
889+
exports['youtube video 1'] = {
890+
"author": "ONE Media",
891+
"date": "2017-04-14T12:00:00.000Z",
892+
"description": "Star Wars 8 El Ultimo JEDI Trailer Espanol (Subtitulado) - 2017 © 2017 - Disney",
893+
"image": "https://i.ytimg.com/vi/hwMkbaS_M_c/mqdefault.jpg",
894+
"lang": null,
895+
"logo": "https://www.youtube.com/yts/img/favicon_144-vfliLAfaB.png",
896+
"publisher": "ES",
897+
"title": "Star Wars 8 El Ultimo JEDI Trailer Espanol (Subtitulado) - 2017",
898+
"url": "https://www.youtube.com/watch?v=hwMkbaS_M_c"
899+
}
900+
901+
exports['youtube channel 1'] = {
902+
"author": "ONE Media",
903+
"date": "2017-12-31T01:00:00.000Z",
904+
"description": "Thor 3, Star Wars, Valerian, Transformers 5, Baywatch, Despicable Me 3, Fast and Furious 8, Wonder Woman, Justice League... ALL The Must See Movie Trailer, F...",
905+
"image": "https://yt3.ggpht.com/-q3yclmvjVzk/AAAAAAAAAAI/AAAAAAAAAAA/jVaGJKJjUps/s200-c-k-no-mo-rj-c0xffffff/photo.jpg",
906+
"lang": null,
907+
"logo": "https://www.youtube.com/yts/img/favicon_144-vfliLAfaB.png",
908+
"publisher": "YouTube",
909+
"title": "ONE Media - YouTube",
910+
"url": "https://www.youtube.com/channel/UCzcRQ3vRNr6fJ1A9rqFn7QA"
911+
}
912+
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
'use strict'
2+
3+
const snapshot = require('snap-shot')
4+
const { promisify } = require('util')
5+
const { resolve } = require('path')
6+
7+
const fs = require('fs')
8+
9+
const metascraper = require('../../..')
10+
const readFile = promisify(fs.readFile)
11+
12+
const url = 'https://www.youtube.com/channel/UCzcRQ3vRNr6fJ1A9rqFn7QA'
13+
14+
it('youtube channel', async () => {
15+
const html = await readFile(resolve(__dirname, 'input.html'))
16+
const metadata = await metascraper({ html, url })
17+
snapshot(metadata)
18+
})

packages/metascraper/test/integration/youtube-channel/input.html

Lines changed: 3893 additions & 0 deletions
Large diffs are not rendered by default.

packages/metascraper/test/integration/youtube/index.js renamed to packages/metascraper/test/integration/youtube-classic/index.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ const readFile = promisify(fs.readFile)
1111

1212
const url = 'https://www.youtube.com/watch?v=GDRd-BFTYIg'
1313

14-
it('youtube', async () => {
14+
it('youtube classic', async () => {
1515
const html = await readFile(resolve(__dirname, 'input.html'))
1616
const metadata = await metascraper({ html, url })
1717
snapshot(metadata)
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
'use strict'
2+
3+
const snapshot = require('snap-shot')
4+
const { promisify } = require('util')
5+
const { resolve } = require('path')
6+
7+
const fs = require('fs')
8+
9+
const metascraper = require('../../..')
10+
const readFile = promisify(fs.readFile)
11+
12+
const url = 'https://www.youtube.com/watch?v=hwMkbaS_M_c'
13+
14+
it('youtube video', async () => {
15+
const html = await readFile(resolve(__dirname, 'input.html'))
16+
const metadata = await metascraper({ html, url })
17+
snapshot(metadata)
18+
})

packages/metascraper/test/integration/youtube-video/input.html

Lines changed: 2604 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)