Skip to content

Commit 69f40c5

Browse files
committed
Extract more video fields
1 parent a67e147 commit 69f40c5

File tree

6 files changed

+1395
-26
lines changed

6 files changed

+1395
-26
lines changed

packages/metascraper-author/index.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ const wrap = rule => ({ htmlDom }) => {
1616
const value = rule(htmlDom)
1717

1818
return isString(value) &&
19-
!isUrl(value, {relative: false}) &&
20-
titleize(value, {removeBy: true})
19+
!isUrl(value, {relative: false}) &&
20+
titleize(value, {removeBy: true})
2121
}
2222

2323
/**
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
exports['twitter 1'] = {
2+
"author": "The Verge",
3+
"publisher": "TwitterCard",
4+
"date": "2018-02-11T12:00:00.000Z",
5+
"description": "“Is it bad to blow into game cartridges? https://t.co/Y3yAimrUnP”",
6+
"image": "https://pbs.twimg.com/media/DRg1OMRVwAEuwTK.jpg",
7+
"lang": "es",
8+
"logo": "https://abs.twimg.com/icons/apple-touch-icon-192x192.png",
9+
"url": "https://twitter.com/verge/status/957383241714970624"
10+
}
11+
12+
exports['facebook 1'] = {
13+
"author": "AFC Ajax",
14+
"publisher": "Facebook",
15+
"date": "2018-04-02T18:57:00.000Z",
16+
"description": null,
17+
"image": null,
18+
"lang": "es",
19+
"logo": "https://static.xx.fbcdn.net/rsrc.php/yp/r/1Dxu6XIjaTc.ico",
20+
"url": "https://www.facebook.com/afcajax/videos/1686831701364171"
21+
}
22+
23+
exports['youtube 1'] = {
24+
"author": "ONE Media",
25+
"publisher": "Youtube",
26+
"date": "2017-04-14T12:00:00.000Z",
27+
"description": "Star Wars 8 El Ultimo JEDI Trailer Espanol (Subtitulado) - 2017\n© 2017 - Disney",
28+
"image": "https://i.ytimg.com/vi/hwMkbaS_M_c/mqdefault.jpg",
29+
"lang": null,
30+
"logo": "https://www.youtube.com/yts/img/favicon_144-vfliLAfaB.png",
31+
"url": "https://www.youtube.com/watch?v=hwMkbaS_M_c"
32+
}
33+
34+
exports['vimeo 1'] = {
35+
"author": "pleid",
36+
"publisher": "Vimeo",
37+
"date": "2016-10-20T13:06:52.000Z",
38+
"description": "Converse has spent a good part of this year updating some of their classics. Our past is constantly catching up to us, but we rarely get to see the relationship…",
39+
"image": "https://i.vimeocdn.com/filter/overlay?src0=https://i.vimeocdn.com/video/598160082_1280x720.jpg&src1=https://f.vimeocdn.com/images_v6/share/play_icon_overlay.png",
40+
"lang": "en",
41+
"logo": "https://i.vimeocdn.com/favicon/main-touch_180",
42+
"url": "https://vimeo.com/188175573"
43+
}
44+

packages/metascraper-video-provider/index.js

Lines changed: 73 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,92 @@
11
'use strict'
22

3-
const { isUrl } = require('@metascraper/helpers')
3+
const { round, size, get, chain, find, isString } = require('lodash')
4+
const { isUrl, titleize } = require('@metascraper/helpers')
45
const youtubedl = require('youtube-dl')
56
const { promisify } = require('util')
67
const path = require('path')
78

8-
const getVideoInfo = promisify(youtubedl.getInfo)
9+
const getInfo = promisify(youtubedl.getInfo)
10+
11+
let cachedVideoInfoUrl
12+
let cachedVideoInfo
913

1014
/**
11-
* Get a Video source quality not too high
15+
* Get the video info.
16+
* Avoid do more one request for the same URL.
1217
*/
13-
const getVideoUrl = ({formats}) => {
14-
const urls = formats
15-
.filter(item =>
16-
item.protocol === 'https' &&
17-
(item.ext === 'mp4' ||
18-
path.extname(item.url).startsWith('.mp4'))
19-
)
20-
.map(item => item.url)
21-
22-
const index = Math.round(urls.length / 2) - 1
23-
return urls[index]
24-
}
25-
26-
const getVideoProvider = async url => {
18+
const getVideoInfo = async url => {
19+
if (url === cachedVideoInfoUrl) return cachedVideoInfo
2720
try {
28-
const info = await getVideoInfo(url)
29-
const videoUrl = getVideoUrl(info)
30-
return isUrl(videoUrl) && videoUrl
21+
const info = await getInfo(url)
22+
cachedVideoInfoUrl = url
23+
cachedVideoInfo = info
24+
return info
3125
} catch (err) {
32-
return false
26+
return {}
3327
}
3428
}
3529

30+
const isMp4 = format => format.ext === 'mp4' || path.extname(format.url).startsWith('.mp4')
31+
const isHttp = format => format.protocol === 'https' || format.protocol === 'http'
32+
33+
/**
34+
* Get a Video source quality enough good
35+
* compatible to be consumed for the browser.
36+
*/
37+
const getVideoUrl = formats => {
38+
const urls = chain(formats)
39+
.filter(format => isHttp(format) && isMp4(format))
40+
.map('url')
41+
.value()
42+
43+
const index = round(size(urls) / 2) - 1
44+
return get(urls, index)
45+
}
46+
47+
/**
48+
* Get a URL-like video source.
49+
*/
50+
const getVideoProvider = async ({url}) => {
51+
const { formats } = await getVideoInfo(url)
52+
const videoUrl = getVideoUrl(formats)
53+
return isUrl(videoUrl) && videoUrl
54+
}
55+
56+
/**
57+
* Get the Author of the video.
58+
*/
59+
const getVideoAuthor = async ({url}) => {
60+
const { uploader, creator, uploader_id: uploaderId } = await getVideoInfo(url)
61+
const author = find([creator, uploader, uploaderId], str => (
62+
isString(str) && !isUrl(str, {relative: false})
63+
))
64+
return author && titleize(author, {removeBy: true})
65+
}
66+
67+
const getVideoPublisher = async ({url}) => {
68+
const { extractor_key: extractorKey } = await getVideoInfo(url)
69+
return isString(extractorKey) && extractorKey
70+
}
71+
72+
const getVideoTitle = async ({url}) => {
73+
const { title: mainTitle, alt_title: secondaryTitle } = await getVideoInfo(url)
74+
const title = find([mainTitle, secondaryTitle], isString)
75+
return title && titleize(title)
76+
}
77+
78+
const getVideoDate = async ({url}) => {
79+
const { timestamp } = await getVideoInfo(url)
80+
return timestamp && new Date(timestamp * 1000).toISOString()
81+
}
82+
3683
module.exports = () => {
3784
return {
38-
video: ({url}) => getVideoProvider(url)
85+
video: getVideoProvider,
86+
author: getVideoAuthor,
87+
publisher: getVideoPublisher,
88+
title: getVideoTitle,
89+
date: getVideoDate
3990
}
4091
}
4192

packages/metascraper-video-provider/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,16 @@
1616
"url": "https://github.com/microlinkhq/metascraper/issues"
1717
},
1818
"dependencies": {
19+
"lodash": "~4.17.5",
1920
"@metascraper/helpers": "^3.9.3",
2021
"youtube-dl": "~1.12.2"
2122
},
2223
"devDependencies": {
23-
"lodash": "latest",
2424
"metascraper": "latest",
2525
"mocha": "latest",
2626
"nyc": "latest",
2727
"should": "latest",
28+
"snap-shot": "latest",
2829
"standard": "latest"
2930
},
3031
"engines": {

0 commit comments

Comments
 (0)