Skip to content

Commit 0a74da4

Browse files
committed
Use twdown for twitter videos
1 parent 815ba2b commit 0a74da4

File tree

4 files changed

+31
-28
lines changed

4 files changed

+31
-28
lines changed

packages/metascraper-video-provider/index.js

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,20 @@ const { overEvery, isEmpty, eq, has, round, size, get, chain, find, isString } =
44
const { isUrl, titleize } = require('@metascraper/helpers')
55
const youtubedl = require('youtube-dl')
66
const { promisify } = require('util')
7+
const twdown = require('twdown')
8+
const { URL } = require('url')
79
const path = require('path')
810

911
const getInfo = promisify(youtubedl.getInfo)
1012

13+
const TWITTER_HOSTNAMES = ['twitter.com', 'mobile.twitter.com']
14+
15+
const isTwitterUrl = url => TWITTER_HOSTNAMES.includes(new URL(url).hostname)
16+
17+
// Local cache for successive calls
1118
let cachedVideoInfoUrl
1219
let cachedVideoInfo
1320

14-
/**
15-
* Get the video info.
16-
* Avoid do more one request for the same URL.
17-
*/
1821
const getVideoInfo = async url => {
1922
if (url === cachedVideoInfoUrl) return cachedVideoInfo
2023
cachedVideoInfoUrl = url
@@ -24,21 +27,21 @@ const getVideoInfo = async url => {
2427
} catch (err) {
2528
cachedVideoInfo = {}
2629
}
27-
2830
return cachedVideoInfo
2931
}
3032

31-
const isMp4 = format => eq(get(format, 'ext'), 'mp4') || path.extname(get(format, 'url')).startsWith('.mp4')
32-
const isHttp = format => eq(get(format, 'protocol'), 'http')
33-
const isHttps = format => eq(get(format, 'protocol'), 'https')
34-
const hasAudio = format => has(format, 'abr')
33+
const isMp4 = video =>
34+
eq(get(video, 'ext'), 'mp4') || path.extname(get(video, 'url')).startsWith('.mp4')
35+
const isHttp = video => eq(get(video, 'protocol'), 'http')
36+
const isHttps = video => eq(get(video, 'protocol'), 'https')
37+
const hasAudio = video => has(video, 'abr')
3538

3639
/**
3740
* Get a Video source quality enough good
3841
* compatible to be consumed for the browser.
3942
*/
40-
const getVideoUrl = (formats, filters = []) => {
41-
const urls = chain(formats)
43+
const getVideoUrl = (videos, filters = []) => {
44+
const urls = chain(videos)
4245
.filter(overEvery(filters))
4346
.map('url')
4447
.value()
@@ -51,9 +54,13 @@ const getVideoUrl = (formats, filters = []) => {
5154
/**
5255
* Get a URL-like video source.
5356
*/
54-
const getVideoProvider = async ({ url }) => {
55-
const { formats } = await getVideoInfo(url)
56-
const videoUrl = getVideoUrl(formats, [isMp4, isHttps, hasAudio]) ||
57+
const getVideoProvider = getBrowserless => async ({ url }) => {
58+
const formats = !isTwitterUrl(url)
59+
? (await getVideoInfo(url)).formats
60+
: await twdown({ url, browserless: await getBrowserless() })
61+
62+
const videoUrl =
63+
getVideoUrl(formats, [isMp4, isHttps, hasAudio]) ||
5764
getVideoUrl(formats, [isMp4, isHttp, hasAudio]) ||
5865
getVideoUrl(formats, [isMp4, isHttps]) ||
5966
getVideoUrl(formats, [isMp4]) ||
@@ -90,9 +97,9 @@ const getVideoDate = async ({ url }) => {
9097
return timestamp && new Date(timestamp * 1000).toISOString()
9198
}
9299

93-
module.exports = () => {
100+
module.exports = ({ getBrowserless }) => {
94101
return {
95-
video: getVideoProvider,
102+
video: getVideoProvider(getBrowserless),
96103
author: getVideoAuthor,
97104
publisher: getVideoPublisher,
98105
title: getVideoTitle,

packages/metascraper-video-provider/package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,14 @@
1919
"@metascraper/helpers": "^3.11.12",
2020
"json-future": "~2.1.2",
2121
"lodash": "~4.17.10",
22+
"twdown": "~1.0.1",
2223
"youtube-dl": "~1.12.2"
2324
},
2425
"devDependencies": {
26+
"browserless": "latest",
2527
"mocha": "latest",
2628
"nyc": "latest",
29+
"puppeteer": "latest",
2730
"should": "latest",
2831
"snap-shot": "latest",
2932
"standard": "latest"

packages/metascraper-video-provider/test/index.js

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
'use strict'
22

33
const { isUrl } = require('@metascraper/helpers')
4+
const browserless = require('browserless')()
45
const { isString } = require('lodash')
56
const snapshot = require('snap-shot')
67
const { promisify } = require('util')
@@ -10,16 +11,7 @@ const should = require('should')
1011
const fs = require('fs')
1112

1213
const metascraper = require('metascraper').load([
13-
require('metascraper-video-provider')({
14-
launchOpts: {
15-
args: [
16-
'--disable-gpu',
17-
'--single-process',
18-
'--no-zygote',
19-
'--no-sandbox'
20-
]
21-
}
22-
}),
14+
require('metascraper-video-provider')({ getBrowserless: () => browserless }),
2315
require('metascraper-author')(),
2416
require('metascraper-date')(),
2517
require('metascraper-description')(),
@@ -44,7 +36,7 @@ describe('metascraper-video-provider', () => {
4436
should(videoUrl).be.an.String()
4537
})
4638
})
47-
describe('providers', () => {
39+
describe('provider', () => {
4840
it('vimeo', async () => {
4941
const html = await readFile(resolve(__dirname, 'fixtures/vimeo.html'))
5042
const url = 'https://vimeo.com/188175573'
@@ -55,7 +47,7 @@ describe('metascraper-video-provider', () => {
5547
snapshot(meta)
5648
})
5749

58-
xit('twitter', async () => {
50+
it('twitter', async () => {
5951
const html = await readFile(resolve(__dirname, 'fixtures/twitter.html'))
6052
const url = 'https://twitter.com/verge/status/957383241714970624'
6153

packages/metascraper-video-provider/test/mocha.opts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@
44
--slow 300
55
--bail
66
--recursive
7+
--exit

0 commit comments

Comments
 (0)