Skip to content

Commit

Permalink
fix(audio,video): load twitter:player as iframe (#655)
Browse files Browse the repository at this point in the history
  • Loading branch information
Kikobeats authored Aug 15, 2023
1 parent 9e665d1 commit 1bda60a
Show file tree
Hide file tree
Showing 21 changed files with 1,153 additions and 266 deletions.
120 changes: 59 additions & 61 deletions packages/metascraper-audio/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,41 @@ const {
audio,
findRule,
has,
isMime,
$twitter,
loadIframe,
normalizeUrl,
toRule
} = require('@metascraper/helpers')

const memoize = require('@keyvhq/memoize')
const { find, chain, isEqual } = require('lodash')
const pReflect = require('p-reflect')
const got = require('got')

const toAudio = toRule(audio)

const withContentType = (url, contentType) =>
isMime(contentType, 'audio') ? url : false
const toAudioFromDom = toRule((domNodes, opts) => {
const values = chain(domNodes)
.map(domNode => ({
src: domNode?.attribs.src,
type: chain(domNode)
.get('attribs.type')
.split(';')
.get(0)
.split('/')
.get(1)
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types
.replace('mpeg', 'mp3')
.value()
}))
.uniqWith(isEqual)
.value()

let result
find(
values,
({ src, type }) => (result = audio(src, Object.assign({ type }, opts)))
)
return result
})

const audioRules = [
({ url, htmlDom: $ }) => {
Expand All @@ -35,45 +56,27 @@ const audioRules = [
})
: undefined
},
toAudio($ => {
const contentType =
$('meta[name="twitter:player:stream:content_type"]').attr('content') ||
$('meta[property="twitter:player:stream:content_type"]').attr('content')

const streamUrl =
$('meta[name="twitter:player:stream"]').attr('content') ||
$('meta[property="twitter:player:stream"]').attr('content')

return contentType ? withContentType(streamUrl, contentType) : streamUrl
}),
({ url, htmlDom: $ }) => {
const src = $twitter($, 'twitter:player:stream')
return src
? audio(src, {
url,
type: $twitter($, 'twitter:player:stream:content_type')
})
: undefined
},
toAudio($jsonld('contentUrl')),
toAudio($ => $('audio').attr('src')),
toAudio($ => $('audio > source').attr('src')),
toAudioFromDom($ => $('audio').get()),
toAudioFromDom($ => $('audio > source').get()),
({ htmlDom: $ }) => $filter($, $('a[href]'), el => audio(el.attr('href')))
]

const _getIframe = (url, $, { src }) =>
loadIframe(url, $.load(`<iframe src="${src}"></iframe>`))

const createGetPlayer = ({ gotOpts, keyvOpts }) => {
const getPlayer = async playerUrl => {
const { value: response } = await pReflect(got(playerUrl, gotOpts))
if (!response) return
const contentType = response.headers['content-type']
if (!contentType || !contentType.startsWith('text')) return
return response.body
}
return memoize(getPlayer, keyvOpts, {
value: value => (value === undefined ? null : value)
})
}

module.exports = ({ getIframe = _getIframe, gotOpts, keyvOpts } = {}) => {
const getPlayer = createGetPlayer({ gotOpts, keyvOpts })

module.exports = ({ getIframe = _getIframe } = {}) => {
return {
audio: [
...audioRules,
audio: audioRules.concat(
async ({ htmlDom: $, url }) => {
const iframe = $('iframe')
if (iframe.length === 0) return
Expand All @@ -92,33 +95,28 @@ module.exports = ({ getIframe = _getIframe, gotOpts, keyvOpts } = {}) => {
}
})

if (srcs.length === 0) return

const { value } = await pReflect(
Promise.any(
srcs.map(async src => {
const htmlDom = await getIframe(url, $, { src })
const result = await findRule(audioRules, { htmlDom, url })
if (!has(result)) throw TypeError('no result')
return result
})
)
)

return value
return srcs.length > 0
? pReflect(
Promise.any(
srcs.map(async src => {
const htmlDom = await getIframe(url, $, { src })
const result = await findRule(audioRules, { htmlDom, url })
if (!has(result)) throw TypeError('no result')
return result
})
)
).then(({ value }) => value)
: undefined
},
async ({ htmlDom: $, url }) => {
const playerUrl =
$('meta[name="twitter:player"]').attr('content') ||
$('meta[property="twitter:player"]').attr('content')
if (!playerUrl) return

const html = await getPlayer(normalizeUrl(url, playerUrl))
if (!html) return

const htmlDom = $.load(html)
return findRule(audioRules, { htmlDom, url })
const src = $twitter($, 'twitter:player')
return src
? findRule(audioRules, {
htmlDom: await getIframe(url, $, { src }),
url
})
: undefined
}
]
)
}
}
3 changes: 1 addition & 2 deletions packages/metascraper-audio/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,8 @@
"metascraper"
],
"dependencies": {
"@keyvhq/memoize": "~2.0.3",
"@metascraper/helpers": "^5.35.1",
"got": "~11.8.6",
"lodash": "~4.17.21",
"p-reflect": "~2.1.0"
},
"devDependencies": {
Expand Down
Loading

0 comments on commit 1bda60a

Please sign in to comment.