Skip to content

Commit

Permalink
Merge 7cc967a into 42dac0d
Browse files Browse the repository at this point in the history
  • Loading branch information
Kikobeats committed Aug 8, 2023
2 parents 42dac0d + 7cc967a commit 8b14ad2
Show file tree
Hide file tree
Showing 20 changed files with 993 additions and 749 deletions.
15 changes: 13 additions & 2 deletions packages/metascraper-audio/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,19 @@ const withContentType = (url, contentType) =>
isMime(contentType, 'audio') ? url : false

const audioRules = [
toAudio($ => $('meta[property="og:audio:secure_url"]').attr('content')),
toAudio($ => $('meta[property="og:audio"]').attr('content')),
({ url, htmlDom: $ }) => {
const src =
$('meta[property="og:audio:secure_url"]').attr('content') ||
$('meta[property="og:audio:url"]').attr('content') ||
$('meta[property="og:audio"]').attr('content')

return src
? audio(src, {
url,
type: $('meta[property="og:audio:type"]').attr('content')
})
: undefined
},
toAudio($ => {
const contentType =
$('meta[name="twitter:player:stream:content_type"]').attr('content') ||
Expand Down
78 changes: 2 additions & 76 deletions packages/metascraper-audio/test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,91 +15,17 @@ test('provide `keyvOpts`', async t => {

const metadataOne = await metascraper({
url,
html:
'<meta property="twitter:player" content="https://twitter-card-player.vercel.app/container/audio.html">'
html: '<meta property="twitter:player" content="https://twitter-card-player.vercel.app/container/audio.html">'
})

t.truthy(metadataOne.audio)
t.is(cache.size, 1)

const metadataTwo = await metascraper({
url,
html:
'<meta property="twitter:player" content="https://twitter-card-player.vercel.app/audio-fail.html">'
html: '<meta property="twitter:player" content="https://twitter-card-player.vercel.app/audio-fail.html">'
})

t.falsy(metadataTwo.audio)
t.is(cache.size, 2)
})

test('og:audio', async t => {
const html =
'<meta property="og:audio" content="https://browserless.js.org/static/demo.mp3">'
const url = 'https://browserless.js.org'
const metascraper = createMetascraper()
const metadata = await metascraper({ html, url })
t.snapshot(metadata)
})

test('og:audio:secure_url', async t => {
const html =
'<meta property="og:audio:secure_url" content="https://browserless.js.org/static/demo.mp3">'
const url = 'https://browserless.js.org'
const metascraper = createMetascraper()
const metadata = await metascraper({ html, url })
t.snapshot(metadata)
})

test('twitter:player', async t => {
const html =
'<meta property="twitter:player" content="https://twitter-card-player.vercel.app/container/audio.html">'
const url = 'https://twitter-card-player.vercel.app'
const metascraper = createMetascraper()
const metadata = await metascraper({ html, url })
t.snapshot(metadata)
})

test('twitter:player:stream', async t => {
const html =
'<meta property="twitter:player:stream" content="https://cdn.microlink.io/file-examples/sample.mp3">'
const url = 'https://twitter-card-player.vercel.app'
const metascraper = createMetascraper()
const metadata = await metascraper({ html, url })
t.snapshot(metadata)
})

test('audio:src', async t => {
const html = '<audio src="https://browserless.js.org/static/demo.mp3">'
const url = 'https://browserless.js.org'
const metascraper = createMetascraper()
const metadata = await metascraper({ html, url })
t.snapshot(metadata)
})

test('audio:source:src', async t => {
const html =
'<audio><source src="https://browserless.js.org/static/demo.mp3"></source></audio>'
const url = 'https://browserless.js.org'
const metascraper = createMetascraper()
const metadata = await metascraper({ html, url })
t.snapshot(metadata)
})

test('a:href', async t => {
const html =
'<a href="https://browserless.js.org/static/demo.mp3?some_param=this">Download</a>'
const url = 'https://browserless.js.org'
const metascraper = createMetascraper()
const metadata = await metascraper({ html, url })
t.snapshot(metadata)
})

test('jsld:contentUrl', async t => {
const html = `<script type="application/ld+json">
{"@context":"http://schema.org","@type":"AudioObject","@id":"https://example.com/audio.mp3","contentUrl":"https://example.com/audio.mp3"}
</script>`
const url = 'https://browserless.js.org'
const metascraper = createMetascraper()
const metadata = await metascraper({ html, url })
t.snapshot(metadata)
})
87 changes: 87 additions & 0 deletions packages/metascraper-audio/test/providers.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
'use strict'

const test = require('ava')

const createMetascraper = (...args) =>
require('metascraper')([require('..')(...args)])

test('og:audio', async t => {
const html =
'<meta property="og:audio" content="https://cdn.microlink.io/file-examples/sample.mp3">'
const url = 'https://browserless.js.org'
const metascraper = createMetascraper()
const metadata = await metascraper({ html, url })
t.is(metadata.audio, 'https://cdn.microlink.io/file-examples/sample.mp3')
})

test('og:audio:url', async t => {
const html =
'<meta property="og:audio:url" content="https://cdn.microlink.io/file-examples/sample.mp3">'
const url = 'https://browserless.js.org'
const metascraper = createMetascraper()
const metadata = await metascraper({ html, url })
t.is(metadata.audio, 'https://cdn.microlink.io/file-examples/sample.mp3')
})

test('og:audio:secure_url', async t => {
const html =
'<meta property="og:audio:secure_url" content="https://cdn.microlink.io/file-examples/sample.mp3">'
const url = 'https://browserless.js.org'
const metascraper = createMetascraper()
const metadata = await metascraper({ html, url })
t.is(metadata.audio, 'https://cdn.microlink.io/file-examples/sample.mp3')
})

test('twitter:player', async t => {
const html =
'<meta property="twitter:player" content="https://twitter-card-player.vercel.app/container/audio.html">'
const url = 'https://twitter-card-player.vercel.app'
const metascraper = createMetascraper()
const metadata = await metascraper({ html, url })
t.snapshot(metadata)
})

test('twitter:player:stream', async t => {
const html =
'<meta property="twitter:player:stream" content="https://cdn.microlink.io/file-examples/sample.mp3">'
const url = 'https://twitter-card-player.vercel.app'
const metascraper = createMetascraper()
const metadata = await metascraper({ html, url })
t.snapshot(metadata)
})

test('audio:src', async t => {
const html = '<audio src="https://cdn.microlink.io/file-examples/sample.mp3">'
const url = 'https://browserless.js.org'
const metascraper = createMetascraper()
const metadata = await metascraper({ html, url })
t.snapshot(metadata)
})

test('audio:source:src', async t => {
const html =
'<audio><source src="https://cdn.microlink.io/file-examples/sample.mp3"></source></audio>'
const url = 'https://browserless.js.org'
const metascraper = createMetascraper()
const metadata = await metascraper({ html, url })
t.snapshot(metadata)
})

test('a:href', async t => {
const html =
'<a href="https://cdn.microlink.io/file-examples/sample.mp3?some_param=this">Download</a>'
const url = 'https://browserless.js.org'
const metascraper = createMetascraper()
const metadata = await metascraper({ html, url })
t.snapshot(metadata)
})

test('jsld:contentUrl', async t => {
const html = `<script type="application/ld+json">
{"@context":"http://schema.org","@type":"AudioObject","@id":"https://example.com/audio.mp3","contentUrl":"https://example.com/audio.mp3"}
</script>`
const url = 'https://browserless.js.org'
const metascraper = createMetascraper()
const metadata = await metascraper({ html, url })
t.snapshot(metadata)
})
Binary file removed packages/metascraper-audio/test/snapshots/index.js.snap
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,25 +1,9 @@
# Snapshot report for `test/index.js`
# Snapshot report for `test/providers.js`

The actual snapshot is saved in `index.js.snap`.
The actual snapshot is saved in `providers.js.snap`.

Generated by [AVA](https://avajs.dev).

## og:audio

> Snapshot 1
{
audio: 'https://browserless.js.org/static/demo.mp3',
}

## og:audio:secure_url

> Snapshot 1
{
audio: 'https://browserless.js.org/static/demo.mp3',
}

## twitter:player

> Snapshot 1
Expand All @@ -41,23 +25,23 @@ Generated by [AVA](https://avajs.dev).
> Snapshot 1
{
audio: 'https://browserless.js.org/static/demo.mp3',
audio: 'https://cdn.microlink.io/file-examples/sample.mp3',
}

## audio:source:src

> Snapshot 1
{
audio: 'https://browserless.js.org/static/demo.mp3',
audio: 'https://cdn.microlink.io/file-examples/sample.mp3',
}

## a:href

> Snapshot 1
{
audio: 'https://browserless.js.org/static/demo.mp3?some_param=this',
audio: 'https://cdn.microlink.io/file-examples/sample.mp3?some_param=this',
}

## jsld:contentUrl
Expand Down
Binary file not shown.
57 changes: 33 additions & 24 deletions packages/metascraper-helpers/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,15 @@ const isAudioExtension = url => isMediaTypeExtension(url, AUDIO)

const isImageExtension = url => isMediaTypeExtension(url, IMAGE)

const isContentType =
extensions =>
({ type = '' } = {}) =>
extensions.some(extension => type.endsWith(extension))

const isVideoContentType = isContentType(Object.keys(videoExtensions))

const isAudioContentType = isContentType(Object.keys(audioExtensions))

const extension = (str = '') => {
const url = urlObject(
str,
Expand Down Expand Up @@ -323,11 +332,7 @@ const jsonld = memoizeOne(
.map(function () {
try {
const el = $(this)
const json = JSON.parse(
$(el)
.contents()
.text()
)
const json = JSON.parse($(el).contents().text())

const { '@graph': graph, ...props } = json
if (!graph) return json
Expand Down Expand Up @@ -364,15 +369,16 @@ const image = (value, opts) => {

const logo = image

const video = (value, opts) => {
const media = (urlValidator, contentTypeValidator) => (value, opts) => {
const urlValue = url(value, opts)
return isVideoUrl(urlValue, opts) ? urlValue : undefined
return urlValidator(urlValue, opts) || contentTypeValidator(opts)
? urlValue
: undefined
}

const audio = (value, opts) => {
const urlValue = url(value, opts)
return isAudioUrl(urlValue, opts) ? urlValue : undefined
}
const video = media(isVideoUrl, isVideoContentType)

const audio = media(isAudioUrl, isAudioContentType)

const validator = {
audio,
Expand Down Expand Up @@ -403,19 +409,22 @@ const findRule = async (rules, args) => {
return value
}

const toRule = (mapper, opts) => rule => async ({ htmlDom, url }) => {
const value = await rule(htmlDom, url)
return mapper(value, { url, ...opts })
}

const composeRule = rule => ({ from, to = from, ...opts }) => async ({
htmlDom,
url
}) => {
const data = await rule(htmlDom, url)
const value = get(data, from)
return invoke(validator, to, value, { url, ...opts })
}
const toRule =
(mapper, opts) =>
rule =>
async ({ htmlDom, url }) => {
const value = await rule(htmlDom, url)
return mapper(value, { url, ...opts })
}

const composeRule =
rule =>
({ from, to = from, ...opts }) =>
async ({ htmlDom, url }) => {
const data = await rule(htmlDom, url)
const value = get(data, from)
return invoke(validator, to, value, { url, ...opts })
}

const has = value =>
value !== undefined && !Number.isNaN(value) && hasValues(value)
Expand Down
Loading

0 comments on commit 8b14ad2

Please sign in to comment.