Skip to content

Commit

Permalink
Merge 6e408f9 into 42dac0d
Browse files Browse the repository at this point in the history
  • Loading branch information
Kikobeats committed Aug 8, 2023
2 parents 42dac0d + 6e408f9 commit b7755fd
Show file tree
Hide file tree
Showing 15 changed files with 889 additions and 652 deletions.
15 changes: 13 additions & 2 deletions packages/metascraper-audio/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,19 @@ const withContentType = (url, contentType) =>
isMime(contentType, 'audio') ? url : false

const audioRules = [
toAudio($ => $('meta[property="og:audio:secure_url"]').attr('content')),
toAudio($ => $('meta[property="og:audio"]').attr('content')),
({ url, htmlDom: $ }) => {
const src =
$('meta[property="og:audio:secure_url"]').attr('content') ||
$('meta[property="og:audio:url"]').attr('content') ||
$('meta[property="og:audio"]').attr('content')

return src
? audio(src, {
url,
type: $('meta[property="og:audio:type"]').attr('content')
})
: undefined
},
toAudio($ => {
const contentType =
$('meta[name="twitter:player:stream:content_type"]').attr('content') ||
Expand Down
57 changes: 33 additions & 24 deletions packages/metascraper-helpers/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,15 @@ const isAudioExtension = url => isMediaTypeExtension(url, AUDIO)

const isImageExtension = url => isMediaTypeExtension(url, IMAGE)

const isContentType =
extensions =>
({ type = '' } = {}) =>
extensions.some(extension => type.endsWith(extension))

const isVideoContentType = isContentType(Object.keys(videoExtensions))

const isAudioContentType = isContentType(Object.keys(audioExtensions))

const extension = (str = '') => {
const url = urlObject(
str,
Expand Down Expand Up @@ -323,11 +332,7 @@ const jsonld = memoizeOne(
.map(function () {
try {
const el = $(this)
const json = JSON.parse(
$(el)
.contents()
.text()
)
const json = JSON.parse($(el).contents().text())

const { '@graph': graph, ...props } = json
if (!graph) return json
Expand Down Expand Up @@ -364,15 +369,16 @@ const image = (value, opts) => {

const logo = image

const video = (value, opts) => {
const media = (urlValidator, contentTypeValidator) => (value, opts) => {
const urlValue = url(value, opts)
return isVideoUrl(urlValue, opts) ? urlValue : undefined
return urlValidator(urlValue, opts) || contentTypeValidator(opts)
? urlValue
: undefined
}

const audio = (value, opts) => {
const urlValue = url(value, opts)
return isAudioUrl(urlValue, opts) ? urlValue : undefined
}
const video = media(isVideoUrl, isVideoContentType)

const audio = media(isAudioUrl, isAudioContentType)

const validator = {
audio,
Expand Down Expand Up @@ -403,19 +409,22 @@ const findRule = async (rules, args) => {
return value
}

const toRule = (mapper, opts) => rule => async ({ htmlDom, url }) => {
const value = await rule(htmlDom, url)
return mapper(value, { url, ...opts })
}

const composeRule = rule => ({ from, to = from, ...opts }) => async ({
htmlDom,
url
}) => {
const data = await rule(htmlDom, url)
const value = get(data, from)
return invoke(validator, to, value, { url, ...opts })
}
const toRule =
(mapper, opts) =>
rule =>
async ({ htmlDom, url }) => {
const value = await rule(htmlDom, url)
return mapper(value, { url, ...opts })
}

const composeRule =
rule =>
({ from, to = from, ...opts }) =>
async ({ htmlDom, url }) => {
const data = await rule(htmlDom, url)
const value = get(data, from)
return invoke(validator, to, value, { url, ...opts })
}

const has = value =>
value !== undefined && !Number.isNaN(value) && hasValues(value)
Expand Down
50 changes: 49 additions & 1 deletion packages/metascraper-helpers/test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ const test = require('ava')
const {
$jsonld,
absoluteUrl,
audio,
author,
date,
description,
Expand All @@ -23,7 +24,8 @@ const {
lang,
normalizeUrl,
parseUrl,
url
url,
video
} = require('..')

const measure = fn => {
Expand Down Expand Up @@ -152,6 +154,52 @@ test('.isMime', t => {
t.true(isMime('audio/mp3', 'audio'))
})

test('.audio', t => {
t.is(
audio(
'https://app.croct.dev/assets/workspace/customer-assets/9d97037d-64a2-4c25-b443-bfc2972f3c9e/a0442e2b-a384-4f2b-b443-9f34c2215e16',
{ type: 'audio/wav' }
),
'https://app.croct.dev/assets/workspace/customer-assets/9d97037d-64a2-4c25-b443-bfc2972f3c9e/a0442e2b-a384-4f2b-b443-9f34c2215e16'
)
t.is(
audio(
'https://app.croct.dev/assets/workspace/customer-assets/9d97037d-64a2-4c25-b443-bfc2972f3c9e/a0442e2b-a384-4f2b-b443-9f34c2215e16',
{ type: 'audio/wav' }
),
'https://app.croct.dev/assets/workspace/customer-assets/9d97037d-64a2-4c25-b443-bfc2972f3c9e/a0442e2b-a384-4f2b-b443-9f34c2215e16'
)
t.is(
audio(
'https://app.croct.dev/assets/workspace/customer-assets/9d97037d-64a2-4c25-b443-bfc2972f3c9e/a0442e2b-a384-4f2b-b443-9f34c2215e16'
),
undefined
)
})

test('.video', t => {
t.is(
video(
'https://app.croct.dev/assets/workspace/customer-assets/9d97037d-64a2-4c25-b443-bfc2972f3c9e/a0442e2b-a384-4f2b-b443-9f34c2215e16',
{ type: 'video/mp4' }
),
'https://app.croct.dev/assets/workspace/customer-assets/9d97037d-64a2-4c25-b443-bfc2972f3c9e/a0442e2b-a384-4f2b-b443-9f34c2215e16'
)
t.is(
video(
'https://app.croct.dev/assets/workspace/customer-assets/9d97037d-64a2-4c25-b443-bfc2972f3c9e/a0442e2b-a384-4f2b-b443-9f34c2215e16',
{ type: 'video/mpeg' }
),
'https://app.croct.dev/assets/workspace/customer-assets/9d97037d-64a2-4c25-b443-bfc2972f3c9e/a0442e2b-a384-4f2b-b443-9f34c2215e16'
)
t.is(
video(
'https://app.croct.dev/assets/workspace/customer-assets/9d97037d-64a2-4c25-b443-bfc2972f3c9e/a0442e2b-a384-4f2b-b443-9f34c2215e16'
),
undefined
)
})

test('.isVideoUrl', t => {
t.false(isVideoUrl('demo.mp4'))
t.false(isVideoUrl('/demo.mp4'))
Expand Down
41 changes: 25 additions & 16 deletions packages/metascraper-media-provider/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,19 @@ const {

const createGetMedia = require('./get-media')

const isProtocol = value => ({ url }) => eq(protocolFn(url), value)
const isProtocol =
value =>
({ url }) =>
eq(protocolFn(url), value)

const isHttps = isProtocol('https')

const isMIME = extension => ({ ext, url }) =>
ext ? eq(ext, extension) : eq(extensionFn(url), extension)
const isMIME =
extension =>
({ ext, url, format }) =>
ext !== 'unknown_video'
? eq(ext, extension)
: eq(extensionFn(url), extension) || format.includes(extension)

const isMp4 = isMIME('mp4')
const isMp3 = isMIME('mp3')
Expand Down Expand Up @@ -60,19 +67,21 @@ const hasVideo = format =>
const isDownloadable = ({ url }) =>
new URL(url).searchParams.get('download') === '1'

const getFormatUrls = ({ orderBy }) => (input, filters) => {
const formats = get(input, 'formats') ||
get(input, 'entries[0].formats') || [input]

const url = chain(formats)
.filter(overEvery(filters))
.orderBy(orderBy, 'asc')
.map('url')
.last()
.value()

return !isEmpty(url) ? url : undefined
}
const getFormatUrls =
({ orderBy }) =>
(input, filters) => {
const formats = get(input, 'formats') ||
get(input, 'entries[0].formats') || [input]

const url = chain(formats)
.filter(overEvery(filters))
.orderBy(orderBy, 'asc')
.map('url')
.last()
.value()

return !isEmpty(url) ? url : undefined
}

const getVideoUrls = getFormatUrls({ orderBy: 'tbr' })

Expand Down
28 changes: 28 additions & 0 deletions packages/metascraper-media-provider/test/video/format.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
'use strict'

const test = require('ava')

const { metascraper } = require('../helpers')

test('from file url content type', async t => {
const url =
'https://app.croct.dev/assets/workspace/customer-assets/9d97037d-64a2-4c25-b443-bfc2972f3c9e/a0442e2b-a384-4f2b-b443-9f34c2215e16'

const metadata = await metascraper({ url })

t.is(
metadata.video,
'https://app.croct.dev/assets/workspace/customer-assets/9d97037d-64a2-4c25-b443-bfc2972f3c9e/a0442e2b-a384-4f2b-b443-9f34c2215e16'
)
})

test('from file url extension', async t => {
const metadata = await metascraper({
url: 'https://cdn-microlink.vercel.app/file-examples/sample.mp4'
})

t.is(
metadata.video,
'https://cdn-microlink.vercel.app/file-examples/sample.mp4'
)
})
40 changes: 30 additions & 10 deletions packages/metascraper-video/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,29 +10,49 @@ const {
video
} = require('@metascraper/helpers')

const { chain, isEqual } = require('lodash')
const memoize = require('@keyvhq/memoize')
const pReflect = require('p-reflect')
const { chain } = require('lodash')
const got = require('got')

const toUrl = toRule(urlFn)

const toVideo = toRule(video)

const toVideoFromDom = toRule((domNodes, opts) => {
const videoUrl = chain(domNodes)
.map('attribs.src')
.uniq()
.orderBy(videoUrl => extension(videoUrl) === 'mp4', ['desc'])
.first()
const values = chain(domNodes)
.map(domNode => ({
src: domNode?.attribs.src,
type: domNode?.attribs.type
}))
.uniqWith(isEqual)
.orderBy(
({ src, type }) => extension(src) === 'mp4' || type?.includes('mp4'),
['desc']
)
.value()

return video(videoUrl, opts)
let result
values.find(
({ src, type }) => (result = video(src, Object.assign({ type }, opts)))
)
return result
})

const videoRules = [
toVideo($ => $('meta[property="og:video:secure_url"]').attr('content')),
toVideo($ => $('meta[property="og:video:url"]').attr('content')),
toVideo($ => $('meta[property="og:video"]').attr('content')),
({ url, htmlDom: $ }) => {
const src =
$('meta[property="og:video:secure_url"]').attr('content') ||
$('meta[property="og:video:url"]').attr('content') ||
$('meta[property="og:video"]').attr('content')

return src
? video(src, {
url,
type: $('meta[property="og:video:type"]').attr('content')
})
: undefined
},
toVideo($ => $('meta[name="twitter:player:stream"]').attr('content')),
toVideo($ => $('meta[property="twitter:player:stream"]').attr('content')),
toVideo($jsonld('contentUrl')),
Expand Down
1,021 changes: 534 additions & 487 deletions packages/metascraper-video/test/fixtures/providers/bluecadet.com.html

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions packages/metascraper-video/test/fixtures/video-type-relative.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<!DOCTYPE html>
<html>

<head>
<title>a0442e2b-a384-4f2b-b443-9f34c2215e16</title>
<meta property="og:site_name" content="croct.dev">
<meta name="date" content="2023-08-04T21:10:56.000Z">
<link rel="canonical" href="https://app.croct.dev/assets/workspace/customer-assets/9d97037d-64a2-4c25-b443-bfc2972f3c9e/a0442e2b-a384-4f2b-b443-9f34c2215e16">
</head>

<body><video>
<source src="video.mp4" type="video/mp4">
</video></body>

</html>
16 changes: 16 additions & 0 deletions packages/metascraper-video/test/fixtures/video-type.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<!DOCTYPE html>
<html>

<head>
<title>a0442e2b-a384-4f2b-b443-9f34c2215e16</title>
<meta property="og:site_name" content="croct.dev">
<meta name="date" content="2023-08-04T21:10:56.000Z">
<link rel="canonical" href="https://app.croct.dev/assets/workspace/customer-assets/9d97037d-64a2-4c25-b443-bfc2972f3c9e/a0442e2b-a384-4f2b-b443-9f34c2215e16">
</head>

<body><video>
<source autoplay="" src="https://app.croct.dev/assets/workspace/customer-assets/9d97037d-64a2-4c25-b443-bfc2972f3c9e/a0442e2b-a384-4f2b-b443-9f34c2215e16" type="video/mp4">
<source src="https://app.croct.dev/assets/workspace/customer-assets/9d97037d-64a2-4c25-b443-bfc2972f3c9e/a0442e2b-a384-4f2b-b443-9f34c2215e16" type="video/mp4">
</video></body>

</html>
Loading

0 comments on commit b7755fd

Please sign in to comment.