Skip to content

Commit

Permalink
fix(iframe): ensure oembed url is absolute (#342)
Browse files Browse the repository at this point in the history
* fix: ensure oembed url is absolute

* test: add missing settings

* test: update snapshot

* build: update dependencies
  • Loading branch information
Kikobeats committed Jan 10, 2021
1 parent b60bea9 commit 31d4a50
Show file tree
Hide file tree
Showing 15 changed files with 92 additions and 28 deletions.
2 changes: 1 addition & 1 deletion packages/metascraper-amazon/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
],
"dependencies": {
"@metascraper/helpers": "^5.16.9",
"tldts": "~5.6.80"
"tldts": "~5.6.81"
},
"devDependencies": {
"lodash": "latest",
Expand Down
2 changes: 1 addition & 1 deletion packages/metascraper-clearbit/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"async-memoize-one": "~1.0.1",
"got": "~11.8.1",
"lodash": "~4.17.20",
"tldts": "~5.6.80"
"tldts": "~5.6.81"
},
"devDependencies": {
"mocha": "latest",
Expand Down
7 changes: 3 additions & 4 deletions packages/metascraper-helpers/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,8 @@ const isMime = (contentType, type) => {
return eq(type, get(EXTENSIONS, ext))
}

memoizeOne.EqualityHtmlDom = (newArgs, oldArgs) => {
return newArgs[0].html() === oldArgs[0].html()
}
memoizeOne.EqualityUrlAndHtmlDom = (newArgs, oldArgs) =>
newArgs[0] === oldArgs[0] && newArgs[1].html() === oldArgs[1].html()

const jsonld = memoizeOne(
$ =>
Expand All @@ -275,7 +274,7 @@ const jsonld = memoizeOne(
})
.get()
.filter(Boolean),
memoizeOne.EqualityHtmlDom
(newArgs, oldArgs) => newArgs[0].html() === oldArgs[0].html()
)

const $jsonld = propName => $ => {
Expand Down
9 changes: 8 additions & 1 deletion packages/metascraper-iframe/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,12 @@
"scripts": {
"test": "NODE_PATH=.. TZ=UTC NODE_ENV=test nyc --silent --no-clean --temp-dir ../../.nyc_output mocha test"
},
"license": "MIT"
"license": "MIT",
"mocha": {
"bail": true,
"recursive": true,
"reporter": "spec",
"slow": 300,
"timeout": "120000"
}
}
21 changes: 14 additions & 7 deletions packages/metascraper-iframe/src/from-html.js
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
'use strict'

const { memoizeOne } = require('@metascraper/helpers')
const { normalizeUrl, memoizeOne } = require('@metascraper/helpers')
const { forEach, get } = require('lodash')
const pReflect = require('p-reflect')
const got = require('got')

const jsonOembed = memoizeOne(
$ => $('link[type="application/json+oembed"]').attr('href'),
memoizeOne.EqualityHtmlDom
const getOembedUrl = memoizeOne(
(url, $) =>
normalizeUrl(
url,
$('link[type="application/json+oembed"]').attr('href') ||
$('link[type="text/xml+oembed"]').attr('href')
),
memoizeOne.EqualityUrlAndHtmlDom
)

const fromHTML = gotOpts => async ({ htmlDom, iframe }) => {
const oembedUrl = jsonOembed(htmlDom)
const fromHTML = gotOpts => async ({ htmlDom, url, iframe }) => {
const oembedUrl = getOembedUrl(url, htmlDom)
if (!oembedUrl) return null

const oembedUrlObj = new URL(oembedUrl)
forEach(iframe, (value, key) =>
oembedUrlObj.searchParams.append(key.toLowerCase(), value)
Expand All @@ -21,6 +27,7 @@ const fromHTML = gotOpts => async ({ htmlDom, iframe }) => {
return get(value, 'html', null)
}

fromHTML.test = $ => !!jsonOembed($)
fromHTML.test = (...args) => !!getOembedUrl(...args)

module.exports = fromHTML
module.exports.getOembedUrl = getOembedUrl
2 changes: 1 addition & 1 deletion packages/metascraper-iframe/src/from-provider.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@ const fromProvider = gotOpts => async ({ url, iframe }) => {
return get(value, 'html', null)
}

fromProvider.test = url => findProvider(url) !== undefined
fromProvider.test = url => !!findProvider(url)

module.exports = fromProvider
8 changes: 2 additions & 6 deletions packages/metascraper-iframe/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,9 @@ const { memoizeOne } = require('@metascraper/helpers')
const fromProvider = require('./from-provider')
const fromHTML = require('./from-html')

const htmlTest = fromHTML.test.bind(fromHTML)
const providerTest = fromProvider.test.bind(fromProvider)

const isValidUrl = memoizeOne(
(url, $) => htmlTest($) || providerTest(url),
(newArgs, oldArgs) =>
newArgs[0] === oldArgs[0] && newArgs[1].html() === oldArgs[1].html()
(url, $) => fromHTML.test(url, $) || fromProvider.test(url),
memoizeOne.EqualityUrlAndHtmlDom
)

const test = ({ url, htmlDom }) => isValidUrl(url, htmlDom)
Expand Down
48 changes: 48 additions & 0 deletions packages/metascraper-iframe/test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ const createMetascraper = require('metascraper')

const { test } = createMetascraperIframe

const { getOembedUrl } = require('../src/from-html')

const commonProviders = [
'https://www.youtube.com/watch?v=Gu8X7vM3Avw',
'https://youtu.be/Gu8X7vM3Avw',
Expand Down Expand Up @@ -39,6 +41,7 @@ describe('metascraper-iframe', () => {
should(isValid).be.true()
})
})

describe('get iframe', () => {
describe('from common providers', () => {
commonProviders.forEach(url => {
Expand All @@ -58,6 +61,7 @@ describe('metascraper-iframe', () => {
should(meta.iframe).be.not.null()
})
})

describe('opts', () => {
it('pass custom got options', async () => {
const cache = new Map()
Expand Down Expand Up @@ -92,4 +96,48 @@ describe('metascraper-iframe', () => {
).be.true()
})
})

describe('.getOembedUrl', () => {
it('detect from `application/json+oembed`', () => {
const url = 'https://example.com'
const oembedUrl = 'https://example.com'
const html = `
<!DOCTYPE html>
<html lang="en">
<head><link rel="alternate" type="application/json+oembed" href="${oembedUrl}"></head>
<body></body>
</html>
`
const jsonUrl = getOembedUrl(url, cheerio.load(html))
should(jsonUrl).be.equal(oembedUrl)
})

it('detect oEmbed URL from `text/xml+oembed`', () => {
const url = 'https://example.com/'
const oembedUrl = 'https://example.com'
const html = `
<!DOCTYPE html>
<html lang="en">
<head><link rel="alternate" type="text/xml+oembed" href="${oembedUrl}"></head>
<body></body>
</html>
`
const jsonUrl = getOembedUrl(url, cheerio.load(html))
should(jsonUrl).be.equal(oembedUrl)
})

it('ensure output URL is absolute', () => {
const oembedUrl = '/wp-json/oembed.js'
const url = 'https://example.com'
const html = `
<!DOCTYPE html>
<html lang="en">
<head><link rel="alternate" type="text/xml+oembed" href="${oembedUrl}"></head>
<body></body>
</html>
`
const jsonUrl = getOembedUrl(url, cheerio.load(html))
should(jsonUrl).be.equal(`${url}${oembedUrl}`)
})
})
})
9 changes: 8 additions & 1 deletion packages/metascraper-logo-favicon/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,12 @@
"scripts": {
"test": "NODE_PATH=.. TZ=UTC NODE_ENV=test nyc --silent --no-clean --temp-dir ../../.nyc_output mocha test"
},
"license": "MIT"
"license": "MIT",
"mocha": {
"bail": true,
"recursive": true,
"reporter": "spec",
"slow": 300,
"timeout": "120000"
}
}
2 changes: 1 addition & 1 deletion packages/metascraper-media-provider/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
"p-reflect": "~2.1.0",
"p-retry": "~4.2.0",
"p-timeout": "~4.1.0",
"tldts": "~5.6.80",
"tldts": "~5.6.81",
"youtube-dl": "~3.0.2"
},
"devDependencies": {
Expand Down
2 changes: 1 addition & 1 deletion packages/metascraper-soundcloud/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
],
"dependencies": {
"@metascraper/helpers": "^5.16.9",
"tldts": "~5.6.80"
"tldts": "~5.6.81"
},
"devDependencies": {
"mocha": "latest",
Expand Down
2 changes: 1 addition & 1 deletion packages/metascraper-spotify/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"@metascraper/helpers": "^5.16.9",
"async-memoize-one": "~1.0.1",
"spotify-url-info": "~2.2.0",
"tldts": "~5.6.80"
"tldts": "~5.6.81"
},
"devDependencies": {
"kind-of": "latest",
Expand Down
2 changes: 1 addition & 1 deletion packages/metascraper-telegram/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"async-memoize-one": "~1.0.1",
"css-urls": "~2.0.11",
"jsdom": "~16.4.0",
"tldts": "~5.6.80"
"tldts": "~5.6.81"
},
"devDependencies": {
"mocha": "latest",
Expand Down
2 changes: 1 addition & 1 deletion packages/metascraper-uol/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
],
"dependencies": {
"@metascraper/helpers": "^5.16.9",
"tldts": "~5.6.80"
"tldts": "~5.6.81"
},
"devDependencies": {
"mocha": "latest",
Expand Down
2 changes: 1 addition & 1 deletion packages/metascraper/__snapshots__/index.js.snap-shot
Original file line number Diff line number Diff line change
Expand Up @@ -651,7 +651,7 @@ exports['pr-newswire 1'] = {

exports['reactpodcast 1'] = {
"author": null,
"date": "2021-01-08T00:00:00.000Z",
"date": "2021-01-10T00:00:00.000Z",
"description": "Tom Preston-Werner is building a full-stack framework for React and GraphQL developers.\nIn this episode we talk about RedwoodJS, a framework that’s bringing full-stack to Jamstack. Tom is a co-founder of GitHub, creator of Jekyll (the OG static site generator), TOML, and Semantic Versioning. If yo…",
"image": null,
"video": null,
Expand Down

0 comments on commit 31d4a50

Please sign in to comment.