Skip to content

Commit

Permalink
fix(logo): avoid data uri with no length
Browse files Browse the repository at this point in the history
Since logo can be detected from HTML markup there is the case where the data URI detected is empty. an empty data uri ('data:,') is considered a valid URL & URI, but can't be considered a valid image.
  • Loading branch information
Kikobeats committed Feb 10, 2024
1 parent 8f3f9aa commit e4f7eed
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 23 deletions.
7 changes: 6 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,12 @@
"commitlint": {
"extends": [
"@commitlint/config-conventional"
]
],
"rules": {
"body-max-length": [
0
]
}
},
"nano-staged": {
"*.js": [
Expand Down
13 changes: 10 additions & 3 deletions packages/metascraper-helpers/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ const _normalizeUrl = require('normalize-url')
const smartquotes = require('smartquotes')
const { decodeHTML } = require('entities')
const iso6393 = require('iso-639-3/to-1')
const dataUri = require('data-uri-utils')
const hasValues = require('has-values')
const chrono = require('chrono-node')
const isIso = require('isostring')
Expand Down Expand Up @@ -375,11 +376,17 @@ const $jsonld = propName => $ => {

const image = (value, opts) => {
const urlValue = url(value, opts)
return urlValue !== undefined &&

const result =
urlValue !== undefined &&
!isAudioUrl(urlValue, opts) &&
!isVideoUrl(urlValue, opts)
? urlValue
: undefined
? urlValue
: undefined

if (!dataUri.test(result)) return result
const buffer = dataUri.toBuffer(dataUri.normalize(result))
return buffer.length ? result : undefined
}

const logo = image
Expand Down
1 change: 1 addition & 0 deletions packages/metascraper-helpers/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"audio-extensions": "0.0.0",
"chrono-node": "~2.7.4",
"condense-whitespace": "~2.0.0",
"data-uri-utils": "~1.0.7",
"entities": "~4.5.0",
"file-extension": "~4.0.5",
"has-values": "~2.0.1",
Expand Down
1 change: 1 addition & 0 deletions packages/metascraper-helpers/test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ test('.image', t => {
image({ '@id': 'https://www.milanocittastato.it/#/schema/logo/image/' }),
undefined
)
t.is(image('data:,'), undefined)
})

test('.isImageUrl', t => {
Expand Down
6 changes: 3 additions & 3 deletions packages/metascraper-logo-favicon/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ const {
parseUrl,
normalizeUrl,
toRule,
url: urlFn
logo: logoFn
} = require('@metascraper/helpers')

const SIZE_REGEX_BY_X = /\d+x\d+/

const toUrl = toRule(urlFn)
const toLogo = toRule(logoFn)

const toSize = (input, url) => {
if (isEmpty(input)) return
Expand Down Expand Up @@ -170,7 +170,7 @@ module.exports = ({
const rootFavicon = createRootFavicon({ getLogo, withRootFavicon })
return {
logo: [
toUrl($ => {
toLogo($ => {
const sizes = getSizes($, sizeSelectors)
const size = pickFn(sizes, pickBiggerSize)
return get(size, 'url')
Expand Down
8 changes: 8 additions & 0 deletions packages/metascraper-logo-favicon/test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -237,3 +237,11 @@ test('resolve logo using from google associated with the domain', async t => {
const metadata = await metascraper({ url })
t.true(metadata.logo.includes('gstatic'))
})

test('avoid data URI when data length is 0', async t => {
const url = 'https://www.adobe.com/'
const html = '<link rel="icon" href="data:,">'
const metascraper = createMetascraper()
const metadata = await metascraper({ url, html })
t.is(metadata.logo, 'https://www.adobe.com/favicon.ico')
})
30 changes: 15 additions & 15 deletions packages/metascraper-logo/src/index.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
'use strict'

const { $jsonld, url: urlFn, toRule } = require('@metascraper/helpers')
const { $jsonld, logo: logoFn, toRule } = require('@metascraper/helpers')
const { eq, get } = require('lodash')

const toLogoUrl = ($, propName) => {
Expand All @@ -13,26 +13,26 @@ const toLogoUrl = ($, propName) => {
module.exports = ({ filter } = {}) => {
const mapper = filter
? async value => {
const result = urlFn(value)
const result = logoFn(value)
return typeof result === 'string' ? await filter(result) : result
}
: urlFn
: logoFn

const toUrl = toRule(mapper)
const toLogo = toRule(mapper)

return {
logo: [
toUrl($ => $('meta[property="og:logo"]').attr('content')),
toUrl($ => $('meta[itemprop="logo"]').attr('content')),
toUrl($ => $('img[itemprop="logo"]').attr('src')),
toUrl($ => toLogoUrl($, 'brand.logo')),
toUrl($ => toLogoUrl($, 'organization.logo')),
toUrl($ => toLogoUrl($, 'place.logo')),
toUrl($ => toLogoUrl($, 'product.logo')),
toUrl($ => toLogoUrl($, 'service.logo')),
toUrl($ => toLogoUrl($, 'publisher.logo')),
toUrl($ => toLogoUrl($, 'logo.url')),
toUrl($ => toLogoUrl($, 'logo'))
toLogo($ => $('meta[property="og:logo"]').attr('content')),
toLogo($ => $('meta[itemprop="logo"]').attr('content')),
toLogo($ => $('img[itemprop="logo"]').attr('src')),
toLogo($ => toLogoUrl($, 'brand.logo')),
toLogo($ => toLogoUrl($, 'organization.logo')),
toLogo($ => toLogoUrl($, 'place.logo')),
toLogo($ => toLogoUrl($, 'product.logo')),
toLogo($ => toLogoUrl($, 'service.logo')),
toLogo($ => toLogoUrl($, 'publisher.logo')),
toLogo($ => toLogoUrl($, 'logo.url')),
toLogo($ => toLogoUrl($, 'logo'))
]
}
}
4 changes: 3 additions & 1 deletion packages/metascraper-telegram/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const {
author,
date,
image,
logo,
memoizeOne,
parseUrl,
sanetizeUrl,
Expand All @@ -17,6 +18,7 @@ const got = require('got')

const toAuthor = toRule(author)
const toImage = toRule(image)
const toLogo = toRule(logo)
const toDate = toRule(date)

const TELEGRAM_DOMAINS = ['telegram.me', 't.me']
Expand Down Expand Up @@ -48,7 +50,7 @@ module.exports = ({ gotOpts, keyvOpts } = {}) => {

const rules = {
author: [toAuthor($ => $('meta[property="og:title"]').attr('content'))],
logo: [toImage($ => $('meta[property="og:image"]').attr('content'))],
logo: [toLogo($ => $('meta[property="og:image"]').attr('content'))],
image: [
toImage(
loadIframe(($iframe, url) => {
Expand Down

0 comments on commit e4f7eed

Please sign in to comment.