From 5a22fc33f68b73d990bc7027baad03e138b50a69 Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Thu, 9 Nov 2023 13:07:42 +0100 Subject: [PATCH] feat(logo-favicon): granular control (#667) --- package.json | 2 +- packages/metascraper-logo-favicon/README.md | 21 +++++ packages/metascraper-logo-favicon/index.js | 85 ++++++++++++------ .../metascraper-logo-favicon/test/favicon.js | 10 +++ .../metascraper-logo-favicon/test/get-logo.js | 17 ++++ .../metascraper-logo-favicon/test/google.js | 29 ++++++ .../test/root-favicon.js | 42 +++++++++ .../test/snapshots/index.js.md | 2 +- .../test/snapshots/index.js.snap | Bin 727 -> 725 bytes .../test/snapshots/index.js.md | 2 +- .../test/snapshots/index.js.snap | Bin 1387 -> 1389 bytes 11 files changed, 180 insertions(+), 30 deletions(-) create mode 100644 packages/metascraper-logo-favicon/test/favicon.js create mode 100644 packages/metascraper-logo-favicon/test/get-logo.js create mode 100644 packages/metascraper-logo-favicon/test/google.js create mode 100644 packages/metascraper-logo-favicon/test/root-favicon.js diff --git a/package.json b/package.json index 11f59d9a5..33a86647b 100644 --- a/package.json +++ b/package.json @@ -178,7 +178,7 @@ "standard-markdown" ], "package.json": [ - "finepack --sort-ignore-object-at ava" + "finepack" ] }, "simple-git-hooks": { diff --git a/packages/metascraper-logo-favicon/README.md b/packages/metascraper-logo-favicon/README.md index 17f802f9b..38d27ad6e 100644 --- a/packages/metascraper-logo-favicon/README.md +++ b/packages/metascraper-logo-favicon/README.md @@ -20,6 +20,27 @@ $ npm install metascraper-logo-favicon --save #### options +##### google + +Type: `boolean`
+Default: `true` + +It enables logo resolution using Google API. + +##### favicon + +Type: `boolean`
+Default: `true` + +It tries to resolve `favicon.ico` of the url. + +##### rootFavicon + +Type: `boolean`|`regexp`
+Default: `true` + +It tries to resolve `favicon.ico` of the url when the URL is a subdomain. + ##### pickFn Type: `function` diff --git a/packages/metascraper-logo-favicon/index.js b/packages/metascraper-logo-favicon/index.js index 2e2bdbe99..c183cdd5c 100644 --- a/packages/metascraper-logo-favicon/index.js +++ b/packages/metascraper-logo-favicon/index.js @@ -105,38 +105,69 @@ const pickBiggerSize = sizes => { pickBiggerSize.sortBySize = collection => orderBy(collection, ['size.priority'], ['desc']) -const createGetLogo = ({ gotOpts, keyvOpts }) => { - const getLogo = async url => { - const faviconUrl = logo('/favicon.ico', { url }) - if (!faviconUrl) return +const favicon = async (url, { gotOpts } = {}) => { + const faviconUrl = logo('/favicon.ico', { url }) + if (!faviconUrl) return undefined + const response = await reachableUrl(faviconUrl, gotOpts) + return reachableUrl.isReachable(response) && + response.headers['content-type']?.startsWith('image') + ? faviconUrl + : undefined +} - let response = await reachableUrl(faviconUrl, gotOpts) +const google = async (url, { gotOpts } = {}) => { + const response = await reachableUrl(google.url(url), gotOpts) + return reachableUrl.isReachable(response) ? response.url : undefined +} - if ( - reachableUrl.isReachable(response) && - response.headers['content-type']?.startsWith('image') - ) { - return faviconUrl - } +google.url = (url, size = 128) => + `https://www.google.com/s2/favicons?domain_url=${url}&sz=${size}` - response = await reachableUrl( - `https://www.google.com/s2/favicons?domain_url=${url}&sz=128`, - gotOpts +const createGetLogo = ({ withGoogle, withFavicon, gotOpts, keyvOpts }) => { + const getLogo = async url => { + const providers = [withFavicon && favicon, withGoogle && google].filter( + Boolean ) - return reachableUrl.isReachable(response) ? response.url : undefined + for (const provider of providers) { + const logoUrl = await provider(url, { gotOpts }) + if (logoUrl) return logoUrl + } } - return memoize(getLogo, keyvOpts, { + const fn = memoize(getLogo, keyvOpts, { value: value => (value === undefined ? null : value) }) + + return (...args) => + fn(...args).then(value => (value === null ? undefined : value)) } -const castNull = value => (value === null ? undefined : value) +const createRootFavicon = ({ getLogo, withRootFavicon = true } = {}) => { + if (withRootFavicon === false) return undefined + return ({ url }) => { + const urlObj = new URL(url) + const domain = parseUrl(url).domain + + if (withRootFavicon instanceof RegExp && withRootFavicon.test(domain)) { + return undefined + } -module.exports = ({ gotOpts, keyvOpts, pickFn = pickBiggerSize } = {}) => { - const getLogo = createGetLogo({ gotOpts, keyvOpts }) + urlObj.hostname = domain + return getLogo(normalizeUrl(urlObj)) + } +} +module.exports = ({ + google: withGoogle = true, + favicon: withFavicon = true, + rootFavicon: withRootFavicon = true, + gotOpts, + keyvOpts, + pickFn = pickBiggerSize +} = {}) => { + const getLogo = createGetLogo({ withGoogle, withFavicon, gotOpts, keyvOpts }) + const rootFavicon = createRootFavicon({ getLogo, withRootFavicon }) return { logo: [ toUrl($ => { @@ -144,13 +175,13 @@ module.exports = ({ gotOpts, keyvOpts, pickFn = pickBiggerSize } = {}) => { const size = pickFn(sizes, pickBiggerSize) return get(size, 'url') }), - async ({ url }) => castNull(await getLogo(normalizeUrl(url))), - async ({ url }) => { - const urlObj = new URL(url) - urlObj.hostname = parseUrl(url).domain - const result = await getLogo(normalizeUrl(urlObj)) - return castNull(result) - } - ] + ({ url }) => getLogo(normalizeUrl(url)), + rootFavicon + ].filter(Boolean) } } + +module.exports.favicon = favicon +module.exports.google = google +module.exports.createRootFavicon = createRootFavicon +module.exports.createGetLogo = createGetLogo diff --git a/packages/metascraper-logo-favicon/test/favicon.js b/packages/metascraper-logo-favicon/test/favicon.js new file mode 100644 index 000000000..f56b462b1 --- /dev/null +++ b/packages/metascraper-logo-favicon/test/favicon.js @@ -0,0 +1,10 @@ +'use strict' + +const test = require('ava') + +const { favicon } = require('..') + +test('with { contentType: \'image/vnd.microsoft.icon\' }', async t => { + const url = 'https://microlink.io/' + t.is(await favicon(url), 'https://microlink.io/favicon.ico') +}) diff --git a/packages/metascraper-logo-favicon/test/get-logo.js b/packages/metascraper-logo-favicon/test/get-logo.js new file mode 100644 index 000000000..8f88b6038 --- /dev/null +++ b/packages/metascraper-logo-favicon/test/get-logo.js @@ -0,0 +1,17 @@ +'use strict' + +const test = require('ava') + +const { createGetLogo } = require('..') + +test('serialize null correctly', async t => { + const cache = new Map() + const keyvOpts = { store: cache } + const getLogo = createGetLogo({ + keyvOpts, + withGoogle: false, + withFavicon: false + }) + t.is(await getLogo('https://example.com'), undefined) + t.is(JSON.parse(cache.get('https://example.com')).value, null) +}) diff --git a/packages/metascraper-logo-favicon/test/google.js b/packages/metascraper-logo-favicon/test/google.js new file mode 100644 index 000000000..da3b2a960 --- /dev/null +++ b/packages/metascraper-logo-favicon/test/google.js @@ -0,0 +1,29 @@ +'use strict' + +const test = require('ava') +const got = require('got') + +const { google } = require('..') + +test('return undefined under no logo', async t => { + const url = 'https://idontexist.lol' + t.is(await google(url), undefined) +}) + +test('return logo when URL is reachable', async t => { + const url = 'https://microlink.io/' + const logoUrl = await google(url) + t.true(typeof logoUrl === 'string') + + const fallbackUrl = google.url() + const [logo, fallback] = await Promise.all( + [logoUrl, fallbackUrl].map(url => + got(url, { + responseType: 'buffer', + resolveBodyOnly: true, + throwHttpErrors: false + }) + ) + ) + t.true(logo.length !== fallback.length) +}) diff --git a/packages/metascraper-logo-favicon/test/root-favicon.js b/packages/metascraper-logo-favicon/test/root-favicon.js new file mode 100644 index 000000000..fedf04989 --- /dev/null +++ b/packages/metascraper-logo-favicon/test/root-favicon.js @@ -0,0 +1,42 @@ +'use strict' + +const test = require('ava') +const got = require('got') + +const { createGetLogo, createRootFavicon, google } = require('..') + +test('enable it by default', async t => { + const getLogo = createGetLogo({ withGoogle: true, withFavicon: true }) + const rootFavicon = createRootFavicon({ getLogo }) + const url = 'https://geolocation-indol.vercel.app/' + const logoUrl = await rootFavicon({ url }) + const domainLogoUrl = google.url('https://vercel.app/') + + const [logo, domainLogo] = await Promise.all( + [logoUrl, domainLogoUrl].map(url => + got(url, { + responseType: 'buffer', + resolveBodyOnly: true, + throwHttpErrors: false + }) + ) + ) + + t.is(logo.length, domainLogo.length) +}) + +test('exclude certain subdomains', async t => { + const getLogo = createGetLogo({ withGoogle: true, withFavicon: true }) + const rootFavicon = createRootFavicon({ + getLogo, + withRootFavicon: /^vercel\.app/ + }) + const url = 'https://geolocation-indol.vercel.app/' + const logoUrl = await rootFavicon({ url }) + t.is(logoUrl, undefined) +}) + +test('disable it when \'{ withRootFavicon: false}\'', async t => { + const rootFavicon = createRootFavicon({ withRootFavicon: false }) + t.is(rootFavicon, undefined) +}) diff --git a/packages/metascraper-manifest/test/snapshots/index.js.md b/packages/metascraper-manifest/test/snapshots/index.js.md index f01812f93..3d6746c66 100644 --- a/packages/metascraper-manifest/test/snapshots/index.js.md +++ b/packages/metascraper-manifest/test/snapshots/index.js.md @@ -88,5 +88,5 @@ Generated by [AVA](https://avajs.dev). { description: 'Get breaking news, politics, trending music, world events, sports scores, and the latest global news stories as they unfold - all with less data.', lang: null, - publisher: 'Twitter', + publisher: 'X', } diff --git a/packages/metascraper-manifest/test/snapshots/index.js.snap b/packages/metascraper-manifest/test/snapshots/index.js.snap index d89a52807d9ccdf3614b71e23aa73cb300b4f7f7..4a9e4fea6f92cf577d090dfcd15aa62d63c7797d 100644 GIT binary patch literal 725 zcmV;`0xJDMRzVAEuf3fxg)udh=LA^NFctHRA_CF z?ai&d*3SCwQc%#*($R*epyeHS0!m71kQn>yB;@0A-3cs>cD=JZ^8EI{vp$Y|rGq|x z_zYHB1tT}qfIQPSky*@T#8Z*U2+;D^FZQ^sT-<;N65%V5n28({ZSnna7z!))zqFhs zr+nBAwy;a>V=|xY9oZ+cJ8W@{Ek5%PA+U^{bXQkb9k$}Id57JmIdgYe%V7&^Y&irB zjLfanSvA&LIX7coi7dX#rDJ`$>tc*WNz3B(?(6t7#0NMjE0jRdfbFbWQkLDRIL$`c!J3HMGSv$|-X;pOpyowrprEF56 zB5hI#|G(pNSc437dWlf(P0H@qEQR7XLJ^2yv3;6`CsPrFSL8~I&>M8NwjXpid)T5F1wb_wm~9j<%;GwDfWuvq5yE3tRYbh1!-UG5S7BMo$|?!{M+UqZL*L6{k8O z;uU+o85gS)h1KobsmoDG>_8Wg9EYcLn!>38rVT(wO&^O000000000BsmcMV*Fcinnz5W2IidOIkcz~_Ba7TMbD+V?gDuJM&c0x|_ zlDOx@k)LyS9T?bH*;vb8!OH)@U%<%73=%K7q%H0BdfN#iJ=yWIpYlHb`Z*_|r?lV4 z51+wGt6<~=8jz>j#xjk#40$3_83J1V`sE&%rHdO7ek?o%5>t_3tS!Dj2?Alo!Ix%Z zrBOWW23y*t_6eCs_Kxfm*&VjL!IqzUN8nq=PP^;t>kYQnV2cfQo8~OsWz7a#+F+{z zpl@VmrB2JS=GuiBi%O)?RW2Rr$*!AO?kR~en95%R=w1XMeVrl?C8&T4d&Z+;kr_Zp z?PU^Q|4kA!va**aU?f&ZlF;ac7di3V$V-VHvRl(K{V}&p>E%UiZHBw9+czRq!l7jW zZL1Ttcl4I$%Ol4cNPTDx!1$nABFo%jzg?PVv<#f$#cA(!Mr5rljb>HR{fjE9@fEU3 zgbKAuAo%}|&rt=^*U1E-+?$r&uXzf^tA!#E!D8zy4Ob>20yoc;7J)lxZ}%Q{wz{21 z4_rDHsdYm!C>1kaaVi`?rb7eHcDpkoyIwjpviznDQIYH;L)hrZT%oqrX^g(llhL!9(Qr6yMQDYUe#xmy zhJ_!P$e1TNN`I3NwV^u~2DIbk#Y`KE9DPkU?j|QV z+{VC_u*Gd0DeVbWhU93qkyL&K7Bh}{o`#wNIb0~k2|&yhAeJS$HTw=ufV)4Y`3I4Q J*1$yx002lgU3CBe diff --git a/packages/metascraper-spotify/test/snapshots/index.js.md b/packages/metascraper-spotify/test/snapshots/index.js.md index 4bfbdc0c6..47a73451d 100644 --- a/packages/metascraper-spotify/test/snapshots/index.js.md +++ b/packages/metascraper-spotify/test/snapshots/index.js.md @@ -317,7 +317,7 @@ Generated by [AVA](https://avajs.dev). author: 'Syntax - Tasty Web Development Treats', date: '2020-01-06T14:00:00.000Z', description: 'In this Hasty Treat, Scott and Wes talk about modules in Node — what are they, how they’re different from browser modules, and more! Sentry - Sponsor If you want to know what’s happening with your errors, track them with Sentry. Sentry is open-source error tracking that helps developers monitor and…', - image: 'https://i.scdn.co/image/ab6765630000ba8a6c0e28e162c68d0e67bc10d5', + image: 'https://i.scdn.co/image/ab6765630000ba8ab84c022aff150c336f73b395', lang: 'en', publisher: 'Spotify', title: 'Hasty Treat - Modules in Node', diff --git a/packages/metascraper-spotify/test/snapshots/index.js.snap b/packages/metascraper-spotify/test/snapshots/index.js.snap index 1bfde2d7444b9a6078d51161f902012ec97ea268..3d58d45e3086cef44eb4e52e1204b96661d2c5bc 100644 GIT binary patch literal 1389 zcmV-z1(NzfRzVW{XJMR zGU}nPULT7H00000000B+nagh5Mihoc-_rEzpj~uRbXvfRR+dTKDBDGmDpnFXh@?1D zU@JwD49TH57Rg~|hL#kdK#D#=foy`JXp==Bp^GkxpnZVu`T$-PeTp7QkrAg7<;Gc9 z01f|;L>|rz`Tgh2kbG<#E;HNW;S(aMbV)bwNEwJtRb>J3*F?Z1wR>wO^Hl5_p{EwM z_Hny$yW82mvd(C?RweqSz@Wm%fpC}<-#oS~EOAr$aXxV_(f?CkQ(mvaX9Ls){Q!Cj z`Umv3l1QXMP0-h%ca^0J%2LhPC#F=Cqp!=$%L(PJgmONiyaO?F?Lpr}vqi-q$Ar4_w@H`%5}~|Av0qJScYVRolWe zOYLovCH_+dpH-U2sbPLowE6h)t+OLTCt4r_y%{w)bcPI!~9SV647kI_Y zt{8eYuH7?KyFWb{+CP4Yxy}^V)o8Aw*YjPJ#e)6bu6 z*c{v;Z=U!LIWn`Kp5@Fw`5p3f>}NZ(Kbx4@11~&#D3WKw6jo_Rs|Nd`(F(5SjP3P$ zjC>}MPkREUH}7#)Wu+$BDb{XP0?VV`MkR(lld!ER@2|7cxR%8mp?T|)ymxJ{c~xu} zJFSZjZ85wsvbnsn?IY<>fy#s42feMLy2+$O*tcLf z0>Q0y8)1Wm5_!xDUD!l@RE3m9hYud2j)NhV6KIF@R*}OxgKviq9>F$?+BV_+6B&>< zIP1U-`>37OK_4FDah@YFdQryYWvV|zb|N5AO{CfM!;*R zL#5LXL-VME{VF!}f}Yp2z(zChIyTmGrk2TI+fL^-Gn>`z vLe|J`R~IeJka)6&$!A2BbYeHf!h2$u2@A-L%bCP10>r z?WPE|V|xmp#1w)*Q9^EB+5p{M1y z_Hn0itJmGWTwt_Us}kd4U{c}ZKsZc_Zywn;mbj_@xTu^_27l^n>g!ebY=C;8A3#q) z|A5|B6(t2~g1!d5tFD|^S8C=yv81X#`+Dx&IYoU-QO_#sI}o$*p1P>0%jeZ)9NLtP zf2|0~so#DfW(9}RVLUo$3C2vKL*B57us97QWqx$zGWBqKMr%th_S?}RXM@mmsc^`Q zu`dt7MaL|L+4k8?1sVAa+tqEcv19MjK*M$PqhC3YcVuNpVN5FtMxWjW8+To^hv{vl9cr~3DiH1Wwq7|~S5$4DU>8VB#1ar%IIt{F5iY45ViR+=?iTTbE-{}iF z$+6YZvU@GoIiPN@T&(Tvwfx=EMxgJ+)BD^`?}e$r0~hz*!II9{ziwQz4vO8o)sC>N zQfHeO&fWy(-$yb34fHqYP2ija-30Yd3-jZq_A4U(>eBmv^G;6X9SXuVheDpi1zxo> ztEQ2OYxf-0?oW@0_K#m;u5-n8Et;$7cl>@w6B2Vy?3nBvk^Ll^tYl1L*Y)A5#8~GD z>oL|;`ZbxWXPqmUJMxw%@8vI68zf#+&F7jbb?h{btMnYE^VS$I@l0YL;d>$E^z)}1 zHivh}o5#LGj?C;Qr#Z8ae}_C9`{~Z?&!%Sf&IbGuNF zk->=3UOJtkfhs#oEnEV0+ZtsKl`661F|#{dGo~S2K7cv~FIMcdza>uZRtE zr+vYpZN~-47i#~j@kYF=I~Cq|RtN@*_LDpcj|;)+hn=53(Fas8n`lP1QM*yj^js2_ zQeDfxEO_Ja=+;P>VAUl^d07$iuu72LGkI_}3)5e<}llic$tW zUVtBqit-_-4ElEQgmk}pR)?5}?IzEhZ(xz74|> z2yU%A2%9XF$YXZs!Y1mYDx@qry#D}o9SpIYKs%(rh8)%%emlJX5VqO0)gpXwB17^f zXI;2qAGfnM?89T6e1z%{#QSg^>H+fw<7m5u`Yc3U3?gGwsP=k^J4zkGTu+c$-_T=Dn9O4E7*&_~w z3mBtMC4|9v5AXjRnU!CR-9*>L$UNF`zlKdCZ{&;&u+dFiz=oxhbb+Ldv}F`*ofvu3 tO6hhkDxVjKKQoul)JM*ZOy&=s0}dCxaujStj=yXj{})XBi=Nsj0053+u(SXG