Skip to content

Commit

Permalink
perf: unify tldts
Browse files Browse the repository at this point in the history
  • Loading branch information
Kikobeats committed Nov 11, 2022
1 parent 24f318a commit 43422a9
Show file tree
Hide file tree
Showing 18 changed files with 73 additions and 61 deletions.
7 changes: 3 additions & 4 deletions packages/metascraper-amazon/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@
const {
$filter,
author,
toRule,
lang,
memoizeOne,
parseUrl,
title,
toRule,
url
} = require('@metascraper/helpers')

const { getPublicSuffix } = require('tldts')

const REGEX_AMAZON_URL = /https?:\/\/(.*amazon\..*\/.*|.*amzn\..*\/.*|.*a\.co\/.*)/i

const test = memoizeOne(url => REGEX_AMAZON_URL.test(url))
Expand All @@ -30,7 +29,7 @@ const SUFFIX_LANGUAGES = {
it: 'it'
}

const getDomainLanguage = url => SUFFIX_LANGUAGES[getPublicSuffix(url)]
const getDomainLanguage = url => SUFFIX_LANGUAGES[parseUrl(url).publicSuffix]

const toUrl = toRule(url)
const toAuthor = toRule(author)
Expand Down
3 changes: 1 addition & 2 deletions packages/metascraper-amazon/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@
"metascraper"
],
"dependencies": {
"@metascraper/helpers": "^5.31.1",
"tldts": "~5.7.92"
"@metascraper/helpers": "^5.31.1"
},
"devDependencies": {
"ava": "latest"
Expand Down
5 changes: 2 additions & 3 deletions packages/metascraper-clearbit/index.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
'use strict'

const { composeRule } = require('@metascraper/helpers')
const { composeRule, parseUrl } = require('@metascraper/helpers')
const { get, isString, isObject } = require('lodash')
const asyncMemoizeOne = require('async-memoize-one')
const { stringify } = require('querystring')
const memoize = require('@keyvhq/memoize')
const { getDomain } = require('tldts')
const got = require('got')

const ENDPOINT = 'https://autocomplete.clearbit.com/v1/companies/suggest'
Expand Down Expand Up @@ -42,7 +41,7 @@ const createClearbit = ({ gotOpts, keyvOpts, logoOpts } = {}) => {

module.exports = opts => {
const clearbit = createClearbit(opts)
const getClearbit = composeRule(($, url) => clearbit(getDomain(url)))
const getClearbit = composeRule(($, url) => clearbit(parseUrl(url).domain))

return {
logo: getClearbit({ from: 'logo' }),
Expand Down
3 changes: 1 addition & 2 deletions packages/metascraper-clearbit/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@
"@metascraper/helpers": "^5.31.1",
"async-memoize-one": "~1.1.2",
"got": "~11.8.5",
"lodash": "~4.17.21",
"tldts": "~5.7.92"
"lodash": "~4.17.21"
},
"devDependencies": {
"ava": "latest"
Expand Down
41 changes: 23 additions & 18 deletions packages/metascraper-helpers/index.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,24 @@
'use strict'

const memoizeOne = require('memoize-one').default || require('memoize-one')
const urlRegex = require('url-regex-safe')({ exact: true, parens: true })
const condenseWhitespace = require('condense-whitespace')
const { getExtension: mimeExtension } = require('mime')
const capitalize = require('microsoft-capitalize')
const { JSDOM, VirtualConsole } = require('jsdom')
const isRelativeUrl = require('is-relative-url')
const fileExtension = require('file-extension')
const _normalizeUrl = require('normalize-url')
const smartquotes = require('smartquotes')
const { decodeHTML } = require('entities')
const iso6393 = require('iso-639-3/to-1')
const hasValues = require('has-values')
const chrono = require('chrono-node')
const isIso = require('isostring')
const isUri = require('is-uri')
const { URL } = require('url')
const tldts = require('tldts')

const {
chain,
eq,
Expand All @@ -14,32 +33,17 @@ const {
isNumber,
isString,
lte,
memoize,
replace,
size,
toLower,
toString
} = require('lodash')

const memoizeOne = require('memoize-one').default || require('memoize-one')
const urlRegex = require('url-regex-safe')({ exact: true, parens: true })
const condenseWhitespace = require('condense-whitespace')
const { getExtension: mimeExtension } = require('mime')
const capitalize = require('microsoft-capitalize')
const { JSDOM, VirtualConsole } = require('jsdom')
const isRelativeUrl = require('is-relative-url')
const fileExtension = require('file-extension')
const _normalizeUrl = require('normalize-url')
const smartquotes = require('smartquotes')
const { decodeHTML } = require('entities')
const iso6393 = require('iso-639-3/to-1')
const hasValues = require('has-values')
const chrono = require('chrono-node')
const isIso = require('isostring')
const isUri = require('is-uri')
const { URL } = require('url')

const iso6393Values = Object.values(iso6393)

const parseUrl = memoize(tldts.parse)

const toTitle = str =>
capitalize(str, [
'CLI',
Expand Down Expand Up @@ -488,6 +492,7 @@ module.exports = {
memoizeOne,
mimeExtension,
normalizeUrl,
parseUrl,
protocol,
publisher,
sanetizeUrl,
Expand Down
14 changes: 14 additions & 0 deletions packages/metascraper-helpers/test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,23 @@ const {
isVideoUrl,
lang,
normalizeUrl,
parseUrl,
url
} = require('..')

const measure = fn => {
const time = process.hrtime()
fn()
const diff = process.hrtime(time)
return (diff[0] * 1e9 + diff[1]) / 1e6
}

test('.parseUrl', t => {
const fn = () => parseUrl('https://example.com')
/* this assertion ensure parseUrl memoize the value */
t.true(measure(fn) > measure(fn)) // eslint-disable-line
})

test('.normalizeUrl', t => {
t.is(normalizeUrl('https://example.com', 'javascript:false'), undefined)
t.is(
Expand Down
8 changes: 5 additions & 3 deletions packages/metascraper-instagram/index.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
'use strict'

const { memoizeOne, composeRule } = require('@metascraper/helpers')
const { getDomainWithoutSuffix } = require('tldts')
const { composeRule, memoizeOne, parseUrl } = require('@metascraper/helpers')
const { JSDOM, VirtualConsole } = require('jsdom')
const { keys, first, get } = require('lodash')

const test = memoizeOne(url => getDomainWithoutSuffix(url) === 'instagram')
const test = memoizeOne(
url => parseUrl(url).domainWithoutSuffix === 'instagram'
)

const getPage = sharedData => first(keys(get(sharedData, 'entry_data')))

Expand Down Expand Up @@ -64,6 +65,7 @@ const extractData = memoizeOne((url, $) => {
}
}
})

const getData = composeRule(($, url) => extractData(url, $))

module.exports = () => {
Expand Down
3 changes: 1 addition & 2 deletions packages/metascraper-instagram/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@
"dependencies": {
"@metascraper/helpers": "^5.31.1",
"jsdom": "~20.0.0",
"lodash": "~4.17.21",
"tldts": "~5.7.92"
"lodash": "~4.17.21"
},
"devDependencies": {
"ava": "latest"
Expand Down
4 changes: 2 additions & 2 deletions packages/metascraper-logo-favicon/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
const { isEmpty, first, toNumber, chain, get, orderBy } = require('lodash')
const reachableUrl = require('reachable-url')
const memoize = require('@keyvhq/memoize')
const { getDomain } = require('tldts')

const {
logo,
parseUrl,
memoizeOne,
normalizeUrl,
toRule,
Expand Down Expand Up @@ -138,7 +138,7 @@ module.exports = ({ gotOpts, keyvOpts, pickFn = pickBiggerSize } = {}) => {
async ({ url }) => castNull(await getLogo(normalizeUrl(url))),
async ({ url }) => {
const urlObj = new URL(url)
urlObj.hostname = getDomain(url)
urlObj.hostname = parseUrl(url).domain
const result = await getLogo(normalizeUrl(urlObj))
return castNull(result)
}
Expand Down
3 changes: 1 addition & 2 deletions packages/metascraper-logo-favicon/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@
"@keyvhq/memoize": "~1.6.14",
"@metascraper/helpers": "^5.31.1",
"lodash": "~4.17.21",
"reachable-url": "~1.7.0",
"tldts": "~5.7.92"
"reachable-url": "~1.7.0"
},
"devDependencies": {
"ava": "latest"
Expand Down
1 change: 0 additions & 1 deletion packages/metascraper-media-provider/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
"p-reflect": "~2.1.0",
"p-retry": "~4.6.1",
"p-timeout": "~4.1.0",
"tldts": "~5.7.92",
"youtube-dl-exec": "~2.1.5"
},
"devDependencies": {
Expand Down
4 changes: 2 additions & 2 deletions packages/metascraper-media-provider/src/get-media/util.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
'use strict'

const { getDomainWithoutSuffix } = require('tldts')
const { parseUrl } = require('@metascraper/helpers')
const { chain } = require('lodash')

const TEN_MIN_MS = 10 * 60 * 1000

const isTweet = url => url.includes('/status/')

const isTweetUrl = url =>
isTweet(url) && getDomainWithoutSuffix(url) === 'twitter'
isTweet(url) && parseUrl(url).domainWithoutSuffix === 'twitter'

const getTweetId = url =>
chain(url)
Expand Down
11 changes: 6 additions & 5 deletions packages/metascraper-soundcloud/index.js
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
'use strict'

const { getDomainWithoutSuffix } = require('tldts')

const {
$filter,
author,
description,
toRule,
memoizeOne
memoizeOne,
parseUrl,
toRule
} = require('@metascraper/helpers')

const toDescription = toRule(description)
const toAuthor = toRule(author)

const test = memoizeOne(url => getDomainWithoutSuffix(url) === 'soundcloud')
const test = memoizeOne(
url => parseUrl(url).domainWithoutSuffix === 'soundcloud'
)

module.exports = () => {
const rules = {
Expand Down
3 changes: 1 addition & 2 deletions packages/metascraper-soundcloud/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@
"soundcloud"
],
"dependencies": {
"@metascraper/helpers": "^5.31.1",
"tldts": "~5.7.92"
"@metascraper/helpers": "^5.31.1"
},
"devDependencies": {
"ava": "latest"
Expand Down
4 changes: 2 additions & 2 deletions packages/metascraper-spotify/index.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
'use strict'

const asyncMemoizeOne = require('async-memoize-one')
const { getDomainWithoutSuffix } = require('tldts')
const memoize = require('@keyvhq/memoize')
const got = require('got')

Expand All @@ -12,6 +11,7 @@ const {
description,
memoizeOne,
normalizeUrl,
parseUrl,
sanetizeUrl,
toRule
} = require('@metascraper/helpers')
Expand Down Expand Up @@ -42,7 +42,7 @@ const createSpotify = ({ gotOpts, keyvOpts }) => {
)
}

const test = memoizeOne(url => getDomainWithoutSuffix(url) === 'spotify')
const test = memoizeOne(url => parseUrl(url).domainWithoutSuffix === 'spotify')

module.exports = ({ gotOpts, keyvOpts } = {}) => {
const spotify = createSpotify({ gotOpts, keyvOpts })
Expand Down
3 changes: 1 addition & 2 deletions packages/metascraper-spotify/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@
"@metascraper/helpers": "^5.31.1",
"async-memoize-one": "~1.1.2",
"got": "~11.8.5",
"spotify-url-info": "~3.1.8",
"tldts": "~5.7.92"
"spotify-url-info": "~3.1.8"
},
"devDependencies": {
"ava": "latest",
Expand Down
4 changes: 2 additions & 2 deletions packages/metascraper-telegram/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ const {
date,
image,
memoizeOne,
parseUrl,
sanetizeUrl,
toRule
} = require('@metascraper/helpers')

const memoize = require('@keyvhq/memoize')
const { getDomain } = require('tldts')
const pReflect = require('p-reflect')
const cssUrls = require('css-urls')
const got = require('got')
Expand All @@ -21,7 +21,7 @@ const toDate = toRule(date)

const TELEGRAM_DOMAINS = ['telegram.me', 't.me']

const test = memoizeOne(url => TELEGRAM_DOMAINS.includes(getDomain(url)))
const test = memoizeOne(url => TELEGRAM_DOMAINS.includes(parseUrl(url).domain))

const createGetIframe = gotOpts => async (url, $) => {
const iframe = $('iframe')
Expand Down
13 changes: 6 additions & 7 deletions packages/metascraper-uol/index.js
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
'use strict'

const { getDomain } = require('tldts')

const {
$jsonld,
$filter,
title,
$jsonld,
description,
toRule,
memoizeOne
memoizeOne,
parseUrl,
title,
toRule
} = require('@metascraper/helpers')

const ROOT_DOMAINS = ['uol.com.br', 'torcedores.com']

const test = memoizeOne(url =>
ROOT_DOMAINS.some(domain => getDomain(url) === domain)
ROOT_DOMAINS.some(domain => parseUrl(url).domain === domain)
)

const toTitle = toRule(title)
Expand Down

0 comments on commit 43422a9

Please sign in to comment.