Skip to content

Commit

Permalink
fix: add author size limitation (#154)
Browse files Browse the repository at this point in the history
Just for skipping false-hood values .cc @wooorm
  • Loading branch information
Kikobeats committed Mar 16, 2019
1 parent 0ae78ab commit c1df0d3
Show file tree
Hide file tree
Showing 12 changed files with 928 additions and 627 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"clean": "rm -rf node_modules && cd packages && eachdir rm -rf node_modules",
"dev": "concurrently \"gulp\" \"npm run server:dev\"",
"lint": "standard-markdown README.md && standard",
"postinstall": "lerna bootstrap && lerna link --force-local",
"postinstall": "lerna bootstrap --no-ci && lerna link --force-local",
"prerelease": "npm run update:check",
"pretest": "npm run lint",
"preversion": "lerna exec npx finepack && git-authors-cli && finepack",
Expand Down
8 changes: 6 additions & 2 deletions packages/metascraper-helpers/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ const {
flow,
chain,
isEmpty,
eq
eq,
lte,
size
} = require('lodash')

const langs = require('iso-639-3').map(({ iso6391 }) => iso6391)
Expand Down Expand Up @@ -61,6 +63,8 @@ const REGEX_LOCATION = /^[A-Z\s]+\s+[-—–]\s+/

const TRUNCATE_MAX_LENGTH = 300

const AUTHOR_MAX_LENGTH = 128

const removeLocation = value => replace(value, REGEX_LOCATION, '')

const isUrl = (url, { relative = false } = {}) =>
Expand Down Expand Up @@ -102,7 +106,7 @@ const $filter = ($, domNodes, fn = defaultFn) => {
}

const isAuthor = (str, opts = { relative: false }) =>
isString(str) && !isUrl(str, opts)
!isUrl(str, opts) && isString(str) && lte(size(str), AUTHOR_MAX_LENGTH)

const getAuthor = (str, opts = { removeBy: true }) => titleize(str, opts)

Expand Down
6 changes: 3 additions & 3 deletions packages/metascraper-helpers/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
],
"dependencies": {
"audio-extensions": "0.0.0",
"chrono-node": "~1.3.5",
"chrono-node": "~1.3.11",
"condense-whitespace": "~1.0.0",
"file-extension": "~4.0.5",
"image-extensions": "~1.1.0",
Expand All @@ -26,8 +26,8 @@
"iso-639-3": "~1.1.0",
"isostring": "0.0.1",
"lodash": "~4.17.11",
"mime-types": "~2.1.21",
"normalize-url": "~4.1.0",
"mime-types": "~2.1.22",
"normalize-url": "~4.2.0",
"smartquotes": "~2.3.1",
"title": "~3.4.1",
"truncate": "~2.0.1",
Expand Down
2 changes: 1 addition & 1 deletion packages/metascraper-media-provider/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
],
"dependencies": {
"@metascraper/helpers": "^4.10.1",
"@microlink/youtube-dl": "~1.13.0",
"@microlink/youtube-dl": "~1.13.1",
"got": "~9.6.0",
"lodash": "~4.17.11",
"memoize-one": "~5.0.0",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ const API_GUEST_ACTIVATE_EXPIRE = 10 * 60 * 1000 // 10 min

const { PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS } = process.env

let agent = PROXY_HOST
const agent = PROXY_HOST
? tunnel.httpsOverHttp({
proxy: {
host: PROXY_HOST,
Expand All @@ -43,7 +43,7 @@ const getGuestToken = async (url = '', opts = {}) => {
const { body } = await got.post(
'https://api.twitter.com/1.1/guest/activate.json',
{
headers: { Authorization: TWITTER_BEARER_TOKEN, Referer: url },
headers: { Authorization: TWITTER_BEARER_TOKEN },
json: true,
retry: 0,
agent,
Expand Down
2 changes: 1 addition & 1 deletion packages/metascraper-media-provider/test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ describe('metascraper-media-provider', () => {
;[
'https://twitter.com/verge/status/957383241714970624',
'https://twitter.com/telediario_tve/status/1036860275859775488',
'https://twitter.com/Mei_Gui8/status/1037374230785142785'
'https://twitter.com/futurism/status/882987478541533189'
].forEach(url => {
it(url, async () => {
const metadata = await metascraper({ url })
Expand Down
4 changes: 2 additions & 2 deletions packages/metascraper-youtube/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
],
"dependencies": {
"@metascraper/helpers": "^4.10.1",
"get-video-id": "~3.1.0",
"get-video-id": "~3.1.1",
"is-reachable": "~3.0.0",
"p-locate": "~3.0.0"
"p-locate": "~4.0.0"
},
"devDependencies": {
"mocha": "latest",
Expand Down
26 changes: 13 additions & 13 deletions packages/metascraper/__snapshots__/index.js.snap-shot
Original file line number Diff line number Diff line change
Expand Up @@ -206,19 +206,6 @@ exports['entrepreneur 1'] = {
"url": "https://www.entrepreneur.com/article/275188"
}

exports['et-tech 1'] = {
"author": "TV Mahalingam",
"date": "2016-05-25T00:00:00.000Z",
"description": "Dell speaks about the complexities of engineering the biggest acquisition in tech history, Dell’s future bets and why he continues to remain bullish..",
"image": "http://economictimes.indiatimes.com/photo/michael-dell-on-complexities-involved-in-pulling-off-the-biggest-tech-acquisition-dell-s-future-bets-more/52424992.cms",
"video": null,
"lang": null,
"logo": "http://b2bstatic.iimg.in/Themes/Release/images/responsive/tech-logo-square.jpg",
"publisher": "ETtech.com",
"title": "Michael Dell on complexities involved in pulling off the biggest tech acquisition, Dell’s future bets & more | ETtech",
"url": "http://tech.economictimes.indiatimes.com/news/corporate/michael-dell-biggest-tech-acquisition-future-bets/52424992"
}

exports['eweek 1'] = {
"author": "Darryl K. Taft",
"date": "2019-01-09T09:01:37.000Z",
Expand Down Expand Up @@ -994,3 +981,16 @@ exports['engadget 1'] = {
"url": "https://www.engadget.com/2019/01/07/all-github-users-keep-code-private/"
}

exports['learnnode 1'] = {
"author": null,
"date": null,
"description": "A premium training course to learn to build apps with Node.js, Express, MongoDB, and friends.",
"image": "https://learnnode.com/images/NODE/poster.jpg",
"video": "https://player.vimeo.com/external/216213305.sd.mp4?s=815e208b400abe120e9b860dad68762bcf4b828a&profile_id=164",
"lang": null,
"logo": "https://learnnode.com/images/NODE/favicon.png",
"publisher": "Learn Node",
"title": "Learn Node",
"url": "https://learnnode.com"
}

4 changes: 2 additions & 2 deletions packages/metascraper/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@
"cheerio": "~1.0.0-rc.2",
"cheerio-advanced-selectors": "~2.0.1",
"lodash": "~4.17.11",
"p-reduce": "~1.0.0",
"whoops": "~4.0.1"
"p-reduce": "~2.0.0",
"whoops": "~4.0.2"
},
"devDependencies": {
"clear-module": "latest",
Expand Down
Loading

0 comments on commit c1df0d3

Please sign in to comment.