1
+ 'use strict'
1
2
2
- let isUrl = require ( 'is-url' )
3
- let toTitle = require ( 'to-title-case' )
3
+ const condenseWhitespace = require ( 'condense-whitespace' )
4
+ const isString = require ( 'lodash.isstring' )
5
+ const toTitle = require ( 'to-title-case' )
6
+ const urlRegex = require ( 'url-regex' )
7
+ const flow = require ( 'lodash.flow' )
8
+
9
+ const REGEX_BY = / ^ [ \s \n ] * b y [ \s \n ] * / im
10
+
11
+ const isUrl = value => urlRegex ( ) . test ( value )
12
+ const removeBy = value => value . replace ( REGEX_BY , '' )
13
+
14
+ const sanetize = flow ( [
15
+ // trim extra whitespace
16
+ condenseWhitespace ,
17
+ // remove any extra "by" in the start of the string
18
+ removeBy ,
19
+ // make it title case, since some sites have it in weird casing
20
+ toTitle
21
+ ] )
4
22
5
23
/**
6
24
* Wrap a rule with validation and formatting logic.
@@ -9,26 +27,14 @@ let toTitle = require('to-title-case')
9
27
* @return {Function } wrapped
10
28
*/
11
29
12
- function wrap ( rule ) {
13
- return ( $ ) => {
14
- let value = rule ( $ )
15
- if ( typeof value != 'string' ) return
16
- if ( isUrl ( value ) ) return
17
- if ( value . indexOf ( 'www.' ) === 0 ) return
18
- if ( value . includes ( '|' ) ) return
19
-
20
- // trim extra whitespace
21
- value = value . replace ( / \s + / g, ' ' )
22
- value = value . trim ( )
23
-
24
- // remove any extra "by" in the start of the string
25
- value = value . replace ( / ^ [ \s \n ] * b y [ \s \n ] * / im, '' )
30
+ const wrap = rule => $ => {
31
+ const value = rule ( $ )
26
32
27
- // make it title case, since some sites have it in weird casing
28
- value = toTitle ( value )
33
+ if ( ! isString ( value ) ) return
34
+ if ( isUrl ( value ) ) return
35
+ if ( value . includes ( '|' ) ) return
29
36
30
- return value
31
- }
37
+ return sanetize ( value )
32
38
}
33
39
34
40
/**
@@ -38,30 +44,28 @@ function wrap(rule) {
38
44
* @return {Function } stricter
39
45
*/
40
46
41
- function strict ( rule ) {
42
- return ( $ ) => {
43
- let value = rule ( $ )
44
- let regexp = / ^ \S + \s + \S + /
45
- if ( ! regexp . test ( value ) ) return
46
- return value
47
- }
47
+ const strict = rule => $ => {
48
+ let value = rule ( $ )
49
+ let regexp = / ^ \S + \s + \S + /
50
+ if ( ! regexp . test ( value ) ) return
51
+ return value
48
52
}
49
53
50
54
/**
51
55
* Rules.
52
56
*/
53
57
54
58
module . exports = [
55
- wrap ( ( $ ) => $ ( 'meta[property="article:author"]' ) . attr ( 'content' ) ) ,
56
- wrap ( ( $ ) => $ ( 'meta[name="author"]' ) . attr ( 'content' ) ) ,
57
- wrap ( ( $ ) => $ ( 'meta[name="sailthru.author"]' ) . attr ( 'content' ) ) ,
58
- wrap ( ( $ ) => $ ( '[rel="author"]' ) . first ( ) . text ( ) ) ,
59
- wrap ( ( $ ) => $ ( '[itemprop*="author"] [itemprop="name"]' ) . first ( ) . text ( ) ) ,
60
- wrap ( ( $ ) => $ ( '[itemprop*="author"]' ) . first ( ) . text ( ) ) ,
61
- wrap ( ( $ ) => $ ( 'meta[property="book:author"]' ) . attr ( 'content' ) ) ,
62
- strict ( wrap ( ( $ ) => $ ( 'a[class*="author"]' ) . first ( ) . text ( ) ) ) ,
63
- strict ( wrap ( ( $ ) => $ ( '[class*="author"] a' ) . first ( ) . text ( ) ) ) ,
64
- strict ( wrap ( ( $ ) => $ ( '[class*="author"]' ) . first ( ) . text ( ) ) ) ,
65
- strict ( wrap ( ( $ ) => $ ( '[class*="byline"]' ) . text ( ) ) ) ,
66
- strict ( wrap ( ( $ ) => $ ( 'a[href*="/author/"]' ) . text ( ) ) ) ,
59
+ wrap ( $ => $ ( 'meta[property="article:author"]' ) . attr ( 'content' ) ) ,
60
+ wrap ( $ => $ ( 'meta[name="author"]' ) . attr ( 'content' ) ) ,
61
+ wrap ( $ => $ ( 'meta[name="sailthru.author"]' ) . attr ( 'content' ) ) ,
62
+ wrap ( $ => $ ( '[rel="author"]' ) . first ( ) . text ( ) ) ,
63
+ wrap ( $ => $ ( '[itemprop*="author"] [itemprop="name"]' ) . first ( ) . text ( ) ) ,
64
+ wrap ( $ => $ ( '[itemprop*="author"]' ) . first ( ) . text ( ) ) ,
65
+ wrap ( $ => $ ( 'meta[property="book:author"]' ) . attr ( 'content' ) ) ,
66
+ strict ( wrap ( $ => $ ( 'a[class*="author"]' ) . first ( ) . text ( ) ) ) ,
67
+ strict ( wrap ( $ => $ ( '[class*="author"] a' ) . first ( ) . text ( ) ) ) ,
68
+ strict ( wrap ( $ => $ ( '[class*="author"]' ) . first ( ) . text ( ) ) ) ,
69
+ strict ( wrap ( $ => $ ( '[class*="byline"]' ) . text ( ) ) ) ,
70
+ strict ( wrap ( $ => $ ( 'a[href*="/author/"]' ) . text ( ) ) )
67
71
]
0 commit comments