File tree Expand file tree Collapse file tree 4 files changed +28
-10
lines changed Expand file tree Collapse file tree 4 files changed +28
-10
lines changed Original file line number Diff line number Diff line change 1
1
'use strict'
2
2
3
- const rules = require ( 'req-all' ) ( './src/rules' )
4
3
const reduce = require ( 'lodash.reduce' )
5
- const cheerio = require ( 'cheerio' )
4
+
5
+ const rules = require ( 'req-all' ) ( './src/rules' )
6
+ const loadHtml = require ( './src/html' )
6
7
7
8
const isValid = result => result !== null && result !== undefined && result !== ''
8
9
@@ -18,9 +19,7 @@ const getValue = ($, conditions) => {
18
19
}
19
20
20
21
module . exports = rawHtml => {
21
- const html = cheerio . load ( rawHtml , {
22
- lowerCaseAttributeNames : true
23
- } )
22
+ const html = loadHtml ( rawHtml )
24
23
25
24
return reduce ( rules , ( acc , conditions , ruleName ) => {
26
25
const value = getValue ( html , conditions )
Original file line number Diff line number Diff line change 27
27
"lodash.reduce" : " ~4.6.0" ,
28
28
"normalize-url" : " ~1.9.1" ,
29
29
"req-all" : " ~1.0.0" ,
30
+ "sanitize-html" : " ~1.14.1" ,
30
31
"to-title-case" : " ~1.0.0" ,
31
32
"url-regex" : " ~4.1.1"
32
33
},
Original file line number Diff line number Diff line change
1
+ 'use strict'
2
+
3
+ const sanitizeHtml = require ( 'sanitize-html' )
4
+ const flow = require ( 'lodash.flow' )
5
+ const cheerio = require ( 'cheerio' )
6
+
7
+ const sanitize = html => sanitizeHtml ( html , {
8
+ allowedTags : false ,
9
+ allowedAttributes : false ,
10
+ transformTags : {
11
+ meta : ( tagName , attribs ) => {
12
+ if ( attribs . name ) attribs . name = attribs . name . toLowerCase ( )
13
+ return { tagName, attribs}
14
+ }
15
+ }
16
+ } )
17
+
18
+ const load = cheerio . load . bind ( cheerio )
19
+
20
+ module . exports = flow ( [
21
+ sanitize ,
22
+ load
23
+ ] )
Original file line number Diff line number Diff line change @@ -36,16 +36,11 @@ const wrap = rule => $ => {
36
36
module . exports = [
37
37
wrap ( $ => $ ( 'meta[property="article:published_time"]' ) . attr ( 'content' ) ) ,
38
38
wrap ( $ => $ ( 'meta[name="dc.date"]' ) . attr ( 'content' ) ) ,
39
- wrap ( $ => $ ( 'meta[name="DC.date"]' ) . attr ( 'content' ) ) ,
40
39
wrap ( $ => $ ( 'meta[name="dc.date.issued"]' ) . attr ( 'content' ) ) ,
41
- wrap ( $ => $ ( 'meta[name="DC.date.issued"]' ) . attr ( 'content' ) ) ,
42
40
wrap ( $ => $ ( 'meta[name="dc.date.created"]' ) . attr ( 'content' ) ) ,
43
- wrap ( $ => $ ( 'meta[name="DC.date.created"]' ) . attr ( 'content' ) ) ,
44
- wrap ( $ => $ ( 'meta[name="DC.Date"]' ) . attr ( 'content' ) ) ,
45
41
wrap ( $ => $ ( 'meta[name="date"]' ) . attr ( 'content' ) ) ,
46
42
wrap ( $ => $ ( 'meta[name="dcterms.date"]' ) . attr ( 'content' ) ) ,
47
43
wrap ( $ => $ ( '[itemprop="datePublished"]' ) . attr ( 'content' ) ) ,
48
- wrap ( $ => $ ( 'time[itemprop*="pubDate"]' ) . attr ( 'datetime' ) ) ,
49
44
wrap ( $ => $ ( 'time[itemprop*="pubdate"]' ) . attr ( 'datetime' ) ) ,
50
45
wrap ( $ => $ ( '[property*="dc:date"]' ) . attr ( 'content' ) ) ,
51
46
wrap ( $ => $ ( '[property*="dc:created"]' ) . attr ( 'content' ) ) ,
You can’t perform that action at this time.
0 commit comments