Skip to content

Commit 818a19b

Browse files
committed
Add html sanitization
Remove unnecessary rules
1 parent 5bf8d93 commit 818a19b

File tree

4 files changed

+28
-10
lines changed

4 files changed

+28
-10
lines changed

index.js

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
'use strict'
22

3-
const rules = require('req-all')('./src/rules')
43
const reduce = require('lodash.reduce')
5-
const cheerio = require('cheerio')
4+
5+
const rules = require('req-all')('./src/rules')
6+
const loadHtml = require('./src/html')
67

78
const isValid = result => result !== null && result !== undefined && result !== ''
89

@@ -18,9 +19,7 @@ const getValue = ($, conditions) => {
1819
}
1920

2021
module.exports = rawHtml => {
21-
const html = cheerio.load(rawHtml, {
22-
lowerCaseAttributeNames: true
23-
})
22+
const html = loadHtml(rawHtml)
2423

2524
return reduce(rules, (acc, conditions, ruleName) => {
2625
const value = getValue(html, conditions)

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
"lodash.reduce": "~4.6.0",
2828
"normalize-url": "~1.9.1",
2929
"req-all": "~1.0.0",
30+
"sanitize-html": "~1.14.1",
3031
"to-title-case": "~1.0.0",
3132
"url-regex": "~4.1.1"
3233
},

src/html/index.js

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
'use strict'
2+
3+
const sanitizeHtml = require('sanitize-html')
4+
const flow = require('lodash.flow')
5+
const cheerio = require('cheerio')
6+
7+
const sanitize = html => sanitizeHtml(html, {
8+
allowedTags: false,
9+
allowedAttributes: false,
10+
transformTags: {
11+
meta: (tagName, attribs) => {
12+
if (attribs.name) attribs.name = attribs.name.toLowerCase()
13+
return {tagName, attribs}
14+
}
15+
}
16+
})
17+
18+
const load = cheerio.load.bind(cheerio)
19+
20+
module.exports = flow([
21+
sanitize,
22+
load
23+
])

src/rules/date.js

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,16 +36,11 @@ const wrap = rule => $ => {
3636
module.exports = [
3737
wrap($ => $('meta[property="article:published_time"]').attr('content')),
3838
wrap($ => $('meta[name="dc.date"]').attr('content')),
39-
wrap($ => $('meta[name="DC.date"]').attr('content')),
4039
wrap($ => $('meta[name="dc.date.issued"]').attr('content')),
41-
wrap($ => $('meta[name="DC.date.issued"]').attr('content')),
4240
wrap($ => $('meta[name="dc.date.created"]').attr('content')),
43-
wrap($ => $('meta[name="DC.date.created"]').attr('content')),
44-
wrap($ => $('meta[name="DC.Date"]').attr('content')),
4541
wrap($ => $('meta[name="date"]').attr('content')),
4642
wrap($ => $('meta[name="dcterms.date"]').attr('content')),
4743
wrap($ => $('[itemprop="datePublished"]').attr('content')),
48-
wrap($ => $('time[itemprop*="pubDate"]').attr('datetime')),
4944
wrap($ => $('time[itemprop*="pubdate"]').attr('datetime')),
5045
wrap($ => $('[property*="dc:date"]').attr('content')),
5146
wrap($ => $('[property*="dc:created"]').attr('content')),

0 commit comments

Comments
 (0)