Skip to content

Commit 9159dd2

Browse files
committed
Avoid rewrite html charset
1 parent 4c17485 commit 9159dd2

4 files changed

Lines changed: 9 additions & 78 deletions

File tree

index.js

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@ const jschardet = require('jschardet')
44
const isBuffer = require('is-buffer')
55
const iconv = require('iconv-lite')
66
const charset = require('charset')
7-
const he = require('he')
8-
9-
const REGEX_CHARSET = /charset=["]*([^>"\s]+)/i
107

118
const inferredEncoding = content => {
129
const charset = jschardet.detect(content)
@@ -26,13 +23,6 @@ module.exports = targetEncoding => {
2623
return (buffer, contentType) => {
2724
if (!isBuffer(buffer)) throw new TypeError('content should be a buffer.')
2825
const encoding = getEncoding(buffer, contentType)
29-
30-
const str = iconv
31-
.decode(buffer, encoding)
32-
.replace(REGEX_CHARSET, targetEncoding)
33-
.toString()
34-
35-
// Ensure to resolve Base64 entities
36-
return he.decode(str)
26+
return iconv.decode(buffer, encoding)
3727
}
3828
}

package.json

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,9 @@
2626
],
2727
"dependencies": {
2828
"charset": "~1.0.1",
29-
"he": "~1.2.0",
3029
"iconv-lite": "~0.4.19",
3130
"is-buffer": "~2.0.0",
32-
"jschardet": "~1.6.0",
33-
"to-buffer": "~1.1.0"
31+
"jschardet": "~1.6.0"
3432
},
3533
"devDependencies": {
3634
"chai": "latest",

test/fixtures/base64.html

Lines changed: 0 additions & 20 deletions
This file was deleted.

test/index.js

Lines changed: 7 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -6,70 +6,33 @@ const fs = require('fs')
66

77
const toUTF8 = require('..')('utf-8')
88

9-
describe('Encoding Converter', function () {
10-
it('properly decodes Shift-JIS html documents', function () {
9+
describe('encoding conversion', function () {
10+
it('Shift-JIS', function () {
1111
const buffer = loadExample('51242_54045.html')
1212
const output = toUTF8(buffer, 'text/html')
1313

1414
expect(output).to.contain('或る日の小せん')
1515
})
1616

17-
it('properly decodes Windows-1250 html documents', function () {
17+
it('Windows-1250', function () {
1818
const buffer = loadExample('rp.pl.html')
1919
const output = toUTF8(buffer, 'windows-1250')
2020

2121
expect(output).to.contain('majątków')
2222
})
2323

24-
it('guesses encoding even without meta tags or content type', function () {
25-
const buffer = loadExample('shiftjis.no.meta.html')
26-
const output = toUTF8(buffer, 'text/html')
27-
28-
expect(output).to.contain('次常用國字標準字體表')
29-
})
30-
31-
it('works for documents which already are UTF-8', function () {
24+
it('UTF-8', function () {
3225
const buffer = loadExample('utf8.with.meta.html')
3326
const output = toUTF8(buffer, 'text/html')
3427

3528
expect(output).to.contain('日本語')
3629
})
3730

38-
it('Replace charset from the original buffer', function () {
39-
const buffer = loadExample('51242_54045.html')
31+
it('inferred', function () {
32+
const buffer = loadExample('shiftjis.no.meta.html')
4033
const output = toUTF8(buffer, 'text/html')
4134

42-
expect(output).to.contain(
43-
'<meta http-equiv="Content-Type" content="text/html;utf-8" />'
44-
)
45-
})
46-
47-
it.only('Decode Base64', function () {
48-
const buffer = loadExample('base64.html')
49-
const output = toUTF8(buffer)
50-
expect(output.trim()).to.deep.equal(
51-
`
52-
<html>
53-
54-
<head></head>
55-
56-
<body>
57-
<pre style="word-wrap: break-word; white-space: pre-wrap;"><!DOCTYPE html>
58-
<html lang="en">
59-
<head>
60-
<meta utf-8">
61-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
62-
<meta http-equiv="X-UA-Compatible" content="ie=edge">
63-
<title>Document</title>
64-
</head>
65-
<body>
66-
<a href="https://httpbin-org.herokuapp.com/redirect/3"></a>
67-
</body>
68-
</html></pre>
69-
</body>
70-
71-
</html>`.trim()
72-
)
35+
expect(output).to.contain('次常用國字標準字體表')
7336
})
7437
})
7538

0 commit comments

Comments
 (0)