A new born

Kikobeats · Kikobeats · commit 29729ef97fa9 · 2017-11-22T14:40:10.000+01:00
diff --git a/README.md b/README.md
@@ -1,32 +1,38 @@
-# html-to-utf8
+# html-encode
+![Last version](https://img.shields.io/github/tag/Kikobeats/html-encode.svg?style=flat-square)
+[![Build Status](https://img.shields.io/travis/Kikobeats/html-encode/master.svg?style=flat-square)](https://travis-ci.org/Kikobeats/html-encode)
+[![Coverage Status](https://img.shields.io/coveralls/Kikobeats/html-encode.svg?style=flat-square)](https://coveralls.io/github/Kikobeats/html-encode)
+[![Dependency status](https://img.shields.io/david/Kikobeats/html-encode.svg?style=flat-square)](https://david-dm.org/Kikobeats/html-encode)
+[![Dev Dependencies Status](https://img.shields.io/david/dev/Kikobeats/html-encode.svg?style=flat-square)](https://david-dm.org/Kikobeats/html-encode#info=devDependencies)
+[![NPM Status](https://img.shields.io/npm/dm/html-encode.svg?style=flat-square)](https://www.npmjs.org/package/html-encode)
+[![Donate](https://img.shields.io/badge/donate-paypal-blue.svg?style=flat-square)](https://paypal.me/Kikobeats)
 
-A Node.js library for converting HTML documents of arbitrary encoding to UTF-8.
+> A Node.js library for converting HTML documents of arbitrary encoding into a target encoding (utf8, utf16, etc).
 
-### Installation
+### Install
 
-```shell
-npm install html-to-utf8
+```bash
+$ npm install html-encode
 ```
 
 ### Usage
 
-```javascript
-var request = require('request')
-var toUTF8 = require('html-to-utf8')
-
-request({
-  url: 'http://www.rakuten.co.jp',
-  encoding: null // stop request from decoding response
-}, function (err, resp, buffer) {
-  if (err) {
-    console.error(err.stack)
-    return
-  }
-
-  var htmlInUTF8 = toUTF8(buffer, resp.headers['content-type'])
-})
+```js
+'use strict'
+
+const got = require('got')
+const toUTF8 = require('html-encode')('utf-8')
+const url = process.argv[2]
+
+;(async () => {
+  const { body: buffer, headers } = await got(url, { encoding: null })
+  const str = toUTF8(buffer, headers['content-type'])
+  console.log(str)
+})()
 ```
 
+See more at [examples](/examples).
+
 ### License
 
 The code is available under [MIT license](LICENSE).
diff --git a/examples/http.js b/examples/http.js
@@ -1,13 +1,13 @@
 'use strict'
 
-var got = require('got')
-var toUTF8 = require('..')
+const got = require('got')
+const toUTF8 = require('..')('utf-8')
 
 const url = process.argv[2]
 ;(async () => {
   try {
     const { body: buffer, headers } = await got(url, { encoding: null })
-    var str = toUTF8(buffer, headers['content-type'])
+    const str = toUTF8(buffer, headers['content-type'])
     console.log(str)
   } catch (err) {
     throw err
diff --git a/examples/stream.js b/examples/stream.js
@@ -1,7 +1,7 @@
 'use strict'
 
 var got = require('got')
-var toUTF8 = require('..')
+var toUTF8 = require('..')('utf-8')
 
 const url = process.argv[2]
 
diff --git a/index.js b/index.js
@@ -1,31 +1,28 @@
 'use strict'
 
-var jschardet = require('jschardet')
-var isBuffer = require('is-buffer')
-var iconv = require('iconv-lite')
-var charset = require('charset')
+const jschardet = require('jschardet')
+const isBuffer = require('is-buffer')
+const iconv = require('iconv-lite')
+const charset = require('charset')
 
-var charsetRegex = /charset=["]*([^>"\s]+)/i
+const REGEX_CHARSET = /charset=["]*([^>"\s]+)/i
 
-module.exports = function ensureUTF8 (buffer, contentType) {
-  if (!isBuffer(buffer)) throw new TypeError('content should be a buffer.')
-
-  var encoding = getEncoding(buffer, contentType)
-
-  return encoding === 'utf8'
-    ? buffer.toString('utf8')
-    : iconv.decode(buffer, encoding).replace(charsetRegex, 'utf-8')
+const inferredEncoding = content => {
+  const charset = jschardet.detect(content)
+  return charset && charset.encoding
 }
 
-function getEncoding (content, contentType) {
-  return (
+module.exports = targetEncoding => {
+  if (!targetEncoding) throw new TypeError('Need to provide a target encoding.')
+
+  const getEncoding = (content, contentType) =>
     charset({ 'content-type': contentType }, content) ||
     inferredEncoding(content) ||
-    'utf8'
-  )
-}
+    targetEncoding
 
-function inferredEncoding (content) {
-  var charset = jschardet.detect(content)
-  return charset && charset.encoding
+  return (buffer, contentType) => {
+    if (!isBuffer(buffer)) throw new TypeError('content should be a buffer.')
+    const encoding = getEncoding(buffer, contentType)
+    return iconv.decode(buffer, encoding).replace(REGEX_CHARSET, targetEncoding)
+  }
 }
diff --git a/package.json b/package.json
@@ -1,24 +1,28 @@
 {
   "name": "html-to-utf8",
-  "description": "Convert html documents of arbitrary encoding to UTF-8",
-  "homepage": "https://github.com/spect88/html-to-utf8#readme",
-  "version": "0.1.0",
+  "description": "A Node.js library for converting HTML documents of arbitrary encoding into a target encoding (utf8, utf16, etc).",
+  "homepage": "https://documentup.com/Kikobeats/html-encode",
+  "version": "0.0.0",
   "main": "index.js",
   "author": {
-    "email": "spect88@gmail.com",
-    "name": "Tomasz Szczęśniak-Szlagowski"
+    "name": "Kiko Beats",
+    "url": "https://github.com/Kikobeats"
   },
   "repository": {
     "type": "git",
-    "url": "git+https://github.com/spect88/html-to-utf8.git"
+    "url": "git+https://github.com/kikobeats/html-encode.git"
   },
   "bugs": {
-    "url": "https://github.com/spect88/html-to-utf8/issues"
+    "url": "https://github.com/kikobeats/html-encode/issues"
   },
   "keywords": [
     "encoding",
+    "encode",
+    "arbitrary",
+    "generic",
     "html",
-    "utf8"
+    "utf8",
+    "utf16"
   ],
   "dependencies": {
     "charset": "~1.0.1",
diff --git a/test/index.js b/test/index.js
@@ -1,43 +1,43 @@
 'use strict'
 
-var expect = require('chai').expect
-var path = require('path')
-var fs = require('fs')
+const { expect } = require('chai')
+const path = require('path')
+const fs = require('fs')
 
-var toUTF8 = require('../index')
+const toUTF8 = require('../index')('utf-8')
 
 describe('Encoding Converter', function () {
   it('properly decodes Shift-JIS html documents', function () {
-    var buffer = loadExample('51242_54045.html')
-    var output = toUTF8(buffer, 'text/html')
+    const buffer = loadExample('51242_54045.html')
+    const output = toUTF8(buffer, 'text/html')
 
     expect(output).to.contain('或る日の小せん')
   })
 
   it('properly decodes Windows-1250 html documents', function () {
-    var buffer = loadExample('rp.pl.html')
-    var output = toUTF8(buffer, 'windows-1250')
+    const buffer = loadExample('rp.pl.html')
+    const output = toUTF8(buffer, 'windows-1250')
 
     expect(output).to.contain('majątków')
   })
 
   it('guesses encoding even without meta tags or content type', function () {
-    var buffer = loadExample('shiftjis.no.meta.html')
-    var output = toUTF8(buffer, 'text/html')
+    const buffer = loadExample('shiftjis.no.meta.html')
+    const output = toUTF8(buffer, 'text/html')
 
     expect(output).to.contain('次常用國字標準字體表')
   })
 
   it('works for documents which already are UTF-8', function () {
-    var buffer = loadExample('utf8.with.meta.html')
-    var output = toUTF8(buffer, 'text/html')
+    const buffer = loadExample('utf8.with.meta.html')
+    const output = toUTF8(buffer, 'text/html')
 
     expect(output).to.contain('日本語')
   })
 
   it('Replace charset from the original buffer', function () {
-    var buffer = loadExample('51242_54045.html')
-    var output = toUTF8(buffer, 'text/html')
+    const buffer = loadExample('51242_54045.html')
+    const output = toUTF8(buffer, 'text/html')
 
     expect(output).to.contain(
       '<meta http-equiv="Content-Type" content="text/html;utf-8" />'