diff --git a/.gitignore b/.gitignore
index c4b384e..6342324 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,8 @@
# Generated test data file (> 100 MB)
tests/data.json
-# JSON version of coverage report
-coverage/coverage.json
+# Coverage report
+coverage
# Installed npm modules
node_modules
diff --git a/.travis.yml b/.travis.yml
index 5a39c55..f3e2e1a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,4 +16,6 @@ before_script:
- "PREFIX=/usr/lib/jvm; if [ ! -d $PREFIX/java-6-openjdk ]; then for d in $PREFIX/java-6-openjdk-*; do if [ -e $d/jre/lib/rt.jar ]; then sudo ln -s $d $PREFIX/java-6-openjdk; break; fi; done; fi"
- "sudo apt-get install -qq python; python --version"
script:
- "grunt ci"
+ - "grunt ci"
+after_script:
+ - "grunt shell:cover-coveralls"
diff --git a/Gruntfile.js b/Gruntfile.js
index 9e827a2..2af2bf5 100644
--- a/Gruntfile.js
+++ b/Gruntfile.js
@@ -15,9 +15,12 @@ module.exports = function(grunt) {
}
}
},
- 'cover': {
+ 'cover-html': {
'command': 'istanbul cover --report "html" --verbose --dir "coverage" "tests/tests.js"; istanbul report --root "coverage" --format "html"'
},
+ 'cover-coveralls': {
+ 'command': 'istanbul cover --verbose --dir "coverage" "tests/tests.js" && cat coverage/lcov.info | coveralls; rm -rf coverage/lcov*'
+ },
'test-narwhal': {
'command': 'echo "Testing in Narwhal..."; export NARWHAL_OPTIMIZATION=-1; narwhal "tests/tests.js"'
},
@@ -49,7 +52,7 @@ module.exports = function(grunt) {
grunt.loadNpmTasks('grunt-shell');
- grunt.registerTask('cover', 'shell:cover');
+ grunt.registerTask('cover', 'shell:cover-html');
grunt.registerTask('ci', [
'shell:generate-test-data',
'shell:test-narwhal',
diff --git a/LICENSE-MIT.txt b/LICENSE-MIT.txt
index 97067e5..a41e0a7 100644
--- a/LICENSE-MIT.txt
+++ b/LICENSE-MIT.txt
@@ -1,4 +1,4 @@
-Copyright Mathias Bynens
+Copyright Mathias Bynens
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
diff --git a/README.md b/README.md
index ddee43d..edddcce 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,12 @@
# utf8.js [![Build status](https://travis-ci.org/mathiasbynens/utf8.js.svg?branch=master)](https://travis-ci.org/mathiasbynens/utf8.js) [![Dependency status](https://gemnasium.com/mathiasbynens/utf8.js.svg)](https://gemnasium.com/mathiasbynens/utf8.js)
-_utf8.js_ is a well-tested UTF-8 encoder/decoder written in JavaScript. Unlike many other JavaScript solutions, it is designed to be a _proper_ UTF-8 encoder/decoder: it can encode/decode any given Unicode code point, including astral symbols and unpaired surrogates. [Here’s an online demo.](http://mothereff.in/utf-8)
+_utf8.js_ is a well-tested UTF-8 encoder/decoder written in JavaScript. Unlike many other JavaScript solutions, it is designed to be a _proper_ UTF-8 encoder/decoder: it can encode/decode any scalar Unicode code point values, as per [the Encoding Standard](https://encoding.spec.whatwg.org/#utf-8). [Here’s an online demo.](https://mothereff.in/utf-8)
Feel free to fork if you see possible improvements!
## Installation
-Via [npm](http://npmjs.org/):
+Via [npm](https://www.npmjs.org/):
```bash
npm install utf8
@@ -30,7 +30,7 @@ In a browser:
```
-In [Narwhal](http://narwhaljs.org/), [Node.js](http://nodejs.org/), and [RingoJS ≥ v0.8.0](http://ringojs.org/):
+In [Narwhal](http://narwhaljs.org/), [Node.js](https://nodejs.org/), and [RingoJS ≥ v0.8.0](http://ringojs.org/):
```js
var utf8 = require('utf8');
@@ -62,7 +62,7 @@ require(
### `utf8.encode(string)`
-Encodes any given JavaScript string (`string`) as UTF-8, and returns the UTF-8-encoded version of the string.
+Encodes any given JavaScript string (`string`) as UTF-8, and returns the UTF-8-encoded version of the string. It throws an error if the input string contains a non-scalar value, i.e. a lone surrogate. (If you need to be able to encode non-scalar values as well, use [WTF-8](https://mths.be/wtf8) instead.)
```js
// U+00A9 COPYRIGHT SIGN; see http://codepoints.net/U+00A9
@@ -75,7 +75,7 @@ utf8.encode('\uD800\uDC01');
### `utf8.decode(byteString)`
-Decodes any given UTF-8-encoded string (`byteString`) as UTF-8, and returns the UTF-8-decoded version of the string. It throws an error when malformed UTF-8 is detected.
+Decodes any given UTF-8-encoded string (`byteString`) as UTF-8, and returns the UTF-8-decoded version of the string. It throws an error when malformed UTF-8 is detected. (If you need to be able to decode encoded non-scalar values as well, use [WTF-8](https://mths.be/wtf8) instead.)
```js
utf8.decode('\xC2\xA9');
@@ -112,8 +112,8 @@ Long before utf8.js was created, the `utf8` module on npm was registered and use
| [![twitter/mathias](https://gravatar.com/avatar/24e08a9ea84deb17ae121074d0f17125?s=70)](https://twitter.com/mathias "Follow @mathias on Twitter") |
|---|
-| [Mathias Bynens](http://mathiasbynens.be/) |
+| [Mathias Bynens](https://mathiasbynens.be/) |
## License
-utf8.js is available under the [MIT](http://mths.be/mit) license.
+utf8.js is available under the [MIT](https://mths.be/mit) license.
diff --git a/package.json b/package.json
index e612cea..32e3115 100644
--- a/package.json
+++ b/package.json
@@ -2,7 +2,7 @@
"name": "utf8",
"version": "2.0.0",
"description": "A well-tested UTF-8 encoder/decoder written in JavaScript.",
- "homepage": "http://mths.be/utf8js",
+ "homepage": "https://mths.be/utf8js",
"main": "utf8.js",
"keywords": [
"charset",
@@ -10,39 +10,26 @@
"unicode",
"utf8"
],
- "licenses": [
- {
- "type": "MIT",
- "url": "http://mths.be/mit"
- },
- {
- "type": "GPL",
- "url": "http://mths.be/gpl"
- }
- ],
+ "license": "MIT",
"author": {
"name": "Mathias Bynens",
- "url": "http://mathiasbynens.be/"
+ "url": "https://mathiasbynens.be/"
},
"repository": {
"type": "git",
"url": "https://github.com/mathiasbynens/utf8.js.git"
},
- "bugs": {
- "url": "https://github.com/mathiasbynens/utf8.js/issues"
- },
- "directories": {
- "test": "tests"
- },
+ "bugs": "https://github.com/mathiasbynens/utf8.js/issues",
"scripts": {
"test": "node tests/tests.js"
},
"devDependencies": {
- "grunt": "~0.4.4",
- "grunt-shell": "~0.6.4",
- "istanbul": "~0.2.6",
- "qunit-clib": "~1.3.0",
+ "coveralls": "^2.11.1",
+ "grunt": "^0.4.5",
+ "grunt-shell": "^1.1.1",
+ "istanbul": "^0.3.5",
+ "qunit-extras": "^1.4.0",
"qunitjs": "~1.11.0",
- "requirejs": "~2.1.11"
+ "requirejs": "^2.1.11"
}
}
diff --git a/tests/generate-test-data.py b/tests/generate-test-data.py
index 096b012..08be0d2 100755
--- a/tests/generate-test-data.py
+++ b/tests/generate-test-data.py
@@ -3,7 +3,7 @@
import re
import json
-# http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
+# https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
# http://stackoverflow.com/a/13436167/96656
def unisymbol(codePoint):
if codePoint >= 0x0000 and codePoint <= 0xFFFF:
@@ -25,6 +25,9 @@ def writeFile(filename, contents):
data = []
for codePoint in range(0x000000, 0x10FFFF + 1):
+ # Skip non-scalar values.
+ if codePoint >= 0xD800 and codePoint <= 0xDFFF:
+ continue
symbol = unisymbol(codePoint)
# http://stackoverflow.com/a/17199950/96656
bytes = symbol.encode('utf8').decode('latin1')
diff --git a/tests/tests.js b/tests/tests.js
index cb01e10..5ebe513 100644
--- a/tests/tests.js
+++ b/tests/tests.js
@@ -1,22 +1,27 @@
-;(function(root) {
+(function(root) {
'use strict';
- /** Use a single `load` function */
- var load = typeof require == 'function' ? require : root.load;
+ var noop = Function.prototype;
+
+ var load = (typeof require == 'function' && !(root.define && define.amd)) ?
+ require :
+ (!root.document && root.java && root.load) || noop;
- /** The unit testing framework */
var QUnit = (function() {
- var noop = Function.prototype;
return root.QUnit || (
root.addEventListener || (root.addEventListener = noop),
root.setTimeout || (root.setTimeout = noop),
root.QUnit = load('../node_modules/qunitjs/qunit/qunit.js') || root.QUnit,
- (load('../node_modules/qunit-clib/qunit-clib.js') || { 'runInContext': noop }).runInContext(root),
addEventListener === noop && delete root.addEventListener,
root.QUnit
);
}());
+ var qe = load('../node_modules/qunit-extras/qunit-extras.js');
+ if (qe) {
+ qe.runInContext(root);
+ }
+
/** The `utf8` object to test */
var utf8 = root.utf8 || (root.utf8 = (
utf8 = load('../utf8.js') || root.utf8,
@@ -96,63 +101,75 @@
{
'codePoint': 0xD800,
'decoded': '\uD800',
- 'encoded': '\xED\xA0\x80'
+ 'encoded': '\xED\xA0\x80',
+ 'error': true
},
{
'description': 'High surrogate followed by another high surrogate',
'decoded': '\uD800\uD800',
- 'encoded': '\xED\xA0\x80\xED\xA0\x80'
+ 'encoded': '\xED\xA0\x80\xED\xA0\x80',
+ 'error': true
},
{
'description': 'High surrogate followed by a symbol that is not a surrogate',
'decoded': '\uD800A',
- 'encoded': '\xED\xA0\x80A'
+ 'encoded': '\xED\xA0\x80A',
+ 'error': true
},
{
'description': 'Unmatched high surrogate, followed by a surrogate pair, followed by an unmatched high surrogate',
'decoded': '\uD800\uD834\uDF06\uD800',
- 'encoded': '\xED\xA0\x80\xF0\x9D\x8C\x86\xED\xA0\x80'
+ 'encoded': '\xED\xA0\x80\xF0\x9D\x8C\x86\xED\xA0\x80',
+ 'error': true
},
{
'codePoint': 0xD9AF,
'decoded': '\uD9AF',
- 'encoded': '\xED\xA6\xAF'
+ 'encoded': '\xED\xA6\xAF',
+ 'error': true
},
{
'codePoint': 0xDBFF,
'decoded': '\uDBFF',
- 'encoded': '\xED\xAF\xBF'
+ 'encoded': '\xED\xAF\xBF',
+ 'error': true
},
// low surrogates: 0xDC00 to 0xDFFF
{
'codePoint': 0xDC00,
'decoded': '\uDC00',
- 'encoded': '\xED\xB0\x80'
+ 'encoded': '\xED\xB0\x80',
+ 'error': true
},
{
'description': 'Low surrogate followed by another low surrogate',
'decoded': '\uDC00\uDC00',
- 'encoded': '\xED\xB0\x80\xED\xB0\x80'
+ 'encoded': '\xED\xB0\x80\xED\xB0\x80',
+ 'error': true
},
{
'description': 'Low surrogate followed by a symbol that is not a surrogate',
'decoded': '\uDC00A',
- 'encoded': '\xED\xB0\x80A'
+ 'encoded': '\xED\xB0\x80A',
+ 'error': true
},
{
'description': 'Unmatched low surrogate, followed by a surrogate pair, followed by an unmatched low surrogate',
'decoded': '\uDC00\uD834\uDF06\uDC00',
- 'encoded': '\xED\xB0\x80\xF0\x9D\x8C\x86\xED\xB0\x80'
+ 'encoded': '\xED\xB0\x80\xF0\x9D\x8C\x86\xED\xB0\x80',
+ 'error': true
},
{
'codePoint': 0xDEEE,
'decoded': '\uDEEE',
- 'encoded': '\xED\xBB\xAE'
+ 'encoded': '\xED\xBB\xAE',
+ 'error': true
},
{
'codePoint': 0xDFFF,
'decoded': '\uDFFF',
- 'encoded': '\xED\xBF\xBF'
+ 'encoded': '\xED\xBF\xBF',
+ 'error': true
},
// 4-byte
@@ -188,16 +205,33 @@
forEach(data, function(object) {
var description = object.description || 'U+' + object.codePoint.toString(16).toUpperCase();
;
- equal(
- object.encoded,
- utf8.encode(object.decoded),
- 'Encoding: ' + description
- );
- equal(
- object.decoded,
- utf8.decode(object.encoded),
- 'Decoding: ' + description
- );
+ if (object.error) {
+ raises(
+ function() {
+ utf8.decode(object.encoded);
+ },
+ Error,
+ 'Error: non-scalar value detected'
+ );
+ raises(
+ function() {
+ utf8.encode(object.decoded);
+ },
+ Error,
+ 'Error: non-scalar value detected'
+ );
+ } else {
+ equal(
+ object.encoded,
+ utf8.encode(object.decoded),
+ 'Encoding: ' + description
+ );
+ equal(
+ object.decoded,
+ utf8.decode(object.encoded),
+ 'Decoding: ' + description
+ );
+ }
});
// Error handling
diff --git a/utf8.js b/utf8.js
index be74021..c138a38 100644
--- a/utf8.js
+++ b/utf8.js
@@ -1,4 +1,4 @@
-/*! http://mths.be/utf8js v2.0.0 by @mathias */
+/*! https://mths.be/utf8js v2.0.0 by @mathias */
;(function(root) {
// Detect free variables `exports`
@@ -19,7 +19,7 @@
var stringFromCharCode = String.fromCharCode;
- // Taken from http://mths.be/punycode
+ // Taken from https://mths.be/punycode
function ucs2decode(string) {
var output = [];
var counter = 0;
@@ -46,7 +46,7 @@
return output;
}
- // Taken from http://mths.be/punycode
+ // Taken from https://mths.be/punycode
function ucs2encode(array) {
var length = array.length;
var index = -1;
@@ -64,6 +64,14 @@
return output;
}
+ function checkScalarValue(codePoint) {
+ if (codePoint >= 0xD800 && codePoint <= 0xDFFF) {
+ throw Error(
+ 'Lone surrogate U+' + codePoint.toString(16).toUpperCase() +
+ ' is not a scalar value'
+ );
+ }
+ }
/*--------------------------------------------------------------------------*/
function createByte(codePoint, shift) {
@@ -79,6 +87,7 @@
symbol = stringFromCharCode(((codePoint >> 6) & 0x1F) | 0xC0);
}
else if ((codePoint & 0xFFFF0000) == 0) { // 3-byte sequence
+ checkScalarValue(codePoint);
symbol = stringFromCharCode(((codePoint >> 12) & 0x0F) | 0xE0);
symbol += createByte(codePoint, 6);
}
@@ -163,6 +172,7 @@
byte3 = readContinuationByte();
codePoint = ((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3;
if (codePoint >= 0x0800) {
+ checkScalarValue(codePoint);
return codePoint;
} else {
throw Error('Invalid continuation byte');