Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Bug 938321 - [email/backend] Implement utf-7 charset support using ha…

…nd-rolled decoder. r=asuth
  • Loading branch information...
commit 4e862d63d7b0d11354f5507d392235736352704d 1 parent a781705
@mcav mcav authored
View
8 data/lib/js-shims/faux-encoding.js
@@ -4,7 +4,7 @@
* the API to be more sane.
**/
-define(function(require, exports, module) {
+define(['utf7', 'exports'], function(utf7, exports) {
// originally from https://github.com/andris9/encoding/blob/master/index.js
// (MIT licensed)
@@ -42,7 +42,11 @@ exports.convert = function(str, destEnc, sourceEnc, ignoredUseLite) {
if (destEnc === sourceEnc)
return new Buffer(str, 'utf-8');
-
+ else if (sourceEnc === 'utf-7' || sourceEnc === 'utf7') {
+ // Some versions of Outlook as recently as Outlook 11 produce
+ // utf-7-encoded body parts. See <https://bugzil.la/938321>.
+ return utf7.decode(str.toString());
+ }
// - decoding (Uint8Array => String)
else if (/^utf-8$/.test(destEnc)) {
var decoder;
View
124 data/lib/js-shims/utf7.js
@@ -0,0 +1,124 @@
+/**
+ * UTF-7 decoding via <https://github.com/kkaefer/utf7>
+ *
+ * Copyright (c) 2010-2011 Konstantin Käfer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+define(['exports'], function(exports) {
+
+// We don't currently use the encode method, but we're keeping it
+// around because IMAP uses it for folder names. If we ever let users
+// create/edit folder names, we'll need this.
+function encode(str) {
+ var b = new Buffer(str.length * 2, 'ascii');
+ for (var i = 0, bi = 0; i < str.length; i++) {
+ // Note that we can't simply convert a UTF-8 string to Base64 because
+ // UTF-8 uses a different encoding. In modified UTF-7, all characters
+ // are represented by their two byte Unicode ID.
+ var c = str.charCodeAt(i);
+ // Upper 8 bits shifted into lower 8 bits so that they fit into 1 byte.
+ b[bi++] = c >> 8;
+ // Lower 8 bits. Cut off the upper 8 bits so that they fit into 1 byte.
+ b[bi++] = c & 0xFF;
+ }
+ // Modified Base64 uses , instead of / and omits trailing =.
+ return b.toString('base64').replace(/=+$/, '');
+}
+
+function decode(str) {
+ // The base-64 encoded utf-16 gets converted into a buffer holding
+ // the utf-16 encoded bits; then we decode the utf-16 into a JS string.
+ return new Buffer(str, 'base64').toString('utf-16be');;
+}
+
+// Escape RegEx from http://simonwillison.net/2006/Jan/20/escape/
+function escape(chars) {
+ return chars.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
+}
+
+// Character classes defined by RFC 2152.
+var setD = "A-Za-z0-9" + escape("'(),-./:?");
+var setO = escape("!\"#$%&*;<=>@[]^_'{|}");
+var setW = escape(" \r\n\t");
+
+// Stores compiled regexes for various replacement pattern.
+var regexes = {};
+var regexAll = new RegExp("[^" + setW + setD + setO + "]+", 'g');
+
+exports.imap = {};
+
+// RFC 2152 UTF-7 encoding.
+exports.encode = function(str, mask) {
+ // Generate a RegExp object from the string of mask characters.
+ if (!mask) {
+ mask = '';
+ }
+ if (!regexes[mask]) {
+ regexes[mask] = new RegExp("[^" + setD + escape(mask) + "]+", 'g');
+ }
+
+ // We replace subsequent disallowed chars with their escape sequence.
+ return str.replace(regexes[mask], function(chunk) {
+ // + is represented by an empty sequence +-, otherwise call encode().
+ return '+' + (chunk === '+' ? '' : encode(chunk)) + '-';
+ });
+};
+
+// RFC 2152 UTF-7 encoding with all optionals.
+exports.encodeAll = function(str) {
+ // We replace subsequent disallowed chars with their escape sequence.
+ return str.replace(regexAll, function(chunk) {
+ // + is represented by an empty sequence +-, otherwise call encode().
+ return '+' + (chunk === '+' ? '' : encode(chunk)) + '-';
+ });
+};
+
+// RFC 3501, section 5.1.3 UTF-7 encoding.
+exports.imap.encode = function(str) {
+ // All printable ASCII chars except for & must be represented by themselves.
+ // We replace subsequent non-representable chars with their escape sequence.
+ return str.replace(/&/g, '&-').replace(/[^\x20-\x7e]+/g, function(chunk) {
+ // & is represented by an empty sequence &-, otherwise call encode().
+ chunk = (chunk === '&' ? '' : encode(chunk)).replace(/\//g, ',');
+ return '&' + chunk + '-';
+ });
+};
+
+// RFC 2152 UTF-7 decoding.
+exports.decode = function(str) {
+ return str.replace(/\+([A-Za-z0-9\/]*)-?/gi, function(_, chunk) {
+ // &- represents &.
+ if (chunk === '') return '+';
+ return decode(chunk);
+ });
+};
+
+// RFC 3501, section 5.1.3 UTF-7 decoding.
+exports.imap.decode = function(str) {
+ return str.replace(/&([^-]*)-/g, function(_, chunk) {
+ // &- represents &.
+ if (chunk === '') return '&';
+ return decode(chunk.replace(/,/g, '/'));
+ });
+};
+
+
+});
View
1  scripts/copy-to-gaia.js
@@ -44,6 +44,7 @@ buildOptions = {
'event-queue': 'data/lib/js-shims/event-queue',
'microtime': 'data/lib/js-shims/microtime',
'path': 'data/lib/js-shims/path',
+ 'utf7': 'data/lib/js-shims/utf7',
'wbxml': 'deps/activesync/wbxml/wbxml',
'activesync': 'deps/activesync',
View
1  test/gelam-require-map.js
@@ -30,6 +30,7 @@ require({
"event-queue": "data/lib/js-shims/event-queue",
"microtime": "data/lib/js-shims/microtime",
"path": "data/lib/js-shims/path",
+ "utf7": "data/lib/js-shims/utf7",
"wbxml": "deps/activesync/wbxml/wbxml",
"activesync": "deps/activesync",
View
1  test/unit/head.js
@@ -192,6 +192,7 @@ require({
"event-queue": "data/lib/js-shims/event-queue",
"microtime": "data/lib/js-shims/microtime",
"path": "data/lib/js-shims/path",
+ "utf7": "data/lib/js-shims/utf7",
"wbxml": "deps/activesync/wbxml/wbxml",
"activesync": "deps/activesync",
View
19 test/unit/test_mime.js
@@ -37,6 +37,7 @@ var rawSammySnake = '\u00dfnake, \u00dfammy',
var rawUnicodeName = 'Figui\u00e8re',
utf8UnicodeName = new Buffer('Figui\u00c3\u00a8re', 'binary'),
+ utf7UnicodeName = 'Figui+AOg-re',
qpUtf8UnicodeName = 'Figui=C3=A8re';
@@ -143,6 +144,14 @@ TD.commonCase('MIME hierarchies', function(T) {
new SyntheticPartLeaf(
utf8UnicodeName,
{ charset: 'utf-8', format: null, encoding: '8bit' }),
+ bpartUtf7Name =
+ new SyntheticPartLeaf(
+ utf7UnicodeName,
+ { charset: 'utf-7' }),
+ bpartUtf7HtmlName =
+ new SyntheticPartLeaf(
+ utf7UnicodeName,
+ { contentType: 'text/html', charset: 'utf-7' }),
// quoted-printable encoding utf-8
bpartQpUtf8Name =
new SyntheticPartLeaf(
@@ -433,6 +442,16 @@ TD.commonCase('MIME hierarchies', function(T) {
checkBody: rawUnicodeName,
},
{
+ name: 'text/plain utf7',
+ bodyPart: bpartUtf7Name,
+ checkBody: rawUnicodeName,
+ },
+ {
+ name: 'text/html utf7',
+ bodyPart: bpartUtf7HtmlName,
+ checkBody: rawUnicodeName,
+ },
+ {
name: 'text/plain qp utf8',
bodyPart: bpartQpUtf8Name,
checkBody: rawUnicodeName,
View
1  test/worker-bootstrap.js
@@ -78,6 +78,7 @@ require({
"event-queue": "data/lib/js-shims/event-queue",
"microtime": "data/lib/js-shims/microtime",
"path": "data/lib/js-shims/path",
+ "utf7": "data/lib/js-shims/utf7",
"wbxml": "deps/activesync/wbxml/wbxml",
"activesync": "deps/activesync",
Please sign in to comment.
Something went wrong with that request. Please try again.