From 10246c84819db14b32fccca040029b06449242a3 Mon Sep 17 00:00:00 2001 From: Kevin Locke Date: Mon, 30 Jun 2014 13:51:58 -0600 Subject: [PATCH] [PATCH v2] Add support for gzip content decoding Support decoding of the gzip Content-Encoding in responses using the zlib module to decode the response before piping it through the request object (and therefore before user-connected pipes or body parsing). Add the boolean option `gzip` to allow users to explicitly request decoding of supported response content and inclusion of appropriate content negotiation headers, if unspecified. This commit favors backwards-compatibility over the increased performance that transparent compression could provide, although it is hoped that a future backwards-incompatible version can make transparent compression the default. Some major tradeoffs of transparent compression are: - It may trigger changes in server behavior and performance (for better or worse) that are unexpected, either due to buggy servers or intermediate network hardware. - The compression is not fully transparent as users who attach to the `data` event of the response (rather than the `data` event of `request`) will get gzipped data rather than uncompressed data. + It is likely a big win for most users (both current and future) who would otherwise be unaware or unable to spend the time to implement content negotiation and decoding. Especially given the prevalence of highly-compressible text content (e.g. JSON and XML) and widespread server support for gzip. Changes since v1: - Rename `decodeContent` option to `gzip` to match option name in 3.0 Signed-off-by: Kevin Locke --- README.md | 1 + request.js | 39 ++++++++++++++++--- tests/test-gzip.js | 95 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 130 insertions(+), 5 deletions(-) create mode 100644 tests/test-gzip.js diff --git a/README.md b/README.md index cff5e1fed..f31cc3567 100644 --- a/README.md +++ b/README.md @@ -258,6 +258,7 @@ The first argument can be either a `url` or an `options` object. The only requir * `aws` - `object` containing AWS signing information. Should have the properties `key`, `secret`. Also requires the property `bucket`, unless you’re specifying your `bucket` as part of the path, or the request doesn’t use a bucket (i.e. GET Services) * `httpSignature` - Options for the [HTTP Signature Scheme](https://github.com/joyent/node-http-signature/blob/master/http_signing.md) using [Joyent's library](https://github.com/joyent/node-http-signature). The `keyId` and `key` properties must be specified. See the docs for other options. * `localAddress` - Local interface to bind for network connections. +* `gzip` - If `true`, add an `Accept-Encoding` header to request compressed content encodings from the server (if not already present) and decode supported content encodings in the response. The callback argument gets 3 arguments: diff --git a/request.js b/request.js index 84c06c801..f3a2e1a86 100644 --- a/request.js +++ b/request.js @@ -8,6 +8,7 @@ var optional = require('./lib/optional') , qs = require('qs') , querystring = require('querystring') , crypto = require('crypto') + , zlib = require('zlib') , oauth = optional('oauth-sign') , hawk = optional('hawk') @@ -289,6 +290,10 @@ Request.prototype.init = function (options) { ) } + if (self.gzip && !self.hasHeader('accept-encoding')) { + self.setHeader('accept-encoding', 'gzip') + } + if (self.uri.auth && !self.hasHeader('authorization')) { var authPieces = self.uri.auth.split(':').map(function(item){ return querystring.unescape(item) }) self.auth(authPieces[0], authPieces.slice(1).join(':'), true) @@ -920,11 +925,31 @@ Request.prototype.onResponse = function (response) { if (!self._ended) self.response.emit('end') }) + var dataStream + if (self.gzip) { + var contentEncoding = response.headers["content-encoding"] || "identity" + contentEncoding = contentEncoding.trim().toLowerCase() + + if (contentEncoding === "gzip") { + dataStream = zlib.createGunzip() + response.pipe(dataStream) + } else { + // Since previous versions didn't check for Content-Encoding header, + // ignore any invalid values to preserve backwards-compatibility + if (contentEncoding !== "identity") { + debug("ignoring unrecognized Content-Encoding " + contentEncoding) + } + dataStream = response + } + } else { + dataStream = response + } + if (self.encoding) { if (self.dests.length !== 0) { console.error("Ignoring encoding parameter as this stream is being piped to another stream which makes the encoding option invalid.") } else { - response.setEncoding(self.encoding) + dataStream.setEncoding(self.encoding) } } @@ -934,15 +959,15 @@ Request.prototype.onResponse = function (response) { self.pipeDest(dest) }) - response.on("data", function (chunk) { + dataStream.on("data", function (chunk) { self._destdata = true self.emit("data", chunk) }) - response.on("end", function (chunk) { + dataStream.on("end", function (chunk) { self._ended = true self.emit("end", chunk) }) - response.on("close", function () {self.emit("close")}) + dataStream.on("close", function () {self.emit("close")}) if (self.callback) { var buffer = [] @@ -1037,7 +1062,11 @@ Request.prototype.pipeDest = function (dest) { } if (dest.setHeader && !dest.headersSent) { for (var i in response.headers) { - dest.setHeader(i, response.headers[i]) + // If the response content is being decoded, the Content-Encoding header + // of the response doesn't represent the piped content, so don't pass it. + if (!this.gzip || i !== 'content-encoding') { + dest.setHeader(i, response.headers[i]) + } } dest.statusCode = response.statusCode } diff --git a/tests/test-gzip.js b/tests/test-gzip.js new file mode 100644 index 000000000..73ec5328e --- /dev/null +++ b/tests/test-gzip.js @@ -0,0 +1,95 @@ +var request = require('../index') + , http = require('http') + , assert = require('assert') + , zlib = require('zlib') + +var testContent = 'Compressible response content.\n' + , testContentGzip + +var server = http.createServer(function (req, res) { + res.statusCode = 200 + res.setHeader('Content-Type', 'text/plain') + + if (/\bgzip\b/i.test(req.headers['accept-encoding'])) { + zlib.gzip(testContent, function (err, data) { + assert.ifError(err) + testContentGzip = data + res.setHeader('Content-Encoding', 'gzip') + res.end(data) + }) + } else { + res.end(testContent) + } +}) + +server.listen(6767, function (err) { + assert.ifError(err) + + var headers, options + + // Transparently supports gzip decoding to callbacks + options = { url: 'http://localhost:6767/foo', gzip: true } + request.get(options, function (err, res, body) { + assert.ifError(err) + assert.strictEqual(res.headers['content-encoding'], 'gzip') + assert.strictEqual(body, testContent) + }) + + + // Transparently supports gzip decoding to pipes + options = { url: 'http://localhost:6767/foo', gzip: true } + var chunks = [] + request.get(options) + .on('data', function (chunk) { chunks.push(chunk) }) + .on('end', function () { + assert.strictEqual(Buffer.concat(chunks).toString(), testContent) + }) + .on('error', function (err) { assert.ifError(err) }) + + + // Does not request gzip if user specifies Accepted-Encodings + headers = { 'Accept-Encoding': null } + options = { + url: 'http://localhost:6767/foo', + headers: headers, + gzip: true + } + request.get(options, function (err, res, body) { + assert.ifError(err) + assert.strictEqual(res.headers['content-encoding'], undefined) + assert.strictEqual(body, testContent) + }) + + + // Does not decode user-requested encoding by default + headers = { 'Accept-Encoding': 'gzip' } + options = { url: 'http://localhost:6767/foo', headers: headers } + request.get(options, function (err, res, body) { + assert.ifError(err) + assert.strictEqual(res.headers['content-encoding'], 'gzip') + assert.strictEqual(body, testContentGzip.toString()) + }) + + + // Supports character encoding with gzip encoding + headers = { 'Accept-Encoding': 'gzip' } + options = { + url: 'http://localhost:6767/foo', + headers: headers, + gzip: true, + encoding: "utf8" + } + var strings = [] + request.get(options) + .on('data', function (string) { + assert.strictEqual(typeof string, "string") + strings.push(string) + }) + .on('end', function () { + assert.strictEqual(strings.join(""), testContent) + + // Shutdown server after last test + server.close() + }) + .on('error', function (err) { assert.ifError(err) }) +})