taskcluster · jonasfj · Apr 16, 2018 · Apr 16, 2018 · Apr 16, 2018 · Apr 20, 2018
diff --git a/src/api.js b/src/api.js
@@ -110,6 +110,9 @@ var api = new API({
     runId:            RUN_ID_PATTERN,
     name:             /^[\x20-\x7e]+$/, // Artifact names must be printable ASCII
   },
+  errorCodes: {
+    ContentEncodingNotAvailable: 406,
+  },
   context: [
     'Task',               // data.Task instance
     'Artifact',           // data.Artifact instance

diff --git a/src/artifacts.js b/src/artifacts.js
@@ -504,6 +504,24 @@ var replyWithArtifact = async function(taskId, runId, name, req, res) {
       key: artifact.details.key,
     };
 
+    // Check if Content-Encoding is supported by the given Accept-Encoding header
+    if (artifact.details.contentEncoding && !req.acceptsEncodings(artifact.details.contentEncoding)) {
+      return res.reportError('ContentEncodingNotAvailable', [
+        'Artifact `{{name}}` is **not available** in an accepted `Content-Encoding`.',
+        '',
+        'The incoming request specifies accepted encoding using the `Accept-Encoding`.',
+        'This header was set to `{{acceptEncoding}}`, but the artifact request was not',
+        'available in one of these encodings.',
+        '',
+        'To download this artifact the request must support `Content-Encoding: {{contentEncoding}}`,',
+        'try setting `Accept-Encoding: {{contentEncoding}}` and handle decoding.',
+      ].join('\n'), {
+        name,
+        acceptEncoding: req.headers['accept-encoding'],
+        contentEncoding: artifact.details.contentEncoding,
+      });
+    }
+
     // TODO: We should consider doing a HEAD on all resources and verifying that
     // the ETag they have matches the one that we received when creating the artifact.
     // This is a bit of extra overhead, but it's one more check of consistency
@@ -661,28 +679,28 @@ api.declare({
     name,
   });
 
-  // Ensure that the run is running 
-  if (run.state !== 'running') { 
-    var allow = false; 
-    if (run.state === 'exception') { 
-      // If task was resolved exception, we'll allow artifacts to be uploaded 
-      // up to 25 min past resolution. This allows us to report exception as 
-      // soon as we know and then upload logs if possible. 
-      // Useful because exception usually implies something badly wrong. 
-      allow = new Date(run.resolved).getTime() > Date.now() - 25 * 60 * 1000; 
-    } 
-    if (!allow) { 
-      return res.reportError('RequestConflict', 
-        'Artifacts cannot be completed for a task after it is ' + 
-        'resolved, unless it is resolved \'exception\', and even ' + 
-        'in this case only up to 25 min past resolution.' + 
-        'This to ensure that artifacts have been uploaded before ' + 
-        'a task is \'completed\' and output is consumed by a ' + 
-        'dependent task\n\nTask status: {{status}}', { 
-          status:   task.status(), 
-        }); 
-    } 
-  } 
+  // Ensure that the run is running
+  if (run.state !== 'running') {
+    var allow = false;
+    if (run.state === 'exception') {
+      // If task was resolved exception, we'll allow artifacts to be uploaded
+      // up to 25 min past resolution. This allows us to report exception as
+      // soon as we know and then upload logs if possible.
+      // Useful because exception usually implies something badly wrong.
+      allow = new Date(run.resolved).getTime() > Date.now() - 25 * 60 * 1000;
+    }
+    if (!allow) {
+      return res.reportError('RequestConflict',
+        'Artifacts cannot be completed for a task after it is ' +
+        'resolved, unless it is resolved \'exception\', and even ' +
+        'in this case only up to 25 min past resolution.' +
+        'This to ensure that artifacts have been uploaded before ' +
+        'a task is \'completed\' and output is consumed by a ' +
+        'dependent task\n\nTask status: {{status}}', {
+          status:   task.status(),
+        });
+    }
+  }
 
   if (artifact.storageType !== 'blob') {
     return res.reportError('InputError',

diff --git a/src/data.js b/src/data.js
@@ -372,21 +372,30 @@ let Artifact = Entity.configure({
      * storageType: error
      *   reason:        Formalized reason for error artifact, see JSON schema.
      *   message:       Human readable error message to return
+     *
      * storageType: blob
-     *   contentType
-     *   contentEncoding
-     *   contentSha256: SHA256 hash of the un-encoded artifact
-     *   contentLength: Number of bytes of the un-encoded artifact
-     *   transferSha256: SHA256 hash of the content-encoding encoded artifact
-     *   transferLength: Number of bytes of the content-encoding encoded artifact
-     *   partsHash: Rather than storing the potentially large list of sha256/size
-     *              pairings for each part, we just need to store enough information
-     *              to determine if the operation would result in an idempotency
-     *              error
+     *   contentEncoding:   Content encoding, such as 'gzip',
+     *                      NOTE: This property is not present if contentEncoding is 'identity'.
+     *   contentSha256:     Hex encoded sha256 hash of the un-encoded artifact
+     *   contentLength:     Size of the un-encoded artifact as number of bytes
+     *   transferLength:    Size of the content-encoding encoded artifact as number of bytes,
+     *                      NOTE: This property is not present if contentEncoding is 'identity'.
+     *   transferSha256:    Hax encooded sha256 hash of the content-encoding encoded artifact,
+     *                      NOTE: This property is not present if contentEncoding is 'identity'.
+     *   provider:          Provider currently 's3'
+     *   region:            Region currently 'us-west-2'
+     *   bucket:            Bucket, currently configured public or private bucket
+     *   key:               Path to the artifact in S3
+     *   uploadId:          S3 multipart uploadId.
+     *                      NOTE: This property is not present when upload is completed.
+     *   etag:              S3 ETag value.
+     *                      NOTE: This property is not present until upload is completed.
+     *   partsHash:         Hash of (sha256, size) tuples for all parts.
+     *                      We store this to enable idempotency, when a request is retried.
      */
     details:        Entity.types.JSON,
     expires:        Entity.types.Date,
-    /** 
+    /**
      * Present is a number field which represents an artifact being present and
      * the upload being completed.  The handling logic for the artifact's
      * storage type will fully define what that means for a given storage type.

diff --git a/test/artifact_test.js b/test/artifact_test.js
@@ -23,6 +23,7 @@ suite('Artifacts', function() {
   var urllib        = require('url');
   var http          = require('http');
   var https         = require('https');
+  var zlib          = require('zlib');
 
   // Static URL from which ip-ranges from AWS services can be fetched
   const AWS_IP_RANGES_URL = 'https://ip-ranges.amazonaws.com/ip-ranges.json';
@@ -176,7 +177,7 @@ suite('Artifacts', function() {
 
     test('S3 single part complete flow', async () => {
       let taskId = slugid.v4();
-      
+
       debug('### Creating task');
       await helper.queue.createTask(taskId, taskDef);
 
@@ -212,11 +213,11 @@ suite('Artifacts', function() {
       let uploadOutcome = await client.runUpload(response.requests, uploadInfo);
 
       response = await helper.queue.completeArtifact(taskId, 0, 'public/singlepart.dat', {
-        etags: uploadOutcome.etags, 
+        etags: uploadOutcome.etags,
       });
 
       let secondResponse = await helper.queue.completeArtifact(taskId, 0, 'public/singlepart.dat', {
-        etags: uploadOutcome.etags, 
+        etags: uploadOutcome.etags,
       });
       assume(response).deeply.equals(secondResponse);
 
@@ -227,18 +228,75 @@ suite('Artifacts', function() {
       debug('Fetching artifact from: %s', artifactUrl);
       let artifact = await getWithoutRedirecting(artifactUrl);
 
-      let expectedUrl = 
+      let expectedUrl =
         `https://test-bucket-for-any-garbage.s3-us-west-2.amazonaws.com/${taskId}/0/public/singlepart.dat`;
       assume(artifact.headers).has.property('location', expectedUrl);
 
       await verifyDownload(artifact.headers.location, bigfilehash, bigfilesize);
 
     });
 
+    test('S3 single part complete flow, content-encoding: gzip', async () => {
+      const taskId = slugid.v4();
+      const data = crypto.randomBytes(12 * 1024 * 1024 + 21);
+      const gzipped = zlib.gzipSync(data);
+
+      debug('### Creating task');
+      await helper.queue.createTask(taskId, taskDef);
+
+      debug('### Claiming task');
+      await helper.queue.claimTask(taskId, 0, {
+        workerGroup:    'my-worker-group',
+        workerId:       'my-worker',
+      });
+
+      debug('### Create artifact');
+      const {
+        requests,
+      } = await helper.queue.createArtifact(taskId, 0, 'public/singlepart.dat', {
+        storageType: 'blob',
+        expires: taskcluster.fromNowJSON('1 day'),
+        contentType: 'binary/octet-stream',
+        contentEncoding: 'gzip',
+        contentLength: data.length,
+        contentSha256: crypto.createHash('sha256').update(data).digest('hex'),
+        transferLength: gzipped.length,
+        transferSha256: crypto.createHash('sha256').update(gzipped).digest('hex'),
+      });
+
+      debug('### Put first and only part of artifact');
+      const {method, url, headers} = requests[0];
+      const res = await request(method, url).set(headers).send(gzipped);
+      const etag = res.headers['etag'];
+
+      debug('### Complete artifact upload');
+      await helper.queue.completeArtifact(taskId, 0, 'public/singlepart.dat', {
+        etags: [etag],
+      });
+
+      const artifactUrl = helper.queue.buildUrl(
+        helper.queue.getArtifact,
+        taskId, 0, 'public/singlepart.dat',
+      );
+      debug('### Fetching artifact from: %s', artifactUrl);
+      const res2 = await request.get(artifactUrl).responseType('blob');
+      debug('Downloaded artifact, statusCode: %s', res2.status);
+      const res2Hash = crypto.createHash('sha256').update(res2.body).digest('hex');
+      assert(res2Hash === crypto.createHash('sha256').update(data).digest('hex'));
+      assert(res2Hash === res2.headers['x-amz-meta-content-sha256']);
+      debug('Response headers: %j', res2.headers);
+
+      debug('### Downloading artifact with incorrect "Accept-Encoding"');
+      let e;
+      await request.get(artifactUrl).set('Accept-Encoding', 'identity').catch(err => e = err);
+      assert(e, 'expected an error');
+      assert(e.status === 406, 'expected 406');
+    });
+
     test('S3 multi part complete flow', async () => {
       let name = 'public/multipart.dat';
       let taskId = slugid.v4();
-      
+
       debug('### Creating task');
       await helper.queue.createTask(taskId, taskDef);
 
@@ -279,12 +337,12 @@ suite('Artifacts', function() {
       let uploadOutcome = await client.runUpload(response.requests, uploadInfo);
 
       response = await helper.queue.completeArtifact(taskId, 0, name, {
-        etags: uploadOutcome.etags, 
+        etags: uploadOutcome.etags,
       });
 
       // Ensure idempotency for completion of artifacts
       let secondResponse = await helper.queue.completeArtifact(taskId, 0, name, {
-        etags: uploadOutcome.etags, 
+        etags: uploadOutcome.etags,
       });
       assume(response).deeply.equals(secondResponse);
 
@@ -301,11 +359,11 @@ suite('Artifacts', function() {
 
       await verifyDownload(artifact.headers.location, bigfilehash, bigfilesize);
     });
-    
+
     test('S3 multi part idempotency', async () => {
       let name = 'public/multipart.dat';
       let taskId = slugid.v4();
-      
+
       debug('### Creating task');
       await helper.queue.createTask(taskId, taskDef);
 
@@ -348,7 +406,7 @@ suite('Artifacts', function() {
           return {sha256: x.sha256, size: x.size};
         }),
       });
-      
+
       let firstUploadId = qs.parse(urllib.parse(firstResponse.requests[0].url).query).uploadId;
       let secondUploadId = qs.parse(urllib.parse(secondResponse.requests[0].url).query).uploadId;
       assume(firstUploadId).equals(secondUploadId);
@@ -382,7 +440,7 @@ suite('Artifacts', function() {
       let uploadOutcome = await client.runUpload(secondResponse.requests, uploadInfo);
 
       let response = await helper.queue.completeArtifact(taskId, 0, name, {
-        etags: uploadOutcome.etags, 
+        etags: uploadOutcome.etags,
       });
     });
   });