diff --git a/src/api.js b/src/api.js index 8750dc8f..f5684533 100644 --- a/src/api.js +++ b/src/api.js @@ -110,6 +110,9 @@ var api = new API({ runId: RUN_ID_PATTERN, name: /^[\x20-\x7e]+$/, // Artifact names must be printable ASCII }, + errorCodes: { + ContentEncodingNotAvailable: 406, + }, context: [ 'Task', // data.Task instance 'Artifact', // data.Artifact instance diff --git a/src/artifacts.js b/src/artifacts.js index 9e82f102..e33867f6 100644 --- a/src/artifacts.js +++ b/src/artifacts.js @@ -504,6 +504,24 @@ var replyWithArtifact = async function(taskId, runId, name, req, res) { key: artifact.details.key, }; + // Check if Content-Encoding is supported by the given Accept-Encoding header + if (artifact.details.contentEncoding && !req.acceptsEncodings(artifact.details.contentEncoding)) { + return res.reportError('ContentEncodingNotAvailable', [ + 'Artifact `{{name}}` is **not available** in an accepted `Content-Encoding`.', + '', + 'The incoming request specifies accepted encoding using the `Accept-Encoding`.', + 'This header was set to `{{acceptEncoding}}`, but the artifact request was not', + 'available in one of these encodings.', + '', + 'To download this artifact the request must support `Content-Encoding: {{contentEncoding}}`,', + 'try setting `Accept-Encoding: {{contentEncoding}}` and handle decoding.', + ].join('\n'), { + name, + acceptEncoding: req.headers['accept-encoding'], + contentEncoding: artifact.details.contentEncoding, + }); + } + // TODO: We should consider doing a HEAD on all resources and verifying that // the ETag they have matches the one that we received when creating the artifact. // This is a bit of extra overhead, but it's one more check of consistency @@ -661,28 +679,28 @@ api.declare({ name, }); - // Ensure that the run is running - if (run.state !== 'running') { - var allow = false; - if (run.state === 'exception') { - // If task was resolved exception, we'll allow artifacts to be uploaded - // up to 25 min past resolution. This allows us to report exception as - // soon as we know and then upload logs if possible. - // Useful because exception usually implies something badly wrong. - allow = new Date(run.resolved).getTime() > Date.now() - 25 * 60 * 1000; - } - if (!allow) { - return res.reportError('RequestConflict', - 'Artifacts cannot be completed for a task after it is ' + - 'resolved, unless it is resolved \'exception\', and even ' + - 'in this case only up to 25 min past resolution.' + - 'This to ensure that artifacts have been uploaded before ' + - 'a task is \'completed\' and output is consumed by a ' + - 'dependent task\n\nTask status: {{status}}', { - status: task.status(), - }); - } - } + // Ensure that the run is running + if (run.state !== 'running') { + var allow = false; + if (run.state === 'exception') { + // If task was resolved exception, we'll allow artifacts to be uploaded + // up to 25 min past resolution. This allows us to report exception as + // soon as we know and then upload logs if possible. + // Useful because exception usually implies something badly wrong. + allow = new Date(run.resolved).getTime() > Date.now() - 25 * 60 * 1000; + } + if (!allow) { + return res.reportError('RequestConflict', + 'Artifacts cannot be completed for a task after it is ' + + 'resolved, unless it is resolved \'exception\', and even ' + + 'in this case only up to 25 min past resolution.' + + 'This to ensure that artifacts have been uploaded before ' + + 'a task is \'completed\' and output is consumed by a ' + + 'dependent task\n\nTask status: {{status}}', { + status: task.status(), + }); + } + } if (artifact.storageType !== 'blob') { return res.reportError('InputError', diff --git a/src/data.js b/src/data.js index 500b882f..041019d5 100644 --- a/src/data.js +++ b/src/data.js @@ -372,21 +372,30 @@ let Artifact = Entity.configure({ * storageType: error * reason: Formalized reason for error artifact, see JSON schema. * message: Human readable error message to return + * * storageType: blob - * contentType - * contentEncoding - * contentSha256: SHA256 hash of the un-encoded artifact - * contentLength: Number of bytes of the un-encoded artifact - * transferSha256: SHA256 hash of the content-encoding encoded artifact - * transferLength: Number of bytes of the content-encoding encoded artifact - * partsHash: Rather than storing the potentially large list of sha256/size - * pairings for each part, we just need to store enough information - * to determine if the operation would result in an idempotency - * error + * contentEncoding: Content encoding, such as 'gzip', + * NOTE: This property is not present if contentEncoding is 'identity'. + * contentSha256: Hex encoded sha256 hash of the un-encoded artifact + * contentLength: Size of the un-encoded artifact as number of bytes + * transferLength: Size of the content-encoding encoded artifact as number of bytes, + * NOTE: This property is not present if contentEncoding is 'identity'. + * transferSha256: Hax encooded sha256 hash of the content-encoding encoded artifact, + * NOTE: This property is not present if contentEncoding is 'identity'. + * provider: Provider currently 's3' + * region: Region currently 'us-west-2' + * bucket: Bucket, currently configured public or private bucket + * key: Path to the artifact in S3 + * uploadId: S3 multipart uploadId. + * NOTE: This property is not present when upload is completed. + * etag: S3 ETag value. + * NOTE: This property is not present until upload is completed. + * partsHash: Hash of (sha256, size) tuples for all parts. + * We store this to enable idempotency, when a request is retried. */ details: Entity.types.JSON, expires: Entity.types.Date, - /** + /** * Present is a number field which represents an artifact being present and * the upload being completed. The handling logic for the artifact's * storage type will fully define what that means for a given storage type. diff --git a/test/artifact_test.js b/test/artifact_test.js index cec0ea36..ac8ba8d9 100644 --- a/test/artifact_test.js +++ b/test/artifact_test.js @@ -23,6 +23,7 @@ suite('Artifacts', function() { var urllib = require('url'); var http = require('http'); var https = require('https'); + var zlib = require('zlib'); // Static URL from which ip-ranges from AWS services can be fetched const AWS_IP_RANGES_URL = 'https://ip-ranges.amazonaws.com/ip-ranges.json'; @@ -176,7 +177,7 @@ suite('Artifacts', function() { test('S3 single part complete flow', async () => { let taskId = slugid.v4(); - + debug('### Creating task'); await helper.queue.createTask(taskId, taskDef); @@ -212,11 +213,11 @@ suite('Artifacts', function() { let uploadOutcome = await client.runUpload(response.requests, uploadInfo); response = await helper.queue.completeArtifact(taskId, 0, 'public/singlepart.dat', { - etags: uploadOutcome.etags, + etags: uploadOutcome.etags, }); let secondResponse = await helper.queue.completeArtifact(taskId, 0, 'public/singlepart.dat', { - etags: uploadOutcome.etags, + etags: uploadOutcome.etags, }); assume(response).deeply.equals(secondResponse); @@ -227,7 +228,7 @@ suite('Artifacts', function() { debug('Fetching artifact from: %s', artifactUrl); let artifact = await getWithoutRedirecting(artifactUrl); - let expectedUrl = + let expectedUrl = `https://test-bucket-for-any-garbage.s3-us-west-2.amazonaws.com/${taskId}/0/public/singlepart.dat`; assume(artifact.headers).has.property('location', expectedUrl); @@ -235,10 +236,67 @@ suite('Artifacts', function() { }); + test('S3 single part complete flow, content-encoding: gzip', async () => { + const taskId = slugid.v4(); + const data = crypto.randomBytes(12 * 1024 * 1024 + 21); + const gzipped = zlib.gzipSync(data); + + debug('### Creating task'); + await helper.queue.createTask(taskId, taskDef); + + debug('### Claiming task'); + await helper.queue.claimTask(taskId, 0, { + workerGroup: 'my-worker-group', + workerId: 'my-worker', + }); + + debug('### Create artifact'); + const { + requests, + } = await helper.queue.createArtifact(taskId, 0, 'public/singlepart.dat', { + storageType: 'blob', + expires: taskcluster.fromNowJSON('1 day'), + contentType: 'binary/octet-stream', + contentEncoding: 'gzip', + contentLength: data.length, + contentSha256: crypto.createHash('sha256').update(data).digest('hex'), + transferLength: gzipped.length, + transferSha256: crypto.createHash('sha256').update(gzipped).digest('hex'), + }); + + debug('### Put first and only part of artifact'); + const {method, url, headers} = requests[0]; + const res = await request(method, url).set(headers).send(gzipped); + const etag = res.headers['etag']; + + debug('### Complete artifact upload'); + await helper.queue.completeArtifact(taskId, 0, 'public/singlepart.dat', { + etags: [etag], + }); + + const artifactUrl = helper.queue.buildUrl( + helper.queue.getArtifact, + taskId, 0, 'public/singlepart.dat', + ); + debug('### Fetching artifact from: %s', artifactUrl); + const res2 = await request.get(artifactUrl).responseType('blob'); + debug('Downloaded artifact, statusCode: %s', res2.status); + const res2Hash = crypto.createHash('sha256').update(res2.body).digest('hex'); + assert(res2Hash === crypto.createHash('sha256').update(data).digest('hex')); + assert(res2Hash === res2.headers['x-amz-meta-content-sha256']); + debug('Response headers: %j', res2.headers); + + debug('### Downloading artifact with incorrect "Accept-Encoding"'); + let e; + await request.get(artifactUrl).set('Accept-Encoding', 'identity').catch(err => e = err); + assert(e, 'expected an error'); + assert(e.status === 406, 'expected 406'); + }); + test('S3 multi part complete flow', async () => { let name = 'public/multipart.dat'; let taskId = slugid.v4(); - + debug('### Creating task'); await helper.queue.createTask(taskId, taskDef); @@ -279,12 +337,12 @@ suite('Artifacts', function() { let uploadOutcome = await client.runUpload(response.requests, uploadInfo); response = await helper.queue.completeArtifact(taskId, 0, name, { - etags: uploadOutcome.etags, + etags: uploadOutcome.etags, }); // Ensure idempotency for completion of artifacts let secondResponse = await helper.queue.completeArtifact(taskId, 0, name, { - etags: uploadOutcome.etags, + etags: uploadOutcome.etags, }); assume(response).deeply.equals(secondResponse); @@ -301,11 +359,11 @@ suite('Artifacts', function() { await verifyDownload(artifact.headers.location, bigfilehash, bigfilesize); }); - + test('S3 multi part idempotency', async () => { let name = 'public/multipart.dat'; let taskId = slugid.v4(); - + debug('### Creating task'); await helper.queue.createTask(taskId, taskDef); @@ -348,7 +406,7 @@ suite('Artifacts', function() { return {sha256: x.sha256, size: x.size}; }), }); - + let firstUploadId = qs.parse(urllib.parse(firstResponse.requests[0].url).query).uploadId; let secondUploadId = qs.parse(urllib.parse(secondResponse.requests[0].url).query).uploadId; assume(firstUploadId).equals(secondUploadId); @@ -382,7 +440,7 @@ suite('Artifacts', function() { let uploadOutcome = await client.runUpload(secondResponse.requests, uploadInfo); let response = await helper.queue.completeArtifact(taskId, 0, name, { - etags: uploadOutcome.etags, + etags: uploadOutcome.etags, }); }); });