Skip to content
This repository was archived by the owner on Jan 5, 2019. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ var api = new API({
runId: RUN_ID_PATTERN,
name: /^[\x20-\x7e]+$/, // Artifact names must be printable ASCII
},
errorCodes: {
ContentEncodingNotAvailable: 406,
},
context: [
'Task', // data.Task instance
'Artifact', // data.Artifact instance
Expand Down
62 changes: 40 additions & 22 deletions src/artifacts.js
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,24 @@ var replyWithArtifact = async function(taskId, runId, name, req, res) {
key: artifact.details.key,
};

// Check if Content-Encoding is supported by the given Accept-Encoding header
if (artifact.details.contentEncoding && !req.acceptsEncodings(artifact.details.contentEncoding)) {
return res.reportError('ContentEncodingNotAvailable', [
'Artifact `{{name}}` is **not available** in an accepted `Content-Encoding`.',
'',
'The incoming request specifies accepted encoding using the `Accept-Encoding`.',
'This header was set to `{{acceptEncoding}}`, but the artifact request was not',
'available in one of these encodings.',
'',
'To download this artifact the request must support `Content-Encoding: {{contentEncoding}}`,',
'try setting `Accept-Encoding: {{contentEncoding}}` and handle decoding.',
].join('\n'), {
name,
acceptEncoding: req.headers['accept-encoding'],
contentEncoding: artifact.details.contentEncoding,
});
}

// TODO: We should consider doing a HEAD on all resources and verifying that
// the ETag they have matches the one that we received when creating the artifact.
// This is a bit of extra overhead, but it's one more check of consistency
Expand Down Expand Up @@ -661,28 +679,28 @@ api.declare({
name,
});

// Ensure that the run is running
if (run.state !== 'running') {
var allow = false;
if (run.state === 'exception') {
// If task was resolved exception, we'll allow artifacts to be uploaded
// up to 25 min past resolution. This allows us to report exception as
// soon as we know and then upload logs if possible.
// Useful because exception usually implies something badly wrong.
allow = new Date(run.resolved).getTime() > Date.now() - 25 * 60 * 1000;
}
if (!allow) {
return res.reportError('RequestConflict',
'Artifacts cannot be completed for a task after it is ' +
'resolved, unless it is resolved \'exception\', and even ' +
'in this case only up to 25 min past resolution.' +
'This to ensure that artifacts have been uploaded before ' +
'a task is \'completed\' and output is consumed by a ' +
'dependent task\n\nTask status: {{status}}', {
status: task.status(),
});
}
}
// Ensure that the run is running
if (run.state !== 'running') {
var allow = false;
if (run.state === 'exception') {
// If task was resolved exception, we'll allow artifacts to be uploaded
// up to 25 min past resolution. This allows us to report exception as
// soon as we know and then upload logs if possible.
// Useful because exception usually implies something badly wrong.
allow = new Date(run.resolved).getTime() > Date.now() - 25 * 60 * 1000;
}
if (!allow) {
return res.reportError('RequestConflict',
'Artifacts cannot be completed for a task after it is ' +
'resolved, unless it is resolved \'exception\', and even ' +
'in this case only up to 25 min past resolution.' +
'This to ensure that artifacts have been uploaded before ' +
'a task is \'completed\' and output is consumed by a ' +
'dependent task\n\nTask status: {{status}}', {
status: task.status(),
});
}
}

if (artifact.storageType !== 'blob') {
return res.reportError('InputError',
Expand Down
31 changes: 20 additions & 11 deletions src/data.js
Original file line number Diff line number Diff line change
Expand Up @@ -372,21 +372,30 @@ let Artifact = Entity.configure({
* storageType: error
* reason: Formalized reason for error artifact, see JSON schema.
* message: Human readable error message to return
*
* storageType: blob
* contentType
* contentEncoding
* contentSha256: SHA256 hash of the un-encoded artifact
* contentLength: Number of bytes of the un-encoded artifact
* transferSha256: SHA256 hash of the content-encoding encoded artifact
* transferLength: Number of bytes of the content-encoding encoded artifact
* partsHash: Rather than storing the potentially large list of sha256/size
* pairings for each part, we just need to store enough information
* to determine if the operation would result in an idempotency
* error
* contentEncoding: Content encoding, such as 'gzip',
* NOTE: This property is not present if contentEncoding is 'identity'.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought if not present, we would store 'identity' there, am I wrong?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I look at the code yes... maybe we have default in the schema and I missed that.. but then we have unnecessary special cases in the code.

* contentSha256: Hex encoded sha256 hash of the un-encoded artifact
* contentLength: Size of the un-encoded artifact as number of bytes
* transferLength: Size of the content-encoding encoded artifact as number of bytes,
* NOTE: This property is not present if contentEncoding is 'identity'.
* transferSha256: Hax encooded sha256 hash of the content-encoding encoded artifact,
* NOTE: This property is not present if contentEncoding is 'identity'.
* provider: Provider currently 's3'
* region: Region currently 'us-west-2'
* bucket: Bucket, currently configured public or private bucket
* key: Path to the artifact in S3
* uploadId: S3 multipart uploadId.
* NOTE: This property is not present when upload is completed.
* etag: S3 ETag value.
* NOTE: This property is not present until upload is completed.
* partsHash: Hash of (sha256, size) tuples for all parts.
* We store this to enable idempotency, when a request is retried.
*/
details: Entity.types.JSON,
expires: Entity.types.Date,
/**
/**
* Present is a number field which represents an artifact being present and
* the upload being completed. The handling logic for the artifact's
* storage type will fully define what that means for a given storage type.
Expand Down
80 changes: 69 additions & 11 deletions test/artifact_test.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ suite('Artifacts', function() {
var urllib = require('url');
var http = require('http');
var https = require('https');
var zlib = require('zlib');

// Static URL from which ip-ranges from AWS services can be fetched
const AWS_IP_RANGES_URL = 'https://ip-ranges.amazonaws.com/ip-ranges.json';
Expand Down Expand Up @@ -176,7 +177,7 @@ suite('Artifacts', function() {

test('S3 single part complete flow', async () => {
let taskId = slugid.v4();

debug('### Creating task');
await helper.queue.createTask(taskId, taskDef);

Expand Down Expand Up @@ -212,11 +213,11 @@ suite('Artifacts', function() {
let uploadOutcome = await client.runUpload(response.requests, uploadInfo);

response = await helper.queue.completeArtifact(taskId, 0, 'public/singlepart.dat', {
etags: uploadOutcome.etags,
etags: uploadOutcome.etags,
});

let secondResponse = await helper.queue.completeArtifact(taskId, 0, 'public/singlepart.dat', {
etags: uploadOutcome.etags,
etags: uploadOutcome.etags,
});
assume(response).deeply.equals(secondResponse);

Expand All @@ -227,18 +228,75 @@ suite('Artifacts', function() {
debug('Fetching artifact from: %s', artifactUrl);
let artifact = await getWithoutRedirecting(artifactUrl);

let expectedUrl =
let expectedUrl =
`https://test-bucket-for-any-garbage.s3-us-west-2.amazonaws.com/${taskId}/0/public/singlepart.dat`;
assume(artifact.headers).has.property('location', expectedUrl);

await verifyDownload(artifact.headers.location, bigfilehash, bigfilesize);

});

test('S3 single part complete flow, content-encoding: gzip', async () => {
const taskId = slugid.v4();
const data = crypto.randomBytes(12 * 1024 * 1024 + 21);
const gzipped = zlib.gzipSync(data);

debug('### Creating task');
await helper.queue.createTask(taskId, taskDef);

debug('### Claiming task');
await helper.queue.claimTask(taskId, 0, {
workerGroup: 'my-worker-group',
workerId: 'my-worker',
});

debug('### Create artifact');
const {
requests,
} = await helper.queue.createArtifact(taskId, 0, 'public/singlepart.dat', {
storageType: 'blob',
expires: taskcluster.fromNowJSON('1 day'),
contentType: 'binary/octet-stream',
contentEncoding: 'gzip',
contentLength: data.length,
contentSha256: crypto.createHash('sha256').update(data).digest('hex'),
transferLength: gzipped.length,
transferSha256: crypto.createHash('sha256').update(gzipped).digest('hex'),
});

debug('### Put first and only part of artifact');
const {method, url, headers} = requests[0];
const res = await request(method, url).set(headers).send(gzipped);
const etag = res.headers['etag'];

debug('### Complete artifact upload');
await helper.queue.completeArtifact(taskId, 0, 'public/singlepart.dat', {
etags: [etag],
});

const artifactUrl = helper.queue.buildUrl(
helper.queue.getArtifact,
taskId, 0, 'public/singlepart.dat',
);
debug('### Fetching artifact from: %s', artifactUrl);
const res2 = await request.get(artifactUrl).responseType('blob');
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's already code for doing this:

      await verifyDownload(artifact.headers.location, bigfilehash, bigfilesize);

Please use this code as it's what we do for all the other tests and handles many more tests than are duplicated here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tried adding it that won't work because of content-encoding.

debug('Downloaded artifact, statusCode: %s', res2.status);
const res2Hash = crypto.createHash('sha256').update(res2.body).digest('hex');
assert(res2Hash === crypto.createHash('sha256').update(data).digest('hex'));
assert(res2Hash === res2.headers['x-amz-meta-content-sha256']);
debug('Response headers: %j', res2.headers);

debug('### Downloading artifact with incorrect "Accept-Encoding"');
let e;
await request.get(artifactUrl).set('Accept-Encoding', 'identity').catch(err => e = err);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's also test for no Accept-Encoding value set, since that's by far the more likely scenario in reality.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

identity is the same thing as setting no accept-encoding.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but it should also be one extra line to account for us doing something incorrectly and handling the two different options differently.

assert(e, 'expected an error');
assert(e.status === 406, 'expected 406');
});

test('S3 multi part complete flow', async () => {
let name = 'public/multipart.dat';
let taskId = slugid.v4();

debug('### Creating task');
await helper.queue.createTask(taskId, taskDef);

Expand Down Expand Up @@ -279,12 +337,12 @@ suite('Artifacts', function() {
let uploadOutcome = await client.runUpload(response.requests, uploadInfo);

response = await helper.queue.completeArtifact(taskId, 0, name, {
etags: uploadOutcome.etags,
etags: uploadOutcome.etags,
});

// Ensure idempotency for completion of artifacts
let secondResponse = await helper.queue.completeArtifact(taskId, 0, name, {
etags: uploadOutcome.etags,
etags: uploadOutcome.etags,
});
assume(response).deeply.equals(secondResponse);

Expand All @@ -301,11 +359,11 @@ suite('Artifacts', function() {

await verifyDownload(artifact.headers.location, bigfilehash, bigfilesize);
});

test('S3 multi part idempotency', async () => {
let name = 'public/multipart.dat';
let taskId = slugid.v4();

debug('### Creating task');
await helper.queue.createTask(taskId, taskDef);

Expand Down Expand Up @@ -348,7 +406,7 @@ suite('Artifacts', function() {
return {sha256: x.sha256, size: x.size};
}),
});

let firstUploadId = qs.parse(urllib.parse(firstResponse.requests[0].url).query).uploadId;
let secondUploadId = qs.parse(urllib.parse(secondResponse.requests[0].url).query).uploadId;
assume(firstUploadId).equals(secondUploadId);
Expand Down Expand Up @@ -382,7 +440,7 @@ suite('Artifacts', function() {
let uploadOutcome = await client.runUpload(secondResponse.requests, uploadInfo);

let response = await helper.queue.completeArtifact(taskId, 0, name, {
etags: uploadOutcome.etags,
etags: uploadOutcome.etags,
});
});
});
Expand Down