This repository has been archived by the owner on Jan 19, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
576 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -83,6 +83,7 @@ def _load_bp(n): | |
'slaveloan', | ||
'tokenauth', | ||
'tooltool', | ||
'archiver', | ||
]] | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
# This Source Code Form is subject to the terms of the Mozilla Public | ||
# License, v. 2.0. If a copy of the MPL was not distributed with this | ||
# file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||
|
||
import logging | ||
|
||
from random import randint | ||
|
||
from flask import Blueprint | ||
from flask import current_app | ||
from flask import redirect | ||
from flask import url_for | ||
from relengapi.blueprints.archiver.tasks import create_and_upload_archive | ||
from relengapi.blueprints.archiver.types import MozharnessArchiveTask | ||
from relengapi.lib import api | ||
|
||
bp = Blueprint('archiver', __name__) | ||
log = logging.getLogger(__name__) | ||
|
||
GET_EXPIRES_IN = 300 | ||
|
||
|
||
@bp.route('/status/<task_id>') | ||
@api.apimethod(MozharnessArchiveTask, unicode) | ||
def task_status(task_id): | ||
""" | ||
Check and return the current state of the create_and_upload_archive celery task with task id | ||
of <task_id>. | ||
If the task is unknown, state will be PENDING. Once the task starts it will be updated to | ||
STARTED and finally, if it completes, it will be either SUCCESS (no exceptions), or FAILURE. | ||
See update_state() within create_and_upload_archive and | ||
http://celery.readthedocs.org/en/latest/reference/celery.states.html for more details. | ||
If state is SUCCESS, it is safe to check response['s3_urls'] for the archives submitted to s3 | ||
""" | ||
task = create_and_upload_archive.AsyncResult(task_id) | ||
task_info = task.info or {} | ||
response = { | ||
'state': task.state, | ||
} | ||
if task.state != 'FAILURE': | ||
response['status'] = task_info.get('status', 'no status available at this point.') | ||
response['src_url'] = task_info.get('src_url', '') | ||
response['s3_urls'] = task_info.get('s3_urls', {}) | ||
else: | ||
# something went wrong | ||
response['status'] = str(task.info) # this is the exception raised | ||
response['src_url'] = '' | ||
response['s3_urls'] = {} | ||
|
||
return MozharnessArchiveTask(**response) | ||
|
||
|
||
@bp.route('/hgmo/<path:repo>/<rev>') | ||
@api.apimethod(None, unicode, unicode, unicode, unicode, unicode, status_code=302) | ||
def get_hgmo_archive(repo, rev, subdir=None, suffix='tar.gz', preferred_region=None): | ||
""" | ||
An archiver for hg.mozilla.org related requests. Uses relengapi.blueprints.archiver.get_archive | ||
:param repo: the repo location off of hg.mozilla.org/ | ||
:param rev: the rev associated with the repo | ||
:param subdir: optional subdir path to only archive a portion of the repo | ||
:param suffix: the archive extension type. defaulted to tar.gz | ||
:param preferred_region: the preferred s3 region to use | ||
""" | ||
src_url = current_app.config['ARCHIVER_HGMO_URL_TEMPLATE'].format( | ||
repo=repo, rev=rev, suffix=suffix, subdir=subdir or '' | ||
) | ||
# though slightly odd to append the archive suffix extension with a subdir, this: | ||
# 1) allows us to have archives based on different subdir locations from the same repo and rev | ||
# 2) is aligned with the hg.mozilla.org format | ||
key = '{repo}-{rev}.{suffix}'.format(repo=repo, rev=rev, suffix=suffix) | ||
if subdir: | ||
key += '/{}'.format(subdir) | ||
return get_archive(src_url, key, preferred_region) | ||
|
||
|
||
def get_archive(src_url, key, preferred_region): | ||
""" | ||
A generic getter for retrieving an s3 location of an archive where the archive is based off a | ||
src_url. | ||
sub-dir: hg.mozilla.org supports archives of sub directories within a repository. This | ||
flexibility allows for creating archives of only a portion of what would normally be an entire | ||
repo archive. | ||
logic flow: | ||
If their is already a key within s3, a re-direct link is given for the | ||
s3 location. If the key does not exist, download the archive from src url, upload it to s3 | ||
for each region supported and return all uploaded s3 url locations. | ||
When the key does not exist, the remaining work will be assigned to a celery background task | ||
with a url location returned immediately for obtaining task state updates. | ||
""" | ||
buckets = current_app.config['ARCHIVER_S3_BUCKETS'] | ||
random_region = buckets.keys()[randint(0, len(buckets.keys()) - 1)] | ||
# use preferred region if available otherwise choose a valid one at random | ||
region = preferred_region if preferred_region and preferred_region in buckets else random_region | ||
bucket = buckets[region] | ||
s3 = current_app.aws.connect_to('s3', region) | ||
|
||
# first, see if the key exists | ||
if not s3.get_bucket(bucket).get_key(key): | ||
task_id = key.replace('/', '_') # keep things simple and avoid slashes in task url | ||
if create_and_upload_archive.AsyncResult(task_id).state != 'STARTED': | ||
# task is currently not in progress so start one. | ||
create_and_upload_archive.apply_async(args=[src_url, key], task_id=task_id) | ||
return {}, 202, {'Location': url_for('archiver.task_status', task_id=task_id)} | ||
|
||
log.info("generating GET URL to {}, expires in {}s".format(key, GET_EXPIRES_IN)) | ||
# return 302 pointing to s3 url with archive | ||
signed_url = s3.generate_url( | ||
method='GET', expires_in=GET_EXPIRES_IN, | ||
bucket=bucket, key=key | ||
) | ||
return redirect(signed_url) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
# This Source Code Form is subject to the terms of the Mozilla Public | ||
# License, v. 2.0. If a copy of the MPL was not distributed with this | ||
# file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||
import logging | ||
import requests | ||
import shutil | ||
import tempfile | ||
|
||
from boto.s3.key import Key | ||
from celery.task import current | ||
from flask import current_app | ||
from random import randint | ||
|
||
from relengapi.lib import celery | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
GET_EXPIRES_IN = 300 | ||
|
||
|
||
def upload_url_archive_to_s3(key, url, buckets): | ||
s3_urls = {} | ||
|
||
# make the source request | ||
resp = requests.get(url, stream=True) | ||
|
||
# create a temporary file for it | ||
tempf = tempfile.TemporaryFile() | ||
# copy the data, block-by-block, into that file | ||
resp.raw.decode_content = True | ||
shutil.copyfileobj(resp.raw, tempf) | ||
|
||
# write it out to S3 | ||
for region in buckets: | ||
s3 = current_app.aws.connect_to('s3', region) | ||
k = Key(s3.get_bucket(buckets[region])) | ||
k.key = key | ||
k.set_metadata('Content-Type', resp.headers['Content-Type']) | ||
# give it the same attachment filename | ||
k.set_metadata('Content-Disposition', resp.headers['Content-Disposition']) | ||
k.set_contents_from_file(tempf, rewind=True) # rewind points tempf back to start for us | ||
s3_urls[region] = s3.generate_url(expires_in=GET_EXPIRES_IN, method='GET', | ||
bucket=buckets[region], key=key) | ||
|
||
resp.close() | ||
|
||
return s3_urls | ||
|
||
|
||
@celery.task(bind=True, track_started=True, max_retries=3) | ||
def create_and_upload_archive(self, src_url, key): | ||
""" | ||
A celery task that downloads an archive if it exists from a src location and attempts to upload | ||
the archive to a supported bucket in each supported region. | ||
Throughout this process, update the state of the task and finally return the location of the | ||
s3 urls if successful. | ||
""" | ||
status = "Task completed! Check 's3_urls' for upload locations." | ||
s3_urls = {} | ||
buckets = current_app.config['ARCHIVER_S3_BUCKETS'] | ||
|
||
resp = requests.head(src_url) | ||
if resp.status_code == 200: | ||
try: | ||
s3_urls = upload_url_archive_to_s3(key, src_url, buckets) | ||
except Exception as exc: | ||
# set a jitter enabled delay | ||
# where an aggressive delay would result in: 7s, 49s, and 343s | ||
# and a gentle delay would result in: 4s, 16s, and 64s | ||
delay = randint(4, 7) ** (current.request.retries + 1) # retries == 0 on first attempt | ||
current.retry(exc=exc, countdown=delay) | ||
else: | ||
status = "Url not found. Does it exist? url: '{}', response: '{}' ".format(src_url, | ||
resp.status_code) | ||
log.warning(status) | ||
return { | ||
'status': status, | ||
'src_url': src_url, | ||
's3_urls': s3_urls, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
# This Source Code Form is subject to the terms of the Mozilla Public | ||
# License, v. 2.0. If a copy of the MPL was not distributed with this | ||
# file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||
import json | ||
import mock | ||
import moto | ||
|
||
from nose.tools import eq_ | ||
from relengapi.blueprints.archiver.test_util import EXPECTED_TASK_STATUS_FAILED_RESPONSE | ||
from relengapi.blueprints.archiver.test_util import EXPECTED_TASK_STATUS_SUCCESSFUL_RESPONSE | ||
from relengapi.blueprints.archiver.test_util import create_s3_items | ||
from relengapi.blueprints.archiver.test_util import fake_200_response | ||
from relengapi.blueprints.archiver.test_util import fake_failed_task_status | ||
from relengapi.blueprints.archiver.test_util import fake_successful_task_status | ||
from relengapi.blueprints.archiver.test_util import setup_buckets | ||
|
||
from relengapi.lib.testing.context import TestContext | ||
|
||
|
||
cfg = { | ||
'RELENGAPI_CELERY_LOG_LEVEL': 'DEBUG', | ||
|
||
'AWS': { | ||
'access_key_id': 'aa', | ||
'secret_access_key': 'ss', | ||
}, | ||
|
||
'ARCHIVER_S3_BUCKETS': { | ||
'us-east-1': 'archiver-bucket-1', | ||
'us-west-2': 'archiver-bucket-2' | ||
}, | ||
'ARCHIVER_HGMO_URL_TEMPLATE': "https://hg.mozilla.org/{repo}/archive/{rev}.{suffix}/{subdir}", | ||
|
||
'CELERY_BROKER_URL': 'memory://', | ||
'CELERY_BACKEND': 'cache', | ||
"CELERY_CACHE_BACKEND": 'memory', | ||
'CELERY_ALWAYS_EAGER': True, | ||
} | ||
|
||
test_context = TestContext(config=cfg) | ||
|
||
|
||
@moto.mock_s3 | ||
@test_context | ||
def test_accepted_response_when_missing_s3_key(app, client): | ||
setup_buckets(app, cfg) | ||
with mock.patch("relengapi.blueprints.archiver.tasks.requests.get") as get, \ | ||
mock.patch("relengapi.blueprints.archiver.tasks.requests.head") as head: | ||
# don't actually hit hg.m.o, we just care about starting a subprocess and | ||
# returning a 202 accepted | ||
get.return_value = fake_200_response() | ||
head.return_value = fake_200_response() | ||
resp = client.get('/archiver/hgmo/mozilla-central/9213957d166d?' | ||
'subdir=testing/mozharness&preferred_region=us-west-2') | ||
eq_(resp.status_code, 202, resp.status) | ||
|
||
|
||
@moto.mock_s3 | ||
@test_context | ||
def test_redirect_response_when_found_s3_key(app, client): | ||
setup_buckets(app, cfg) | ||
rev, repo, subdir, suffix = '203e1025a826', 'mozilla-central', 'testing/mozharness', 'tar.gz' | ||
key = '{repo}-{rev}.{suffix}'.format(repo=repo, rev=rev, suffix=suffix) | ||
if subdir: | ||
key += '/{}'.format(subdir) | ||
create_s3_items(app, cfg, key=key) | ||
|
||
resp = client.get( | ||
'/archiver/hgmo/{repo}/{rev}?subdir={subdir}&suffix={suffix}'.format( | ||
rev=rev, repo=repo, subdir=subdir, suffix=suffix | ||
) | ||
) | ||
eq_(resp.status_code, 302, resp.status) | ||
|
||
|
||
@moto.mock_s3 | ||
@test_context | ||
def test_task_status_when_failed(app, client): | ||
expected_response = EXPECTED_TASK_STATUS_FAILED_RESPONSE | ||
with mock.patch("relengapi.blueprints.archiver.create_and_upload_archive") as caua: | ||
caua.AsyncResult.side_effect = fake_failed_task_status | ||
response = client.get('/archiver/status/{task_id}'.format(task_id=123)) | ||
eq_(cmp(json.loads(response.data)['result'], expected_response), 0, | ||
"a failed task status check does not equal expected status.") | ||
|
||
|
||
@moto.mock_s3 | ||
@test_context | ||
def test_task_status_when_success(app, client): | ||
expected_response = EXPECTED_TASK_STATUS_SUCCESSFUL_RESPONSE | ||
with mock.patch("relengapi.blueprints.archiver.create_and_upload_archive") as caua: | ||
caua.AsyncResult.return_value = fake_successful_task_status(expected_response) | ||
response = client.get('/archiver/status/{task_id}'.format(task_id=123)) | ||
eq_(cmp(json.loads(response.data)['result'], expected_response), 0, | ||
"A successful task status check does not equal expected status.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
# This Source Code Form is subject to the terms of the Mozilla Public | ||
# License, v. 2.0. If a copy of the MPL was not distributed with this | ||
# file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||
import mock | ||
import moto | ||
|
||
from relengapi.blueprints.archiver.tasks import create_and_upload_archive | ||
from relengapi.blueprints.archiver.test_util import fake_200_response | ||
from relengapi.blueprints.archiver.test_util import fake_404_response | ||
from relengapi.blueprints.archiver.test_util import setup_buckets | ||
from relengapi.lib.testing.context import TestContext | ||
|
||
|
||
cfg = { | ||
'RELENGAPI_CELERY_LOG_LEVEL': 'DEBUG', | ||
|
||
'AWS': { | ||
'access_key_id': 'aa', | ||
'secret_access_key': 'ss', | ||
}, | ||
|
||
'ARCHIVER_S3_BUCKETS': { | ||
'us-east-1': 'archiver-bucket-1', | ||
'us-west-2': 'archiver-bucket-2' | ||
}, | ||
'ARCHIVER_HGMO_URL_TEMPLATE': "https://hg.mozilla.org/{repo}/archive/{rev}.{suffix}/{subdir}", | ||
|
||
'CELERY_BROKER_URL': 'memory://', | ||
'CELERY_BACKEND': 'cache', | ||
"CELERY_CACHE_BACKEND": 'memory', | ||
'CELERY_ALWAYS_EAGER': True, | ||
} | ||
|
||
test_context = TestContext(config=cfg) | ||
|
||
|
||
@moto.mock_s3 | ||
@test_context | ||
def test_invalid_hg_url(app): | ||
setup_buckets(app, cfg) | ||
rev, repo, suffix = 'fakeRev', 'mozilla-central', 'tar.gz' | ||
key = '{repo}-{rev}.{suffix}'.format(repo=repo, rev=rev, suffix=suffix) | ||
src_url = cfg['ARCHIVER_HGMO_URL_TEMPLATE'].format(repo=repo, rev=rev, suffix=suffix, | ||
subdir='testing/mozharness') | ||
with app.app_context(): | ||
with mock.patch("relengapi.blueprints.archiver.tasks.requests.head") as head: | ||
head.return_value = fake_404_response() | ||
task = create_and_upload_archive.apply_async(args=[src_url, key], | ||
task_id=key.replace('/', '_')) | ||
assert "Url not found." in task.info.get('status', {}), "invalid hg url was not caught!" | ||
|
||
|
||
@moto.mock_s3 | ||
@test_context | ||
def test_successful_upload_archive_response(app): | ||
setup_buckets(app, cfg) | ||
rev, repo, subdir, suffix = '203e1025a826', 'mozilla-central', 'testing/mozharness', 'tar.gz' | ||
key = '{repo}-{rev}.{suffix}'.format(repo=repo, rev=rev, suffix=suffix) | ||
if subdir: | ||
key += '/{}'.format(subdir) | ||
src_url = cfg['ARCHIVER_HGMO_URL_TEMPLATE'].format(repo=repo, rev=rev, suffix=suffix, | ||
subdir='testing/mozharness') | ||
with app.app_context(): | ||
with mock.patch("relengapi.blueprints.archiver.tasks.requests.get") as get, \ | ||
mock.patch("relengapi.blueprints.archiver.tasks.requests.head") as head: | ||
get.return_value = fake_200_response() | ||
head.return_value = fake_200_response() | ||
task = create_and_upload_archive.apply_async(args=[src_url, key], | ||
task_id=key.replace('/', '_')) | ||
expected_regions = [region for region in cfg['ARCHIVER_S3_BUCKETS']] | ||
all_regions_have_s3_urls = [ | ||
task.info.get("s3_urls", {}).get(region) for region in expected_regions | ||
] | ||
assert all(all_regions_have_s3_urls), "s3 urls not uploaded for each region!" | ||
assert task.info.get('src_url') == src_url, "src url doesn't match upload response!" | ||
assert task.state == "SUCCESS", "completed task's state isn't SUCCESS!" |
Oops, something went wrong.