Skip to content

Commit

Permalink
Merge pull request #14 from lundjordan/download-artifacts
Browse files Browse the repository at this point in the history
ports download_artifacts and download_file to scriptworker
  • Loading branch information
escapewindow committed Sep 24, 2016
2 parents 1d9cbcf + ac6a3e0 commit 0078803
Show file tree
Hide file tree
Showing 18 changed files with 267 additions and 117 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@ All notable changes to this project will be documented in this file.
This project adheres to [Semantic Versioning](http://semver.org/).

## Unreleased
### Added
- added `DownloadError` exception for `download_file`
- added `scriptworker.task.download_artifacts`
- added `scriptworker.util.download_file`

### Changed
- `DEFAULT_CONFIG`, `STATUSES`, and `REVERSED_STATUSES` have moved to `scriptworker.constants`.
- `list_to_tuple` has been renamed `freeze_values`, and also converts dict values to frozendicts.

## [0.6.0] - 2016-09-15
### Added
Expand Down
3 changes: 1 addition & 2 deletions scriptworker/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@
import re
from urllib.parse import urlparse, unquote

from scriptworker.config import DEFAULT_CONFIG
from scriptworker.constants import DEFAULT_CONFIG, STATUSES
from scriptworker.exceptions import ScriptWorkerTaskException
from scriptworker.task import STATUSES


def get_task(config):
Expand Down
80 changes: 8 additions & 72 deletions scriptworker/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,94 +3,28 @@
Attributes:
log (logging.Logger): the log object for the module.
DEFAULT_CONFIG (dict): the default config for scriptworker. Running configs
are validated against this.
CREDS_FILES (tuple): an ordered list of files to look for taskcluster
credentials, if they aren't in the config file or environment.
"""
from copy import deepcopy
from frozendict import frozendict
import json
import logging
import os
import sys

from frozendict import frozendict
from scriptworker.constants import DEFAULT_CONFIG

log = logging.getLogger(__name__)

DEFAULT_CONFIG = {
# Worker identification
"provisioner_id": "test-dummy-provisioner",
"worker_group": "test-dummy-workers",
"worker_type": "dummy-worker-myname",
"worker_id": os.environ.get("SCRIPTWORKER_WORKER_ID", "dummy-worker-myname1"),

"credentials": {
"clientId": "...",
"accessToken": "...",
"certificate": "...",
},

# for download url validation. The regexes need to define a 'filepath'.
'valid_artifact_schemes': ('https', ),
'valid_artifact_netlocs': ('queue.taskcluster.net', ),
'valid_artifact_path_regexes': (
r'''^/v1/task/(?P<taskId>[^/]+)(/runs/\d+)?/artifacts/(?P<filepath>.*)$''',
),
'valid_artifact_task_ids': (),

# Worker settings; these probably don't need tweaking
"max_connections": 30,
"credential_update_interval": 300,
"reclaim_interval": 300,
"poll_interval": 5,

# chain of trust settings
"verify_chain_of_trust": False, # TODO True
"sign_chain_of_trust": False, # TODO True
"chain_of_trust_hash_algorithm": "sha256",
"cot_schema_path": os.path.join(os.path.dirname(__file__), "data", "cot_v1_schema.json"),
# Specify a default gpg home other than ~/.gnupg
"gpg_home": None,

# A list of additional gpg cmdline options
"gpg_options": None,
# The path to the gpg executable.
"gpg_path": None,
# The path to the public/secret keyrings, if we're not using the default
"gpg_public_keyring": '%(gpg_home)s/pubring.gpg',
"gpg_secret_keyring": '%(gpg_home)s/secring.gpg',
# Boolean to use the gpg agent
"gpg_use_agent": False,
# Encoding to use. Defaults to latin-1
"gpg_encoding": None,

# Worker log settings
"log_datefmt": "%Y-%m-%dT%H:%M:%S",
"log_fmt": "%(asctime)s %(levelname)8s - %(message)s",
"log_max_bytes": 1024 * 1024 * 512,
"log_num_backups": 10,

# Task settings
"work_dir": "...",
"log_dir": "...",
"artifact_dir": "...",
"task_log_dir": "...", # set this to ARTIFACT_DIR/public/logs
"artifact_expiration_hours": 24,
"artifact_upload_timeout": 60 * 20,
"task_script": ("bash", "-c", "echo foo && sleep 19 && exit 1"),
"task_max_timeout": 60 * 20,
"verbose": True,
}

CREDS_FILES = (
os.path.join(os.getcwd(), 'secrets.json'),
os.path.join(os.environ.get('HOME', '/etc/'), '.scriptworker'),
)


def list_to_tuple(dictionary):
"""Convert a dictionary's list values into tuples.
def freeze_values(dictionary):
"""Convert a dictionary's list values into tuples, and dicts into frozendicts.
This won't recurse; it's best for relatively flat data structures.
Expand All @@ -100,6 +34,8 @@ def list_to_tuple(dictionary):
for key, value in dictionary.items():
if isinstance(value, list):
dictionary[key] = tuple(value)
elif isinstance(value, dict):
dictionary[key] = frozendict(value)


def read_worker_creds(key="credentials"):
Expand Down Expand Up @@ -184,10 +120,10 @@ def create_config(path="config.json"):
sys.exit(1)
with open(path, "r", encoding="utf-8") as fh:
secrets = json.load(fh)
config = deepcopy(DEFAULT_CONFIG)
config = dict(deepcopy(DEFAULT_CONFIG))
if not secrets.get("credentials"):
secrets['credentials'] = read_worker_creds()
list_to_tuple(secrets)
freeze_values(secrets)
config.update(secrets)
messages = check_config(config, path)
if messages:
Expand Down
88 changes: 88 additions & 0 deletions scriptworker/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#!/usr/bin/env python
"""scriptworker constants
Attributes:
DEFAULT_CONFIG (frozendict): the default config for scriptworker. Running configs
are validated against this.
STATUSES (dict): maps taskcluster status (string) to exit code (int).
REVERSED_STATUSES (dict): the same as STATUSES, except it maps the exit code
(int) to the taskcluster status (string).
"""
from frozendict import frozendict
import os

DEFAULT_CONFIG = frozendict({
# Worker identification
"provisioner_id": "test-dummy-provisioner",
"worker_group": "test-dummy-workers",
"worker_type": "dummy-worker-myname",
"worker_id": os.environ.get("SCRIPTWORKER_WORKER_ID", "dummy-worker-myname1"),

"credentials": frozendict({
"clientId": "...",
"accessToken": "...",
"certificate": "...",
}),

# for download url validation. The regexes need to define a 'filepath'.
'valid_artifact_schemes': ('https', ),
'valid_artifact_netlocs': ('queue.taskcluster.net', ),
'valid_artifact_path_regexes': (
r'''^/v1/task/(?P<taskId>[^/]+)(/runs/\d+)?/artifacts/(?P<filepath>.*)$''',
),
'valid_artifact_task_ids': (),

# Worker settings; these probably don't need tweaking
"max_connections": 30,
"credential_update_interval": 300,
"reclaim_interval": 300,
"poll_interval": 5,

# chain of trust settings
"verify_chain_of_trust": False, # TODO True
"sign_chain_of_trust": False, # TODO True
"chain_of_trust_hash_algorithm": "sha256",
"cot_schema_path": os.path.join(os.path.dirname(__file__), "data", "cot_v1_schema.json"),
# Specify a default gpg home other than ~/.gnupg
"gpg_home": None,

# A list of additional gpg cmdline options
"gpg_options": None,
# The path to the gpg executable.
"gpg_path": None,
# The path to the public/secret keyrings, if we're not using the default
"gpg_public_keyring": '%(gpg_home)s/pubring.gpg',
"gpg_secret_keyring": '%(gpg_home)s/secring.gpg',
# Boolean to use the gpg agent
"gpg_use_agent": False,
# Encoding to use. Defaults to latin-1
"gpg_encoding": None,

# Worker log settings
"log_datefmt": "%Y-%m-%dT%H:%M:%S",
"log_fmt": "%(asctime)s %(levelname)8s - %(message)s",
"log_max_bytes": 1024 * 1024 * 512,
"log_num_backups": 10,

# Task settings
"work_dir": "...",
"log_dir": "...",
"artifact_dir": "...",
"task_log_dir": "...", # set this to ARTIFACT_DIR/public/logs
"artifact_expiration_hours": 24,
"artifact_upload_timeout": 60 * 20,
"task_script": ("bash", "-c", "echo foo && sleep 19 && exit 1"),
"task_max_timeout": 60 * 20,
"verbose": True,
})

STATUSES = {
'success': 0,
'failure': 1,
'worker-shutdown': 2,
'malformed-payload': 3,
'resource-unavailable': 4,
'internal-error': 5,
'superseded': 6,
}
REVERSED_STATUSES = {v: k for k, v in STATUSES.items()}
7 changes: 7 additions & 0 deletions scriptworker/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,10 @@ class ScriptWorkerTaskException(ScriptWorkerException):
def __init__(self, *args, exit_code=1, **kwargs):
self.exit_code = exit_code
super(ScriptWorkerTaskException, self).__init__(*args, **kwargs)


class DownloadError(ScriptWorkerTaskException):
def __init__(self, msg):
super(DownloadError, self).__init__(
msg, exit_code=4
)
49 changes: 31 additions & 18 deletions scriptworker/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,42 +3,29 @@
Attributes:
log (logging.Logger): the log object for the module
STATUSES (dict): maps taskcluster status (string) to exit code (int).
REVERSED_STATUSES (dict): the same as STATUSES, except it maps the exit code
(int) to the taskcluster status (string).
"""
import aiohttp.hdrs
import arrow
import asyncio
from asyncio.subprocess import PIPE
from copy import deepcopy
import logging
import mimetypes
import os
import signal

from asyncio.subprocess import PIPE

import taskcluster
import taskcluster.exceptions

from scriptworker.client import validate_artifact_url
from scriptworker.constants import REVERSED_STATUSES
from scriptworker.exceptions import ScriptWorkerRetryException
from scriptworker.log import get_log_fhs, log_errors, read_stdout
from scriptworker.utils import filepaths_in_dir, raise_future_exceptions, retry_async
from scriptworker.utils import filepaths_in_dir, raise_future_exceptions, retry_async, download_file

log = logging.getLogger(__name__)


STATUSES = {
'success': 0,
'failure': 1,
'worker-shutdown': 2,
'malformed-payload': 3,
'resource-unavailable': 4,
'internal-error': 5,
'superseded': 6,
}
REVERSED_STATUSES = {v: k for k, v in STATUSES.items()}


def worst_level(level1, level2):
"""Given two int levels, return the larger.
Expand Down Expand Up @@ -329,3 +316,29 @@ def max_timeout(context, proc, timeout):
asyncio.ensure_future(kill(-pid)),
asyncio.ensure_future(kill(pid))
]))


async def download_artifacts(context, file_urls, parent_dir=None, session=None,
download_func=download_file):
parent_dir = parent_dir or context.config['work_dir']
session = session or context.session

tasks = []
files = []
download_config = deepcopy(context.config)
download_config.setdefault('valid_artifact_task_ids', context.task['dependencies'])
for file_url in file_urls:
rel_path = validate_artifact_url(download_config, file_url)
abs_file_path = os.path.join(parent_dir, rel_path)
files.append(rel_path)
tasks.append(
asyncio.ensure_future(
retry_async(
download_func, args=(context, file_url, abs_file_path),
kwargs={'session': session},
)
)
)

await raise_future_exceptions(tasks)
return files
6 changes: 6 additions & 0 deletions scriptworker/test/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ def __init__(self, *args, status=200, payload=None, **kwargs):
self.status = status
self.headers = {'content-type': 'application/json'}
self._loop = mock.MagicMock()
self.content = self
self.resp = [b"asdf", b"asdf"]

@asyncio.coroutine
def text(self, *args, **kwargs):
Expand All @@ -129,6 +131,10 @@ def json(self, *args, **kwargs):
def release(self):
return

async def read(self, *args):
if self.resp:
return self.resp.pop(0)


@pytest.fixture(scope='function')
def successful_queue():
Expand Down
2 changes: 1 addition & 1 deletion scriptworker/test/data/check_pubkeys.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import os
import shutil
import tempfile
from scriptworker.config import DEFAULT_CONFIG
from scriptworker.constants import DEFAULT_CONFIG
from scriptworker.context import Context
from scriptworker.exceptions import ScriptWorkerGPGException
import scriptworker.gpg
Expand Down
9 changes: 5 additions & 4 deletions scriptworker/test/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import os
import pytest
import scriptworker.config as config
from scriptworker.constants import DEFAULT_CONFIG


# constants helpers and fixtures {{{1
Expand All @@ -35,7 +36,7 @@

@pytest.fixture(scope='function')
def t_config():
return deepcopy(config.DEFAULT_CONFIG)
return dict(deepcopy(DEFAULT_CONFIG))


@pytest.fixture(scope='function')
Expand All @@ -49,9 +50,9 @@ def t_env():

# tests {{{1
def test_nontext_to_unicode():
d = {'a': [1, 2, 3]}
config.list_to_tuple(d)
assert d == {'a': (1, 2, 3)}
d = {'a': [1, 2, 3], 'b': {'c': 'd'}}
config.freeze_values(d)
assert d == {'a': (1, 2, 3), 'b': frozendict({'c': 'd'})}


def test_check_config_invalid_key(t_config):
Expand Down
Loading

0 comments on commit 0078803

Please sign in to comment.