Skip to content

Commit

Permalink
Support for specifying a project at upload time (#98)
Browse files Browse the repository at this point in the history
* Fixes regex error where names get improperly formatted

* Fixes interleaving bug on upload and datetime bug when interacting with the server

* Adds requirements for pytz to ensure all apps have it

* Adds additional filename test

* Adds additional notices to metadata/tag flag on upload

* Adds support for specifying project at upload time

* Bumps version to 0.2.13
  • Loading branch information
bcamarda authored and clausmith committed Sep 22, 2018
1 parent 4d0ff7a commit efaab4b
Show file tree
Hide file tree
Showing 10 changed files with 91 additions and 26 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ htmlcov/
.DS_Store
.python-version
*.ipynb
.pytest_cache/*
.pytest_cache/*
Pipfile*
41 changes: 34 additions & 7 deletions onecodex/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@
import click

from onecodex.utils import (cli_resource_fetcher, download_file_helper,
valid_api_key, OPTION_HELP, pprint,
valid_api_key, OPTION_HELP, pprint, pretty_errors,
warn_if_insecure_platform, is_simplejson_installed,
warn_simplejson, telemetry, snake_case)
from onecodex.api import Api
from onecodex.exceptions import ValidationWarning, ValidationError, UploadException
from onecodex.exceptions import (OneCodexException, ValidationWarning,
ValidationError, UploadException)
from onecodex.auth import _login, _logout, _remove_creds, _silent_login
from onecodex.scripts import filter_reads
from onecodex.version import __version__
Expand Down Expand Up @@ -187,11 +188,13 @@ def samples(ctx, samples):
@click.option('--validate/--do-not-validate', is_flag=True, help=OPTION_HELP['validate'],
default=True)
@click.option('--tag', '-t', 'tags', multiple=True, help=OPTION_HELP['tag'])
@click.option("--metadata", '-md', multiple=True, help=OPTION_HELP['metadata'])
@click.option('--metadata', '-md', multiple=True, help=OPTION_HELP['metadata'])
@click.option('--project', '-p', 'project_id', help=OPTION_HELP['project'])
@click.pass_context
@pretty_errors
@telemetry
def upload(ctx, files, max_threads, clean, no_interleave, prompt, validate,
forward, reverse, tags, metadata):
forward, reverse, tags, metadata, project_id):
"""Upload a FASTA or FASTQ (optionally gzip'd) to One Codex"""

appendables = {}
Expand Down Expand Up @@ -267,11 +270,35 @@ def upload(ctx, files, max_threads, clean, no_interleave, prompt, validate,
if not clean:
warnings.filterwarnings('error', category=ValidationWarning)

upload_kwargs = {
'threads': max_threads,
'validate': validate,
'metadata': appendables['valid_metadata'],
'tags': appendables['valid_tags'],
}

# get project
if project_id:
project = ctx.obj['API'].Projects.get(project_id)
if not project:
project = ctx.obj['API'].Projects.where(name=project_id)
if not project:
project = ctx.obj['API'].Projects.where(project_name=project_id)
if not project:
raise OneCodexException('{} is not a valid project UUID'
.format(project_id))

if not isinstance(project, list):
project = [project]

upload_kwargs['project'] = project[0]

try:
# do the uploading
ctx.obj['API'].Samples.upload(files, threads=max_threads, validate=validate,
metadata=appendables['valid_metadata'], tags=appendables['valid_tags'])

ctx.obj['API'].Samples.upload(
files,
**upload_kwargs
)
except ValidationWarning as e:
sys.stderr.write('\nERROR: {}. {}'.format(
e, 'Running with the --clean flag will suppress this error.'
Expand Down
28 changes: 21 additions & 7 deletions onecodex/lib/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,31 @@
from requests_toolbelt import MultipartEncoder

from onecodex.lib.inline_validator import FASTXReader, FASTXTranslator
from onecodex.exceptions import UploadException, process_api_error
from onecodex.exceptions import OneCodexException, UploadException, process_api_error


MULTIPART_SIZE = 5 * 1000 * 1000 * 1000
DEFAULT_UPLOAD_THREADS = 4


def interleaved_filename(filename):
# strip out the _R1_/etc chunk from the first filename if this is a paired upload
# and make that the filename

if not isinstance(filename, tuple):
raise OneCodexException('Cannot get the interleaved filename without a tuple.')
if re.match('.*[._][Rr][12][_.].*', filename[0]):
return re.sub('[._][Rr][12]', '', filename[0])
else:
print('Does not match')
return filename[0]


def _file_stats(filename, validate=True):
if isinstance(filename, tuple):
assert len(filename) == 2
file_size = sum(os.path.getsize(f) for f in filename)
# strip out the _R1_/etc chunk from the first filename if this is a paired upload
# and make that the filename
filename = re.sub('[._][Rr][12][._]', '_', filename[0])
filename = interleaved_filename(filename)
else:
file_size = os.path.getsize(filename)

Expand Down Expand Up @@ -65,7 +76,7 @@ def _wrap_files(filename, logger=None, validate=True):


def upload(files, session, samples_resource, server_url, threads=DEFAULT_UPLOAD_THREADS,
validate=True, log_to=None, metadata=None, tags=None):
validate=True, log_to=None, metadata=None, tags=None, project=None):
"""
Uploads several files to the One Codex server, auto-detecting sizes and using the appropriate
downstream upload functions. Also, wraps the files with a streaming validator to ensure they
Expand Down Expand Up @@ -155,7 +166,7 @@ def _wrapped(*wrapped_args):
if file_size < MULTIPART_SIZE:
file_obj = _wrap_files(file_path, logger=progress_bar, validate=validate)
file_uuid = threaded_upload(file_obj, filename, session, samples_resource, log_to,
metadata, tags)
metadata, tags, project)
if file_uuid:
uploading_uuids.append(file_uuid)
uploading_files.append(file_obj)
Expand Down Expand Up @@ -228,7 +239,7 @@ def upload_large_file(file_obj, filename, session, samples_resource, server_url,
log_to.flush()


def upload_file(file_obj, filename, session, samples_resource, log_to, metadata, tags):
def upload_file(file_obj, filename, session, samples_resource, log_to, metadata, tags, project=None):
"""
Uploads a file to the One Codex server directly to the users S3 bucket by self-signing
"""
Expand All @@ -243,6 +254,9 @@ def upload_file(file_obj, filename, session, samples_resource, log_to, metadata,
if tags:
upload_args['tags'] = tags

if project:
upload_args['project'] = project.id

try:
upload_info = samples_resource.init_upload(upload_args)

Expand Down
7 changes: 6 additions & 1 deletion onecodex/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import sys

from dateutil.parser import parse
import pytz
from requests.exceptions import HTTPError
from potion_client.converter import PotionJSONEncoder
from potion_client.resource import Resource
Expand Down Expand Up @@ -84,7 +85,11 @@ def __getattr__(self, key):
else:
return str(value)
if schema.get('format') == 'date-time' and value is not None:
return parse(value)
datetime_value = parse(value)
if datetime_value.tzinfo is None:
return pytz.utc.localize(datetime_value)
else:
return datetime_value.astimezone(pytz.utc)
return value
elif key == 'id' or key in self.__class__._resource._schema['properties']:
# make fields appear blank if there's no _resource bound to me
Expand Down
4 changes: 2 additions & 2 deletions onecodex/models/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def save(self):
self.metadata.save()

@classmethod
def upload(cls, filename, threads=None, validate=True, metadata=None, tags=None):
def upload(cls, filename, threads=None, validate=True, metadata=None, tags=None, project=None):
"""
Uploads a series of files to the One Codex server. These files are automatically
validated during upload.
Expand All @@ -112,7 +112,7 @@ def upload(cls, filename, threads=None, validate=True, metadata=None, tags=None)
if isinstance(filename, string_types) or isinstance(filename, tuple):
filename = [filename]
samples = upload(filename, res._client.session, res, res._client._root_url + '/', threads=threads,
validate=validate, log_to=sys.stderr, metadata=metadata, tags=tags)
validate=validate, log_to=sys.stderr, metadata=metadata, tags=tags, project=project)
return samples
# FIXME: pass the auth into this so we can authenticate the callback?

Expand Down
19 changes: 14 additions & 5 deletions onecodex/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,19 @@
'telemetry': 'Send errors to One Codex?',
'forward': 'Specify a forward reads file',
'reverse': 'Specify a reverse reads file',
'tag': ('Add one or more tags to all uploaded samples '
'e.g., `onecodex upload --tag "Cohort A" $FILE`'),
'metadata': ('Add one or more metadata attributes to all uploaded samples, '
'e.g., `onecodex upload --metadata starred=true --metadata '
'platform="Illumina MiSeq" $FILE`'),
'tag': ('NOTE: We recommend invoking the upload command separately for each '
'sample to add sample-specific tags.\n\nAdd one or more tags to '
'all uploaded samples e.g., `onecodex upload --tag "Cohort A" $FILE`.'),
'metadata': ('NOTE: We recommend invoking the upload command separately for each '
'sample to add sample-specific metadata.\n\nAdd one or more '
'metadata attributes to all uploaded samples, '
'e.g. `onecodex upload --metadata starred=true --metadata '
'platform="Illumina MiSeq" $FILE`. '),
'project': 'Provide the name, short name, or alphanumeric UUID of a ' \
'project to automatically add the samples to that project on ' \
'upload. NOTE: does not currently support adding a sample to ' \
'a public project. Projects are searched by UUID, then name, ' \
'then short name in that order.'
}

SUPPORTED_EXTENSIONS = ["fa", "fasta", "fq", "fastq",
Expand Down Expand Up @@ -331,6 +339,7 @@ def telemetry(fn):
Note that this also overwrites verbose Raven logs on exit ("Sentry is waiting to send..."),
see https://github.com/getsentry/raven-python/issues/904 for more details.
"""

@wraps(fn)
def telemetry_wrapper(*args, **kwargs):
# By default, do not instantiate a client,
Expand Down
2 changes: 1 addition & 1 deletion onecodex/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.2.12'
__version__ = '0.2.13'
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ numpy>=1.11.0
pandas>=0.20.0,<0.21.0
matplotlib>=1.5.1
networkx>=1.11
pytz>=2014.1
seaborn==0.8
sklearn
scikit-bio==0.4.2
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
packages=find_packages(exclude=['*test*']),
install_requires=['potion-client==2.5.1', 'requests>=2.9', 'click>=6.6',
'requests_toolbelt==0.7.0', 'python-dateutil>=2.5.3',
'six>=1.10.0', 'boto3>=1.4.2', 'raven>=6.1.0'],
'six>=1.10.0', 'boto3>=1.4.2', 'raven>=6.1.0', 'pytz>=2014.1'],
include_package_data=True,
zip_safe=False,
extras_require={
Expand Down
10 changes: 9 additions & 1 deletion tests/test_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,15 @@
import pytest

from onecodex.lib.inline_validator import FASTXTranslator
from onecodex.lib.upload import upload, upload_file, upload_large_file
from onecodex.lib.upload import upload, upload_file, upload_large_file, interleaved_filename


@pytest.mark.parametrize('files,filename', [
(('test_R1.fastq', 'test_R2.fastq'), 'test.fastq'),
(('test_R1_001.fastq', 'test_R2_001.fastq'), 'test_001.fastq')
])
def test_interleaved_filenames(files, filename):
assert interleaved_filename(files) == filename


@pytest.mark.parametrize('file_list,n_small,n_big', [
Expand Down

0 comments on commit efaab4b

Please sign in to comment.