Skip to content

Commit

Permalink
Upload big files directly to S3. Closes #264.
Browse files Browse the repository at this point in the history
  • Loading branch information
Keats authored and polyatail committed Jul 19, 2019
1 parent 287f7a5 commit 6037fed
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ htmlcov/
*.ipynb
.pytest_cache/*
Pipfile*
.idea/
16 changes: 15 additions & 1 deletion onecodex/lib/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ def __init__(self, file_path, file_size, file_format="fastq", progressbar=None):
self.progressbar = progressbar
self.mime_type = "text/plain"

def size(self):
return self._fsize

@property
def len(self):
"""Size of data left to be read.
Expand Down Expand Up @@ -192,6 +195,9 @@ def read(self, size=-1):

return bytes_read

def size(self):
return self._fsize

@property
def len(self):
"""Size of data left to be read."""
Expand Down Expand Up @@ -705,13 +711,21 @@ def upload_sequence_fileobj(file_obj, file_name, fields, retry_fields, session,
try:
sample_id = fields["sample_id"]

# Are we being directed to skip the proxy? If so, don't try to upload >5GB files
# Are we being directed to skip the proxy? If so, only do it if files ares <5GB since that's the limit for
# direct uploads to S3
if (
"AWSAccessKeyId" in fields["additional_fields"]
and getattr(file_obj, "_fsize", 0) > 5 * 1024 ** 3
):
raise RetryableUploadException

# Big files are going to skip the proxy even if the backend told us the opposite
# 100GB is considered big enough to defer the validation
# In some cases, file_obj might be a BytesIO object instead of one of our file object so we
# filter them out by checking for a `write` attribute
if not hasattr(file_obj, "write") and file_obj.size() > 100 * 1024 ** 3:
raise RetryableUploadException

_direct_upload(file_obj, file_name, fields, session, samples_resource)
except RetryableUploadException:
# upload failed -- retry direct upload to S3 intermediate; first try to cancel pending upload
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ scikit-learn>=0.19.0
coverage~=4.5
coveralls~=1.5
flake8
black
pytest~=4.1
pytest-cov~=2.6
responses
Expand Down

0 comments on commit 6037fed

Please sign in to comment.