Skip to content

Commit

Permalink
Use a single size cutoff for all files
Browse files Browse the repository at this point in the history
  • Loading branch information
polyatail committed Jul 19, 2019
1 parent d715ba5 commit 90245fd
Showing 1 changed file with 4 additions and 20 deletions.
24 changes: 4 additions & 20 deletions onecodex/lib/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,12 @@ def __init__(self, file_path, file_size, file_format="fastq", progressbar=None):
if file_path[0].endswith(".gz") or file_path[1].endswith(".gz"):
self._fp_left = gzip.GzipFile(file_path[0], mode="rb")
self._fp_right = gzip.GzipFile(file_path[1], mode="rb")
self.compressed = True
elif file_path[0].endswith(".bz2") or file_path[1].endswith(".bz2"):
self._fp_left = bz2.BZ2File(file_path[0], mode="rb")
self._fp_right = bz2.BZ2File(file_path[1], mode="rb")
self.compressed = True
else:
self._fp_left = open(file_path[0], mode="rb")
self._fp_right = open(file_path[1], mode="rb")
self.compressed = False

if file_format == "fasta":
raise OneCodexException("Interleaving FASTA files is currently unsupported")
Expand Down Expand Up @@ -185,13 +182,10 @@ def __init__(self, file_path, file_size, progressbar=None):

if ext in {".gz", ".gzip"}:
self.mime_type = "application/x-gzip"
self.compressed = True
elif ext in {".bz", ".bz2", ".bzip", ".bzip2"}:
self.mime_type = "application/x-bzip2"
self.compressed = True
else:
self.mime_type = "text/plain"
self.compressed = False

def read(self, size=-1):
bytes_read = self._fp.read(size)
Expand Down Expand Up @@ -726,20 +720,10 @@ def upload_sequence_fileobj(file_obj, file_name, fields, retry_fields, session,
raise RetryableUploadException

# Big files are going to skip the proxy even if the backend told us the opposite
# 100GB for uncompressed files and 50GB for compressed files are considered big enough to defer the validation
# In some cases, file_obj might be a BytesIO object instead of one of our file object so we filter them out
# by checking for a `write` attribute
if (
not hasattr(file_obj, "write")
and not file_obj.compressed
and file_obj.size() > 100 * 1024 ** 3
):
raise RetryableUploadException
if (
not hasattr(file_obj, "write")
and file_obj.compressed
and file_obj.size() > 50 * 1024 ** 3
):
# 100GB is considered big enough to defer the validation
# In some cases, file_obj might be a BytesIO object instead of one of our file object so we
# filter them out by checking for a `write` attribute
if not hasattr(file_obj, "write") and file_obj.size() > 100 * 1024 ** 3:
raise RetryableUploadException

_direct_upload(file_obj, file_name, fields, session, samples_resource)
Expand Down

0 comments on commit 90245fd

Please sign in to comment.