-
Notifications
You must be signed in to change notification settings - Fork 267
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Do not leave 0-byte multipart files in Cloud Storage #602
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -396,6 +396,13 @@ def initiate_chunked_upload(self): | |
def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata, content_type=None): | ||
self._initialize_cloud_conn() | ||
|
||
# Do not create zero-byte objects. | ||
# TODO: length=-1 appears to be a special case and used in tests. Why? Document it. | ||
if length == 0: | ||
logger.debug("Attempted to upload a zero-byte chunk to cloud storage. Ignoring.") | ||
new_metadata = copy.deepcopy(storage_metadata) | ||
return 0, new_metadata, None | ||
|
||
# We are going to upload each chunk to a separate key | ||
chunk_path = self._rel_upload_path(str(uuid4())) | ||
bytes_written, write_error = self._stream_write_internal( | ||
|
@@ -408,6 +415,12 @@ def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata, con | |
if bytes_written > 0: | ||
new_metadata[_CHUNKS_KEY].append(_PartUploadMetadata(chunk_path, offset, bytes_written)) | ||
|
||
# Do not leave zero-byte chunks in Object Storage | ||
# TODO: Determine a clean way to eliminate calling this function when it's not needed. | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Woudn't the above check for zero-length prevent these zero bytes objects to be written in the first place. i.e Don't need to remove them anymore. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't believe so. In the case where it's explicitly defined as 0 bytes when this file is called, it is short-circuited. In other cases, I believe that the -1 you explained earlier leads to the creation and writing of a 0 byte file. I'm not exactly sure if that's how they get created, but that initial short-circuit did not fix it for all cases during my personal testing. |
||
logger.debug("Wrote zero bytes to Object Storage. Removing orphaned object.") | ||
self.remove(chunk_path) | ||
|
||
return bytes_written, new_metadata, write_error | ||
|
||
def _chunk_generator(self, chunk_list): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
import os | ||
|
||
from io import BytesIO | ||
from mock import patch | ||
|
||
import pytest | ||
|
||
|
@@ -275,3 +276,19 @@ def test_rechunked(max_size, parts): | |
assert len(rechunked) == len(parts) | ||
for index, chunk in enumerate(rechunked): | ||
assert chunk == parts[index] | ||
|
||
|
||
def test_no_orphaned_multipart_objects(storage_engine): | ||
""" | ||
Ensure Quay is not creating orphaned multi-part objects in Cloud Storage. | ||
""" | ||
with patch.object(storage_engine, "remove") as mock_remove: | ||
|
||
upload_id, metadata = storage_engine.initiate_chunked_upload() | ||
data = b"" | ||
bytes_written, new_metadata, errors = storage_engine.stream_upload_chunk( | ||
upload_id, 0, -1, BytesIO(data), metadata | ||
) | ||
assert len(data) == bytes_written | ||
assert mock_remove.call_count == 1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Likewise, shouldn't need to remove anymore, since it's not being written anymore? |
||
assert errors == None |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Length
-1
means to write the wholein_fp
stream, vs justlength
bytes if set.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good to know. Thank you!