Skip to content

Commit

Permalink
storage: Fix big layer uploads for Ceph/RADOS driver (PROJQUAY-6586) (#…
Browse files Browse the repository at this point in the history
…2601)

* storage: Fix big layer uploads for Ceph/RADOS driver (PROJQUAY-6586)
Current uploads of large images usually fail on Ceph/RADOS compatible implementations (including Noobaa) because during the last assembly, copy is done all at once. For large layers, this takes a long while and Boto times out. With this patch, we limit the size of the used chunk to 32 MB so the final copy is done in parts of up to 32 MB each. The size can be overridden by specifying the parameter `maximum_chunk_size_mb` in the driver settings. For backwards compatibility, an additional parameter was added: if `server_side_assembly: true` then we force server side assembly and the final blob push in chunks, if `server_side_assembly: false` we fall back to default client side assembly (we increase the boto timeout in this case to still support large layer upload):

~~~
DISTRIBUTED_STORAGE_CONFIG:
    default:
        - RadosGWStorage
        - ...
           maximum_chunk_size_mb: 100
           server_side_assembly: true
~~~

* Fix formatting

* Added backward compatiblity switch and increased boto timeout

* Changed name of variable in config

* Small fixes to if statements
  • Loading branch information
ibazulic committed Jan 16, 2024
1 parent e8ff33e commit e243d23
Showing 1 changed file with 28 additions and 7 deletions.
35 changes: 28 additions & 7 deletions storage/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -983,10 +983,16 @@ def __init__(
secret_key,
bucket_name,
port=None,
maximum_chunk_size_mb=None,
server_side_assembly=True,
):
upload_params = {}
connect_kwargs = {
"endpoint_url": _build_endpoint_url(hostname, port=port, is_secure=is_secure),
"config": Config(
connect_timeout=600 if not server_side_assembly else 60,
read_timeout=600 if not server_side_assembly else 60,
),
}

super(RadosGWStorage, self).__init__(
Expand All @@ -1000,6 +1006,13 @@ def __init__(
secret_key,
)

chunk_size = (
maximum_chunk_size_mb if maximum_chunk_size_mb is not None else 32
) # 32mb default, as used in Docker registry:2
self.maximum_chunk_size = chunk_size * 1024 * 1024

self.server_side_assembly = server_side_assembly

# TODO remove when radosgw supports cors: http://tracker.ceph.com/issues/8718#change-38624
def get_direct_download_url(
self, path, request_ip=None, expires_in=60, requires_cors=False, head=False, **kwargs
Expand All @@ -1019,13 +1032,21 @@ def get_direct_upload_url(self, path, mime_type, requires_cors=True):
return super(RadosGWStorage, self).get_direct_upload_url(path, mime_type, requires_cors)

def complete_chunked_upload(self, uuid, final_path, storage_metadata):
self._initialize_cloud_conn()

# RadosGW does not support multipart copying from keys, so we are forced to join
# it all locally and then reupload.
# See https://github.com/ceph/ceph/pull/5139
chunk_list = self._chunk_list_from_metadata(storage_metadata)
self._client_side_chunk_join(final_path, chunk_list)
logger.debug("Server side assembly is set to {}.".format(self.server_side_assembly))
if self.server_side_assembly:
logger.debug("Initiating multipart upload and server side assembly for final push.")
return super(RadosGWStorage, self).complete_chunked_upload(
uuid, final_path, storage_metadata
)
else:
logger.debug("Initiating client side chunk join for final assembly and push.")
logger.debug("Setting Boto timeout to 600 seconds in case of a large layer push.")
self._initialize_cloud_conn()
# Certain implementations of RadosGW do not support multipart copying from keys,
# so we are forced to join it all locally and then reupload.
# See https://github.com/ceph/ceph/pull/5139
chunk_list = self._chunk_list_from_metadata(storage_metadata)
self._client_side_chunk_join(final_path, chunk_list)


class RHOCSStorage(RadosGWStorage):
Expand Down

0 comments on commit e243d23

Please sign in to comment.