storage: Fix big layer uploads for Ceph/RADOS driver (PROJQUAY-6586) (#…

…2601) * storage: Fix big layer uploads for Ceph/RADOS driver (PROJQUAY-6586) Current uploads of large images usually fail on Ceph/RADOS compatible implementations (including Noobaa) because during the last assembly, copy is done all at once. For large layers, this takes a long while and Boto times out. With this patch, we limit the size of the used chunk to 32 MB so the final copy is done in parts of up to 32 MB each. The size can be overridden by specifying the parameter `maximum_chunk_size_mb` in the driver settings. For backwards compatibility, an additional parameter was added: if `server_side_assembly: true` then we force server side assembly and the final blob push in chunks, if `server_side_assembly: false` we fall back to default client side assembly (we increase the boto timeout in this case to still support large layer upload): ~~~ DISTRIBUTED_STORAGE_CONFIG: default: - RadosGWStorage - ... maximum_chunk_size_mb: 100 server_side_assembly: true ~~~ * Fix formatting * Added backward compatiblity switch and increased boto timeout * Changed name of variable in config * Small fixes to if statements
quay · Jan 16, 2024 · e243d23 · e243d23
1 parent e8ff33e
commit e243d23
Showing 1 changed file with 28 additions and 7 deletions.
diff --git a/storage/cloud.py b/storage/cloud.py
@@ -983,10 +983,16 @@ def __init__(
         secret_key,
         bucket_name,
         port=None,
+        maximum_chunk_size_mb=None,
+        server_side_assembly=True,
     ):
         upload_params = {}
         connect_kwargs = {
             "endpoint_url": _build_endpoint_url(hostname, port=port, is_secure=is_secure),
+            "config": Config(
+                connect_timeout=600 if not server_side_assembly else 60,
+                read_timeout=600 if not server_side_assembly else 60,
+            ),
         }
 
         super(RadosGWStorage, self).__init__(
@@ -1000,6 +1006,13 @@ def __init__(
             secret_key,
         )
 
+        chunk_size = (
+            maximum_chunk_size_mb if maximum_chunk_size_mb is not None else 32
+        )  # 32mb default, as used in Docker registry:2
+        self.maximum_chunk_size = chunk_size * 1024 * 1024
+
+        self.server_side_assembly = server_side_assembly
+
     # TODO remove when radosgw supports cors: http://tracker.ceph.com/issues/8718#change-38624
     def get_direct_download_url(
         self, path, request_ip=None, expires_in=60, requires_cors=False, head=False, **kwargs
@@ -1019,13 +1032,21 @@ def get_direct_upload_url(self, path, mime_type, requires_cors=True):
         return super(RadosGWStorage, self).get_direct_upload_url(path, mime_type, requires_cors)
 
     def complete_chunked_upload(self, uuid, final_path, storage_metadata):
-        self._initialize_cloud_conn()
-
-        # RadosGW does not support multipart copying from keys, so we are forced to join
-        # it all locally and then reupload.
-        # See https://github.com/ceph/ceph/pull/5139
-        chunk_list = self._chunk_list_from_metadata(storage_metadata)
-        self._client_side_chunk_join(final_path, chunk_list)
+        logger.debug("Server side assembly is set to {}.".format(self.server_side_assembly))
+        if self.server_side_assembly:
+            logger.debug("Initiating multipart upload and server side assembly for final push.")
+            return super(RadosGWStorage, self).complete_chunked_upload(
+                uuid, final_path, storage_metadata
+            )
+        else:
+            logger.debug("Initiating client side chunk join for final assembly and push.")
+            logger.debug("Setting Boto timeout to 600 seconds in case of a large layer push.")
+            self._initialize_cloud_conn()
+            # Certain implementations of RadosGW do not support multipart copying from keys,
+            # so we are forced to join it all locally and then reupload.
+            # See https://github.com/ceph/ceph/pull/5139
+            chunk_list = self._chunk_list_from_metadata(storage_metadata)
+            self._client_side_chunk_join(final_path, chunk_list)
 
 
 class RHOCSStorage(RadosGWStorage):