Skip to content
Browse files

Trying to get multipart copy byte-range headers to be correct.

  • Loading branch information...
1 parent a08c404 commit c711d102087d7a8b4c0c9873d79d7597aa285d5b @porcupie committed Jul 17, 2012
Showing with 17 additions and 32 deletions.
  1. +1 −1 S3/Config.py
  2. +14 −27 S3/MultiPart.py
  3. +2 −4 S3/S3.py
View
2 S3/Config.py
@@ -66,7 +66,7 @@ class Config(object):
enable_multipart = True
multipart_chunk_size_mb = 15 # MB
#- minimum size to use multipart remote s3-to-s3 copy with byte range is 5gb
- multipart_copy_size_gb = 5 # GB
+ #multipart_copy_size = (5 * 1024 * 1024 * 1024) - 1
multipart_copy_size = 5 * 1024 * 1024 * 1024
# List of checks to be performed for 'sync'
sync_checks = ['size', 'md5'] # 'weak-timestamp'
View
41 S3/MultiPart.py
@@ -113,10 +113,6 @@ def abort_upload(self):
class MultiPartCopy(MultiPartUpload):
- # S3 Config or const?
- MIN_CHUNK_SIZE_MB = 5120 # 5GB
- MAX_CHUNK_SIZE_MB = 42949672960 # 5TB
-
def __init__(self, s3, src_uri, dst_uri, src_size, headers_baseline = {}):
self.s3 = s3
self.file = self.src_uri = src_uri
@@ -130,6 +126,13 @@ def __init__(self, s3, src_uri, dst_uri, src_size, headers_baseline = {}):
def initiate_multipart_copy(self):
return self.initiate_multipart_upload()
+ def complete_multipart_copy(self):
+ return self.complete_multipart_upload()
+
+ def abort_copy(self):
+ return self.abort_upload()
+
+
def copy_all_parts(self):
"""
Execute a full multipart upload copy on a remote file
@@ -139,9 +142,7 @@ def copy_all_parts(self):
raise RuntimeError("Attempting to use a multipart copy that has not been initiated.")
size_left = file_size = self.src_size
- # TODO: only include byte range if remote src file is > 5gb, or get error
- # > 5368709121 (5 * 1024 * 1024 * 1024)
- self.chunk_size = self.s3.config.multipart_copy_size
+ self.chunk_size = self.s3.config.multipart_copy_size # - 1
nr_parts = file_size / self.chunk_size + (file_size % self.chunk_size and 1)
debug("MultiPart: Copying %s in %d parts" % (self.src_uri, nr_parts))
@@ -156,7 +157,6 @@ def copy_all_parts(self):
'extra' : "[part %d of %d, %s]" % (seq, nr_parts, "%d%sB" % formatSize(current_chunk_size, human_readable = True))
}
try:
- #self.upload_part(seq, offset, current_chunk_size, labels)
self.copy_part(seq, offset, current_chunk_size, labels)
except:
error(u"Upload copy of '%s' part %d failed. Aborting multipart upload copy." % (self.src_uri, seq))
@@ -175,39 +175,26 @@ def copy_part(self, seq, offset, chunk_size, labels):
debug("Copying part %i of %r (%s bytes)" % (seq, self.upload_id, chunk_size))
# set up headers with copy-params
- headers = {
- # TODO: should be /bucket/uri
- "x-amz-copy-source": "/%s/%s" % (self.src_uri.bucket(), self.src_uri.object())
- }
- if chunk_size >= self.s3.config.multipart_copy_size:
- # TODO: only include byte range if original file is > 5gb?
- # > 5368709121 (5 * 1024 * 1024 * 1024)
- headers["x-amz-copy-source-range"] = "bytes=%d-%d" % (offset, offset + chunk_size)
-
-
# x-amz-copy-source: /source_bucket/sourceObject
# x-amz-copy-source-range:bytes=first-last
# x-amz-copy-source-if-match: etag
# x-amz-copy-source-if-none-match: etag
# x-amz-copy-source-if-unmodified-since: time_stamp
# x-amz-copy-source-if-modified-since: time_stamp
+ headers = { "x-amz-copy-source": "/%s/%s" % (self.src_uri.bucket(), self.src_uri.object()) }
+
+ # include byte range header if already on next sequence or original file is > 5gb
+ if (seq > 1) or (chunk_size >= self.s3.config.multipart_copy_size):
+ # FIXME: TODO: is this correct calculation to do proper byte-range headers!?
+ headers["x-amz-copy-source-range"] = "bytes=%d-%d" % (offset, (offset + chunk_size - 1))
query_string = "?partNumber=%i&uploadId=%s" % (seq, self.upload_id)
request = self.s3.create_request("OBJECT_PUT", uri = self.uri, headers = headers, extra = query_string)
response = self.s3.send_request(request)
# etag in xml response
- #self.parts[seq] = response["headers"]["etag"]
self.parts[seq] = getTextFromXml(response["data"], "ETag")
-
return response
- def complete_multipart_copy(self):
- return self.complete_multipart_upload()
-
- def abort_copy(self):
- return self.abort_upload()
-
-
# vim:et:ts=4:sts=4:ai
View
6 S3/S3.py
@@ -403,8 +403,6 @@ def object_delete(self, uri):
response = self.send_request(request)
return response
-# TODO: want to be able to do multi-part copy on remote s3 objects > 5gb
-# instead of Object-PUT ... multipart upload with header -d
def object_copy(self, src_uri, dst_uri, extra_headers = None):
if src_uri.type != "s3":
@@ -413,15 +411,15 @@ def object_copy(self, src_uri, dst_uri, extra_headers = None):
raise ValueError("Expected URI type 's3', got '%s'" % dst_uri.type)
headers = SortedDict(ignore_case = True)
- # TODO: where do ACL headers go for copy?
+ # TODO: where do ACL headers go for copy? Should we copy ACL from source?
if self.config.acl_public:
headers["x-amz-acl"] = "public-read"
if self.config.reduced_redundancy:
headers["x-amz-storage-class"] = "REDUCED_REDUNDANCY"
# if extra_headers:
# headers.update(extra_headers)
- ## Multipart decision - can only copy remote s3-to-s3 files over 5gb
+ ## Multipart decision - only do multipart copy for remote s3 files > 5gb
multipart = False
# TODO: does it need new config option for: enable_multipart_copy ?
if self.config.enable_multipart:

0 comments on commit c711d10

Please sign in to comment.
Something went wrong with that request. Please try again.