Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Trying to get multipart copy byte-range headers to be correct.

  • Loading branch information...
commit c711d102087d7a8b4c0c9873d79d7597aa285d5b 1 parent a08c404
Damian Martinez authored

Showing 3 changed files with 17 additions and 32 deletions. Show diff stats Hide diff stats

  1. +1 1  S3/Config.py
  2. +14 27 S3/MultiPart.py
  3. +2 4 S3/S3.py
2  S3/Config.py
@@ -66,7 +66,7 @@ class Config(object):
66 66 enable_multipart = True
67 67 multipart_chunk_size_mb = 15 # MB
68 68 #- minimum size to use multipart remote s3-to-s3 copy with byte range is 5gb
69   - multipart_copy_size_gb = 5 # GB
  69 + #multipart_copy_size = (5 * 1024 * 1024 * 1024) - 1
70 70 multipart_copy_size = 5 * 1024 * 1024 * 1024
71 71 # List of checks to be performed for 'sync'
72 72 sync_checks = ['size', 'md5'] # 'weak-timestamp'
41 S3/MultiPart.py
@@ -113,10 +113,6 @@ def abort_upload(self):
113 113
114 114 class MultiPartCopy(MultiPartUpload):
115 115
116   - # S3 Config or const?
117   - MIN_CHUNK_SIZE_MB = 5120 # 5GB
118   - MAX_CHUNK_SIZE_MB = 42949672960 # 5TB
119   -
120 116 def __init__(self, s3, src_uri, dst_uri, src_size, headers_baseline = {}):
121 117 self.s3 = s3
122 118 self.file = self.src_uri = src_uri
@@ -130,6 +126,13 @@ def __init__(self, s3, src_uri, dst_uri, src_size, headers_baseline = {}):
130 126 def initiate_multipart_copy(self):
131 127 return self.initiate_multipart_upload()
132 128
  129 + def complete_multipart_copy(self):
  130 + return self.complete_multipart_upload()
  131 +
  132 + def abort_copy(self):
  133 + return self.abort_upload()
  134 +
  135 +
133 136 def copy_all_parts(self):
134 137 """
135 138 Execute a full multipart upload copy on a remote file
@@ -139,9 +142,7 @@ def copy_all_parts(self):
139 142 raise RuntimeError("Attempting to use a multipart copy that has not been initiated.")
140 143
141 144 size_left = file_size = self.src_size
142   - # TODO: only include byte range if remote src file is > 5gb, or get error
143   - # > 5368709121 (5 * 1024 * 1024 * 1024)
144   - self.chunk_size = self.s3.config.multipart_copy_size
  145 + self.chunk_size = self.s3.config.multipart_copy_size # - 1
145 146 nr_parts = file_size / self.chunk_size + (file_size % self.chunk_size and 1)
146 147 debug("MultiPart: Copying %s in %d parts" % (self.src_uri, nr_parts))
147 148
@@ -156,7 +157,6 @@ def copy_all_parts(self):
156 157 'extra' : "[part %d of %d, %s]" % (seq, nr_parts, "%d%sB" % formatSize(current_chunk_size, human_readable = True))
157 158 }
158 159 try:
159   - #self.upload_part(seq, offset, current_chunk_size, labels)
160 160 self.copy_part(seq, offset, current_chunk_size, labels)
161 161 except:
162 162 error(u"Upload copy of '%s' part %d failed. Aborting multipart upload copy." % (self.src_uri, seq))
@@ -175,22 +175,18 @@ def copy_part(self, seq, offset, chunk_size, labels):
175 175 debug("Copying part %i of %r (%s bytes)" % (seq, self.upload_id, chunk_size))
176 176
177 177 # set up headers with copy-params
178   - headers = {
179   - # TODO: should be /bucket/uri
180   - "x-amz-copy-source": "/%s/%s" % (self.src_uri.bucket(), self.src_uri.object())
181   - }
182   - if chunk_size >= self.s3.config.multipart_copy_size:
183   - # TODO: only include byte range if original file is > 5gb?
184   - # > 5368709121 (5 * 1024 * 1024 * 1024)
185   - headers["x-amz-copy-source-range"] = "bytes=%d-%d" % (offset, offset + chunk_size)
186   -
187   -
188 178 # x-amz-copy-source: /source_bucket/sourceObject
189 179 # x-amz-copy-source-range:bytes=first-last
190 180 # x-amz-copy-source-if-match: etag
191 181 # x-amz-copy-source-if-none-match: etag
192 182 # x-amz-copy-source-if-unmodified-since: time_stamp
193 183 # x-amz-copy-source-if-modified-since: time_stamp
  184 + headers = { "x-amz-copy-source": "/%s/%s" % (self.src_uri.bucket(), self.src_uri.object()) }
  185 +
  186 + # include byte range header if already on next sequence or original file is > 5gb
  187 + if (seq > 1) or (chunk_size >= self.s3.config.multipart_copy_size):
  188 + # FIXME: TODO: is this correct calculation to do proper byte-range headers!?
  189 + headers["x-amz-copy-source-range"] = "bytes=%d-%d" % (offset, (offset + chunk_size - 1))
194 190
195 191 query_string = "?partNumber=%i&uploadId=%s" % (seq, self.upload_id)
196 192
@@ -198,16 +194,7 @@ def copy_part(self, seq, offset, chunk_size, labels):
198 194 response = self.s3.send_request(request)
199 195
200 196 # etag in xml response
201   - #self.parts[seq] = response["headers"]["etag"]
202 197 self.parts[seq] = getTextFromXml(response["data"], "ETag")
203   -
204 198 return response
205 199
206   - def complete_multipart_copy(self):
207   - return self.complete_multipart_upload()
208   -
209   - def abort_copy(self):
210   - return self.abort_upload()
211   -
212   -
213 200 # vim:et:ts=4:sts=4:ai
6 S3/S3.py
@@ -403,8 +403,6 @@ def object_delete(self, uri):
403 403 response = self.send_request(request)
404 404 return response
405 405
406   -# TODO: want to be able to do multi-part copy on remote s3 objects > 5gb
407   -# instead of Object-PUT ... multipart upload with header -d
408 406
409 407 def object_copy(self, src_uri, dst_uri, extra_headers = None):
410 408 if src_uri.type != "s3":
@@ -413,7 +411,7 @@ def object_copy(self, src_uri, dst_uri, extra_headers = None):
413 411 raise ValueError("Expected URI type 's3', got '%s'" % dst_uri.type)
414 412 headers = SortedDict(ignore_case = True)
415 413
416   - # TODO: where do ACL headers go for copy?
  414 + # TODO: where do ACL headers go for copy? Should we copy ACL from source?
417 415 if self.config.acl_public:
418 416 headers["x-amz-acl"] = "public-read"
419 417 if self.config.reduced_redundancy:
@@ -421,7 +419,7 @@ def object_copy(self, src_uri, dst_uri, extra_headers = None):
421 419 # if extra_headers:
422 420 # headers.update(extra_headers)
423 421
424   - ## Multipart decision - can only copy remote s3-to-s3 files over 5gb
  422 + ## Multipart decision - only do multipart copy for remote s3 files > 5gb
425 423 multipart = False
426 424 # TODO: does it need new config option for: enable_multipart_copy ?
427 425 if self.config.enable_multipart:

0 comments on commit c711d10

Please sign in to comment.
Something went wrong with that request. Please try again.