Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

* S3/S3.py, s3cmd, S3/Config.py, s3cmd.1: Added --continue for

  'get' command, improved 'get' failure resiliency.



git-svn-id: https://s3tools.svn.sourceforge.net/svnroot/s3tools/s3cmd/trunk@267 830e0280-6d2a-0410-9c65-932aecc39d9d
  • Loading branch information...
commit 9197e62ed1a795f1f905269d1de6e28ead895cf3 1 parent eb9a4b6
@mludvig mludvig authored
Showing with 109 additions and 52 deletions.
  1. +2 −0  ChangeLog
  2. +3 −0  NEWS
  3. +1 −0  S3/Config.py
  4. +87 −48 S3/S3.py
  5. +13 −4 s3cmd
  6. +3 −0  s3cmd.1
View
2  ChangeLog
@@ -1,5 +1,7 @@
2008-11-24 Michal Ludvig <michal@logix.cz>
+ * S3/S3.py, s3cmd, S3/Config.py, s3cmd.1: Added --continue for
+ 'get' command, improved 'get' failure resiliency.
* S3/Progress.py: Support for progress meter not starting in 0.
* S3/S3.py: improved retrying in send_request() and send_file()
View
3  NEWS
@@ -6,6 +6,9 @@ s3cmd 0.9.9 - ???
prefix with --recursive (-r)
* Copying and moving objects, within or between buckets.
(Andrew Ryan)
+* Continue getting partially downloaded files with --continue
+* Improved resistance to communication errors (Connection
+ reset by peer, etc.)
s3cmd 0.9.8.4 - 2008-11-07
=============
View
1  S3/Config.py
@@ -24,6 +24,7 @@ class Config(object):
recv_chunk = 4096
human_readable_sizes = False
force = False
+ get_continue = False
recursive = False
acl_public = False
proxy_host = ""
View
135 S3/S3.py
@@ -189,11 +189,11 @@ def object_put(self, filename, uri, extra_headers = None):
response = self.send_file(request, file)
return response
- def object_get(self, uri, stream):
+ def object_get(self, uri, stream, start_position):
if uri.type != "s3":
raise ValueError("Expected URI type 's3', got '%s'" % uri.type)
request = self.create_request("OBJECT_GET", uri = uri)
- response = self.recv_file(request, stream)
+ response = self.recv_file(request, stream, start_position)
return response
def object_delete(self, uri):
@@ -399,6 +399,8 @@ def send_file(self, request, file, throttle = 0, retries = _max_retries):
conn.putheader(header, str(headers[header]))
conn.endheaders()
except Exception, e:
+ if self.config.progress_meter:
+ progress.done("failed")
if retries:
warning("Retrying failed request: %s (%s)" % (resource['uri'], e))
warning("Waiting %d sec..." % self._fail_wait(retries))
@@ -406,7 +408,7 @@ def send_file(self, request, file, throttle = 0, retries = _max_retries):
# Connection error -> same throttle value
return self.send_file(request, file, throttle, retries - 1)
else:
- raise S3UploadError("Request failed for: %s" % resource['uri'])
+ raise S3UploadError("Upload failed for: %s" % resource['uri'])
file.seek(0)
md5_hash = md5.new()
try:
@@ -414,20 +416,12 @@ def send_file(self, request, file, throttle = 0, retries = _max_retries):
debug("SendFile: Reading up to %d bytes from '%s'" % (self.config.send_chunk, file.name))
data = file.read(self.config.send_chunk)
md5_hash.update(data)
+ conn.send(data)
if self.config.progress_meter:
progress.update(delta_position = len(data))
- else:
- debug("SendFile: Sending %d bytes to the server" % len(data))
- conn.send(data)
-
size_left -= len(data)
if throttle:
time.sleep(throttle)
- ## Call progress meter from here
- debug("Sent %d bytes (%d %% of %d)" % (
- (size_total - size_left),
- (size_total - size_left) * 100 / size_total,
- size_total))
md5_computed = md5_hash.hexdigest()
response = {}
http_response = conn.getresponse()
@@ -442,7 +436,7 @@ def send_file(self, request, file, throttle = 0, retries = _max_retries):
progress.done("failed")
if retries:
throttle = throttle and throttle * 5 or 0.01
- warning("Request failed: %s (%s)" % (resource['uri'], e))
+ warning("Upload failed: %s (%s)" % (resource['uri'], e))
warning("Retrying on lower speed (throttle=%0.2f)" % throttle)
warning("Waiting %d sec..." % self._fail_wait(retries))
time.sleep(self._fail_wait(retries))
@@ -450,7 +444,7 @@ def send_file(self, request, file, throttle = 0, retries = _max_retries):
return self.send_file(request, file, throttle, retries - 1)
else:
debug("Giving up on '%s' %s" % (file.name, e))
- raise S3UploadError("Request failed for: %s" % resource['uri'])
+ raise S3UploadError("Upload failed for: %s" % resource['uri'])
timestamp_end = time.time()
response["elapsed"] = timestamp_end - timestamp_start
@@ -490,24 +484,40 @@ def send_file(self, request, file, throttle = 0, retries = _max_retries):
raise S3Error(response)
return response
- def recv_file(self, request, stream):
+ def recv_file(self, request, stream, start_position = 0, retries = _max_retries):
method_string, resource, headers = request
if self.config.progress_meter:
progress = self.config.progress_class(stream.name, 0)
else:
info("Receiving file '%s', please wait..." % stream.name)
timestamp_start = time.time()
- conn = self.get_connection(resource['bucket'])
- conn.connect()
- conn.putrequest(method_string, self.format_uri(resource))
- for header in headers.keys():
- conn.putheader(header, str(headers[header]))
- conn.endheaders()
- response = {}
- http_response = conn.getresponse()
- response["status"] = http_response.status
- response["reason"] = http_response.reason
- response["headers"] = convertTupleListToDict(http_response.getheaders())
+ try:
+ conn = self.get_connection(resource['bucket'])
+ conn.connect()
+ conn.putrequest(method_string, self.format_uri(resource))
+ for header in headers.keys():
+ conn.putheader(header, str(headers[header]))
+ if start_position > 0:
+ debug("Requesting Range: %d .. end" % start_position)
+ conn.putheader("Range", "bytes=%d-" % start_position)
+ conn.endheaders()
+ response = {}
+ http_response = conn.getresponse()
+ response["status"] = http_response.status
+ response["reason"] = http_response.reason
+ response["headers"] = convertTupleListToDict(http_response.getheaders())
+ debug("Response: %s" % response)
+ except Exception, e:
+ if self.config.progress_meter:
+ progress.done("failed")
+ if retries:
+ warning("Retrying failed request: %s (%s)" % (resource['uri'], e))
+ warning("Waiting %d sec..." % self._fail_wait(retries))
+ time.sleep(self._fail_wait(retries))
+ # Connection error -> same throttle value
+ return self.recv_file(request, stream, start_position, retries - 1)
+ else:
+ raise S3DownloadError("Download failed for: %s" % resource['uri'])
if response["status"] == 307:
## RedirectPermanent
@@ -521,38 +531,67 @@ def recv_file(self, request, stream):
if response["status"] < 200 or response["status"] > 299:
raise S3Error(response)
- md5_hash = md5.new()
- size_left = size_total = int(response["headers"]["content-length"])
+ if start_position == 0:
+ # Only compute MD5 on the fly if we're downloading from beginning
+ # Otherwise we'd get a nonsense.
+ md5_hash = md5.new()
+ size_left = int(response["headers"]["content-length"])
+ size_total = start_position + size_left
+ current_position = start_position
+
if self.config.progress_meter:
progress.total_size = size_total
- size_recvd = 0
- while (size_recvd < size_total):
- this_chunk = size_left > self.config.recv_chunk and self.config.recv_chunk or size_left
- debug("ReceiveFile: Receiving up to %d bytes from the server" % this_chunk)
- data = http_response.read(this_chunk)
- debug("ReceiveFile: Writing %d bytes to file '%s'" % (len(data), stream.name))
- stream.write(data)
- md5_hash.update(data)
- size_recvd += len(data)
- ## Call progress meter from here...
+ progress.initial_position = current_position
+ progress.current_position = current_position
+
+ try:
+ while (current_position < size_total):
+ this_chunk = size_left > self.config.recv_chunk and self.config.recv_chunk or size_left
+ data = http_response.read(this_chunk)
+ stream.write(data)
+ if start_position == 0:
+ md5_hash.update(data)
+ current_position += len(data)
+ ## Call progress meter from here...
+ if self.config.progress_meter:
+ progress.update(delta_position = len(data))
+ conn.close()
+ except Exception, e:
if self.config.progress_meter:
- progress.update(delta_position = len(data))
+ progress.done("failed")
+ if retries:
+ warning("Retrying failed request: %s (%s)" % (resource['uri'], e))
+ warning("Waiting %d sec..." % self._fail_wait(retries))
+ time.sleep(self._fail_wait(retries))
+ # Connection error -> same throttle value
+ return self.recv_file(request, stream, current_position, retries - 1)
else:
- debug("Received %d bytes (%d %% of %d)" % (
- size_recvd,
- size_recvd * 100 / size_total,
- size_total))
- conn.close()
+ raise S3DownloadError("Download failed for: %s" % resource['uri'])
+
+ stream.flush()
progress.done("done")
timestamp_end = time.time()
- response["md5"] = md5_hash.hexdigest()
+
+ if start_position == 0:
+ # Only compute MD5 on the fly if we were downloading from the beginning
+ response["md5"] = md5_hash.hexdigest()
+ else:
+ # Otherwise try to compute MD5 of the output file
+ try:
+ response["md5"] = hash_file_md5(stream.name)
+ except IOError, e:
+ if e.errno != errno.ENOENT:
+ warning("Unable to open file: %s: %s" % (stream.name, e))
+ warning("Unable to verify MD5. Assume it matches.")
+ response["md5"] = response["headers"]["etag"]
+
response["md5match"] = response["headers"]["etag"].find(response["md5"]) >= 0
response["elapsed"] = timestamp_end - timestamp_start
- response["size"] = size_recvd
+ response["size"] = current_position
response["speed"] = response["elapsed"] and float(response["size"]) / response["elapsed"] or float(-1)
- if response["size"] != long(response["headers"]["content-length"]):
+ if response["size"] != start_position + long(response["headers"]["content-length"]):
warning("Reported size (%s) does not match received size (%s)" % (
- response["headers"]["content-length"], response["size"]))
+ start_position + response["headers"]["content-length"], response["size"]))
debug("ReceiveFile: Computed MD5 = %s" % response["md5"])
if not response["md5match"]:
warning("MD5 signatures do not match: computed=%s, received=%s" % (
View
17 s3cmd
@@ -253,6 +253,7 @@ def cmd_object_get(args):
uri_arg = args.pop(0)
uri = S3Uri(uri_arg)
+ start_position = 0
if destination_file:
destination = destination_file
elif destination_dir:
@@ -265,14 +266,21 @@ def cmd_object_get(args):
dst_stream = sys.stdout
else:
## File
- if not Config().force and os.path.exists(destination):
- raise ParameterError("File %s already exists. Use --force to overwrite it" % destination)
try:
- dst_stream = open(destination, "wb")
+ dst_stream = open(destination, "ab")
+ if os.path.exists(destination):
+ if Config().get_continue:
+ start_position = dst_stream.tell()
+ elif Config().force:
+ start_position = 0L
+ dst_stream.seek(0L)
+ dst_stream.truncate()
+ else:
+ raise ParameterError("File %s already exists. Use either --force or --continue or give it a new name." % destination)
except IOError, e:
error("Skipping %s: %s" % (destination, e.strerror))
continue
- response = s3.object_get(uri, dst_stream)
+ response = s3.object_get(uri, dst_stream, start_position = start_position)
if response["headers"].has_key("x-amz-meta-s3tools-gpgenc"):
gpg_decrypt(destination, response["headers"]["x-amz-meta-s3tools-gpgenc"])
response["size"] = os.stat(destination)[6]
@@ -965,6 +973,7 @@ def main():
optparser.add_option("-e", "--encrypt", dest="encrypt", action="store_true", help="Encrypt files before uploading to S3.")
optparser.add_option( "--no-encrypt", dest="encrypt", action="store_false", help="Don't encrypt files.")
optparser.add_option("-f", "--force", dest="force", action="store_true", help="Force overwrite and other dangerous operations.")
+ optparser.add_option( "--continue", dest="get_continue", action="store_true", help="Continue getting a partially downloaded file (only for [get] command).")
optparser.add_option("-r", "--recursive", dest="recursive", action="store_true", help="Recursive upload, download or removal.")
optparser.add_option("-P", "--acl-public", dest="acl_public", action="store_true", help="Store objects with ACL allowing read for anyone.")
optparser.add_option( "--acl-private", dest="acl_public", action="store_false", help="Store objects with default ACL allowing access for you only.")
View
3  s3cmd.1
@@ -113,6 +113,9 @@ Options common for all commands (where it makes sense indeed):
\fB\-f\fR, \fB\-\-force\fR
Force overwrite and other dangerous operations.
.TP
+\fB\-\-continue\fR
+Continue getting a partially downloaded file (only for \fIget\fR command). This comes handy once download of a large file, say an ISO image, from a S3 bucket fails and a partially downloaded file is left on the disk. Unfortunately \fIput\fR command doesn't support restarting of failed upload due to Amazon S3 limitation.
+.TP
\fB\-P\fR, \fB\-\-acl\-public\fR
Store objects with permissions allowing read for anyone.
.TP
Please sign in to comment.
Something went wrong with that request. Please try again.