Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Reuse a single connection when doing a bucket list

Listing the contents of a bucket can involve multiple requests if the
bucket is sufficiently large. The default behavior is to list the
objects in batches of 1000. Currently a new connection is created for
each batch and this results in unnecessary connection establishment
overhead when listing a bucket involves multiple batches. This commit
moves the connection creation for listing a bucket into the
bucket_list function and changes the send_request function to
optionally accept an existing connection as a parameter. This allows
the same connection to be reused for listing the entire contents of a
bucket.
  • Loading branch information...
commit e86de366fed74bcd9d09817495015ea66fc962c8 1 parent a102e10
@kellymclaughlin kellymclaughlin authored committed
Showing with 16 additions and 6 deletions.
  1. +16 −6 S3/S3.py
View
22 S3/S3.py
@@ -243,9 +243,10 @@ def _get_common_prefixes(data):
truncated = True
list = []
prefixes = []
+ conn = self.get_connection(bucket)
while truncated:
- response = self.bucket_list_noparse(bucket, prefix, recursive, uri_params)
+ response = self.bucket_list_noparse(conn, bucket, prefix, recursive, uri_params)
current_list = _get_contents(response["data"])
current_prefixes = _get_common_prefixes(response["data"])
truncated = _list_truncated(response["data"])
@@ -259,17 +260,19 @@ def _get_common_prefixes(data):
list += current_list
prefixes += current_prefixes
+ conn.close()
+
response['list'] = list
response['common_prefixes'] = prefixes
return response
- def bucket_list_noparse(self, bucket, prefix = None, recursive = None, uri_params = {}):
+ def bucket_list_noparse(self, connection, bucket, prefix = None, recursive = None, uri_params = {}):
if prefix:
uri_params['prefix'] = self.urlencode_string(prefix)
if not self.config.recursive and not recursive:
uri_params['delimiter'] = "/"
request = self.create_request("BUCKET_LIST", bucket = bucket, **uri_params)
- response = self.send_request(request)
+ response = self.send_request(request, conn = connection)
#debug(response)
return response
@@ -643,7 +646,7 @@ def _fail_wait(self, retries):
# Wait a few seconds. The more it fails the more we wait.
return (self._max_retries - retries + 1) * 3
- def send_request(self, request, body = None, retries = _max_retries):
+ def send_request(self, request, body = None, retries = _max_retries, conn = None):
method_string, resource, headers = request.get_triplet()
debug("Processing request, please wait...")
if not headers.has_key('content-length'):
@@ -652,7 +655,13 @@ def send_request(self, request, body = None, retries = _max_retries):
# "Stringify" all headers
for header in headers.keys():
headers[header] = str(headers[header])
- conn = self.get_connection(resource['bucket'])
+ if conn is None:
+ debug("Establishing connection")
+ conn = self.get_connection(resource['bucket'])
+ close_conn = True
+ else:
+ debug("Using existing connection")
+ close_conn = False
uri = self.format_uri(resource)
debug("Sending request method_string=%r, uri=%r, headers=%r, body=(%i bytes)" % (method_string, uri, headers, len(body or "")))
conn.request(method_string, uri, body, headers)
@@ -663,7 +672,8 @@ def send_request(self, request, body = None, retries = _max_retries):
response["headers"] = convertTupleListToDict(http_response.getheaders())
response["data"] = http_response.read()
debug("Response: " + str(response))
- conn.close()
+ if close_conn is True:
+ conn.close()
except Exception, e:
if retries:
warning("Retrying failed request: %s (%s)" % (resource['uri'], e))
Please sign in to comment.
Something went wrong with that request. Please try again.