Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Reuse a single connection when doing a bucket list
Listing the contents of a bucket can involve multiple requests if the
bucket is sufficiently large. The default behavior is to list the
objects in batches of 1000. Currently a new connection is created for
each batch and this results in unnecessary connection establishment
overhead when listing a bucket involves multiple batches. This commit
moves the connection creation for listing a bucket into the
bucket_list function and changes the send_request function to
optionally accept an existing connection as a parameter. This allows
the same connection to be reused for listing the entire contents of a
bucket.
  • Loading branch information
kellymclaughlin authored and mdomsch committed Dec 6, 2012
1 parent a102e10 commit e86de36
Showing 1 changed file with 16 additions and 6 deletions.
22 changes: 16 additions & 6 deletions S3/S3.py
Expand Up @@ -243,9 +243,10 @@ def _get_common_prefixes(data):
truncated = True
list = []
prefixes = []
conn = self.get_connection(bucket)

while truncated:
response = self.bucket_list_noparse(bucket, prefix, recursive, uri_params)
response = self.bucket_list_noparse(conn, bucket, prefix, recursive, uri_params)
current_list = _get_contents(response["data"])
current_prefixes = _get_common_prefixes(response["data"])
truncated = _list_truncated(response["data"])
Expand All @@ -259,17 +260,19 @@ def _get_common_prefixes(data):
list += current_list
prefixes += current_prefixes

conn.close()

response['list'] = list
response['common_prefixes'] = prefixes
return response

def bucket_list_noparse(self, bucket, prefix = None, recursive = None, uri_params = {}):
def bucket_list_noparse(self, connection, bucket, prefix = None, recursive = None, uri_params = {}):
if prefix:
uri_params['prefix'] = self.urlencode_string(prefix)
if not self.config.recursive and not recursive:
uri_params['delimiter'] = "/"
request = self.create_request("BUCKET_LIST", bucket = bucket, **uri_params)
response = self.send_request(request)
response = self.send_request(request, conn = connection)
#debug(response)
return response

Expand Down Expand Up @@ -643,7 +646,7 @@ def _fail_wait(self, retries):
# Wait a few seconds. The more it fails the more we wait.
return (self._max_retries - retries + 1) * 3

def send_request(self, request, body = None, retries = _max_retries):
def send_request(self, request, body = None, retries = _max_retries, conn = None):
method_string, resource, headers = request.get_triplet()
debug("Processing request, please wait...")
if not headers.has_key('content-length'):
Expand All @@ -652,7 +655,13 @@ def send_request(self, request, body = None, retries = _max_retries):
# "Stringify" all headers
for header in headers.keys():
headers[header] = str(headers[header])
conn = self.get_connection(resource['bucket'])
if conn is None:
debug("Establishing connection")
conn = self.get_connection(resource['bucket'])
close_conn = True
else:
debug("Using existing connection")
close_conn = False
uri = self.format_uri(resource)
debug("Sending request method_string=%r, uri=%r, headers=%r, body=(%i bytes)" % (method_string, uri, headers, len(body or "")))
conn.request(method_string, uri, body, headers)
Expand All @@ -663,7 +672,8 @@ def send_request(self, request, body = None, retries = _max_retries):
response["headers"] = convertTupleListToDict(http_response.getheaders())
response["data"] = http_response.read()
debug("Response: " + str(response))
conn.close()
if close_conn is True:
conn.close()
except Exception, e:
if retries:
warning("Retrying failed request: %s (%s)" % (resource['uri'], e))
Expand Down

0 comments on commit e86de36

Please sign in to comment.