From e86de366fed74bcd9d09817495015ea66fc962c8 Mon Sep 17 00:00:00 2001 From: Kelly McLaughlin Date: Wed, 5 Dec 2012 16:52:50 -0700 Subject: [PATCH] Reuse a single connection when doing a bucket list Listing the contents of a bucket can involve multiple requests if the bucket is sufficiently large. The default behavior is to list the objects in batches of 1000. Currently a new connection is created for each batch and this results in unnecessary connection establishment overhead when listing a bucket involves multiple batches. This commit moves the connection creation for listing a bucket into the bucket_list function and changes the send_request function to optionally accept an existing connection as a parameter. This allows the same connection to be reused for listing the entire contents of a bucket. --- S3/S3.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/S3/S3.py b/S3/S3.py index c509f5da4..4cc1e442b 100644 --- a/S3/S3.py +++ b/S3/S3.py @@ -243,9 +243,10 @@ def _get_common_prefixes(data): truncated = True list = [] prefixes = [] + conn = self.get_connection(bucket) while truncated: - response = self.bucket_list_noparse(bucket, prefix, recursive, uri_params) + response = self.bucket_list_noparse(conn, bucket, prefix, recursive, uri_params) current_list = _get_contents(response["data"]) current_prefixes = _get_common_prefixes(response["data"]) truncated = _list_truncated(response["data"]) @@ -259,17 +260,19 @@ def _get_common_prefixes(data): list += current_list prefixes += current_prefixes + conn.close() + response['list'] = list response['common_prefixes'] = prefixes return response - def bucket_list_noparse(self, bucket, prefix = None, recursive = None, uri_params = {}): + def bucket_list_noparse(self, connection, bucket, prefix = None, recursive = None, uri_params = {}): if prefix: uri_params['prefix'] = self.urlencode_string(prefix) if not self.config.recursive and not recursive: uri_params['delimiter'] = "/" request = self.create_request("BUCKET_LIST", bucket = bucket, **uri_params) - response = self.send_request(request) + response = self.send_request(request, conn = connection) #debug(response) return response @@ -643,7 +646,7 @@ def _fail_wait(self, retries): # Wait a few seconds. The more it fails the more we wait. return (self._max_retries - retries + 1) * 3 - def send_request(self, request, body = None, retries = _max_retries): + def send_request(self, request, body = None, retries = _max_retries, conn = None): method_string, resource, headers = request.get_triplet() debug("Processing request, please wait...") if not headers.has_key('content-length'): @@ -652,7 +655,13 @@ def send_request(self, request, body = None, retries = _max_retries): # "Stringify" all headers for header in headers.keys(): headers[header] = str(headers[header]) - conn = self.get_connection(resource['bucket']) + if conn is None: + debug("Establishing connection") + conn = self.get_connection(resource['bucket']) + close_conn = True + else: + debug("Using existing connection") + close_conn = False uri = self.format_uri(resource) debug("Sending request method_string=%r, uri=%r, headers=%r, body=(%i bytes)" % (method_string, uri, headers, len(body or ""))) conn.request(method_string, uri, body, headers) @@ -663,7 +672,8 @@ def send_request(self, request, body = None, retries = _max_retries): response["headers"] = convertTupleListToDict(http_response.getheaders()) response["data"] = http_response.read() debug("Response: " + str(response)) - conn.close() + if close_conn is True: + conn.close() except Exception, e: if retries: warning("Retrying failed request: %s (%s)" % (resource['uri'], e))