From e86de366fed74bcd9d09817495015ea66fc962c8 Mon Sep 17 00:00:00 2001
From: Kelly McLaughlin <kelly@basho.com>
Date: Wed, 5 Dec 2012 16:52:50 -0700
Subject: [PATCH] Reuse a single connection when doing a bucket list

Listing the contents of a bucket can involve multiple requests if the
bucket is sufficiently large. The default behavior is to list the
objects in batches of 1000. Currently a new connection is created for
each batch and this results in unnecessary connection establishment
overhead when listing a bucket involves multiple batches. This commit
moves the connection creation for listing a bucket into the
bucket_list function and changes the send_request function to
optionally accept an existing connection as a parameter. This allows
the same connection to be reused for listing the entire contents of a
bucket.
---
 S3/S3.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/S3/S3.py b/S3/S3.py
index c509f5da4..4cc1e442b 100644
--- a/S3/S3.py
+++ b/S3/S3.py
@@ -243,9 +243,10 @@ def _get_common_prefixes(data):
         truncated = True
         list = []
         prefixes = []
+        conn = self.get_connection(bucket)
 
         while truncated:
-            response = self.bucket_list_noparse(bucket, prefix, recursive, uri_params)
+            response = self.bucket_list_noparse(conn, bucket, prefix, recursive, uri_params)
             current_list = _get_contents(response["data"])
             current_prefixes = _get_common_prefixes(response["data"])
             truncated = _list_truncated(response["data"])
@@ -259,17 +260,19 @@ def _get_common_prefixes(data):
             list += current_list
             prefixes += current_prefixes
 
+        conn.close()
+
         response['list'] = list
         response['common_prefixes'] = prefixes
         return response
 
-    def bucket_list_noparse(self, bucket, prefix = None, recursive = None, uri_params = {}):
+    def bucket_list_noparse(self, connection, bucket, prefix = None, recursive = None, uri_params = {}):
         if prefix:
             uri_params['prefix'] = self.urlencode_string(prefix)
         if not self.config.recursive and not recursive:
             uri_params['delimiter'] = "/"
         request = self.create_request("BUCKET_LIST", bucket = bucket, **uri_params)
-        response = self.send_request(request)
+        response = self.send_request(request, conn = connection)
         #debug(response)
         return response
 
@@ -643,7 +646,7 @@ def _fail_wait(self, retries):
         # Wait a few seconds. The more it fails the more we wait.
         return (self._max_retries - retries + 1) * 3
 
-    def send_request(self, request, body = None, retries = _max_retries):
+    def send_request(self, request, body = None, retries = _max_retries, conn = None):
         method_string, resource, headers = request.get_triplet()
         debug("Processing request, please wait...")
         if not headers.has_key('content-length'):
@@ -652,7 +655,13 @@ def send_request(self, request, body = None, retries = _max_retries):
             # "Stringify" all headers
             for header in headers.keys():
                 headers[header] = str(headers[header])
-            conn = self.get_connection(resource['bucket'])
+            if conn is None:
+                debug("Establishing connection")
+                conn = self.get_connection(resource['bucket'])
+                close_conn = True
+            else:
+                debug("Using existing connection")
+                close_conn = False
             uri = self.format_uri(resource)
             debug("Sending request method_string=%r, uri=%r, headers=%r, body=(%i bytes)" % (method_string, uri, headers, len(body or "")))
             conn.request(method_string, uri, body, headers)
@@ -663,7 +672,8 @@ def send_request(self, request, body = None, retries = _max_retries):
             response["headers"] = convertTupleListToDict(http_response.getheaders())
             response["data"] =  http_response.read()
             debug("Response: " + str(response))
-            conn.close()
+            if close_conn is True:
+                conn.close()
         except Exception, e:
             if retries:
                 warning("Retrying failed request: %s (%s)" % (resource['uri'], e))