Skip to content
This repository
Browse code

Reuse a single connection when doing a bucket list

Listing the contents of a bucket can involve multiple requests if the
bucket is sufficiently large. The default behavior is to list the
objects in batches of 1000. Currently a new connection is created for
each batch and this results in unnecessary connection establishment
overhead when listing a bucket involves multiple batches. This commit
moves the connection creation for listing a bucket into the
bucket_list function and changes the send_request function to
optionally accept an existing connection as a parameter. This allows
the same connection to be reused for listing the entire contents of a
bucket.
  • Loading branch information...
commit e86de366fed74bcd9d09817495015ea66fc962c8 1 parent a102e10
Kelly McLaughlin authored December 05, 2012 mdomsch committed December 06, 2012

Showing 1 changed file with 16 additions and 6 deletions. Show diff stats Hide diff stats

  1. 22  S3/S3.py
22  S3/S3.py
@@ -243,9 +243,10 @@ def _get_common_prefixes(data):
243 243
         truncated = True
244 244
         list = []
245 245
         prefixes = []
  246
+        conn = self.get_connection(bucket)
246 247
 
247 248
         while truncated:
248  
-            response = self.bucket_list_noparse(bucket, prefix, recursive, uri_params)
  249
+            response = self.bucket_list_noparse(conn, bucket, prefix, recursive, uri_params)
249 250
             current_list = _get_contents(response["data"])
250 251
             current_prefixes = _get_common_prefixes(response["data"])
251 252
             truncated = _list_truncated(response["data"])
@@ -259,17 +260,19 @@ def _get_common_prefixes(data):
259 260
             list += current_list
260 261
             prefixes += current_prefixes
261 262
 
  263
+        conn.close()
  264
+
262 265
         response['list'] = list
263 266
         response['common_prefixes'] = prefixes
264 267
         return response
265 268
 
266  
-    def bucket_list_noparse(self, bucket, prefix = None, recursive = None, uri_params = {}):
  269
+    def bucket_list_noparse(self, connection, bucket, prefix = None, recursive = None, uri_params = {}):
267 270
         if prefix:
268 271
             uri_params['prefix'] = self.urlencode_string(prefix)
269 272
         if not self.config.recursive and not recursive:
270 273
             uri_params['delimiter'] = "/"
271 274
         request = self.create_request("BUCKET_LIST", bucket = bucket, **uri_params)
272  
-        response = self.send_request(request)
  275
+        response = self.send_request(request, conn = connection)
273 276
         #debug(response)
274 277
         return response
275 278
 
@@ -643,7 +646,7 @@ def _fail_wait(self, retries):
643 646
         # Wait a few seconds. The more it fails the more we wait.
644 647
         return (self._max_retries - retries + 1) * 3
645 648
 
646  
-    def send_request(self, request, body = None, retries = _max_retries):
  649
+    def send_request(self, request, body = None, retries = _max_retries, conn = None):
647 650
         method_string, resource, headers = request.get_triplet()
648 651
         debug("Processing request, please wait...")
649 652
         if not headers.has_key('content-length'):
@@ -652,7 +655,13 @@ def send_request(self, request, body = None, retries = _max_retries):
652 655
             # "Stringify" all headers
653 656
             for header in headers.keys():
654 657
                 headers[header] = str(headers[header])
655  
-            conn = self.get_connection(resource['bucket'])
  658
+            if conn is None:
  659
+                debug("Establishing connection")
  660
+                conn = self.get_connection(resource['bucket'])
  661
+                close_conn = True
  662
+            else:
  663
+                debug("Using existing connection")
  664
+                close_conn = False
656 665
             uri = self.format_uri(resource)
657 666
             debug("Sending request method_string=%r, uri=%r, headers=%r, body=(%i bytes)" % (method_string, uri, headers, len(body or "")))
658 667
             conn.request(method_string, uri, body, headers)
@@ -663,7 +672,8 @@ def send_request(self, request, body = None, retries = _max_retries):
663 672
             response["headers"] = convertTupleListToDict(http_response.getheaders())
664 673
             response["data"] =  http_response.read()
665 674
             debug("Response: " + str(response))
666  
-            conn.close()
  675
+            if close_conn is True:
  676
+                conn.close()
667 677
         except Exception, e:
668 678
             if retries:
669 679
                 warning("Retrying failed request: %s (%s)" % (resource['uri'], e))

0 notes on commit e86de36

Please sign in to comment.
Something went wrong with that request. Please try again.