openstack
diff --git a/‎swift/common/bufferedhttp.py
Lines changed: 43 additions & 0 deletions b/‎swift/common/bufferedhttp.py
Lines changed: 43 additions & 0 deletions
diff --git a/‎swift/common/exceptions.py
Lines changed: 4 additions & 0 deletions b/‎swift/common/exceptions.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎swift/common/swob.py
Lines changed: 31 additions & 14 deletions b/‎swift/common/swob.py
Lines changed: 31 additions & 14 deletions
diff --git a/‎swift/common/utils.py
Lines changed: 176 additions & 2 deletions b/‎swift/common/utils.py
Lines changed: 176 additions & 2 deletions
@@ -62,6 +62,7 @@ def __init__(self, sock, debuglevel=0, strict=0,
         self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
         self.length = _UNKNOWN          # number of bytes left in response
         self.will_close = _UNKNOWN      # conn will close at end of response
+        self._readline_buffer = ''
 
     def expect_response(self):
         if self.fp:
@@ -79,6 +80,48 @@ def expect_response(self):
             self.msg = HTTPMessage(self.fp, 0)
             self.msg.fp = None
 
+    def read(self, amt=None):
+        if not self._readline_buffer:
+            return HTTPResponse.read(self, amt)
+
+        if amt is None:
+            # Unbounded read: send anything we have buffered plus whatever
+            # is left.
+            buffered = self._readline_buffer
+            self._readline_buffer = ''
+            return buffered + HTTPResponse.read(self, amt)
+        elif amt <= len(self._readline_buffer):
+            # Bounded read that we can satisfy entirely from our buffer
+            res = self._readline_buffer[:amt]
+            self._readline_buffer = self._readline_buffer[amt:]
+            return res
+        else:
+            # Bounded read that wants more bytes than we have
+            smaller_amt = amt - len(self._readline_buffer)
+            buf = self._readline_buffer
+            self._readline_buffer = ''
+            return buf + HTTPResponse.read(self, smaller_amt)
+
+    def readline(self, size=1024):
+        # You'd think Python's httplib would provide this, but it doesn't.
+        # It does, however, provide a comment in the HTTPResponse class:
+        #
+        #  # XXX It would be nice to have readline and __iter__ for this,
+        #  # too.
+        #
+        # Yes, it certainly would.
+        while ('\n' not in self._readline_buffer
+               and len(self._readline_buffer) < size):
+            read_size = size - len(self._readline_buffer)
+            chunk = HTTPResponse.read(self, read_size)
+            if not chunk:
+                break
+            self._readline_buffer += chunk
+
+        line, newline, rest = self._readline_buffer.partition('\n')
+        self._readline_buffer = rest
+        return line + newline
+
     def nuke_from_orbit(self):
         """
         Terminate the socket with extreme prejudice.
 
@@ -57,6 +57,10 @@ class SuffixSyncError(SwiftException):
     pass
 
 
+class RangeAlreadyComplete(SwiftException):
+    pass
+
+
 class DiskFileError(SwiftException):
     pass
 
 
@@ -1089,13 +1089,14 @@ def content_range_header(start, stop, size):
 
 def multi_range_iterator(ranges, content_type, boundary, size, sub_iter_gen):
     for start, stop in ranges:
-        yield ''.join(['\r\n--', boundary, '\r\n',
+        yield ''.join(['--', boundary, '\r\n',
                        'Content-Type: ', content_type, '\r\n'])
         yield content_range_header(start, stop, size) + '\r\n\r\n'
         sub_iter = sub_iter_gen(start, stop)
         for chunk in sub_iter:
             yield chunk
-    yield '\r\n--' + boundary + '--\r\n'
+        yield '\r\n'
+    yield '--' + boundary + '--'
 
 
 class Response(object):
@@ -1177,21 +1178,37 @@ def _prepare_for_ranges(self, ranges):
         self.content_type = ''.join(['multipart/byteranges;',
                                      'boundary=', self.boundary])
 
-        # This section calculate the total size of the targeted response
-        # The value 12 is the length of total bytes of hyphen, new line
-        # form feed for each section header. The value 8 is the length of
-        # total bytes of hyphen, new line, form feed characters for the
-        # closing boundary which appears only once
-        section_header_fixed_len = 12 + (len(self.boundary) +
-                                         len('Content-Type: ') +
-                                         len(content_type) +
-                                         len('Content-Range: bytes '))
+        # This section calculates the total size of the response.
+        section_header_fixed_len = (
+            # --boundary\r\n
+            len(self.boundary) + 4
+            # Content-Type: <type>\r\n
+            + len('Content-Type: ') + len(content_type) + 2
+            # Content-Range: <value>\r\n; <value> accounted for later
+            + len('Content-Range: ') + 2
+            # \r\n at end of headers
+            + 2)
+
         body_size = 0
         for start, end in ranges:
             body_size += section_header_fixed_len
-            body_size += len(str(start) + '-' + str(end - 1) + '/' +
-                             str(content_size)) + (end - start)
-        body_size += 8 + len(self.boundary)
+
+            # length of the value of Content-Range, not including the \r\n
+            # since that's already accounted for
+            cr = content_range_header_value(start, end, content_size)
+            body_size += len(cr)
+
+            # the actual bytes (note: this range is half-open, i.e. begins
+            # with byte <start> and ends with byte <end - 1>, so there's no
+            # fencepost error here)
+            body_size += (end - start)
+
+            # \r\n prior to --boundary
+            body_size += 2
+
+        # --boundary-- terminates the message
+        body_size += len(self.boundary) + 4
+
         self.content_length = body_size
         self.content_range = None
         return content_size, content_type
 
@@ -25,6 +25,7 @@
 import os
 import pwd
 import re
+import rfc822
 import sys
 import threading as stdlib_threading
 import time
@@ -3181,7 +3182,7 @@ def parse_content_type(content_type):
             ('text/plain', [('charset, 'UTF-8'), ('level', '1')])
 
     :param content_type: content_type to parse
-    :returns: a typle containing (content type, list of k, v parameter tuples)
+    :returns: a tuple containing (content type, list of k, v parameter tuples)
     """
     parm_list = []
     if ';' in content_type:
@@ -3313,7 +3314,9 @@ def readline(self):
 def iter_multipart_mime_documents(wsgi_input, boundary, read_chunk_size=4096):
     """
     Given a multi-part-mime-encoded input file object and boundary,
-    yield file-like objects for each part.
+    yield file-like objects for each part. Note that this does not
+    split each part into headers and body; the caller is responsible
+    for doing that if necessary.
 
     :param wsgi_input: The file-like object to read from.
     :param boundary: The mime boundary to separate new file-like
@@ -3324,6 +3327,9 @@ def iter_multipart_mime_documents(wsgi_input, boundary, read_chunk_size=4096):
     boundary = '--' + boundary
     blen = len(boundary) + 2  # \r\n
     got = wsgi_input.readline(blen)
+    while got == '\r\n':
+        got = wsgi_input.readline(blen)
+
     if got.strip() != boundary:
         raise swift.common.exceptions.MimeInvalid(
             'invalid starting boundary: wanted %r, got %r', (boundary, got))
@@ -3338,6 +3344,174 @@ def iter_multipart_mime_documents(wsgi_input, boundary, read_chunk_size=4096):
         input_buffer = it.input_buffer
 
 
+def mime_to_document_iters(input_file, boundary, read_chunk_size=4096):
+    """
+    Takes a file-like object containing a multipart MIME document and
+    returns an iterator of (headers, body-file) tuples.
+
+    :param input_file: file-like object with the MIME doc in it
+    :param boundary: MIME boundary, sans dashes
+        (e.g. "divider", not "--divider")
+    :param read_chunk_size: size of strings read via input_file.read()
+    """
+    doc_files = iter_multipart_mime_documents(input_file, boundary,
+                                              read_chunk_size)
+    for i, doc_file in enumerate(doc_files):
+        # this consumes the headers and leaves just the body in doc_file
+        headers = rfc822.Message(doc_file, 0)
+        yield (headers, doc_file)
+
+
+def document_iters_to_multipart_byteranges(ranges_iter, boundary):
+    """
+    Takes an iterator of range iters and yields a multipart/byteranges MIME
+    document suitable for sending as the body of a multi-range 206 response.
+
+    See document_iters_to_http_response_body for parameter descriptions.
+    """
+
+    divider = "--" + boundary + "\r\n"
+    terminator = "--" + boundary + "--"
+
+    for range_spec in ranges_iter:
+        start_byte = range_spec["start_byte"]
+        end_byte = range_spec["end_byte"]
+        entity_length = range_spec.get("entity_length", "*")
+        content_type = range_spec["content_type"]
+        part_iter = range_spec["part_iter"]
+
+        part_header = ''.join((
+            divider,
+            "Content-Type: ", str(content_type), "\r\n",
+            "Content-Range: ", "bytes %d-%d/%s\r\n" % (
+                start_byte, end_byte, entity_length),
+            "\r\n"
+        ))
+        yield part_header
+
+        for chunk in part_iter:
+            yield chunk
+        yield "\r\n"
+    yield terminator
+
+
+def document_iters_to_http_response_body(ranges_iter, boundary, multipart,
+                                         logger):
+    """
+    Takes an iterator of range iters and turns it into an appropriate
+    HTTP response body, whether that's multipart/byteranges or not.
+
+    This is almost, but not quite, the inverse of
+    http_response_to_document_iters(). This function only yields chunks of
+    the body, not any headers.
+
+    :param ranges_iter: an iterator of dictionaries, one per range.
+        Each dictionary must contain at least the following key:
+        "part_iter": iterator yielding the bytes in the range
+
+        Additionally, if multipart is True, then the following other keys
+        are required:
+
+        "start_byte": index of the first byte in the range
+        "end_byte": index of the last byte in the range
+        "content_type": value for the range's Content-Type header
+
+        Finally, there is one optional key that is used in the
+            multipart/byteranges case:
+
+        "entity_length": length of the requested entity (not necessarily
+            equal to the response length). If omitted, "*" will be used.
+
+        Each part_iter will be exhausted prior to calling next(ranges_iter).
+
+    :param boundary: MIME boundary to use, sans dashes (e.g. "boundary", not
+        "--boundary").
+    :param multipart: True if the response should be multipart/byteranges,
+        False otherwise. This should be True if and only if you have 2 or
+        more ranges.
+    :param logger: a logger
+    """
+    if multipart:
+        return document_iters_to_multipart_byteranges(ranges_iter, boundary)
+    else:
+        try:
+            response_body_iter = next(ranges_iter)['part_iter']
+        except StopIteration:
+            return ''
+
+        # We need to make sure ranges_iter does not get garbage-collected
+        # before response_body_iter is exhausted. The reason is that
+        # ranges_iter has a finally block that calls close_swift_conn, and
+        # so if that finally block fires before we read response_body_iter,
+        # there's nothing there.
+        def string_along(useful_iter, useless_iter_iter, logger):
+            for x in useful_iter:
+                yield x
+
+            try:
+                next(useless_iter_iter)
+            except StopIteration:
+                pass
+            else:
+                logger.warn("More than one part in a single-part response?")
+
+        return string_along(response_body_iter, ranges_iter, logger)
+
+
+def multipart_byteranges_to_document_iters(input_file, boundary,
+                                           read_chunk_size=4096):
+    """
+    Takes a file-like object containing a multipart/byteranges MIME document
+    (see RFC 7233, Appendix A) and returns an iterator of (first-byte,
+    last-byte, length, document-headers, body-file) 5-tuples.
+
+    :param input_file: file-like object with the MIME doc in it
+    :param boundary: MIME boundary, sans dashes
+        (e.g. "divider", not "--divider")
+    :param read_chunk_size: size of strings read via input_file.read()
+    """
+    for headers, body in mime_to_document_iters(input_file, boundary,
+                                                read_chunk_size):
+        first_byte, last_byte, length = parse_content_range(
+            headers.getheader('content-range'))
+        yield (first_byte, last_byte, length, headers.items(), body)
+
+
+def http_response_to_document_iters(response, read_chunk_size=4096):
+    """
+    Takes a successful object-GET HTTP response and turns it into an
+    iterator of (first-byte, last-byte, length, headers, body-file)
+    5-tuples.
+
+    The response must either be a 200 or a 206; if you feed in a 204 or
+    something similar, this probably won't work.
+
+    :param response: HTTP response, like from bufferedhttp.http_connect(),
+        not a swob.Response.
+    """
+    if response.status == 200:
+        # Single "range" that's the whole object
+        content_length = int(response.getheader('Content-Length'))
+        return iter([(0, content_length - 1, content_length,
+                      response.getheaders(), response)])
+
+    content_type, params_list = parse_content_type(
+        response.getheader('Content-Type'))
+    if content_type != 'multipart/byteranges':
+        # Single range; no MIME framing, just the bytes. The start and end
+        # byte indices are in the Content-Range header.
+        start, end, length = parse_content_range(
+            response.getheader('Content-Range'))
+        return iter([(start, end, length, response.getheaders(), response)])
+    else:
+        # Multiple ranges; the response body is a multipart/byteranges MIME
+        # document, and we have to parse it using the MIME boundary
+        # extracted from the Content-Type header.
+        params = dict(params_list)
+        return multipart_byteranges_to_document_iters(
+            response, params['boundary'], read_chunk_size)
+
+
 #: Regular expression to match form attributes.
 ATTRIBUTES_RE = re.compile(r'(\w+)=(".*?"|[^";]+)(; ?|$)')