25
25
import os
26
26
import pwd
27
27
import re
28
+ import rfc822
28
29
import sys
29
30
import threading as stdlib_threading
30
31
import time
@@ -3181,7 +3182,7 @@ def parse_content_type(content_type):
3181
3182
('text/plain', [('charset, 'UTF-8'), ('level', '1')])
3182
3183
3183
3184
:param content_type: content_type to parse
3184
- :returns: a typle containing (content type, list of k, v parameter tuples)
3185
+ :returns: a tuple containing (content type, list of k, v parameter tuples)
3185
3186
"""
3186
3187
parm_list = []
3187
3188
if ';' in content_type :
@@ -3313,7 +3314,9 @@ def readline(self):
3313
3314
def iter_multipart_mime_documents (wsgi_input , boundary , read_chunk_size = 4096 ):
3314
3315
"""
3315
3316
Given a multi-part-mime-encoded input file object and boundary,
3316
- yield file-like objects for each part.
3317
+ yield file-like objects for each part. Note that this does not
3318
+ split each part into headers and body; the caller is responsible
3319
+ for doing that if necessary.
3317
3320
3318
3321
:param wsgi_input: The file-like object to read from.
3319
3322
:param boundary: The mime boundary to separate new file-like
@@ -3324,6 +3327,9 @@ def iter_multipart_mime_documents(wsgi_input, boundary, read_chunk_size=4096):
3324
3327
boundary = '--' + boundary
3325
3328
blen = len (boundary ) + 2 # \r\n
3326
3329
got = wsgi_input .readline (blen )
3330
+ while got == '\r \n ' :
3331
+ got = wsgi_input .readline (blen )
3332
+
3327
3333
if got .strip () != boundary :
3328
3334
raise swift .common .exceptions .MimeInvalid (
3329
3335
'invalid starting boundary: wanted %r, got %r' , (boundary , got ))
@@ -3338,6 +3344,174 @@ def iter_multipart_mime_documents(wsgi_input, boundary, read_chunk_size=4096):
3338
3344
input_buffer = it .input_buffer
3339
3345
3340
3346
3347
+ def mime_to_document_iters (input_file , boundary , read_chunk_size = 4096 ):
3348
+ """
3349
+ Takes a file-like object containing a multipart MIME document and
3350
+ returns an iterator of (headers, body-file) tuples.
3351
+
3352
+ :param input_file: file-like object with the MIME doc in it
3353
+ :param boundary: MIME boundary, sans dashes
3354
+ (e.g. "divider", not "--divider")
3355
+ :param read_chunk_size: size of strings read via input_file.read()
3356
+ """
3357
+ doc_files = iter_multipart_mime_documents (input_file , boundary ,
3358
+ read_chunk_size )
3359
+ for i , doc_file in enumerate (doc_files ):
3360
+ # this consumes the headers and leaves just the body in doc_file
3361
+ headers = rfc822 .Message (doc_file , 0 )
3362
+ yield (headers , doc_file )
3363
+
3364
+
3365
+ def document_iters_to_multipart_byteranges (ranges_iter , boundary ):
3366
+ """
3367
+ Takes an iterator of range iters and yields a multipart/byteranges MIME
3368
+ document suitable for sending as the body of a multi-range 206 response.
3369
+
3370
+ See document_iters_to_http_response_body for parameter descriptions.
3371
+ """
3372
+
3373
+ divider = "--" + boundary + "\r \n "
3374
+ terminator = "--" + boundary + "--"
3375
+
3376
+ for range_spec in ranges_iter :
3377
+ start_byte = range_spec ["start_byte" ]
3378
+ end_byte = range_spec ["end_byte" ]
3379
+ entity_length = range_spec .get ("entity_length" , "*" )
3380
+ content_type = range_spec ["content_type" ]
3381
+ part_iter = range_spec ["part_iter" ]
3382
+
3383
+ part_header = '' .join ((
3384
+ divider ,
3385
+ "Content-Type: " , str (content_type ), "\r \n " ,
3386
+ "Content-Range: " , "bytes %d-%d/%s\r \n " % (
3387
+ start_byte , end_byte , entity_length ),
3388
+ "\r \n "
3389
+ ))
3390
+ yield part_header
3391
+
3392
+ for chunk in part_iter :
3393
+ yield chunk
3394
+ yield "\r \n "
3395
+ yield terminator
3396
+
3397
+
3398
+ def document_iters_to_http_response_body (ranges_iter , boundary , multipart ,
3399
+ logger ):
3400
+ """
3401
+ Takes an iterator of range iters and turns it into an appropriate
3402
+ HTTP response body, whether that's multipart/byteranges or not.
3403
+
3404
+ This is almost, but not quite, the inverse of
3405
+ http_response_to_document_iters(). This function only yields chunks of
3406
+ the body, not any headers.
3407
+
3408
+ :param ranges_iter: an iterator of dictionaries, one per range.
3409
+ Each dictionary must contain at least the following key:
3410
+ "part_iter": iterator yielding the bytes in the range
3411
+
3412
+ Additionally, if multipart is True, then the following other keys
3413
+ are required:
3414
+
3415
+ "start_byte": index of the first byte in the range
3416
+ "end_byte": index of the last byte in the range
3417
+ "content_type": value for the range's Content-Type header
3418
+
3419
+ Finally, there is one optional key that is used in the
3420
+ multipart/byteranges case:
3421
+
3422
+ "entity_length": length of the requested entity (not necessarily
3423
+ equal to the response length). If omitted, "*" will be used.
3424
+
3425
+ Each part_iter will be exhausted prior to calling next(ranges_iter).
3426
+
3427
+ :param boundary: MIME boundary to use, sans dashes (e.g. "boundary", not
3428
+ "--boundary").
3429
+ :param multipart: True if the response should be multipart/byteranges,
3430
+ False otherwise. This should be True if and only if you have 2 or
3431
+ more ranges.
3432
+ :param logger: a logger
3433
+ """
3434
+ if multipart :
3435
+ return document_iters_to_multipart_byteranges (ranges_iter , boundary )
3436
+ else :
3437
+ try :
3438
+ response_body_iter = next (ranges_iter )['part_iter' ]
3439
+ except StopIteration :
3440
+ return ''
3441
+
3442
+ # We need to make sure ranges_iter does not get garbage-collected
3443
+ # before response_body_iter is exhausted. The reason is that
3444
+ # ranges_iter has a finally block that calls close_swift_conn, and
3445
+ # so if that finally block fires before we read response_body_iter,
3446
+ # there's nothing there.
3447
+ def string_along (useful_iter , useless_iter_iter , logger ):
3448
+ for x in useful_iter :
3449
+ yield x
3450
+
3451
+ try :
3452
+ next (useless_iter_iter )
3453
+ except StopIteration :
3454
+ pass
3455
+ else :
3456
+ logger .warn ("More than one part in a single-part response?" )
3457
+
3458
+ return string_along (response_body_iter , ranges_iter , logger )
3459
+
3460
+
3461
+ def multipart_byteranges_to_document_iters (input_file , boundary ,
3462
+ read_chunk_size = 4096 ):
3463
+ """
3464
+ Takes a file-like object containing a multipart/byteranges MIME document
3465
+ (see RFC 7233, Appendix A) and returns an iterator of (first-byte,
3466
+ last-byte, length, document-headers, body-file) 5-tuples.
3467
+
3468
+ :param input_file: file-like object with the MIME doc in it
3469
+ :param boundary: MIME boundary, sans dashes
3470
+ (e.g. "divider", not "--divider")
3471
+ :param read_chunk_size: size of strings read via input_file.read()
3472
+ """
3473
+ for headers , body in mime_to_document_iters (input_file , boundary ,
3474
+ read_chunk_size ):
3475
+ first_byte , last_byte , length = parse_content_range (
3476
+ headers .getheader ('content-range' ))
3477
+ yield (first_byte , last_byte , length , headers .items (), body )
3478
+
3479
+
3480
+ def http_response_to_document_iters (response , read_chunk_size = 4096 ):
3481
+ """
3482
+ Takes a successful object-GET HTTP response and turns it into an
3483
+ iterator of (first-byte, last-byte, length, headers, body-file)
3484
+ 5-tuples.
3485
+
3486
+ The response must either be a 200 or a 206; if you feed in a 204 or
3487
+ something similar, this probably won't work.
3488
+
3489
+ :param response: HTTP response, like from bufferedhttp.http_connect(),
3490
+ not a swob.Response.
3491
+ """
3492
+ if response .status == 200 :
3493
+ # Single "range" that's the whole object
3494
+ content_length = int (response .getheader ('Content-Length' ))
3495
+ return iter ([(0 , content_length - 1 , content_length ,
3496
+ response .getheaders (), response )])
3497
+
3498
+ content_type , params_list = parse_content_type (
3499
+ response .getheader ('Content-Type' ))
3500
+ if content_type != 'multipart/byteranges' :
3501
+ # Single range; no MIME framing, just the bytes. The start and end
3502
+ # byte indices are in the Content-Range header.
3503
+ start , end , length = parse_content_range (
3504
+ response .getheader ('Content-Range' ))
3505
+ return iter ([(start , end , length , response .getheaders (), response )])
3506
+ else :
3507
+ # Multiple ranges; the response body is a multipart/byteranges MIME
3508
+ # document, and we have to parse it using the MIME boundary
3509
+ # extracted from the Content-Type header.
3510
+ params = dict (params_list )
3511
+ return multipart_byteranges_to_document_iters (
3512
+ response , params ['boundary' ], read_chunk_size )
3513
+
3514
+
3341
3515
#: Regular expression to match form attributes.
3342
3516
ATTRIBUTES_RE = re .compile (r'(\w+)=(".*?"|[^";]+)(; ?|$)' )
3343
3517
0 commit comments