Skip to content

Commit 06bede8

Browse files
author
Zack M. Davis
committed
replace use of deprecated rfc822.Message with a helper utility
The rfc822 module has been deprecated since Python 2.3, and in particular is absent from the Python 3 standard library. However, Swift uses instances of rfc822.Message in a number of places, relying on its behavior of immediately parsing the headers of a file-like object without consuming the body, leaving the position of the file at the start of the body. Python 3's http.client has an undocumented parse_headers function with the same behavior, which inspired the new parse_mime_headers utility introduced here. (The HeaderKeyDict returned by parse_mime_headers doesn't have a `.getheader(key)` method like rfc822.Message did; the dictionary-like `[key]` or `.get(key)` interface should be used exclusively.) The implementation in this commit won't actually work with Python 3, the email.parser.Parser().parsestr of which expects a Unicode string, but it is believed that this can be addressed in followup work. Change-Id: Ia5ee2ead67e36e8c6416183667f64ae255887736
1 parent 0e3e2db commit 06bede8

File tree

5 files changed

+71
-18
lines changed

5 files changed

+71
-18
lines changed

swift/common/middleware/formpost.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,15 +113,15 @@
113113
__all__ = ['FormPost', 'filter_factory', 'READ_CHUNK_SIZE', 'MAX_VALUE_LENGTH']
114114

115115
import hmac
116-
import rfc822
117116
from hashlib import sha1
118117
from time import time
119118
from urllib import quote
120119

121120
from swift.common.exceptions import MimeInvalid
122121
from swift.common.middleware.tempurl import get_tempurl_keys_from_metadata
123122
from swift.common.utils import streq_const_time, register_swift_info, \
124-
parse_content_disposition, iter_multipart_mime_documents
123+
parse_content_disposition, parse_mime_headers, \
124+
iter_multipart_mime_documents
125125
from swift.common.wsgi import make_pre_authed_env
126126
from swift.common.swob import HTTPUnauthorized
127127
from swift.proxy.controllers.base import get_account_info, get_container_info
@@ -254,9 +254,9 @@ def _translate_form(self, env, boundary):
254254
file_count = 0
255255
for fp in iter_multipart_mime_documents(
256256
env['wsgi.input'], boundary, read_chunk_size=READ_CHUNK_SIZE):
257-
hdrs = rfc822.Message(fp, 0)
257+
hdrs = parse_mime_headers(fp)
258258
disp, attrs = parse_content_disposition(
259-
hdrs.getheader('Content-Disposition', ''))
259+
hdrs.get('Content-Disposition', ''))
260260
if disp == 'form-data' and attrs.get('filename'):
261261
file_count += 1
262262
try:

swift/common/utils.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,13 @@
2525
import os
2626
import pwd
2727
import re
28-
import rfc822
2928
import sys
3029
import threading as stdlib_threading
3130
import time
3231
import uuid
3332
import functools
3433
import weakref
34+
import email.parser
3535
from hashlib import md5, sha1
3636
from random import random, shuffle
3737
from urllib import quote as _quote
@@ -3446,6 +3446,29 @@ def iter_multipart_mime_documents(wsgi_input, boundary, read_chunk_size=4096):
34463446
input_buffer = it.input_buffer
34473447

34483448

3449+
def parse_mime_headers(doc_file):
3450+
"""
3451+
Takes a file-like object containing a MIME document and returns a
3452+
HeaderKeyDict containing the headers. The body of the message is not
3453+
consumed: the position in doc_file is left at the beginning of the body.
3454+
3455+
This function was inspired by the Python standard library's
3456+
http.client.parse_headers.
3457+
3458+
:param doc_file: binary file-like object containing a MIME document
3459+
:returns: a swift.common.swob.HeaderKeyDict containing the headers
3460+
"""
3461+
from swift.common.swob import HeaderKeyDict # avoid circular import
3462+
headers = []
3463+
while True:
3464+
line = doc_file.readline()
3465+
headers.append(line)
3466+
if line in (b'\r\n', b'\n', b''):
3467+
break
3468+
header_string = b''.join(headers)
3469+
return HeaderKeyDict(email.parser.Parser().parsestr(header_string))
3470+
3471+
34493472
def mime_to_document_iters(input_file, boundary, read_chunk_size=4096):
34503473
"""
34513474
Takes a file-like object containing a multipart MIME document and
@@ -3460,7 +3483,7 @@ def mime_to_document_iters(input_file, boundary, read_chunk_size=4096):
34603483
read_chunk_size)
34613484
for i, doc_file in enumerate(doc_files):
34623485
# this consumes the headers and leaves just the body in doc_file
3463-
headers = rfc822.Message(doc_file, 0)
3486+
headers = parse_mime_headers(doc_file)
34643487
yield (headers, doc_file)
34653488

34663489

@@ -3596,7 +3619,7 @@ def multipart_byteranges_to_document_iters(input_file, boundary,
35963619
for headers, body in mime_to_document_iters(input_file, boundary,
35973620
read_chunk_size):
35983621
first_byte, last_byte, length = parse_content_range(
3599-
headers.getheader('content-range'))
3622+
headers.get('content-range'))
36003623
yield (first_byte, last_byte, length, headers.items(), body)
36013624

36023625

swift/obj/server.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import multiprocessing
2222
import time
2323
import traceback
24-
import rfc822
2524
import socket
2625
import math
2726
from swift import gettext_ as _
@@ -33,7 +32,8 @@
3332
from swift.common.utils import public, get_logger, \
3433
config_true_value, timing_stats, replication, \
3534
normalize_delete_at_timestamp, get_log_line, Timestamp, \
36-
get_expirer_container, iter_multipart_mime_documents
35+
get_expirer_container, parse_mime_headers, \
36+
iter_multipart_mime_documents
3737
from swift.common.bufferedhttp import http_connect
3838
from swift.common.constraints import check_object_creation, \
3939
valid_timestamp, check_utf8
@@ -60,7 +60,7 @@ def iter_mime_headers_and_bodies(wsgi_input, mime_boundary, read_chunk_size):
6060
wsgi_input, mime_boundary, read_chunk_size)
6161

6262
for file_like in mime_documents_iter:
63-
hdrs = HeaderKeyDict(rfc822.Message(file_like, 0))
63+
hdrs = parse_mime_headers(file_like)
6464
yield (hdrs, file_like)
6565

6666

test/unit/common/test_utils.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
import json
3535
import math
3636

37-
from six import StringIO
37+
from six import BytesIO, StringIO
3838
from six.moves.queue import Queue, Empty
3939
from six.moves import range
4040
from textwrap import dedent
@@ -4927,6 +4927,36 @@ def test_readline_with_tiny_chunks(self):
49274927
self.assertTrue(exc is not None)
49284928

49294929

4930+
class TestParseMimeHeaders(unittest.TestCase):
4931+
4932+
def test_parse_mime_headers(self):
4933+
doc_file = BytesIO(b"""Content-Disposition: form-data; name="file_size"
4934+
Foo: Bar
4935+
NOT-title-cAsED: quux
4936+
Connexion: =?iso8859-1?q?r=E9initialis=E9e_par_l=27homologue?=
4937+
Status: =?utf-8?b?5byA5aeL6YCa6L+H5a+56LGh5aSN5Yi2?=
4938+
Latin-1: Resincronizaci\xf3n realizada con \xe9xito
4939+
Utf-8: \xd0\xba\xd0\xbe\xd0\xbd\xd1\x82\xd0\xb5\xd0\xb9\xd0\xbd\xd0\xb5\xd1\x80
4940+
4941+
This is the body
4942+
""")
4943+
headers = utils.parse_mime_headers(doc_file)
4944+
expected_headers = {
4945+
'Content-Disposition': 'form-data; name="file_size"',
4946+
'Foo': "Bar",
4947+
'Not-Title-Cased': "quux",
4948+
# Encoded-word or non-ASCII values are treated just like any other
4949+
# bytestring (at least for now)
4950+
'Connexion': "=?iso8859-1?q?r=E9initialis=E9e_par_l=27homologue?=",
4951+
'Status': "=?utf-8?b?5byA5aeL6YCa6L+H5a+56LGh5aSN5Yi2?=",
4952+
'Latin-1': "Resincronizaci\xf3n realizada con \xe9xito",
4953+
'Utf-8': ("\xd0\xba\xd0\xbe\xd0\xbd\xd1\x82\xd0\xb5\xd0\xb9\xd0"
4954+
"\xbd\xd0\xb5\xd1\x80")
4955+
}
4956+
self.assertEqual(expected_headers, headers)
4957+
self.assertEqual(b"This is the body\n", doc_file.read())
4958+
4959+
49304960
class FakeResponse(object):
49314961
def __init__(self, status, headers, body):
49324962
self.status = status

test/unit/proxy/test_server.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import math
2121
import os
2222
import pickle
23-
import rfc822
2423
import sys
2524
import unittest
2625
from contextlib import closing, contextmanager, nested
@@ -48,7 +47,8 @@
4847
from six import StringIO
4948
from six.moves import range
5049
from swift.common.utils import hash_path, json, storage_directory, \
51-
parse_content_type, iter_multipart_mime_documents, public
50+
parse_content_type, parse_mime_headers, \
51+
iter_multipart_mime_documents, public
5252

5353
from test.unit import (
5454
connect_tcp, readuntil2crlfs, FakeLogger, fake_http_connect, FakeRing,
@@ -1438,7 +1438,7 @@ def test_GET_ranges(self):
14381438
got_mime_docs = []
14391439
for mime_doc_fh in iter_multipart_mime_documents(StringIO(res.body),
14401440
boundary):
1441-
headers = HeaderKeyDict(rfc822.Message(mime_doc_fh, 0).items())
1441+
headers = parse_mime_headers(mime_doc_fh)
14421442
body = mime_doc_fh.read()
14431443
got_mime_docs.append((headers, body))
14441444
self.assertEqual(len(got_mime_docs), 3)
@@ -1635,7 +1635,7 @@ def single_sabotage_hrtdi(*a, **kw):
16351635
got_byteranges = []
16361636
for mime_doc_fh in iter_multipart_mime_documents(StringIO(body),
16371637
boundary):
1638-
rfc822.Message(mime_doc_fh, 0)
1638+
parse_mime_headers(mime_doc_fh)
16391639
body = mime_doc_fh.read()
16401640
got_byteranges.append(body)
16411641

@@ -1667,7 +1667,7 @@ def single_sabotage_hrtdi(*a, **kw):
16671667
got_byteranges = []
16681668
for mime_doc_fh in iter_multipart_mime_documents(StringIO(body),
16691669
boundary):
1670-
rfc822.Message(mime_doc_fh, 0)
1670+
parse_mime_headers(mime_doc_fh)
16711671
body = mime_doc_fh.read()
16721672
got_byteranges.append(body)
16731673

@@ -1704,7 +1704,7 @@ def single_sabotage_hrtdi(*a, **kw):
17041704
got_byteranges = []
17051705
for mime_doc_fh in iter_multipart_mime_documents(StringIO(body),
17061706
boundary):
1707-
rfc822.Message(mime_doc_fh, 0)
1707+
parse_mime_headers(mime_doc_fh)
17081708
body = mime_doc_fh.read()
17091709
got_byteranges.append(body)
17101710

@@ -1741,7 +1741,7 @@ def single_sabotage_hrtdi(*a, **kw):
17411741
got_byteranges = []
17421742
for mime_doc_fh in iter_multipart_mime_documents(StringIO(body),
17431743
boundary):
1744-
rfc822.Message(mime_doc_fh, 0)
1744+
parse_mime_headers(mime_doc_fh)
17451745
body = mime_doc_fh.read()
17461746
got_byteranges.append(body)
17471747

0 commit comments

Comments
 (0)