8
8
"""
9
9
10
10
import asyncio
11
- import email .parser
12
- import io
11
+ import re
13
12
import sys
14
13
15
14
from .version import version as websockets_version
26
25
))
27
26
28
27
28
+ # See https://tools.ietf.org/html/rfc7230#appendix-B.
29
+
30
+ # Regex for validating header names.
31
+
32
+ _token_re = re .compile (rb'^[-!#$%&\'*+.^_`|~0-9a-zA-Z]+$' )
33
+
34
+ # Regex for validating header values.
35
+
36
+ # We don't attempt to support obsolete line folding.
37
+
38
+ # Include HTAB (\x09), SP (\x20), VCHAR (\x21-\x7e), obs-text (\x80-\xff).
39
+
40
+ # The ABNF is complicated because it attempts to express that optional
41
+ # whitespace is ignored. We strip whitespace and don't revalidate that.
42
+
43
+ # See also https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189
44
+
45
+ _value_re = re .compile (rb'^[\x09\x20-\x7e\x80-\xff]*$' )
46
+
47
+
29
48
@asyncio .coroutine
30
49
def read_request (stream ):
31
50
"""
@@ -34,20 +53,38 @@ def read_request(stream):
34
53
``stream`` is an :class:`~asyncio.StreamReader`.
35
54
36
55
Return ``(path, headers)`` where ``path`` is a :class:`str` and
37
- ``headers`` is a :class:`~email.message.Message`. ``path`` isn't
38
- URL-decoded.
56
+ ``headers`` is a list of ``(name, value)`` tuples.
57
+
58
+ ``path`` isn't URL-decoded or validated in any way.
59
+
60
+ Non-ASCII characters are represented with surrogate escapes.
39
61
40
62
Raise an exception if the request isn't well formatted.
41
63
42
64
The request is assumed not to contain a body.
43
65
44
66
"""
45
- request_line , headers = yield from read_message (stream )
46
- method , path , version = request_line [:- 2 ].decode ().split (None , 2 )
47
- if method != 'GET' :
48
- raise ValueError ("Unsupported method" )
49
- if version != 'HTTP/1.1' :
50
- raise ValueError ("Unsupported HTTP version" )
67
+ # https://tools.ietf.org/html/rfc7230#section-3.1.1
68
+
69
+ # Parsing is simple because fixed values are expected for method and
70
+ # version and because path isn't checked. Since WebSocket software tends
71
+ # to implement HTTP/1.1 strictly, there's little need for lenient parsing.
72
+
73
+ # Given the implementation of read_line(), request_line ends with CRLF.
74
+ request_line = yield from read_line (stream )
75
+
76
+ # This may raise "ValueError: not enough values to unpack"
77
+ method , path , version = request_line [:- 2 ].split (b' ' , 2 )
78
+
79
+ if method != b'GET' :
80
+ raise ValueError ("Unsupported HTTP method: %r" % method )
81
+ if version != b'HTTP/1.1' :
82
+ raise ValueError ("Unsupported HTTP version: %r" % version )
83
+
84
+ path = path .decode ('ascii' , 'surrogateescape' )
85
+
86
+ headers = yield from read_headers (stream )
87
+
51
88
return path , headers
52
89
53
90
@@ -59,45 +96,82 @@ def read_response(stream):
59
96
``stream`` is an :class:`~asyncio.StreamReader`.
60
97
61
98
Return ``(status, headers)`` where ``status`` is a :class:`int` and
62
- ``headers`` is a :class:`~email.message.Message`.
99
+ ``headers`` is a list of ``(name, value)`` tuples.
100
+
101
+ Non-ASCII characters are represented with surrogate escapes.
63
102
64
103
Raise an exception if the request isn't well formatted.
65
104
66
105
The response is assumed not to contain a body.
67
106
68
107
"""
69
- status_line , headers = yield from read_message (stream )
70
- version , status , reason = status_line [:- 2 ].decode ().split (" " , 2 )
71
- if version != 'HTTP/1.1' :
72
- raise ValueError ("Unsupported HTTP version" )
73
- return int (status ), headers
108
+ # https://tools.ietf.org/html/rfc7230#section-3.1.2
109
+
110
+ # As in read_request, parsing is simple because a fixed value is expected
111
+ # for version, status is a 3-digit number, and reason can be ignored.
112
+
113
+ # Given the implementation of read_line(), status_line ends with CRLF.
114
+ status_line = yield from read_line (stream )
115
+
116
+ # This may raise "ValueError: not enough values to unpack"
117
+ version , status , reason = status_line [:- 2 ].split (b' ' , 2 )
118
+
119
+ if version != b'HTTP/1.1' :
120
+ raise ValueError ("Unsupported HTTP version: %r" % version )
121
+ # This may raise "ValueError: invalid literal for int() with base 10"
122
+ status = int (status )
123
+ if not 100 <= status < 1000 :
124
+ raise ValueError ("Unsupported HTTP status code: %d" % status )
125
+ if not _value_re .match (reason ):
126
+ raise ValueError ("Invalid HTTP reason phrase: %r" % reason )
127
+
128
+ headers = yield from read_headers (stream )
129
+
130
+ return status , headers
74
131
75
132
76
133
@asyncio .coroutine
77
- def read_message (stream ):
134
+ def read_headers (stream ):
78
135
"""
79
136
Read an HTTP message from ``stream``.
80
137
81
138
``stream`` is an :class:`~asyncio.StreamReader`.
82
139
83
140
Return ``(start_line, headers)`` where ``start_line`` is :class:`bytes`
84
- and ``headers`` is a :class:`~email.message.Message`.
141
+ and ``headers`` is a list of ``(name, value)`` tuples.
142
+
143
+ Non-ASCII characters are represented with surrogate escapes.
85
144
86
145
The message is assumed not to contain a body.
87
146
88
147
"""
89
- start_line = yield from read_line (stream )
90
- header_lines = io .BytesIO ()
91
- for num in range (MAX_HEADERS ):
92
- header_line = yield from read_line (stream )
93
- header_lines .write (header_line )
94
- if header_line == b'\r \n ' :
148
+ # https://tools.ietf.org/html/rfc7230#section-3.2
149
+
150
+ # We don't attempt to support obsolete line folding.
151
+
152
+ headers = []
153
+ for _ in range (MAX_HEADERS ):
154
+ line = yield from read_line (stream )
155
+ if line == b'\r \n ' :
95
156
break
157
+
158
+ # This may raise "ValueError: not enough values to unpack"
159
+ name , value = line [:- 2 ].split (b':' , 1 )
160
+ if not _token_re .match (name ):
161
+ raise ValueError ("Invalid HTTP header name: %r" % name )
162
+ value = value .strip (b' \t ' )
163
+ if not _value_re .match (value ):
164
+ raise ValueError ("Invalid HTTP header value: %r" % value )
165
+
166
+ headers .append ((
167
+ name .decode ('ascii' ), # guaranteed to be ASCII at this point
168
+ value .decode ('ascii' , 'surrogateescape' ),
169
+ ))
170
+
96
171
else :
97
- raise ValueError ("Too many headers" )
98
- header_lines .seek (0 )
99
- headers = email .parser .BytesHeaderParser ().parse (header_lines )
100
- return start_line , headers
172
+ raise ValueError ("Too many HTTP headers" )
173
+
174
+ return headers
101
175
102
176
103
177
@asyncio .coroutine
@@ -108,9 +182,12 @@ def read_line(stream):
108
182
``stream`` is an :class:`~asyncio.StreamReader`.
109
183
110
184
"""
185
+ # Security: this is bounded by the StreamReader's limit (default = 32kB).
111
186
line = yield from stream .readline ()
187
+ # Security: this guarantees header values are small (hardcoded = 4kB)
112
188
if len (line ) > MAX_LINE :
113
189
raise ValueError ("Line too long" )
190
+ # Not mandatory but safe - https://tools.ietf.org/html/rfc7230#section-3.5
114
191
if not line .endswith (b'\r \n ' ):
115
192
raise ValueError ("Line without CRLF" )
116
193
return line
0 commit comments