Skip to content
Newer
Older
100644 1062 lines (874 sloc) 37.1 KB
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
1 # -*- coding: utf-8 -*-
2 """
3 werkzeug.wsgi
4 ~~~~~~~~~~~~~
5
6 This module implements WSGI related helpers.
7
4d00f95 @DasIch Happy New Year 2014
DasIch authored
8 :copyright: (c) 2014 by the Werkzeug Team, see AUTHORS for more details.
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
9 :license: BSD, see LICENSE for more details.
10 """
b0080b8 @mitsuhiko Added support for stream URL encoding and decoding
authored
11 import re
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
12 import os
13 import posixpath
14 import mimetypes
c9f9fc2 @mitsuhiko Fixed byte operations on stream decoding
authored
15 from itertools import chain
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
16 from zlib import adler32
17 from time import time, mktime
18 from datetime import datetime
10b4b8b @mitsuhiko A lot internal cleanup
authored
19 from functools import partial, update_wrapper
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
20
4a7192f @mitsuhiko Added sentinal value to IterIO objects to handle corner cases
authored
21 from werkzeug._compat import iteritems, text_type, string_types, \
2b2d921 @untitaker Flake8 fixes
untitaker authored
22 implements_iterator, make_literal_wrapper, to_unicode, to_bytes, \
23 wsgi_get_bytes, try_coerce_native, PY2
74478e5 @mitsuhiko Unified idna decoding across the codebase and fixed edge cases
authored
24 from werkzeug._internal import _empty_stream, _encode_idna
e232fdc @mitsuhiko Added transition docs and moved two functions into better modules
authored
25 from werkzeug.http import is_resource_modified, http_date
33c50d2 @mitsuhiko Fixed missing usages of the WSGI encoding dance.
authored
26 from werkzeug.urls import uri_to_iri, url_quote, url_parse, url_join
bba0cdc @untitaker Force UTF-8 as filesystem encoding in some cases
untitaker authored
27 from werkzeug.filesystem import get_filesystem_encoding
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
28
29
30 def responder(f):
31 """Marks a function as responder. Decorate a function with it and it
32 will automatically call the return value as WSGI application.
33
34 Example::
35
36 @responder
37 def application(environ, start_response):
38 return Response('Hello World!')
39 """
10b4b8b @mitsuhiko A lot internal cleanup
authored
40 return update_wrapper(lambda *a: f(*a)(*a[-2:]), f)
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
41
42
43 def get_current_url(environ, root_only=False, strip_querystring=False,
cdf6802 @mitsuhiko Added support for host validation and added some improved security he…
authored
44 host_only=False, trusted_hosts=None):
9f837b7 @mitsuhiko Added notes on get_current_url being an IRI.
authored
45 """A handy helper function that recreates the full URL as IRI for the
46 current request or parts of it. Here an example:
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
47
1fb8eb1 @mitsuhiko Switched everything to long imports
authored
48 >>> from werkzeug.test import create_environ
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
49 >>> env = create_environ("/?param=foo", "http://localhost/script")
50 >>> get_current_url(env)
51 'http://localhost/script/?param=foo'
52 >>> get_current_url(env, root_only=True)
53 'http://localhost/script/'
54 >>> get_current_url(env, host_only=True)
55 'http://localhost/'
56 >>> get_current_url(env, strip_querystring=True)
57 'http://localhost/script/'
58
cdf6802 @mitsuhiko Added support for host validation and added some improved security he…
authored
59 This optionally it verifies that the host is in a list of trusted hosts.
60 If the host is not in there it will raise a
61 :exc:`~werkzeug.exceptions.SecurityError`.
62
9f837b7 @mitsuhiko Added notes on get_current_url being an IRI.
authored
63 Note that the string returned might contain unicode characters as the
64 representation is an IRI not an URI. If you need an ASCII only
65 representation you can use the :func:`~werkzeug.urls.iri_to_uri`
66 function:
67
68 >>> from werkzeug.urls import iri_to_uri
69 >>> iri_to_uri(get_current_url(env))
70 'http://localhost/script/?param=foo'
71
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
72 :param environ: the WSGI environment to get the current URL from.
73 :param root_only: set `True` if you only want the root URL.
74 :param strip_querystring: set to `True` if you don't want the querystring.
75 :param host_only: set to `True` if the host URL should be returned.
cdf6802 @mitsuhiko Added support for host validation and added some improved security he…
authored
76 :param trusted_hosts: a list of trusted hosts, see :func:`host_is_trusted`
77 for more information.
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
78 """
cdf6802 @mitsuhiko Added support for host validation and added some improved security he…
authored
79 tmp = [environ['wsgi.url_scheme'], '://', get_host(environ, trusted_hosts)]
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
80 cat = tmp.append
81 if host_only:
a06e6a8 @untitaker Move import into func
untitaker authored
82 return uri_to_iri(''.join(tmp) + '/')
33c50d2 @mitsuhiko Fixed missing usages of the WSGI encoding dance.
authored
83 cat(url_quote(wsgi_get_bytes(environ.get('SCRIPT_NAME', ''))).rstrip('/'))
84 cat('/')
85 if not root_only:
86 cat(url_quote(wsgi_get_bytes(environ.get('PATH_INFO', '')).lstrip(b'/')))
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
87 if not strip_querystring:
33c50d2 @mitsuhiko Fixed missing usages of the WSGI encoding dance.
authored
88 qs = get_query_string(environ)
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
89 if qs:
90 cat('?' + qs)
a06e6a8 @untitaker Move import into func
untitaker authored
91 return uri_to_iri(''.join(tmp))
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
92
93
cdf6802 @mitsuhiko Added support for host validation and added some improved security he…
authored
94 def host_is_trusted(hostname, trusted_list):
95 """Checks if a host is trusted against a list. This also takes care
96 of port normalization.
97
98 .. versionadded:: 0.9
99
100 :param hostname: the hostname to check
101 :param trusted_list: a list of hostnames to check against. If a
102 hostname starts with a dot it will match against
103 all subdomains as well.
104 """
105 if not hostname:
106 return False
107
55bfb67 @ThiefMaster Use string_types instead of basestring
ThiefMaster authored
108 if isinstance(trusted_list, string_types):
cdf6802 @mitsuhiko Added support for host validation and added some improved security he…
authored
109 trusted_list = [trusted_list]
110
111 def _normalize(hostname):
112 if ':' in hostname:
113 hostname = hostname.rsplit(':', 1)[0]
74478e5 @mitsuhiko Unified idna decoding across the codebase and fixed edge cases
authored
114 return _encode_idna(hostname)
cdf6802 @mitsuhiko Added support for host validation and added some improved security he…
authored
115
d46360c @mitsuhiko Unicode errors in host encoding are now trapped or converted. This fi…
authored
116 try:
117 hostname = _normalize(hostname)
118 except UnicodeError:
119 return False
cdf6802 @mitsuhiko Added support for host validation and added some improved security he…
authored
120 for ref in trusted_list:
121 if ref.startswith('.'):
122 ref = ref[1:]
123 suffix_match = True
124 else:
125 suffix_match = False
d46360c @mitsuhiko Unicode errors in host encoding are now trapped or converted. This fi…
authored
126 try:
127 ref = _normalize(ref)
128 except UnicodeError:
129 return False
cdf6802 @mitsuhiko Added support for host validation and added some improved security he…
authored
130 if ref == hostname:
131 return True
132 if suffix_match and hostname.endswith('.' + ref):
133 return True
134 return False
135
136
137 def get_host(environ, trusted_hosts=None):
2e4d517 @danielrichman mention x-forwarded-host in trusted_hosts documentation
danielrichman authored
138 """Return the real host for the given WSGI environment. This first checks
139 the `X-Forwarded-Host` header, then the normal `Host` header, and finally
140 the `SERVER_NAME` environment variable (using the first one it finds).
141
142 Optionally it verifies that the host is in a list of trusted hosts.
143 If the host is not in there it will raise a
144 :exc:`~werkzeug.exceptions.SecurityError`.
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
145
146 :param environ: the WSGI environment to get the host of.
cdf6802 @mitsuhiko Added support for host validation and added some improved security he…
authored
147 :param trusted_hosts: a list of trusted hosts, see :func:`host_is_trusted`
148 for more information.
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
149 """
150 if 'HTTP_X_FORWARDED_HOST' in environ:
714e148 @ergoithz Fixed dumb splits
ergoithz authored
151 rv = environ['HTTP_X_FORWARDED_HOST'].split(',', 1)[0].strip()
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
152 elif 'HTTP_HOST' in environ:
cdf6802 @mitsuhiko Added support for host validation and added some improved security he…
authored
153 rv = environ['HTTP_HOST']
154 else:
155 rv = environ['SERVER_NAME']
156 if (environ['wsgi.url_scheme'], environ['SERVER_PORT']) not \
157 in (('https', '443'), ('http', '80')):
158 rv += ':' + environ['SERVER_PORT']
159 if trusted_hosts is not None:
160 if not host_is_trusted(rv, trusted_hosts):
161 from werkzeug.exceptions import SecurityError
162 raise SecurityError('Host "%s" is not trusted' % rv)
163 return rv
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
164
165
534c7f6 @mitsuhiko Overhauled the whole input stream handling system to be more HTTP com…
authored
166 def get_content_length(environ):
167 """Returns the content length from the WSGI environment as
168 integer. If it's not available `None` is returned.
169
170 .. versionadded:: 0.9
171
172 :param environ: the WSGI environ to fetch the content length from.
173 """
174 content_length = environ.get('CONTENT_LENGTH')
175 if content_length is not None:
176 try:
918cc86 @mitsuhiko Added support for wsgi.input_terminated
authored
177 return max(0, int(content_length))
534c7f6 @mitsuhiko Overhauled the whole input stream handling system to be more HTTP com…
authored
178 except (ValueError, TypeError):
179 pass
180
181
182 def get_input_stream(environ, safe_fallback=True):
183 """Returns the input stream from the WSGI environment and wraps it
184 in the most sensible way possible. The stream returned is not the
185 raw WSGI stream in most cases but one that is safe to read from
186 without taking into account the content length.
187
188 .. versionadded:: 0.9
189
190 :param environ: the WSGI environ to fetch the stream from.
408328f @mASOUDd fixed a few typos in wsgi.py
mASOUDd authored
191 :param safe: indicates whether the function should use an empty
534c7f6 @mitsuhiko Overhauled the whole input stream handling system to be more HTTP com…
authored
192 stream as safe fallback or just return the original
193 WSGI input stream if it can't wrap it safely. The
194 default is to return an empty string in those cases.
195 """
196 stream = environ['wsgi.input']
197 content_length = get_content_length(environ)
198
918cc86 @mitsuhiko Added support for wsgi.input_terminated
authored
199 # A wsgi extension that tells us if the input is terminated. In
408328f @mASOUDd fixed a few typos in wsgi.py
mASOUDd authored
200 # that case we return the stream unchanged as we know we can safely
918cc86 @mitsuhiko Added support for wsgi.input_terminated
authored
201 # read it until the end.
202 if environ.get('wsgi.input_terminated'):
534c7f6 @mitsuhiko Overhauled the whole input stream handling system to be more HTTP com…
authored
203 return stream
204
918cc86 @mitsuhiko Added support for wsgi.input_terminated
authored
205 # If we don't have a content length we fall back to an empty stream
206 # in case of a safe fallback, otherwise we return the stream unchanged.
207 # The non-safe fallback is not recommended but might be useful in
208 # some situations.
534c7f6 @mitsuhiko Overhauled the whole input stream handling system to be more HTTP com…
authored
209 if content_length is None:
210 return safe_fallback and _empty_stream or stream
211
212 # Otherwise limit the stream to the content length
213 return LimitedStream(stream, content_length)
214
215
33c50d2 @mitsuhiko Fixed missing usages of the WSGI encoding dance.
authored
216 def get_query_string(environ):
217 """Returns the `QUERY_STRING` from the WSGI environment. This also takes
218 care about the WSGI decoding dance on Python 3 environments as a
219 native string. The string returned will be restricted to ASCII
220 characters.
221
222 .. versionadded:: 0.9
223
224 :param environ: the WSGI environment object to get the query string from.
225 """
226 qs = wsgi_get_bytes(environ.get('QUERY_STRING', ''))
227 # QUERY_STRING really should be ascii safe but some browsers
228 # will send us some unicode stuff (I am looking at you IE).
229 # In that case we want to urllib quote it badly.
230 return try_coerce_native(url_quote(qs, safe=':&%=+$!*\'(),'))
231
232
233 def get_path_info(environ, charset='utf-8', errors='replace'):
234 """Returns the `PATH_INFO` from the WSGI environment and properly
235 decodes it. This also takes care about the WSGI decoding dance
236 on Python 3 environments. if the `charset` is set to `None` a
237 bytestring is returned.
238
239 .. versionadded:: 0.9
240
241 :param environ: the WSGI environment object to get the path from.
242 :param charset: the charset for the path info, or `None` if no
243 decoding should be performed.
244 :param errors: the decoding error handling.
245 """
246 path = wsgi_get_bytes(environ.get('PATH_INFO', ''))
247 return to_unicode(path, charset, errors, allow_none_charset=True)
248
249
250 def get_script_name(environ, charset='utf-8', errors='replace'):
251 """Returns the `SCRIPT_NAME` from the WSGI environment and properly
252 decodes it. This also takes care about the WSGI decoding dance
253 on Python 3 environments. if the `charset` is set to `None` a
254 bytestring is returned.
255
256 .. versionadded:: 0.9
257
258 :param environ: the WSGI environment object to get the path from.
259 :param charset: the charset for the path, or `None` if no
260 decoding should be performed.
261 :param errors: the decoding error handling.
262 """
263 path = wsgi_get_bytes(environ.get('SCRIPT_NAME', ''))
264 return to_unicode(path, charset, errors, allow_none_charset=True)
265
266
267 def pop_path_info(environ, charset='utf-8', errors='replace'):
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
268 """Removes and returns the next segment of `PATH_INFO`, pushing it onto
269 `SCRIPT_NAME`. Returns `None` if there is nothing left on `PATH_INFO`.
270
33c50d2 @mitsuhiko Fixed missing usages of the WSGI encoding dance.
authored
271 If the `charset` is set to `None` a bytestring is returned.
272
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
273 If there are empty segments (``'/foo//bar``) these are ignored but
274 properly pushed to the `SCRIPT_NAME`:
275
276 >>> env = {'SCRIPT_NAME': '/foo', 'PATH_INFO': '/a/b'}
277 >>> pop_path_info(env)
278 'a'
279 >>> env['SCRIPT_NAME']
280 '/foo/a'
281 >>> pop_path_info(env)
282 'b'
283 >>> env['SCRIPT_NAME']
284 '/foo/a/b'
285
286 .. versionadded:: 0.5
287
33c50d2 @mitsuhiko Fixed missing usages of the WSGI encoding dance.
authored
288 .. versionchanged:: 0.9
289 The path is now decoded and a charset and encoding
290 parameter can be provided.
291
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
292 :param environ: the WSGI environment that is modified.
293 """
294 path = environ.get('PATH_INFO')
295 if not path:
296 return None
297
298 script_name = environ.get('SCRIPT_NAME', '')
299
300 # shift multiple leading slashes over
301 old_path = path
302 path = path.lstrip('/')
303 if path != old_path:
304 script_name += '/' * (len(old_path) - len(path))
305
306 if '/' not in path:
307 environ['PATH_INFO'] = ''
308 environ['SCRIPT_NAME'] = script_name + path
33c50d2 @mitsuhiko Fixed missing usages of the WSGI encoding dance.
authored
309 rv = wsgi_get_bytes(path)
310 else:
311 segment, path = path.split('/', 1)
312 environ['PATH_INFO'] = '/' + path
313 environ['SCRIPT_NAME'] = script_name + segment
314 rv = wsgi_get_bytes(segment)
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
315
33c50d2 @mitsuhiko Fixed missing usages of the WSGI encoding dance.
authored
316 return to_unicode(rv, charset, errors, allow_none_charset=True)
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
317
318
33c50d2 @mitsuhiko Fixed missing usages of the WSGI encoding dance.
authored
319 def peek_path_info(environ, charset='utf-8', errors='replace'):
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
320 """Returns the next segment on the `PATH_INFO` or `None` if there
321 is none. Works like :func:`pop_path_info` without modifying the
322 environment:
323
324 >>> env = {'SCRIPT_NAME': '/foo', 'PATH_INFO': '/a/b'}
325 >>> peek_path_info(env)
326 'a'
327 >>> peek_path_info(env)
328 'a'
329
33c50d2 @mitsuhiko Fixed missing usages of the WSGI encoding dance.
authored
330 If the `charset` is set to `None` a bytestring is returned.
331
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
332 .. versionadded:: 0.5
333
33c50d2 @mitsuhiko Fixed missing usages of the WSGI encoding dance.
authored
334 .. versionchanged:: 0.9
335 The path is now decoded and a charset and encoding
336 parameter can be provided.
337
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
338 :param environ: the WSGI environment that is checked.
339 """
340 segments = environ.get('PATH_INFO', '').lstrip('/').split('/', 1)
341 if segments:
33c50d2 @mitsuhiko Fixed missing usages of the WSGI encoding dance.
authored
342 return to_unicode(wsgi_get_bytes(segments[0]),
343 charset, errors, allow_none_charset=True)
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
344
345
8f12982 @mitsuhiko added :func:`werkzeug.extract_path_info`
authored
346 def extract_path_info(environ_or_baseurl, path_or_url, charset='utf-8',
e4d079d @mitsuhiko Another ignore -> replace
authored
347 errors='replace', collapse_http_schemes=True):
8f12982 @mitsuhiko added :func:`werkzeug.extract_path_info`
authored
348 """Extracts the path info from the given URL (or WSGI environment) and
349 path. The path info returned is a unicode string, not a bytestring
350 suitable for a WSGI environment. The URLs might also be IRIs.
351
352 If the path info could not be determined, `None` is returned.
353
354 Some examples:
355
356 >>> extract_path_info('http://example.com/app', '/app/hello')
357 u'/hello'
358 >>> extract_path_info('http://example.com/app',
359 ... 'https://example.com/app/hello')
360 u'/hello'
361 >>> extract_path_info('http://example.com/app',
362 ... 'https://example.com/app/hello',
363 ... collapse_http_schemes=False) is None
364 True
365
366 Instead of providing a base URL you can also pass a WSGI environment.
367
368 .. versionadded:: 0.6
369
370 :param environ_or_baseurl: a WSGI environment dict, a base URL or
371 base IRI. This is the root of the
372 application.
373 :param path_or_url: an absolute path from the server root, a
374 relative path (in which case it's the path info)
375 or a full URL. Also accepts IRIs and unicode
376 parameters.
377 :param charset: the charset for byte data in URLs
378 :param errors: the error handling on decode
379 :param collapse_http_schemes: if set to `False` the algorithm does
380 not assume that http and https on the
381 same server point to the same
382 resource.
383 """
384 def _normalize_netloc(scheme, netloc):
385 parts = netloc.split(u'@', 1)[-1].split(u':', 1)
386 if len(parts) == 2:
387 netloc, port = parts
19fef57 @mitsuhiko Added quotes around 'and'-statement in path info extractor for easier
authored
388 if (scheme == u'http' and port == u'80') or \
389 (scheme == u'https' and port == u'443'):
8f12982 @mitsuhiko added :func:`werkzeug.extract_path_info`
authored
390 port = None
391 else:
392 netloc = parts[0]
393 port = None
394 if port is not None:
395 netloc += u':' + port
396 return netloc
397
398 # make sure whatever we are working on is a IRI and parse it
33c50d2 @mitsuhiko Fixed missing usages of the WSGI encoding dance.
authored
399 path = uri_to_iri(path_or_url, charset, errors)
8f12982 @mitsuhiko added :func:`werkzeug.extract_path_info`
authored
400 if isinstance(environ_or_baseurl, dict):
401 environ_or_baseurl = get_current_url(environ_or_baseurl,
402 root_only=True)
33c50d2 @mitsuhiko Fixed missing usages of the WSGI encoding dance.
authored
403 base_iri = uri_to_iri(environ_or_baseurl, charset, errors)
404 base_scheme, base_netloc, base_path = url_parse(base_iri)[:3]
8f12982 @mitsuhiko added :func:`werkzeug.extract_path_info`
authored
405 cur_scheme, cur_netloc, cur_path, = \
33c50d2 @mitsuhiko Fixed missing usages of the WSGI encoding dance.
authored
406 url_parse(url_join(base_iri, path))[:3]
8f12982 @mitsuhiko added :func:`werkzeug.extract_path_info`
authored
407
408 # normalize the network location
409 base_netloc = _normalize_netloc(base_scheme, base_netloc)
410 cur_netloc = _normalize_netloc(cur_scheme, cur_netloc)
411
412 # is that IRI even on a known HTTP scheme?
413 if collapse_http_schemes:
414 for scheme in base_scheme, cur_scheme:
415 if scheme not in (u'http', u'https'):
416 return None
417 else:
c3b0f02 @mitsuhiko Removed unnecessary backslash
authored
418 if not (base_scheme in (u'http', u'https') and
8f12982 @mitsuhiko added :func:`werkzeug.extract_path_info`
authored
419 base_scheme == cur_scheme):
420 return None
421
a06f212 @mitsuhiko Spelling fixes for documentation. Also: werkzeug's run_simple now links
authored
422 # are the netlocs compatible?
8f12982 @mitsuhiko added :func:`werkzeug.extract_path_info`
authored
423 if base_netloc != cur_netloc:
424 return None
425
426 # are we below the application path?
427 base_path = base_path.rstrip(u'/')
428 if not cur_path.startswith(base_path):
429 return None
430
431 return u'/' + cur_path[len(base_path):].lstrip(u'/')
432
433
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
434 class SharedDataMiddleware(object):
2b2d921 @untitaker Flake8 fixes
untitaker authored
435
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
436 """A WSGI middleware that provides static content for development
437 environments or simple server setups. Usage is quite simple::
438
439 import os
1fb8eb1 @mitsuhiko Switched everything to long imports
authored
440 from werkzeug.wsgi import SharedDataMiddleware
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
441
442 app = SharedDataMiddleware(app, {
443 '/shared': os.path.join(os.path.dirname(__file__), 'shared')
444 })
445
446 The contents of the folder ``./shared`` will now be available on
447 ``http://example.com/shared/``. This is pretty useful during development
448 because a standalone media server is not required. One can also mount
449 files on the root folder and still continue to use the application because
450 the shared data middleware forwards all unhandled requests to the
451 application, even if the requests are below one of the shared folders.
452
453 If `pkg_resources` is available you can also tell the middleware to serve
454 files from package data::
455
456 app = SharedDataMiddleware(app, {
457 '/shared': ('myapplication', 'shared_files')
458 })
459
460 This will then serve the ``shared_files`` folder in the `myapplication`
461 Python package.
462
463 The optional `disallow` parameter can be a list of :func:`~fnmatch.fnmatch`
464 rules for files that are not accessible from the web. If `cache` is set to
465 `False` no caching headers are sent.
466
467 Currently the middleware does not support non ASCII filenames. If the
468 encoding on the file system happens to be the encoding of the URI it may
469 work but this could also be by accident. We strongly suggest using ASCII
470 only file names for static files.
471
6352183 @mitsuhiko added `fallback_mimetype` to :class:`werkzeug.SharedDataMiddleware`.
authored
472 The middleware will guess the mimetype using the Python `mimetype`
473 module. If it's unable to figure out the charset it will fall back
474 to `fallback_mimetype`.
475
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
476 .. versionchanged:: 0.5
477 The cache timeout is configurable now.
478
6352183 @mitsuhiko added `fallback_mimetype` to :class:`werkzeug.SharedDataMiddleware`.
authored
479 .. versionadded:: 0.6
480 The `fallback_mimetype` parameter was added.
481
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
482 :param app: the application to wrap. If you don't want to wrap an
483 application you can pass it :exc:`NotFound`.
484 :param exports: a dict of exported files and folders.
d0158db @masklinn typo in wsgi.SharedDataMiddleware's docstring
masklinn authored
485 :param disallow: a list of :func:`~fnmatch.fnmatch` rules.
6352183 @mitsuhiko added `fallback_mimetype` to :class:`werkzeug.SharedDataMiddleware`.
authored
486 :param fallback_mimetype: the fallback mimetype for unknown files.
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
487 :param cache: enable or disable caching headers.
fb1fd86 @tony Typo capitalization docstring param
tony authored
488 :param cache_timeout: the cache timeout in seconds for the headers.
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
489 """
490
491 def __init__(self, app, exports, disallow=None, cache=True,
6352183 @mitsuhiko added `fallback_mimetype` to :class:`werkzeug.SharedDataMiddleware`.
authored
492 cache_timeout=60 * 60 * 12, fallback_mimetype='text/plain'):
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
493 self.app = app
494 self.exports = {}
495 self.cache = cache
496 self.cache_timeout = cache_timeout
f10682e @untitaker Some more fixes.
untitaker authored
497 for key, value in iteritems(exports):
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
498 if isinstance(value, tuple):
499 loader = self.get_package_loader(*value)
55bfb67 @ThiefMaster Use string_types instead of basestring
ThiefMaster authored
500 elif isinstance(value, string_types):
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
501 if os.path.isfile(value):
502 loader = self.get_file_loader(value)
503 else:
504 loader = self.get_directory_loader(value)
505 else:
506 raise TypeError('unknown def %r' % value)
507 self.exports[key] = loader
508 if disallow is not None:
509 from fnmatch import fnmatch
510 self.is_allowed = lambda x: not fnmatch(x, disallow)
6352183 @mitsuhiko added `fallback_mimetype` to :class:`werkzeug.SharedDataMiddleware`.
authored
511 self.fallback_mimetype = fallback_mimetype
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
512
513 def is_allowed(self, filename):
514 """Subclasses can override this method to disallow the access to
515 certain files. However by providing `disallow` in the constructor
516 this method is overwritten.
517 """
518 return True
519
520 def _opener(self, filename):
521 return lambda: (
522 open(filename, 'rb'),
523 datetime.utcfromtimestamp(os.path.getmtime(filename)),
524 int(os.path.getsize(filename))
525 )
526
527 def get_file_loader(self, filename):
528 return lambda x: (os.path.basename(filename), self._opener(filename))
529
530 def get_package_loader(self, package, package_path):
531 from pkg_resources import DefaultProvider, ResourceManager, \
2b2d921 @untitaker Flake8 fixes
untitaker authored
532 get_provider
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
533 loadtime = datetime.utcnow()
534 provider = get_provider(package)
535 manager = ResourceManager()
536 filesystem_bound = isinstance(provider, DefaultProvider)
2b2d921 @untitaker Flake8 fixes
untitaker authored
537
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
538 def loader(path):
3c4b37e @mitsuhiko fixed an issue where the SharedDataMiddleware could cause an
authored
539 if path is None:
540 return None, None
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
541 path = posixpath.join(package_path, path)
3c4b37e @mitsuhiko fixed an issue where the SharedDataMiddleware could cause an
authored
542 if not provider.has_resource(path):
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
543 return None, None
544 basename = posixpath.basename(path)
545 if filesystem_bound:
546 return basename, self._opener(
547 provider.get_resource_filename(manager, path))
548 return basename, lambda: (
549 provider.get_resource_stream(manager, path),
550 loadtime,
551 0
552 )
553 return loader
554
555 def get_directory_loader(self, directory):
556 def loader(path):
557 if path is not None:
558 path = os.path.join(directory, path)
559 else:
560 path = directory
561 if os.path.isfile(path):
562 return os.path.basename(path), self._opener(path)
563 return None, None
564 return loader
565
566 def generate_etag(self, mtime, file_size, real_filename):
cac420c @DasIch Fix SharedDataMiddleware.generate_etag
DasIch authored
567 if not isinstance(real_filename, bytes):
bba0cdc @untitaker Force UTF-8 as filesystem encoding in some cases
untitaker authored
568 real_filename = real_filename.encode(get_filesystem_encoding())
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
569 return 'wzsdm-%d-%s-%s' % (
570 mktime(mtime.timetuple()),
571 file_size,
cac420c @DasIch Fix SharedDataMiddleware.generate_etag
DasIch authored
572 adler32(real_filename) & 0xffffffff
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
573 )
574
575 def __call__(self, environ, start_response):
b02d93f @DasIch Properly retrieve PATH_INFO in SharedDataMiddleware
DasIch authored
576 cleaned_path = get_path_info(environ)
577 if PY2:
bba0cdc @untitaker Force UTF-8 as filesystem encoding in some cases
untitaker authored
578 cleaned_path = cleaned_path.encode(get_filesystem_encoding())
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
579 # sanitize the path for non unix systems
b02d93f @DasIch Properly retrieve PATH_INFO in SharedDataMiddleware
DasIch authored
580 cleaned_path = cleaned_path.strip('/')
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
581 for sep in os.sep, os.altsep:
582 if sep and sep != '/':
583 cleaned_path = cleaned_path.replace(sep, '/')
714e148 @ergoithz Fixed dumb splits
ergoithz authored
584 path = '/' + '/'.join(x for x in cleaned_path.split('/')
585 if x and x != '..')
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
586 file_loader = None
a08c6c4 @ThiefMaster Use iter[values,items] from compat
ThiefMaster authored
587 for search_path, loader in iteritems(self.exports):
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
588 if search_path == path:
589 real_filename, file_loader = loader(None)
590 if file_loader is not None:
591 break
592 if not search_path.endswith('/'):
593 search_path += '/'
594 if path.startswith(search_path):
595 real_filename, file_loader = loader(path[len(search_path):])
596 if file_loader is not None:
597 break
598 if file_loader is None or not self.is_allowed(real_filename):
599 return self.app(environ, start_response)
600
601 guessed_type = mimetypes.guess_type(real_filename)
6352183 @mitsuhiko added `fallback_mimetype` to :class:`werkzeug.SharedDataMiddleware`.
authored
602 mime_type = guessed_type[0] or self.fallback_mimetype
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
603 f, mtime, file_size = file_loader()
604
605 headers = [('Date', http_date())]
606 if self.cache:
607 timeout = self.cache_timeout
608 etag = self.generate_etag(mtime, file_size, real_filename)
609 headers += [
610 ('Etag', '"%s"' % etag),
611 ('Cache-Control', 'max-age=%d, public' % timeout)
612 ]
613 if not is_resource_modified(environ, etag, last_modified=mtime):
614 f.close()
615 start_response('304 Not Modified', headers)
616 return []
617 headers.append(('Expires', http_date(time() + timeout)))
618 else:
619 headers.append(('Cache-Control', 'public'))
620
621 headers.extend((
622 ('Content-Type', mime_type),
623 ('Content-Length', str(file_size)),
624 ('Last-Modified', http_date(mtime))
625 ))
626 start_response('200 OK', headers)
627 return wrap_file(environ, f)
628
629
630 class DispatcherMiddleware(object):
2b2d921 @untitaker Flake8 fixes
untitaker authored
631
a06f212 @mitsuhiko Spelling fixes for documentation. Also: werkzeug's run_simple now links
authored
632 """Allows one to mount middlewares or applications in a WSGI application.
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
633 This is useful if you want to combine multiple WSGI applications::
634
635 app = DispatcherMiddleware(app, {
636 '/app2': app2,
637 '/app3': app3
638 })
639 """
640
641 def __init__(self, app, mounts=None):
642 self.app = app
643 self.mounts = mounts or {}
644
645 def __call__(self, environ, start_response):
646 script = environ.get('PATH_INFO', '')
647 path_info = ''
648 while '/' in script:
649 if script in self.mounts:
650 app = self.mounts[script]
651 break
714e148 @ergoithz Fixed dumb splits
ergoithz authored
652 script, last_item = script.rsplit('/', 1)
653 path_info = '/%s%s' % (last_item, path_info)
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
654 else:
655 app = self.mounts.get(script, self.app)
656 original_script_name = environ.get('SCRIPT_NAME', '')
657 environ['SCRIPT_NAME'] = original_script_name + script
658 environ['PATH_INFO'] = path_info
659 return app(environ, start_response)
660
661
adca0a4 @DasIch Remove six dependency
DasIch authored
662 @implements_iterator
663 class ClosingIterator(object):
2b2d921 @untitaker Flake8 fixes
untitaker authored
664
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
665 """The WSGI specification requires that all middlewares and gateways
666 respect the `close` callback of an iterator. Because it is useful to add
667 another close action to a returned iterator and adding a custom iterator
668 is a boring task this class can be used for that::
669
670 return ClosingIterator(app(environ, start_response), [cleanup_session,
671 cleanup_locals])
672
673 If there is just one close function it can be passed instead of the list.
674
675 A closing iterator is not needed if the application uses response objects
676 and finishes the processing if the response is started::
677
678 try:
679 return response(environ, start_response)
680 finally:
681 cleanup_session()
682 cleanup_locals()
683 """
684
685 def __init__(self, iterable, callbacks=None):
686 iterator = iter(iterable)
f10682e @untitaker Some more fixes.
untitaker authored
687 self._next = partial(next, iterator)
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
688 if callbacks is None:
689 callbacks = []
690 elif callable(callbacks):
691 callbacks = [callbacks]
692 else:
693 callbacks = list(callbacks)
694 iterable_close = getattr(iterator, 'close', None)
695 if iterable_close:
696 callbacks.insert(0, iterable_close)
697 self._callbacks = callbacks
698
699 def __iter__(self):
700 return self
701
a521fe1 @untitaker Fixed remaining iterating-related stuff.
untitaker authored
702 def __next__(self):
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
703 return self._next()
704
705 def close(self):
706 for callback in self._callbacks:
707 callback()
708
709
710 def wrap_file(environ, file, buffer_size=8192):
711 """Wraps a file. This uses the WSGI server's file wrapper if available
712 or otherwise the generic :class:`FileWrapper`.
713
714 .. versionadded:: 0.5
715
716 If the file wrapper from the WSGI server is used it's important to not
717 iterate over it from inside the application but to pass it through
718 unchanged. If you want to pass out a file wrapper inside a response
719 object you have to set :attr:`~BaseResponse.direct_passthrough` to `True`.
720
721 More information about file wrappers are available in :pep:`333`.
722
723 :param file: a :class:`file`-like object with a :meth:`~file.read` method.
724 :param buffer_size: number of bytes for one iteration.
725 """
726 return environ.get('wsgi.file_wrapper', FileWrapper)(file, buffer_size)
727
728
adca0a4 @DasIch Remove six dependency
DasIch authored
729 @implements_iterator
730 class FileWrapper(object):
2b2d921 @untitaker Flake8 fixes
untitaker authored
731
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
732 """This class can be used to convert a :class:`file`-like object into
733 an iterable. It yields `buffer_size` blocks until the file is fully
734 read.
735
736 You should not use this class directly but rather use the
737 :func:`wrap_file` function that uses the WSGI server's file wrapper
738 support if it's available.
739
740 .. versionadded:: 0.5
741
fe196fc @mitsuhiko Added `direct_passthrough` documentation to :class:`FileWrapper`.
authored
742 If you're using this object together with a :class:`BaseResponse` you have
743 to use the `direct_passthrough` mode.
744
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
745 :param file: a :class:`file`-like object with a :meth:`~file.read` method.
746 :param buffer_size: number of bytes for one iteration.
747 """
748
749 def __init__(self, file, buffer_size=8192):
750 self.file = file
751 self.buffer_size = buffer_size
752
753 def close(self):
754 if hasattr(self.file, 'close'):
755 self.file.close()
756
757 def __iter__(self):
758 return self
759
54f33e0 @DasIch Make FileWrapper iterable
DasIch authored
760 def __next__(self):
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
761 data = self.file.read(self.buffer_size)
762 if data:
763 return data
764 raise StopIteration()
765
766
c9f9fc2 @mitsuhiko Fixed byte operations on stream decoding
authored
767 def _make_chunk_iter(stream, limit, buffer_size):
3cbe355 @mitsuhiko Added tests for make_chunk_iter and added support for iterators inste…
authored
768 """Helper for the line and chunk iter functions."""
c9f9fc2 @mitsuhiko Fixed byte operations on stream decoding
authored
769 if isinstance(stream, (bytes, bytearray, text_type)):
770 raise TypeError('Passed a string or byte object instead of '
771 'true iterator or stream.')
772 if not hasattr(stream, 'read'):
86d2cee @mitsuhiko Some tiny simplification for chunk iteration
authored
773 for item in stream:
774 if item:
c9f9fc2 @mitsuhiko Fixed byte operations on stream decoding
authored
775 yield item
86d2cee @mitsuhiko Some tiny simplification for chunk iteration
authored
776 return
b9f02fd @mitsuhiko Some internal refactoring for limited streams on chunk iters
authored
777 if not isinstance(stream, LimitedStream) and limit is not None:
778 stream = LimitedStream(stream, limit)
779 _read = stream.read
86d2cee @mitsuhiko Some tiny simplification for chunk iteration
authored
780 while 1:
781 item = _read(buffer_size)
782 if not item:
783 break
784 yield item
3cbe355 @mitsuhiko Added tests for make_chunk_iter and added support for iterators inste…
authored
785
786
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
787 def make_line_iter(stream, limit=None, buffer_size=10 * 1024):
a06f212 @mitsuhiko Spelling fixes for documentation. Also: werkzeug's run_simple now links
authored
788 """Safely iterates line-based over an input stream. If the input stream
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
789 is not a :class:`LimitedStream` the `limit` parameter is mandatory.
790
791 This uses the stream's :meth:`~file.read` method internally as opposite
792 to the :meth:`~file.readline` method that is unsafe and can only be used
793 in violation of the WSGI specification. The same problem applies to the
794 `__iter__` function of the input stream which calls :meth:`~file.readline`
795 without arguments.
796
797 If you need line-by-line processing it's strongly recommended to iterate
798 over the input stream using this helper function.
799
b677ecc @mitsuhiko Check for proper content lengths in more places now.
authored
800 .. versionchanged:: 0.8
801 This function now ensures that the limit was reached.
802
3cbe355 @mitsuhiko Added tests for make_chunk_iter and added support for iterators inste…
authored
803 .. versionadded:: 0.9
804 added support for iterators as input stream.
805
806 :param stream: the stream or iterate to iterate over.
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
807 :param limit: the limit in bytes for the stream. (Usually
808 content length. Not necessary if the `stream`
809 is a :class:`LimitedStream`.
810 :param buffer_size: The optional buffer size.
811 """
c9f9fc2 @mitsuhiko Fixed byte operations on stream decoding
authored
812 _iter = _make_chunk_iter(stream, limit, buffer_size)
813
814 first_item = next(_iter, '')
815 if not first_item:
816 return
817
818 s = make_literal_wrapper(first_item)
819 empty = s('')
820 cr = s('\r')
821 lf = s('\n')
822 crlf = s('\r\n')
823
824 _iter = chain((first_item,), _iter)
825
8eba1b3 @mitsuhiko Fixed make_line_iter to no longer split lines into two. This fixes #126
authored
826 def _iter_basic_lines():
c9f9fc2 @mitsuhiko Fixed byte operations on stream decoding
authored
827 _join = empty.join
8eba1b3 @mitsuhiko Fixed make_line_iter to no longer split lines into two. This fixes #126
authored
828 buffer = []
829 while 1:
c9f9fc2 @mitsuhiko Fixed byte operations on stream decoding
authored
830 new_data = next(_iter, '')
806a032 @mitsuhiko Refactored make_line_iter
authored
831 if not new_data:
8d40a0f @mitsuhiko New break logic for iter_basic_lines
authored
832 break
806a032 @mitsuhiko Refactored make_line_iter
authored
833 new_buf = []
834 for item in chain(buffer, new_data.splitlines(True)):
835 new_buf.append(item)
c9f9fc2 @mitsuhiko Fixed byte operations on stream decoding
authored
836 if item and item[-1:] in crlf:
837 yield _join(new_buf)
838 new_buf = []
806a032 @mitsuhiko Refactored make_line_iter
authored
839 buffer = new_buf
840 if buffer:
c9f9fc2 @mitsuhiko Fixed byte operations on stream decoding
authored
841 yield _join(buffer)
8eba1b3 @mitsuhiko Fixed make_line_iter to no longer split lines into two. This fixes #126
authored
842
843 # This hackery is necessary to merge 'foo\r' and '\n' into one item
844 # of 'foo\r\n' if we were unlucky and we hit a chunk boundary.
c9f9fc2 @mitsuhiko Fixed byte operations on stream decoding
authored
845 previous = empty
8eba1b3 @mitsuhiko Fixed make_line_iter to no longer split lines into two. This fixes #126
authored
846 for item in _iter_basic_lines():
c9f9fc2 @mitsuhiko Fixed byte operations on stream decoding
authored
847 if item == lf and previous[-1:] == cr:
1002b6b @DasIch Make make_line_iter operate on unicode and bytes
DasIch authored
848 previous += item
c9f9fc2 @mitsuhiko Fixed byte operations on stream decoding
authored
849 item = empty
8eba1b3 @mitsuhiko Fixed make_line_iter to no longer split lines into two. This fixes #126
authored
850 if previous:
851 yield previous
852 previous = item
853 if previous:
854 yield previous
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
855
856
b0080b8 @mitsuhiko Added support for stream URL encoding and decoding
authored
857 def make_chunk_iter(stream, separator, limit=None, buffer_size=10 * 1024):
858 """Works like :func:`make_line_iter` but accepts a separator
859 which divides chunks. If you want newline based processing
d1c8911 @mitsuhiko No longer require stream limiting on certain functions, fix some docs…
authored
860 you should use :func:`make_line_iter` instead as it
b0080b8 @mitsuhiko Added support for stream URL encoding and decoding
authored
861 supports arbitrary newline markers.
862
863 .. versionadded:: 0.8
864
3cbe355 @mitsuhiko Added tests for make_chunk_iter and added support for iterators inste…
authored
865 .. versionadded:: 0.9
866 added support for iterators as input stream.
867
868 :param stream: the stream or iterate to iterate over.
b0080b8 @mitsuhiko Added support for stream URL encoding and decoding
authored
869 :param separator: the separator that divides chunks.
870 :param limit: the limit in bytes for the stream. (Usually
871 content length. Not necessary if the `stream`
d1c8911 @mitsuhiko No longer require stream limiting on certain functions, fix some docs…
authored
872 is otherwise already limited).
b0080b8 @mitsuhiko Added support for stream URL encoding and decoding
authored
873 :param buffer_size: The optional buffer size.
874 """
c9f9fc2 @mitsuhiko Fixed byte operations on stream decoding
authored
875 _iter = _make_chunk_iter(stream, limit, buffer_size)
876
877 first_item = next(_iter, '')
878 if not first_item:
879 return
880
881 _iter = chain((first_item,), _iter)
882 if isinstance(first_item, text_type):
883 separator = to_unicode(separator)
884 _split = re.compile(r'(%s)' % re.escape(separator)).split
885 _join = u''.join
d25db7a @DasIch Introduce separator encoding magic in make_chunk_iter
DasIch authored
886 else:
c9f9fc2 @mitsuhiko Fixed byte operations on stream decoding
authored
887 separator = to_bytes(separator)
888 _split = re.compile(b'(' + re.escape(separator) + b')').split
889 _join = b''.join
890
b0080b8 @mitsuhiko Added support for stream URL encoding and decoding
authored
891 buffer = []
892 while 1:
c9f9fc2 @mitsuhiko Fixed byte operations on stream decoding
authored
893 new_data = next(_iter, '')
b0080b8 @mitsuhiko Added support for stream URL encoding and decoding
authored
894 if not new_data:
895 break
c9f9fc2 @mitsuhiko Fixed byte operations on stream decoding
authored
896 chunks = _split(new_data)
b0080b8 @mitsuhiko Added support for stream URL encoding and decoding
authored
897 new_buf = []
898 for item in chain(buffer, chunks):
c9f9fc2 @mitsuhiko Fixed byte operations on stream decoding
authored
899 if item == separator:
900 yield _join(new_buf)
b0080b8 @mitsuhiko Added support for stream URL encoding and decoding
authored
901 new_buf = []
902 else:
903 new_buf.append(item)
904 buffer = new_buf
905 if buffer:
4a7192f @mitsuhiko Added sentinal value to IterIO objects to handle corner cases
authored
906 yield _join(buffer)
b0080b8 @mitsuhiko Added support for stream URL encoding and decoding
authored
907
908
bd6976b @mitsuhiko Fixed iter protocol on limited streams
authored
909 @implements_iterator
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
910 class LimitedStream(object):
2b2d921 @untitaker Flake8 fixes
untitaker authored
911
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
912 """Wraps a stream so that it doesn't read more than n bytes. If the
913 stream is exhausted and the caller tries to get more bytes from it
914 :func:`on_exhausted` is called which by default returns an empty
a2fa971 @mitsuhiko deprecated non-silent usage of the :class:`werkzeug.LimitedStream`.
authored
915 string. The return value of that function is forwarded
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
916 to the reader function. So if it returns an empty string
917 :meth:`read` will return an empty string as well.
918
919 The limit however must never be higher than what the stream can
920 output. Otherwise :meth:`readlines` will try to read past the
921 limit.
922
923 .. admonition:: Note on WSGI compliance
924
925 calls to :meth:`readline` and :meth:`readlines` are not
926 WSGI compliant because it passes a size argument to the
927 readline methods. Unfortunately the WSGI PEP is not safely
928 implementable without a size argument to :meth:`readline`
929 because there is no EOF marker in the stream. As a result
930 of that the use of :meth:`readline` is discouraged.
931
932 For the same reason iterating over the :class:`LimitedStream`
933 is not portable. It internally calls :meth:`readline`.
934
935 We strongly suggest using :meth:`read` only or using the
a06f212 @mitsuhiko Spelling fixes for documentation. Also: werkzeug's run_simple now links
authored
936 :func:`make_line_iter` which safely iterates line-based
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
937 over a WSGI input stream.
938
939 :param stream: the stream to wrap.
940 :param limit: the limit for the stream, must not be longer than
941 what the string can provide if the stream does not
942 end with `EOF` (like `wsgi.input`)
943 """
944
d62c802 @mitsuhiko Removed support for non-silent limited streams
authored
945 def __init__(self, stream, limit):
b2e5720 @mitsuhiko improved performance of `make_line_iter` and the multipart parser
authored
946 self._read = stream.read
947 self._readline = stream.readline
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
948 self._pos = 0
949 self.limit = limit
950
951 def __iter__(self):
952 return self
953
954 @property
955 def is_exhausted(self):
956 """If the stream is exhausted this attribute is `True`."""
957 return self._pos >= self.limit
958
959 def on_exhausted(self):
960 """This is called when the stream tries to read past the limit.
961 The return value of this function is returned from the reading
962 function.
963 """
d62c802 @mitsuhiko Removed support for non-silent limited streams
authored
964 # Read null bytes from the stream so that we get the
965 # correct end of stream marker.
966 return self._read(0)
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
967
fa1be45 @mitsuhiko More consistent behavior regarding client disconnects.
authored
968 def on_disconnect(self):
969 """What should happen if a disconnect is detected? The return
970 value of this function is returned from read functions in case
971 the client went away. By default a
972 :exc:`~werkzeug.exceptions.ClientDisconnected` exception is raised.
973 """
974 from werkzeug.exceptions import ClientDisconnected
975 raise ClientDisconnected()
976
02d2c02 @mitsuhiko Fixed exhausting not working in all cases in the multipart parser.
authored
977 def exhaust(self, chunk_size=1024 * 64):
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
978 """Exhaust the stream. This consumes all the data left until the
979 limit is reached.
980
981 :param chunk_size: the size for a chunk. It will read the chunk
982 until the stream is exhausted and throw away
983 the results.
984 """
985 to_read = self.limit - self._pos
986 chunk = chunk_size
987 while to_read > 0:
988 chunk = min(to_read, chunk)
989 self.read(chunk)
990 to_read -= chunk
991
992 def read(self, size=None):
993 """Read `size` bytes or if size is not provided everything is read.
994
995 :param size: the number of bytes read.
996 """
997 if self._pos >= self.limit:
998 return self.on_exhausted()
2a66fdf @RonnyPfannschmidt make LimitedStream.read(-1) consistent with normal files
RonnyPfannschmidt authored
999 if size is None or size == -1: # -1 is for consistence with file
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
1000 size = self.limit
b677ecc @mitsuhiko Check for proper content lengths in more places now.
authored
1001 to_read = min(self.limit - self._pos, size)
fa1be45 @mitsuhiko More consistent behavior regarding client disconnects.
authored
1002 try:
1003 read = self._read(to_read)
1004 except (IOError, ValueError):
1005 return self.on_disconnect()
b677ecc @mitsuhiko Check for proper content lengths in more places now.
authored
1006 if to_read and len(read) != to_read:
fa1be45 @mitsuhiko More consistent behavior regarding client disconnects.
authored
1007 return self.on_disconnect()
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
1008 self._pos += len(read)
1009 return read
1010
1011 def readline(self, size=None):
1012 """Reads one line from the stream."""
1013 if self._pos >= self.limit:
1014 return self.on_exhausted()
1015 if size is None:
1016 size = self.limit - self._pos
1017 else:
1018 size = min(size, self.limit - self._pos)
fa1be45 @mitsuhiko More consistent behavior regarding client disconnects.
authored
1019 try:
1020 line = self._readline(size)
1021 except (ValueError, IOError):
1022 return self.on_disconnect()
b677ecc @mitsuhiko Check for proper content lengths in more places now.
authored
1023 if size and not line:
fa1be45 @mitsuhiko More consistent behavior regarding client disconnects.
authored
1024 return self.on_disconnect()
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
1025 self._pos += len(line)
1026 return line
1027
1028 def readlines(self, size=None):
1029 """Reads a file into a list of strings. It calls :meth:`readline`
1030 until the file is read to the end. It does support the optional
1031 `size` argument if the underlaying stream supports it for
1032 `readline`.
1033 """
1034 last_pos = self._pos
1035 result = []
1036 if size is not None:
1037 end = min(self.limit, last_pos + size)
1038 else:
1039 end = self.limit
1040 while 1:
1041 if size is not None:
1042 size -= last_pos - self._pos
1043 if self._pos >= end:
1044 break
1045 result.append(self.readline(size))
1046 if size is not None:
1047 last_pos = self._pos
1048 return result
1049
639f35b @mitsuhiko Added support for tell() on the stream.
authored
1050 def tell(self):
1051 """Returns the position of the stream.
1052
1053 .. versionadded:: 0.9
1054 """
1055 return self._pos
1056
bd6976b @mitsuhiko Fixed iter protocol on limited streams
authored
1057 def __next__(self):
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
1058 line = self.readline()
bd6976b @mitsuhiko Fixed iter protocol on limited streams
authored
1059 if not line:
f6fc8d4 @mitsuhiko More refactoring. Moved a lot of stuff into a new werkzeug.wsgi module.
authored
1060 raise StopIteration()
1061 return line
Something went wrong with that request. Please try again.