Skip to content

Commit

Permalink
Clarify re_encode_path_info
Browse files Browse the repository at this point in the history
  • Loading branch information
mar10 committed Nov 27, 2021
1 parent ba908cb commit f4de837
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 18 deletions.
44 changes: 37 additions & 7 deletions wsgidav/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,11 +190,34 @@ def unicode_to_wsgi(u):


def wsgi_to_bytes(s):
"""Convert a native string to a WSGI / HTTP compatible byte string."""
# Taken from PEP3333
"""Convert a native string to a WSGI / HTTP compatible byte string.
WSGI always assumes iso-8859-1 (PEP 3333).
https://bugs.python.org/issue16679#msg177450
"""
return s.encode("iso-8859-1")


def re_encode_wsgi(s: str, *, encoding="utf-8", fallback=False) -> str:
"""Convert a WSGI string to `str`, assuming the client used UTF-8.
WSGI always assumes iso-8859-1. Modern clients send UTF-8, so we have to
re-encode
https://www.python.org/dev/peps/pep-3333/#unicode-issues
https://bugs.python.org/issue16679#msg177450
"""
try:
if type(s) is bytes:
# haven't seen this case, but may be possible according to PEP 3333?
return s.decode(encoding)
return s.encode("iso-8859-1").decode(encoding)
except UnicodeDecodeError:
if fallback:
return s
raise


# ========================================================================
# Time tools
# ========================================================================
Expand Down Expand Up @@ -441,13 +464,13 @@ def dynamic_import_class(name):
try:
module = importlib.import_module(module_name)
except Exception as e:
_logger.exception("Dynamic import of {!r} failed: {}".format(name, e))
_logger.error("Dynamic import of {!r} failed: {}".format(name, e))
raise
the_class = getattr(module, class_name)
return the_class


def dynamic_instantiate_class(class_name, options, *, expand=None):
def dynamic_instantiate_class(class_name, options, *, expand=None, raise_error=True):
"""Import a class and instantiate with custom args.
Equivalent of
Expand Down Expand Up @@ -481,11 +504,11 @@ def _expand(v):
{"args", "kwargs"},
msg=f"Invalid class instantiation options for {class_name}",
)
pos_args = options.get("args", [])
pos_args = options.get("args") or []
if pos_args is not None and not isinstance(pos_args, (tuple, list)):
raise ValueError(f"Expected list format for `args` option: {options}")

kwargs = options.get("kwargs", {})
kwargs = options.get("kwargs") or {}
if kwargs is not None and not isinstance(kwargs, dict):
raise ValueError(f"Expected dict format for `kwargs` option: {options}")

Expand All @@ -512,7 +535,11 @@ def _expand(v):
"Instantiate {}({}) => {}".format(class_name, ", ".join(disp_args), inst)
)
except Exception:
_logger.exception(f"ERROR: Instantiate {class_name}({options}) failed")
msg = f"Instantiate {class_name}({options}) failed"
if raise_error:
_logger.error(msg)
raise
_logger.exception(msg)

return inst

Expand Down Expand Up @@ -1565,6 +1592,9 @@ def test_if_header_dict(dav_res, dictIf, fullurl, locktokenlist, entitytag):
".ogg": "audio/ogg",
".ogv": "video/ogg",
".webm": "video/webm",
# https://mailarchive.ietf.org/arch/msg/media-types/DA8UuKX2dyaVxWh-oevy-t3Vg9Q/
".yml": "application/yaml",
".yaml": "application/yaml",
}


Expand Down
22 changes: 11 additions & 11 deletions wsgidav/wsgidav_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ def __init__(self, config):
app = app_class(self, self.application, config)
elif type(mw) is dict:
# If a dict with one entry is passed, expect {class: ..., kwargs: ...}
expand = {"${application}": self.application}
app = dynamic_instantiate_class_from_opts(mw, expand=expand)
elif inspect.isclass(mw):
# If a class is passed, assume BaseMiddleware (or compatible)
Expand Down Expand Up @@ -399,16 +400,15 @@ def __call__(self, environ, start_response):

path = environ["PATH_INFO"]

# (#73) Failed on processing non-iso-8859-1 characters on Python 3
#
# Note: we encode using UTF-8 here (falling back to ISO-8859-1)!
# This seems to be wrong, since per PEP 3333 PATH_INFO is always ISO-8859-1 encoded
# (see https://www.python.org/dev/peps/pep-3333/#unicode-issues).
# But also seems to resolve errors when accessing resources with Chinese characters, for
# example.
# This is done by default for Python 3, but can be turned off in settings.
# WSGI always assumes iso-8859-1. Modern clients send UTF-8, so we may
# have to re-encode.
# See also:
# - Issue #73
# - https://www.python.org/dev/peps/pep-3333/#unicode-issues
# - https://bugs.python.org/issue16679#msg177450
# (The hotfixes.re_encode_path_info option is true by default.)
if self.re_encode_path_info:
path = environ["PATH_INFO"] = util.wsgi_to_bytes(path).decode()
path = environ["PATH_INFO"] = util.re_encode_wsgi(path)

# We optionally unquote PATH_INFO here, although this should already be
# done by the server (#8, #228).
Expand Down Expand Up @@ -565,8 +565,8 @@ def _start_response_wrapper(status, response_headers, exc_info=None):
extra.append("elap={:.3f}sec".format(time.time() - start_time))
extra = ", ".join(extra)

# This is the CherryPy format:
# 127.0.0.1 - - [08/Jul/2009:17:25:23] "GET /loginPrompt?redirect=/renderActionList%3Frelation%3Dpersonal%26key%3D%26filter%3DprivateSchedule&reason=0 HTTP/1.1" 200 1944 "http://127.0.0.1:8002/command?id=CMD_Schedule" "Mozilla/5.0 (Windows; U; Windows NT 6.0; de; rv:1.9.1) Gecko/20090624 Firefox/3.5" # noqa
# This is the CherryPy format:
# 127.0.0.1 - - [08/Jul/2009:17:25:23] "GET /loginPrompt?redirect=/renderActionList%3Frelation%3Dpersonal%26key%3D%26filter%3DprivateSchedule&reason=0 HTTP/1.1" 200 1944 "http://127.0.0.1:8002/command?id=CMD_Schedule" "Mozilla/5.0 (Windows; U; Windows NT 6.0; de; rv:1.9.1) Gecko/20090624 Firefox/3.5" # noqa
_logger.info(
'{addr} - {user} - [{time}] "{method} {path}" {extra} -> {status}'.format(
addr=environ.get("REMOTE_ADDR", ""),
Expand Down

0 comments on commit f4de837

Please sign in to comment.