Skip to content

Commit

Permalink
Safeguard fsnative against nulls. Fixes #3
Browse files Browse the repository at this point in the history
Make sure we error out when trying to create them
and validate them when converting to something different.

This makes an empty fsnative the only value which
can't be passed to stdlib API.
  • Loading branch information
lazka committed Dec 6, 2016
1 parent 6c05270 commit c435b51
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 20 deletions.
71 changes: 52 additions & 19 deletions senf/_fsnative.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,16 @@ def _fsnative(text):
path = text.encode(encoding, _surrogatepass)
except UnicodeEncodeError:
path = text.encode("utf-8", _surrogatepass)

if b"\x00" in path:
path = path.replace(b"\x00", fsn2bytes(_fsnative(u"\uFFFD"), None))

if PY3:
return path.decode(_encoding, "surrogateescape")
return path
else:
if u"\x00" in text:
text = text.replace(u"\x00", u"\uFFFD")
return text


Expand Down Expand Up @@ -258,16 +264,24 @@ def _validate_fsnative(path):
raise TypeError("path needs to be %s, not %s" % (
fsnative_type.__name__, type(path).__name__))

if PY3 and is_unix:
try:
return path.encode(_encoding, "surrogateescape")
except UnicodeEncodeError:
# This look more like ValueError, but raising only one error
# makes things simpler... also one could say str + surrogates
# is its own type
raise TypeError("path contained Unicode code points not valid in"
"the current path encoding. To create a valid "
"path from Unicode use text2fsn()")
if is_unix:
if PY3:
try:
path = path.encode(_encoding, "surrogateescape")
except UnicodeEncodeError:
# This look more like ValueError, but raising only one error
# makes things simpler... also one could say str + surrogates
# is its own type
raise TypeError(
"path contained Unicode code points not valid in"
"the current path encoding. To create a valid "
"path from Unicode use text2fsn()")

if b"\x00" in path:
raise TypeError("fsnative can't contain nulls")
else:
if u"\x00" in path:
raise TypeError("fsnative can't contain nulls")

return path

Expand Down Expand Up @@ -310,14 +324,23 @@ def path2fsn(path):
else:
if isinstance(path, text_type):
path = path.encode(_encoding)
if "\x00" in path:
raise ValueError("embedded null")
else:
path = getattr(os, "fspath", lambda x: x)(path)
if isinstance(path, bytes):
if b"\x00" in path:
raise ValueError("embedded null")
path = path.decode(_encoding, "surrogateescape")
elif is_unix and isinstance(path, str):
# make sure we can encode it and this is not just some random
# unicode string
path.encode(_encoding, "surrogateescape")
data = path.encode(_encoding, "surrogateescape")
if b"\x00" in data:
raise ValueError("embedded null")
else:
if u"\x00" in path:
raise ValueError("embedded null")

if not isinstance(path, fsnative_type):
raise TypeError("path needs to be %s", fsnative_type.__name__)
Expand Down Expand Up @@ -437,13 +460,19 @@ def bytes2fsn(data, encoding):
if encoding is None:
raise ValueError("invalid encoding %r" % encoding)
try:
return _bytes2winpath(data, encoding)
path = _bytes2winpath(data, encoding)
except LookupError:
raise ValueError("invalid encoding %r" % encoding)
elif PY2:
return data
if u"\x00" in path:
raise ValueError("contains nulls")
return path
else:
return data.decode(_encoding, "surrogateescape")
if b"\x00" in data:
raise ValueError("contains nulls")
if PY2:
return data
else:
return data.decode(_encoding, "surrogateescape")


def uri2fsn(uri):
Expand Down Expand Up @@ -482,12 +511,16 @@ def uri2fsn(uri):
path = "\\\\" + path
if PY2:
path = path.decode("utf-8")
if u"\x00" in path:
raise ValueError("embedded null")
return path
else:
if PY2:
return url2pathname(path)
else:
return fsnative(url2pathname(path))
path = url2pathname(path)
if "\x00" in path:
raise ValueError("embedded null")
if PY3:
path = fsnative(path)
return path


def fsn2uri(path):
Expand Down
37 changes: 36 additions & 1 deletion tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,10 +463,17 @@ def test_fsnative():
with pytest.raises(TypeError):
fsnative(b"")

assert fsnative(u"\x00") == fsnative(u"\uFFFD")


def test_path2fsn():
assert isinstance(path2fsn(senf.__path__[0]), fsnative)

with pytest.raises(ValueError):
path2fsn(b"\x00")
with pytest.raises(ValueError):
path2fsn(u"\x00")

if os.name == "nt":
assert path2fsn(u"\u1234") == u"\u1234"
assert path2fsn("abc") == u"abc"
Expand Down Expand Up @@ -548,6 +555,14 @@ def test_fsn2bytes():
assert fsn2bytes(fsnative(u"foo"), "utf-8") == b"foo"
with pytest.raises(TypeError):
fsn2bytes(object(), "utf-8")

if PY3:
with pytest.raises(TypeError):
fsn2bytes(u"\x00", "utf-8")
else:
with pytest.raises(TypeError):
fsn2bytes(b"\x00", "utf-8")

if os.name != "nt":
assert fsn2bytes(fsnative(u"foo"), None) == b"foo"
else:
Expand Down Expand Up @@ -627,7 +642,9 @@ def test_surrogates():
# for utf-16-le we have a workaround
assert bytes2fsn(b"=\xd8", "utf-16-le") == u"\ud83d"
assert bytes2fsn(b"=\xd8=\xd8", "utf-16-le") == u"\ud83d\ud83d"
assert bytes2fsn(b"=\xd8\x00\x00", "utf-16-le") == u"\ud83d\x00"

with pytest.raises(ValueError):
bytes2fsn(b"=\xd8\x00\x00", "utf-16-le")

# 4 byte code point
assert fsn2bytes(u"\U0001f600", "utf-16-le") == b"=\xd8\x00\xde"
Expand Down Expand Up @@ -667,6 +684,12 @@ def test_bytes2fsn():
assert (bytes2fsn(fsn2bytes(fsnative(u"\u1234"), "utf-8"), "utf-8") ==
fsnative(u"\u1234"))

with pytest.raises(ValueError):
bytes2fsn(b"\x00", "utf-8")

with pytest.raises(ValueError):
bytes2fsn(b"\x00\x00", "utf-16-le")

with pytest.raises(TypeError):
bytes2fsn(object(), "utf-8")

Expand Down Expand Up @@ -826,13 +849,21 @@ def test_putenv():

def test_uri2fsn():
if os.name != "nt":
with pytest.raises(ValueError):
assert uri2fsn(u"file:///%00")
with pytest.raises(ValueError):
assert uri2fsn("file:///%00")
assert uri2fsn("file:///foo") == fsnative(u"/foo")
assert uri2fsn(u"file:///foo") == fsnative(u"/foo")
assert isinstance(uri2fsn("file:///foo"), fsnative)
assert isinstance(uri2fsn(u"file:///foo"), fsnative)
assert \
uri2fsn("file:///foo-%E1%88%B4") == path2fsn(b"/foo-\xe1\x88\xb4")
else:
with pytest.raises(ValueError):
assert uri2fsn(u"file:///C:/%00")
with pytest.raises(ValueError):
assert uri2fsn("file:///C:/%00")
assert uri2fsn("file:///C:/foo") == fsnative(u"C:\\foo")
assert uri2fsn(u"file:///C:/foo") == fsnative(u"C:\\foo")
assert isinstance(uri2fsn("file:///C:/foo"), fsnative)
Expand Down Expand Up @@ -863,6 +894,8 @@ def test_fsn2uri():
fsn2uri(object())

if os.name == "nt":
with pytest.raises(TypeError):
fsn2uri(u"\x00")
assert fsn2uri(fsnative(u"C:\\foo")) == "file:///C:/foo"
assert fsn2uri(u"C:\\ö ä%") == "file:///C:/%C3%B6%20%C3%A4%25"
assert (fsn2uri(u"C:\\foo-\u1234") ==
Expand All @@ -879,6 +912,8 @@ def test_fsn2uri():

assert fsn2uri(u"C:\\\uD800\uDC01") == u"file:///C:/%F0%90%80%81"
else:
with pytest.raises(TypeError):
fsn2uri(b"\x00")
if PY2:
path = "/foo-\xe1\x88\xb4"
else:
Expand Down
8 changes: 8 additions & 0 deletions tests/test_hypo.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.

import pytest
from hypothesis import given, strategies

from senf import fsnative, text2fsn, fsn2text, bytes2fsn, fsn2bytes, print_
Expand Down Expand Up @@ -46,13 +47,20 @@ def test_text2fsn(text):

@given(strategies.text())
def test_text_fsn_roudntrip(text):
if u"\x00" in text:
return
assert isinstance(fsn2text(text2fsn(text)), text_type)


@given(strategies.binary(),
strategies.sampled_from(("utf-8", "utf-16-le",
"utf-32-le", "latin-1")))
def test_bytes(data, encoding):
if u"\x00".encode(encoding) in data:
with pytest.raises(ValueError):
bytes2fsn(data, encoding)
return

try:
path = bytes2fsn(data, encoding)
except ValueError:
Expand Down

0 comments on commit c435b51

Please sign in to comment.