Skip to content

Commit

Permalink
More docs cleanup; Raise TypeError on py3+unix for invalid paths like…
Browse files Browse the repository at this point in the history
… on other platforms
  • Loading branch information
lazka committed Aug 26, 2016
1 parent 82c9003 commit 3c83e25
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 49 deletions.
2 changes: 2 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ types depending on the Python version and platform used.
.. class:: text()

Represents :obj:`unicode` under Python 2 and :obj:`str` under Python 3.
Does not include `surrogates
<https://www.python.org/dev/peps/pep-0383/>`__.


.. class:: bytes()
Expand Down
124 changes: 77 additions & 47 deletions senf/_fsnative.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ def _create_fsnative(type_):
class meta(type):

def __instancecheck__(self, instance):
# XXX: invalid str on Unix + Py3 still returns True here, but
# might fail when passed to fsnative API. We could be more strict
# here and call _validate_fsnative(), but then we could
# have a value not being an instance of fsnative, while its type
# is still a subclass of fsnative.. and this is enough magic
# already.
return isinstance(instance, type_)

def __subclasscheck__(self, subclass):
Expand All @@ -66,9 +72,7 @@ class impl(object):
Args:
text (text): The text to convert to a path
Returns:
fsnative:
The new path. Depending on the Python version and platform
this is either `text` or `bytes`.
fsnative: The new path.
Raises:
TypeError: In case something other then `text` has been passed
Expand All @@ -91,7 +95,7 @@ class impl(object):
which can be encoded with the locale encoding) under Python 3 +
Unix
Constructing a `fsnative` can't fail as long as `text` is passed.
Constructing a `fsnative` can't fail.
"""

def __new__(cls, text=u""):
Expand All @@ -106,6 +110,39 @@ def __new__(cls, text=u""):
fsnative = _create_fsnative(fsnative_type)


def _validate_fsnative(path):
"""
Args:
path (fsnative)
Returns:
`text` on Windows, `bytes` on Unix
Raises:
TypeError: in case the type is wrong or the ´str` on Py3 + Unix
can't be converted to `bytes`
This helper allows to validate the type and content of a path.
To reduce overhead the encoded value for Py3 + Unix is returned so
it can be reused.
"""

if not isinstance(path, fsnative_type):
raise TypeError("path needs to be %s, not %s" % (
fsnative_type.__name__, type(path).__name__))

if PY3 and is_unix:
try:
return path.encode(_encoding, "surrogateescape")
except UnicodeEncodeError:
# This look more like ValueError, but raising only one error
# makes things simpler... also one could say str + surrogates
# is its own type
raise TypeError("path contained Unicode code points not valid in"
"the current path encoding. To create a valid "
"path from Unicode use text2fsn()")

return path


def _get_encoding():
"""The encoding used for paths, argv, environ, stdout and stdin"""

Expand All @@ -132,11 +169,7 @@ def path2fsn(path):
TypeError: In case the type can't be converted to a `fsnative`
ValueError: In case conversion fails
Returns a fsnative path for a path-like.
If the passed in path is a `fsnative` path it simply returns it.
This will not fail for a valid path (Either retrieved from stdlib functions
or `argv` or `environ` or through the `fsnative` helper)
Returns a `fsnative` path for a `pathlike`.
"""

# allow mbcs str on py2+win and bytes on py3
Expand Down Expand Up @@ -171,25 +204,18 @@ def fsn2text(path):
Raises:
TypeError: In case no `fsnative` has been passed
Converts a path to text. This process is not reversible and should
only be used for display purposes.
On Python 3 the resulting `str` will not contain surrogates.
Converts a `fsnative` path to `text`.
This can't fail if a valid `fsnative` gets passed.
This process is not reversible and should only be used for display
purposes.
"""

if not isinstance(path, fsnative_type):
raise TypeError("path needs to be %s", fsnative_type.__name__)
path = _validate_fsnative(path)

if fsnative_type is bytes:
return path.decode(_encoding, "replace")
if is_win:
return path
else:
if PY2 or is_win:
return path
else:
return path.encode(
_encoding, "surrogateescape").decode(_encoding, "replace")
return path.decode(_encoding, "replace")


def text2fsn(text):
Expand All @@ -198,12 +224,12 @@ def text2fsn(text):
text (text): The text to convert
Returns:
`fsnative`
Raises:
TypeError: In case no `text` has been passed
Takes `text` and converts it to a `fsnative`. This operation is not
reversible and can't fail.
Takes `text` and converts it to a `fsnative`.
This is the same as calling ``fsnative(text)`` and available for
consistency.
This operation is not reversible and can't fail.
"""

return fsnative(text)
Expand All @@ -220,24 +246,25 @@ def fsn2bytes(path, encoding):
TypeError: If no `fsnative` path is passed
ValueError: On Windows if no valid encoding is passed or encoding fails
Turns a path to bytes. If the path is not associated with an encoding
the passed encoding is used (under Windows for example).
Turns a `fsnative` path to `bytes`.
The passed *encoding* is only used on platforms where paths are not
associated with an encoding (Windows for example). If you don't care about
Windows you can pass `None`.
"""

if not isinstance(path, fsnative_type):
raise TypeError("path needs to be %s", fsnative_type.__name__)
path = _validate_fsnative(path)

if is_win:
if encoding is None:
raise ValueError("invalid encoding %r" % encoding)

try:
return path.encode(encoding)
except LookupError:
raise ValueError("invalid encoding %r" % encoding)
elif PY2:
return path
else:
return path.encode(_encoding, "surrogateescape")
return path


def bytes2fsn(data, encoding):
Expand All @@ -249,10 +276,14 @@ def bytes2fsn(data, encoding):
`fsnative`
Raises:
TypeError: If no `bytes` path is passed
ValueError: On Windows if no valid encoding is passed or decoding fails
LookupError: In case the passed encoding is unknown
ValueError: If decoding fails or no encoding is given
Turns bytes to a path. If the path is not associated with an encoding
the passed encoding is used (under Windows for example)
Turns `bytes` to a `fsnative` path.
The passed *encoding* is only used on platforms where paths are not
associated with an encoding (Windows for example). If you don't care about
Windows you can pass `None`.
"""

if not isinstance(data, bytes):
Expand Down Expand Up @@ -281,7 +312,7 @@ def uri2fsn(uri):
TypeError: In case an invalid type is passed
ValueError: In case the URI isn't a valid file URI
Takes a file URI and returns a fsnative path
Takes a file URI and returns a `fsnative` path
"""

if PY2:
Expand Down Expand Up @@ -325,14 +356,16 @@ def fsn2uri(path):
TypeError: If no `fsnative` was passed
ValueError: If the path can't be converted
Takes a fsnative path and returns a file URI.
Takes a `fsnative` path and returns a file URI.
On Windows this returns a unicode URI. If you want an ASCII URI
On Windows this returns a Unicode URI. If you want an ASCII URI
use :func:`fsn2uri_ascii` instead.
The returned type is a subset of `fsnative`, it has the same type but
does not contains surrogates.
"""

if not isinstance(path, fsnative_type):
raise TypeError("path needs to be %s", fsnative_type.__name__)
path = _validate_fsnative(path)

if is_win:
buf = ctypes.create_unicode_buffer(winapi.INTERNET_MAX_URL_LENGTH)
Expand All @@ -344,10 +377,7 @@ def fsn2uri(path):
raise ValueError(e)
return buf[:length.value]
else:
if PY2:
return "file://" + quote(path)
else:
return "file://" + quote(path.encode(_encoding, "surrogateescape"))
return "file://" + quote(path)


def fsn2uri_ascii(path):
Expand All @@ -360,7 +390,7 @@ def fsn2uri_ascii(path):
TypeError: If no `fsnative` was passed
ValueError: If the path can't be converted
Takes a fsnative path and returns a file URI.
Takes a `fsnative` path and returns a file URI.
Like fsn2uri() but returns ASCII only. On Windows non-ASCII characters
will be encoded using utf-8 and then percent encoded.
Expand Down
8 changes: 7 additions & 1 deletion senf/_print.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,16 @@
def print_(*objects, **kwargs):
"""print_(*objects, sep=None, end=None, file=None, flush=False)
Arguments:
Args:
objects (object): zero or more objects to print
sep (str): Object separator to use, defaults to ``" "``
end (str): Trailing string to use, defaults to ``"\\n"``.
If end is ``"\\n"`` then `os.linesep` is used.
file (object): A file-like object, defaults to `sys.stdout`
flush (bool): If the file stream should be flushed
Raises:
OSError
IOError
Like print(), but:
Expand Down Expand Up @@ -315,6 +318,9 @@ def input_(prompt=None):
adding a trailing newline
Returns:
`fsnative`
Raises:
OSError
IOError
Like :func:`python3:input` but returns a `fsnative` and allows printing
filenames as prompt to stdout.
Expand Down
22 changes: 21 additions & 1 deletion tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
del_windows_env_var
from senf._winansi import ansi_parse, ansi_split
from senf._stdlib import _get_userdir
from senf._fsnative import _encoding
from senf._fsnative import _encoding, is_unix
from senf._print import _encode_codepage, _decode_codepage


Expand All @@ -50,6 +50,17 @@ def notfsnative(text=u""):
assert not isinstance(notfsnative(), fsnative)


def iternotfsn():
yield notfsnative(u"foo")

if PY3 and is_unix:
try:
u"\u1234".encode(_encoding)
except UnicodeEncodeError:
# in case we have a ascii encoding this is an invalid path
yield u"\u1234"


@contextlib.contextmanager
def preserve_environ():
old = environ.copy()
Expand Down Expand Up @@ -391,6 +402,15 @@ def test_fsn2text():
with pytest.raises(TypeError):
fsn2text(notfsnative(u"foo"))

if PY3 and is_unix:
try:
u"\u1234".encode(_encoding)
except UnicodeEncodeError:
# in case we have a ascii encoding, this should fail with type
# error
with pytest.raises(TypeError):
fsn2text(u"\u1234")


def test_text2fsn():
with pytest.raises(TypeError):
Expand Down

0 comments on commit 3c83e25

Please sign in to comment.