Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

w3lib.utils: deprecate functions not needed for Python 3 #170

Merged
merged 5 commits into from
Jul 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions tests/test_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from unittest import TestCase

from pytest import deprecated_call, raises

from w3lib.util import (
str_to_unicode,
to_bytes,
to_native_str,
to_unicode,
unicode_to_str,
)


class StrToUnicodeTestCase(TestCase):

def test_deprecation(self):
with deprecated_call():
str_to_unicode('')


class ToBytesTestCase(TestCase):

def test_type_error(self):
with raises(TypeError):
to_bytes(True)


class ToNativeStrTestCase(TestCase):

def test_deprecation(self):
with deprecated_call():
to_native_str('')


class ToUnicodeTestCase(TestCase):

def test_type_error(self):
with raises(TypeError):
to_unicode(True)


class UnicodeToStrTestCase(TestCase):

def test_deprecation(self):
with deprecated_call():
unicode_to_str('')
43 changes: 17 additions & 26 deletions w3lib/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@
urlunsplit,
)
from urllib.request import pathname2url, url2pathname
from w3lib.util import to_bytes, to_native_str, to_unicode
from w3lib.util import to_unicode


# error handling function for bytes-to-Unicode decoding errors with URLs
def _quote_byte(error):
return (to_unicode(quote(error.object[error.start:error.end])), error.end)
return (quote(error.object[error.start:error.end]), error.end)

codecs.register_error('percentencode', _quote_byte)

Expand Down Expand Up @@ -77,26 +77,22 @@ def safe_url_string(url, encoding='utf8', path_encoding='utf8', quote_path=True)
# IDNA encoding can fail for too long labels (>63 characters)
# or missing labels (e.g. http://.example.com)
try:
netloc = parts.netloc.encode('idna')
netloc = parts.netloc.encode('idna').decode()
except UnicodeError:
netloc = parts.netloc

# default encoding for path component SHOULD be UTF-8
if quote_path:
path = quote(to_bytes(parts.path, path_encoding), _path_safe_chars)
path = quote(parts.path.encode(path_encoding), _path_safe_chars)
else:
path = to_native_str(parts.path)
path = parts.path

# quote() in Python2 return type follows input type;
# quote() in Python3 always returns Unicode (native str)
return urlunsplit((
to_native_str(parts.scheme),
to_native_str(netloc).rstrip(':'),
parts.scheme,
netloc.rstrip(':'),
path,
# encoding of query and fragment follows page encoding
# or form-charset (if known and passed)
quote(to_bytes(parts.query, encoding), _safe_chars),
quote(to_bytes(parts.fragment, encoding), _safe_chars),
quote(parts.query.encode(encoding), _safe_chars),
quote(parts.fragment.encode(encoding), _safe_chars),
))


Expand Down Expand Up @@ -410,22 +406,17 @@ def _safe_ParseResult(parts, encoding='utf8', path_encoding='utf8'):
# IDNA encoding can fail for too long labels (>63 characters)
# or missing labels (e.g. http://.example.com)
try:
netloc = parts.netloc.encode('idna')
netloc = parts.netloc.encode('idna').decode()
except UnicodeError:
netloc = parts.netloc

return (
to_native_str(parts.scheme),
to_native_str(netloc),

# default encoding for path component SHOULD be UTF-8
quote(to_bytes(parts.path, path_encoding), _path_safe_chars),
quote(to_bytes(parts.params, path_encoding), _safe_chars),

# encoding of query and fragment follows page encoding
# or form-charset (if known and passed)
quote(to_bytes(parts.query, encoding), _safe_chars),
quote(to_bytes(parts.fragment, encoding), _safe_chars)
parts.scheme,
netloc,
quote(parts.path.encode(path_encoding), _path_safe_chars),
quote(parts.params.encode(path_encoding), _safe_chars),
quote(parts.query.encode(encoding), _safe_chars),
quote(parts.fragment.encode(encoding), _safe_chars)
)


Expand Down Expand Up @@ -466,7 +457,7 @@ def canonicalize_url(url, keep_blank_values=True, keep_fragments=False,
# if not for proper URL expected by remote website.
try:
scheme, netloc, path, params, query, fragment = _safe_ParseResult(
parse_url(url), encoding=encoding)
parse_url(url), encoding=encoding or 'utf8')
except UnicodeEncodeError as e:
scheme, netloc, path, params, query, fragment = _safe_ParseResult(
parse_url(url), encoding='utf8')
Expand Down
32 changes: 28 additions & 4 deletions w3lib/util.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,26 @@
from warnings import warn


def str_to_unicode(text, encoding=None, errors='strict'):
warn(
"The w3lib.utils.str_to_unicode function is deprecated and "
"will be removed in a future release.",
DeprecationWarning,
stacklevel=2,
)
if encoding is None:
encoding = 'utf-8'
if isinstance(text, bytes):
return text.decode(encoding, errors)
return text

def unicode_to_str(text, encoding=None, errors='strict'):
warn(
"The w3lib.utils.unicode_to_str function is deprecated and "
"will be removed in a future release.",
DeprecationWarning,
stacklevel=2,
)
if encoding is None:
encoding = 'utf-8'
if isinstance(text, str):
Expand All @@ -18,8 +33,9 @@ def to_unicode(text, encoding=None, errors='strict'):
if isinstance(text, str):
return text
if not isinstance(text, (bytes, str)):
raise TypeError('to_unicode must receive a bytes, str or unicode '
'object, got %s' % type(text).__name__)
raise TypeError(
f'to_unicode must receive bytes or str, got {type(text).__name__}'
)
if encoding is None:
encoding = 'utf-8'
return text.decode(encoding, errors)
Expand All @@ -30,12 +46,20 @@ def to_bytes(text, encoding=None, errors='strict'):
if isinstance(text, bytes):
return text
if not isinstance(text, str):
raise TypeError('to_bytes must receive a unicode, str or bytes '
'object, got %s' % type(text).__name__)
raise TypeError(
f'to_bytes must receive str or bytes, got {type(text).__name__}'
)
if encoding is None:
encoding = 'utf-8'
return text.encode(encoding, errors)

def to_native_str(text, encoding=None, errors='strict'):
""" Return str representation of `text` """
warn(
"The w3lib.utils.to_native_str function is deprecated and "
"will be removed in a future release. Please use "
"w3lib.utils.to_unicode instead.",
DeprecationWarning,
stacklevel=2,
)
return to_unicode(text, encoding, errors)