Skip to content

Commit

Permalink
Rename isbinarytext function to binary_is_text for clarity
Browse files Browse the repository at this point in the history
  • Loading branch information
nyov committed Mar 17, 2016
1 parent e122c56 commit ebf0efc
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 15 deletions.
4 changes: 2 additions & 2 deletions scrapy/responsetypes.py
Expand Up @@ -10,7 +10,7 @@

from scrapy.http import Response
from scrapy.utils.misc import load_object
from scrapy.utils.python import isbinarytext, to_bytes, to_native_str
from scrapy.utils.python import binary_is_text, to_bytes, to_native_str


class ResponseTypes(object):
Expand Down Expand Up @@ -94,7 +94,7 @@ def from_body(self, body):
cannot be guess using more straightforward methods."""
chunk = body[:5000]
chunk = to_bytes(chunk)
if isbinarytext(chunk):
if not binary_is_text(chunk):
return self.from_mimetype('application/octet-stream')
elif b"<html>" in chunk.lower():
return self.from_mimetype('text/html')
Expand Down
20 changes: 14 additions & 6 deletions scrapy/utils/python.py
Expand Up @@ -174,17 +174,25 @@ def new_method(self, *args, **kwargs):
return cache[self]
return new_method


_BINARYCHARS = {six.b(chr(i)) for i in range(32)} - {b"\0", b"\t", b"\n", b"\r"}
_BINARYCHARS |= {ord(ch) for ch in _BINARYCHARS}


@deprecated("scrapy.utils.python.binary_is_text")
def isbinarytext(text):
"""Return True if the given text is considered binary, or False
otherwise, by looking for binary bytes at their chars
""" This function is deprecated.
Please use scrapy.utils.python.binary_is_text, which was created to be more
clear about the functions behavior: it is behaving inverted to this one. """
return not binary_is_text(text)


def binary_is_text(data):
""" Returns `True` if the given ``data`` argument (a ``bytes`` object)
does not contain unprintable control characters.
"""
if not isinstance(text, bytes):
raise TypeError("text must be bytes, got '%s'" % type(text).__name__)
return any(c in _BINARYCHARS for c in text)
if not isinstance(data, bytes):
raise TypeError("data must be bytes, got '%s'" % type(data).__name__)
return all(c not in _BINARYCHARS for c in data)


def get_func_args(func, stripself=False):
Expand Down
14 changes: 7 additions & 7 deletions tests/test_utils_python.py
Expand Up @@ -5,7 +5,7 @@
import six

from scrapy.utils.python import (
memoizemethod_noargs, isbinarytext, equal_attributes,
memoizemethod_noargs, binary_is_text, equal_attributes,
WeakKeyCache, stringify_dict, get_func_args, to_bytes, to_unicode,
without_none_values)

Expand Down Expand Up @@ -71,18 +71,18 @@ def noncached(self):
assert one is not three


class IsBinaryTextTest(unittest.TestCase):
def test_isbinarytext(self):
assert not isbinarytext(b"hello")
class BinaryIsTextTest(unittest.TestCase):
def test_binaryistext(self):
assert binary_is_text(b"hello")

def test_utf_16_strings_contain_null_bytes(self):
assert not isbinarytext(u"hello".encode('utf-16'))
assert binary_is_text(u"hello".encode('utf-16'))

def test_one_with_encoding(self):
assert not isbinarytext(b"<div>Price \xa3</div>")
assert binary_is_text(b"<div>Price \xa3</div>")

def test_real_binary_bytes(self):
assert isbinarytext(b"\x02\xa3")
assert not binary_is_text(b"\x02\xa3")



Expand Down

0 comments on commit ebf0efc

Please sign in to comment.