scrapy · kmike · Aug 14, 2015 · Aug 14, 2015
diff --git a/parsel/selector.py b/parsel/selector.py
@@ -43,8 +43,14 @@ def create_root_node(text, parser_cls, base_url=None):
 
 class SelectorList(list):
 
+    # __getslice__ is deprecated but `list` builtin implements it only in Py2
     def __getslice__(self, i, j):
-        return self.__class__(list.__getslice__(self, i, j))
+        o = super(SelectorList, self).__getslice__(i, j)
+        return self.__class__(o)
+
+    def __getitem__(self, pos):
+        o = super(SelectorList, self).__getitem__(pos)
+        return self.__class__(o) if isinstance(pos, slice) else o
 
     def xpath(self, xpath):
         return self.__class__(flatten([x.xpath(xpath) for x in self]))
@@ -158,8 +164,6 @@ def extract(self):
                 return six.text_type(self.root)
 
     def register_namespace(self, prefix, uri):
-        if self.namespaces is None:
-            self.namespaces = {}
         self.namespaces[prefix] = uri
 
     def remove_namespaces(self):
@@ -171,8 +175,9 @@ def remove_namespaces(self):
                 if an.startswith('{'):
                     el.attrib[an.split('}', 1)[1]] = el.attrib.pop(an)
 
-    def __nonzero__(self):
+    def __bool__(self):
         return bool(self.extract())
+    __nonzero__ = __bool__
 
     def __str__(self):
         data = repr(self.extract()[:40])

diff --git a/parsel/utils.py b/parsel/utils.py
@@ -25,63 +25,48 @@ def iflatten(x):
     """iflatten(sequence) -> iterator
     Similar to ``.flatten()``, but returns iterator instead"""
     for el in x:
-        if is_listlike(el):
+        if _is_listlike(el):
             for el_ in flatten(el):
                 yield el_
         else:
             yield el
 
 
-def is_listlike(x):
+def _is_listlike(x):
     """
-    >>> is_listlike("foo")
+    >>> _is_listlike("foo")
     False
-    >>> is_listlike(5)
+    >>> _is_listlike(5)
     False
-    >>> is_listlike(b"foo")
+    >>> _is_listlike(b"foo")
     False
-    >>> is_listlike([b"foo"])
+    >>> _is_listlike([b"foo"])
     True
-    >>> is_listlike((b"foo",))
+    >>> _is_listlike((b"foo",))
     True
-    >>> is_listlike({})
+    >>> _is_listlike({})
     True
-    >>> is_listlike(set())
+    >>> _is_listlike(set())
     True
-    >>> is_listlike((x for x in range(3)))
+    >>> _is_listlike((x for x in range(3)))
     True
-    >>> is_listlike(six.moves.xrange(5))
+    >>> _is_listlike(six.moves.xrange(5))
     True
     """
     return hasattr(x, "__iter__") and not isinstance(x, (six.text_type, bytes))
 
 
-def to_unicode(text, encoding='utf-8', errors='strict'):
-    """Return the unicode representation of a bytes object `text`. If `text`
-    is already an unicode object, return it as-is."""
-    if isinstance(text, six.text_type):
-        return text
-    return text.decode(encoding, errors)
-
-
-def extract_regex(regex, text, encoding='utf-8'):
+def extract_regex(regex, text):
     """Extract a list of unicode strings from the given text/encoding using the following policies:
     * if the regex contains a named group called "extract" that will be returned
     * if the regex contains multiple numbered groups, all those will be returned (flattened)
     * if the regex doesn't contain any group the entire regex matching is returned
     """
-
     if isinstance(regex, six.string_types):
         regex = re.compile(regex, re.UNICODE)
 
     try:
         strings = [regex.search(text).group('extract')]   # named group
     except:
         strings = regex.findall(text)    # full regex or numbered groups
-    strings = flatten(strings)
-
-    if isinstance(text, six.text_type):
-        return [replace_entities(s, keep=['lt', 'amp']) for s in strings]
-    else:
-        return [replace_entities(to_unicode(s, encoding), keep=['lt', 'amp'])
-                for s in strings]
+    return [replace_entities(s, keep=['lt', 'amp']) for s in flatten(strings)]
diff --git a/tests/test_selector.py b/tests/test_selector.py
@@ -132,6 +132,33 @@ def test_differences_parsing_xml_vs_html(self):
         self.assertEqual(xs.xpath("//div").extract(),
                          [u'<div><img src="a.jpg"><p>Hello</p></img></div>'])
 
+    def test_error_for_unknown_selector_type(self):
+        self.assertRaises(ValueError, self.sscls, text=u'', type='_na_')
+
+    def test_text_or_root_is_required(self):
+        self.assertRaisesRegexp(ValueError,
+                                'Selector needs either text or root argument',
+                                self.sscls)
+
+    def test_bool(self):
+        text = u'<a href="" >false</a><a href="nonempty">true</a>'
+        hs = self.sscls(text=text, type='html')
+        falsish = hs.xpath('//a/@href')[0]
+        self.assertEqual(falsish.extract(), u'')
+        self.assertFalse(falsish)
+        trueish = hs.xpath('//a/@href')[1]
+        self.assertEqual(trueish.extract(), u'nonempty')
+        self.assertTrue(trueish)
+
+    def test_slicing(self):
+        text = u'<div><p>1</p><p>2</p><p>3</p></div>'
+        hs = self.sscls(text=text, type='html')
+        self.assertIsInstance(hs.css('p')[2], self.sscls)
+        self.assertIsInstance(hs.css('p')[2:3], self.sscls.selectorlist_cls)
+        self.assertIsInstance(hs.css('p')[:2], self.sscls.selectorlist_cls)
+        self.assertEqual(hs.css('p')[2:3].extract(), [u'<p>3</p>'])
+        self.assertEqual(hs.css('p')[1:3].extract(), [u'<p>2</p>', u'<p>3</p>'])
+
     def test_nested_selectors(self):
         """Nested selector tests"""
         body = u"""<body>
@@ -378,6 +405,19 @@ def test_configure_base_url(self):
         self.assertEquals(u'http://example.com', sel.root.base)
 
 
+    def test_extending_selector(self):
+        class MySelectorList(Selector.selectorlist_cls):
+            pass
+
+        class MySelector(Selector):
+            selectorlist_cls = MySelectorList
+
+        sel = MySelector(text=u'<html><div>foo</div></html>')
+        self.assertIsInstance(sel.xpath('//div'), MySelectorList)
+        self.assertIsInstance(sel.xpath('//div')[0], MySelector)
+        self.assertIsInstance(sel.css('div'), MySelectorList)
+        self.assertIsInstance(sel.css('div')[0], MySelector)
+
 class ExsltTestCase(unittest.TestCase):
 
     sscls = Selector
@@ -493,16 +533,3 @@ def test_set(self):
                                //div[@itemtype="http://schema.org/Event"]
                                     //*[@itemscope]/*/@itemprop)''').extract(),
                          [u'url', u'name', u'startDate', u'location', u'offers'])
-
-    def test_extending_selector(self):
-        class MySelectorList(Selector.selectorlist_cls):
-            pass
-
-        class MySelector(Selector):
-            selectorlist_cls = MySelectorList
-
-        sel = MySelector(text=u'<html><div>foo</div></html>')
-        self.assertIsInstance(sel.xpath('//div'), MySelectorList)
-        self.assertIsInstance(sel.xpath('//div')[0], MySelector)
-        self.assertIsInstance(sel.css('div'), MySelectorList)
-        self.assertIsInstance(sel.css('div')[0], MySelector)