scrapy · dangra · Aug 11, 2015 · Aug 3, 2015 · Aug 3, 2015 · Aug 3, 2015
diff --git a/requirements.txt b/requirements.txt
@@ -7,3 +7,4 @@ queuelib
 six>=1.5.2
 PyDispatcher>=2.0.5
 service_identity
+parsel>=0.9.5
diff --git a/scrapy/http/request/form.py b/scrapy/http/request/form.py
@@ -7,6 +7,7 @@
 
 from six.moves.urllib.parse import urljoin, urlencode
 import lxml.html
+from parsel.selector import create_root_node
 import six
 from scrapy.http.request import Request
 from scrapy.utils.python import to_bytes, is_listlike
@@ -56,8 +57,8 @@ def _urlencode(seq, enc):
 
 def _get_form(response, formname, formid, formnumber, formxpath):
     """Find the form element """
-    from scrapy.selector.lxmldocument import LxmlDocument
-    root = LxmlDocument(response, lxml.html.HTMLParser)
+    text = response.body_as_unicode()
+    root = create_root_node(text, lxml.html.HTMLParser, base_url=response.url)
     forms = root.xpath('//form')
     if not forms:
         raise ValueError("No <form> element found in %s" % response)

diff --git a/scrapy/linkextractors/lxmlhtml.py b/scrapy/linkextractors/lxmlhtml.py
@@ -47,7 +47,7 @@ def _iter_links(self, document):
     def _extract_links(self, selector, response_url, response_encoding, base_url):
         links = []
         # hacky way to get the underlying lxml parsed document
-        for el, attr, attr_val in self._iter_links(selector._root):
+        for el, attr, attr_val in self._iter_links(selector.root):
             # pseudo lxml.html.HtmlElement.make_links_absolute(base_url)
             attr_val = urljoin(base_url, attr_val)
             url = self.process_attr(attr_val)
@@ -65,9 +65,8 @@ def _extract_links(self, selector, response_url, response_encoding, base_url):
                 if self.unique else links
 
     def extract_links(self, response):
-        html = Selector(response)
         base_url = get_base_url(response)
-        return self._extract_links(html, response.url, response.encoding, base_url)
+        return self._extract_links(response.selector, response.url, response.encoding, base_url)
 
     def _process_links(self, links):
         """ Normalize and filter extracted links
@@ -95,14 +94,13 @@ def __init__(self, allow=(), deny=(), allow_domains=(), deny_domains=(), restric
             canonicalize=canonicalize, deny_extensions=deny_extensions)
 
     def extract_links(self, response):
-        html = Selector(response)
         base_url = get_base_url(response)
         if self.restrict_xpaths:
             docs = [subdoc
                     for x in self.restrict_xpaths
-                    for subdoc in html.xpath(x)]
+                    for subdoc in response.xpath(x)]
         else:
-            docs = [html]
+            docs = [response.selector]
         all_links = []
         for doc in docs:
             links = self._extract_links(doc, response.url, response.encoding, base_url)

diff --git a/scrapy/linkextractors/sgml.py b/scrapy/linkextractors/sgml.py
@@ -127,11 +127,10 @@ def __init__(self, allow=(), deny=(), allow_domains=(), deny_domains=(), restric
     def extract_links(self, response):
         base_url = None
         if self.restrict_xpaths:
-            sel = Selector(response)
             base_url = get_base_url(response)
             body = u''.join(f
                             for x in self.restrict_xpaths
-                            for f in sel.xpath(x).extract()
+                            for f in response.xpath(x).extract()
                             ).encode(response.encoding, errors='xmlcharrefreplace')
         else:
             body = response.body

diff --git a/scrapy/selector/csstranslator.py b/scrapy/selector/csstranslator.py
@@ -1,88 +1,15 @@
-from cssselect import GenericTranslator, HTMLTranslator
-from cssselect.xpath import _unicode_safe_getattr, XPathExpr, ExpressionError
-from cssselect.parser import FunctionalPseudoElement
+from parsel.csstranslator import XPathExpr, GenericTranslator, HTMLTranslator
+from scrapy.utils.deprecate import create_deprecated_class
 
 
-class ScrapyXPathExpr(XPathExpr):
+ScrapyXPathExpr = create_deprecated_class(
+    'ScrapyXPathExpr', XPathExpr,
+    new_class_path='parsel.csstranslator.XPathExpr')
 
-    textnode = False
-    attribute = None
-
-    @classmethod
-    def from_xpath(cls, xpath, textnode=False, attribute=None):
-        x = cls(path=xpath.path, element=xpath.element, condition=xpath.condition)
-        x.textnode = textnode
-        x.attribute = attribute
-        return x
-
-    def __str__(self):
-        path = super(ScrapyXPathExpr, self).__str__()
-        if self.textnode:
-            if path == '*':
-                path = 'text()'
-            elif path.endswith('::*/*'):
-                path = path[:-3] + 'text()'
-            else:
-                path += '/text()'
-
-        if self.attribute is not None:
-            if path.endswith('::*/*'):
-                path = path[:-2]
-            path += '/@%s' % self.attribute
-
-        return path
-
-    def join(self, combiner, other):
-        super(ScrapyXPathExpr, self).join(combiner, other)
-        self.textnode = other.textnode
-        self.attribute = other.attribute
-        return self
-
-
-class TranslatorMixin(object):
-
-    def xpath_element(self, selector):
-        xpath = super(TranslatorMixin, self).xpath_element(selector)
-        return ScrapyXPathExpr.from_xpath(xpath)
-
-    def xpath_pseudo_element(self, xpath, pseudo_element):
-        if isinstance(pseudo_element, FunctionalPseudoElement):
-            method = 'xpath_%s_functional_pseudo_element' % (
-                pseudo_element.name.replace('-', '_'))
-            method = _unicode_safe_getattr(self, method, None)
-            if not method:
-                raise ExpressionError(
-                    "The functional pseudo-element ::%s() is unknown"
-                % pseudo_element.name)
-            xpath = method(xpath, pseudo_element)
-        else:
-            method = 'xpath_%s_simple_pseudo_element' % (
-                pseudo_element.replace('-', '_'))
-            method = _unicode_safe_getattr(self, method, None)
-            if not method:
-                raise ExpressionError(
-                    "The pseudo-element ::%s is unknown"
-                    % pseudo_element)
-            xpath = method(xpath)
-        return xpath
-
-    def xpath_attr_functional_pseudo_element(self, xpath, function):
-        if function.argument_types() not in (['STRING'], ['IDENT']):
-            raise ExpressionError(
-                "Expected a single string or ident for ::attr(), got %r"
-                % function.arguments)
-        return ScrapyXPathExpr.from_xpath(xpath,
-            attribute=function.arguments[0].value)
-
-    def xpath_text_simple_pseudo_element(self, xpath):
-        """Support selecting text nodes using ::text pseudo-element"""
-        return ScrapyXPathExpr.from_xpath(xpath, textnode=True)
-
-
-class ScrapyGenericTranslator(TranslatorMixin, GenericTranslator):
-    pass
-
-
-class ScrapyHTMLTranslator(TranslatorMixin, HTMLTranslator):
-    pass
+ScrapyGenericTranslator = create_deprecated_class(
+    'ScrapyGenericTranslator', GenericTranslator,
+    new_class_path='parsel.csstranslator.GenericTranslator')
 
+ScrapyHTMLTranslator = create_deprecated_class(
+    'ScrapyHTMLTranslator', HTMLTranslator,
+    new_class_path='parsel.csstranslator.HTMLTranslator')
diff --git a/scrapy/selector/lxmldocument.py b/scrapy/selector/lxmldocument.py