Skip to content
Permalink
Browse files

encode invalid xpath with unicode_escape under PY2

The exception quotes an xpath string
which may be unicode.
  • Loading branch information
Digenis authored and dangra committed Apr 20, 2015
1 parent 2c8e573 commit 07cb3e526def018c96c840ed24360f7f906f6946
Showing with 14 additions and 9 deletions.
  1. +3 −1 scrapy/selector/unified.py
  2. +11 −8 scrapy/tests/test_selector.py
@@ -3,6 +3,7 @@
"""

from lxml import etree
import six

from scrapy.utils.misc import extract_regex
from scrapy.utils.trackref import object_ref
@@ -95,7 +96,8 @@ def xpath(self, query):
result = xpathev(query, namespaces=self.namespaces,
smart_strings=self._lxml_smart_strings)
except etree.XPathError:
raise ValueError("Invalid XPath: %s" % query)
msg = u"Invalid XPath: %s" % query
raise ValueError(msg if six.PY3 else msg.encode("unicode_escape"))

if type(result) is not list:
result = [result]
@@ -1,6 +1,7 @@
import re
import warnings
import weakref
import six
from twisted.trial import unittest
from scrapy.exceptions import ScrapyDeprecationWarning
from scrapy.http import TextResponse, HtmlResponse, XmlResponse
@@ -188,17 +189,19 @@ def test_selector_over_text(self):
self.assertEqual(xs.xpath('.').extract(), [u'<root>lala</root>'])

def test_invalid_xpath(self):
"Test invalid xpath raises ValueError with the invalid xpath"
response = XmlResponse(url="http://example.com", body="<html></html>")
x = self.sscls(response)
xpath = "//test[@foo='bar]"
try:
x.xpath(xpath)
except ValueError as e:
assert xpath in str(e), "Exception message does not contain invalid xpath"
except Exception:
raise AssertionError("A invalid XPath does not raise ValueError")
else:
raise AssertionError("A invalid XPath does not raise an exception")
self.assertRaisesRegexp(ValueError, re.escape(xpath), x.xpath, xpath)

def test_invalid_xpath_unicode(self):
"Test *Unicode* invalid xpath raises ValueError with the invalid xpath"
response = XmlResponse(url="http://example.com", body="<html></html>")
x = self.sscls(response)
xpath = u"//test[@foo='\u0431ar]"
encoded = xpath if six.PY3 else xpath.encode('unicode_escape')
self.assertRaisesRegexp(ValueError, re.escape(encoded), x.xpath, xpath)

def test_http_header_encoding_precedence(self):
# u'\xa3' = pound symbol in unicode

0 comments on commit 07cb3e5

Please sign in to comment.
You can’t perform that action at this time.