Permalink
Browse files

Merge pull request #185 from notsobad/master

Added xpath support in FormRequest.from_response
  • Loading branch information...
2 parents 75563b3 + a438be3 commit 7527ef97ba1d0250dbec6cfe396e4413722a0256 @dangra dangra committed Jan 8, 2013
Showing with 36 additions and 3 deletions.
  1. +16 −3 scrapy/http/request/form.py
  2. +20 −0 scrapy/tests/test_http_request.py
@@ -31,9 +31,9 @@ def __init__(self, *args, **kwargs):
@classmethod
def from_response(cls, response, formname=None, formnumber=0, formdata=None,
- clickdata=None, dont_click=False, **kwargs):
+ clickdata=None, dont_click=False, formxpath=None, **kwargs):
kwargs.setdefault('encoding', response.encoding)
- form = _get_form(response, formname, formnumber)
+ form = _get_form(response, formname, formnumber, formxpath)
formdata = _get_inputs(form, formdata, dont_click, clickdata, response)
url = form.action or form.base_url
return cls(url, method=form.method, formdata=formdata, **kwargs)
@@ -45,7 +45,7 @@ def _urlencode(seq, enc):
for v in (vs if hasattr(vs, '__iter__') else [vs])]
return urllib.urlencode(values, doseq=1)
-def _get_form(response, formname, formnumber):
+def _get_form(response, formname, formnumber, formxpath):
"""Find the form element """
from scrapy.selector.lxmldocument import LxmlDocument
root = LxmlDocument(response, lxml.html.HTMLParser)
@@ -56,6 +56,19 @@ def _get_form(response, formname, formnumber):
f = root.xpath('//form[@name="%s"]' % formname)
if f:
return f[0]
+
+ # Get form element from xpath, if not found, go up
+ if formxpath is not None:
+ nodes = root.xpath(formxpath)
+ if nodes:
+ el = nodes[0]
+ while True:
+ if el.tag == 'form':
+ return el
+ el = el.getparent()
+ if el is None:
+ break
+ raise ValueError('No <form> element found with %s' % formxpath)
# If we get here, it means that either formname was None
# or invalid
@@ -591,6 +591,26 @@ def test_from_response_descendants(self):
fs = _qs(req)
self.assertEqual(set(fs), set(['h2', 'i2', 'i1', 'i3', 'h1', 'i5', 'i4']))
+ def test_from_response_xpath(self):
+ response = _buildresponse(
+ """<form action="post.php" method="POST">
+ <input type="hidden" name="one" value="1">
+ <input type="hidden" name="two" value="2">
+ </form>
+ <form action="post2.php" method="POST">
+ <input type="hidden" name="three" value="3">
+ <input type="hidden" name="four" value="4">
+ </form>""")
+ r1 = self.request_class.from_response(response, formxpath="//form[@action='post.php']")
+ fs = _qs(r1)
+ self.assertEqual(fs['one'], ['1'])
+
+ r1 = self.request_class.from_response(response, formxpath="//form/input[@name='four']")
+ fs = _qs(r1)
+ self.assertEqual(fs['three'], ['3'])
+
+ self.assertRaises(ValueError, self.request_class.from_response,
+ response, formxpath="//form/input[@name='abc']")
def _buildresponse(body, **kwargs):
kwargs.setdefault('body', body)

0 comments on commit 7527ef9

Please sign in to comment.