Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] Fix tests for Travis-CI build #570

Merged
merged 1 commit into from Jan 31, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 3 additions & 5 deletions scrapy/contrib/linkextractors/regex.py
Expand Up @@ -25,8 +25,6 @@ def _extract_links(self, response_text, response_url, response_encoding, base_ur
clean_text = lambda t: replace_escape_chars(remove_tags(t.decode(response_encoding))).strip()

links_text = linkre.findall(response_text)
urlstext = set([(clean_url(url).encode(response_encoding), clean_text(text))
for url, _, text in links_text])


return [Link(url, text) for url, text in urlstext]
return [Link(clean_url(url).encode(response_encoding),
clean_text(text))
for url, _, text in links_text]
41 changes: 31 additions & 10 deletions scrapy/tests/test_contrib_exporter.py
@@ -1,5 +1,7 @@
import unittest, json, cPickle as pickle
from cStringIO import StringIO
import lxml.etree
import re

from scrapy.item import Item, Field
from scrapy.utils.python import str_to_unicode
Expand Down Expand Up @@ -139,38 +141,43 @@ class CsvItemExporterTest(BaseItemExporterTest):
def _get_exporter(self, **kwargs):
return CsvItemExporter(self.output, **kwargs)

def assertCsvEqual(self, first, second, msg=None):
csvsplit = lambda csv: [sorted(re.split(r'(,|\s+)', line))
for line in csv.splitlines(True)]
return self.assertEqual(csvsplit(first), csvsplit(second), msg)

def _check_output(self):
self.assertEqual(self.output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n')
self.assertCsvEqual(self.output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n')

def test_header(self):
output = StringIO()
ie = CsvItemExporter(output, fields_to_export=self.i.fields.keys())
ie.start_exporting()
ie.export_item(self.i)
ie.finish_exporting()
self.assertEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n')
self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n')

output = StringIO()
ie = CsvItemExporter(output, fields_to_export=['age'])
ie.start_exporting()
ie.export_item(self.i)
ie.finish_exporting()
self.assertEqual(output.getvalue(), 'age\r\n22\r\n')
self.assertCsvEqual(output.getvalue(), 'age\r\n22\r\n')

output = StringIO()
ie = CsvItemExporter(output)
ie.start_exporting()
ie.export_item(self.i)
ie.export_item(self.i)
ie.finish_exporting()
self.assertEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n')
self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n')

output = StringIO()
ie = CsvItemExporter(output, include_headers_line=False)
ie.start_exporting()
ie.export_item(self.i)
ie.finish_exporting()
self.assertEqual(output.getvalue(), '22,John\xc2\xa3\r\n')
self.assertCsvEqual(output.getvalue(), '22,John\xc2\xa3\r\n')

def test_join_multivalue(self):
class TestItem2(Item):
Expand All @@ -183,16 +190,29 @@ class TestItem2(Item):
ie.start_exporting()
ie.export_item(i)
ie.finish_exporting()
self.assertEqual(output.getvalue(), '"Mary,Paul",John\r\n')
self.assertCsvEqual(output.getvalue(), '"Mary,Paul",John\r\n')

class XmlItemExporterTest(BaseItemExporterTest):

def _get_exporter(self, **kwargs):
return XmlItemExporter(self.output, **kwargs)

def assertXmlEquivalent(self, first, second, msg=None):
def xmltuple(elem):
children = list(elem.iterchildren())
if children:
return [(child.tag, sorted(xmltuple(child)))
for child in children]
else:
return [(elem.tag, [(elem.text, ())])]
def xmlsplit(xmlcontent):
doc = lxml.etree.fromstring(xmlcontent)
return xmltuple(doc)
return self.assertEqual(xmlsplit(first), xmlsplit(second), msg)

def _check_output(self):
expected_value = '<?xml version="1.0" encoding="utf-8"?>\n<items><item><age>22</age><name>John\xc2\xa3</name></item></items>'
self.assertEqual(self.output.getvalue(), expected_value)
self.assertXmlEquivalent(self.output.getvalue(), expected_value)

def test_multivalued_fields(self):
output = StringIO()
Expand All @@ -202,7 +222,7 @@ def test_multivalued_fields(self):
ie.export_item(item)
ie.finish_exporting()
expected_value = '<?xml version="1.0" encoding="utf-8"?>\n<items><item><name><value>John\xc2\xa3</value><value>Doe</value></name></item></items>'
self.assertEqual(output.getvalue(), expected_value)
self.assertXmlEquivalent(output.getvalue(), expected_value)

def test_nested_item(self):
output = StringIO()
Expand All @@ -224,7 +244,7 @@ def test_nested_item(self):
'</age>'\
'<name>buz</name>'\
'</item></items>'
self.assertEqual(output.getvalue(), expected_value)
self.assertXmlEquivalent(output.getvalue(), expected_value)

def test_nested_list_item(self):
output = StringIO()
Expand All @@ -243,7 +263,8 @@ def test_nested_list_item(self):
'</age>'\
'<name>buz</name>'\
'</item></items>'
self.assertEqual(output.getvalue(), expected_value)
self.assertXmlEquivalent(output.getvalue(), expected_value)


class JsonLinesItemExporterTest(BaseItemExporterTest):

Expand Down
22 changes: 9 additions & 13 deletions scrapy/tests/test_contrib_linkextractors.py
Expand Up @@ -312,12 +312,11 @@ def setUp(self):
def test_extraction(self):
# Default arguments
lx = HtmlParserLinkExtractor()
self.assertEqual([link for link in lx.extract_links(self.response)], [
Link(url='http://example.com/sample2.html', text=u'sample 2'),
Link(url='http://example.com/sample3.html', text=u'sample 3 text'),
Link(url='http://example.com/sample3.html', text='sample 3 repetition'),
Link(url='http://www.google.com/something', text=''),
])
self.assertEqual(lx.extract_links(self.response),
[Link(url='http://example.com/sample2.html', text=u'sample 2'),
Link(url='http://example.com/sample3.html', text=u'sample 3 text'),
Link(url='http://example.com/sample3.html', text=u'sample 3 repetition'),
Link(url='http://www.google.com/something', text=u''),])


class RegexLinkExtractorTestCase(unittest.TestCase):
Expand All @@ -329,13 +328,10 @@ def setUp(self):
def test_extraction(self):
# Default arguments
lx = RegexLinkExtractor()
# Note that RegexLinkExtractor returns links in arbitrary order,
# so we need to sort them for comparison
self.assertEqual(sorted(lx.extract_links(self.response), key=lambda x: x.url), [
Link(url='http://example.com/sample2.html', text=u'sample 2'),
Link(url='http://example.com/sample3.html', text=u'sample 3 repetition'),
Link(url='http://www.google.com/something', text=u''),
])
self.assertEqual(lx.extract_links(self.response),
[Link(url='http://example.com/sample2.html', text=u'sample 2'),
Link(url='http://example.com/sample3.html', text=u'sample 3 text'),
Link(url='http://www.google.com/something', text=u''),])


if __name__ == "__main__":
Expand Down
13 changes: 8 additions & 5 deletions scrapy/tests/test_djangoitem/__init__.py
Expand Up @@ -24,33 +24,36 @@ class IdentifiedPersonItem(DjangoItem):

class DjangoItemTest(unittest.TestCase):

def assertSortedEqual(self, first, second, msg=None):
return self.assertEqual(sorted(first), sorted(second), msg)

def setUp(self):
if 'django' not in optional_features:
raise unittest.SkipTest("Django is not available")

def test_base(self):
i = BasePersonItem()
self.assertEqual(i.fields.keys(), ['age', 'name'])
self.assertSortedEqual(i.fields.keys(), ['age', 'name'])

def test_new_fields(self):
i = NewFieldPersonItem()
self.assertEqual(i.fields.keys(), ['age', 'other', 'name'])
self.assertSortedEqual(i.fields.keys(), ['age', 'other', 'name'])

def test_override_field(self):
i = OverrideFieldPersonItem()
self.assertEqual(i.fields.keys(), ['age', 'name'])
self.assertSortedEqual(i.fields.keys(), ['age', 'name'])

def test_custom_primary_key_field(self):
"""
Test that if a custom primary key exists, it is
in the field list.
"""
i = IdentifiedPersonItem()
self.assertEqual(i.fields.keys(), ['age', 'identifier', 'name'])
self.assertSortedEqual(i.fields.keys(), ['age', 'identifier', 'name'])

def test_save(self):
i = BasePersonItem()
self.assertEqual(i.fields.keys(), ['age', 'name'])
self.assertSortedEqual(i.fields.keys(), ['age', 'name'])

i['name'] = 'John'
i['age'] = '22'
Expand Down
4 changes: 2 additions & 2 deletions scrapy/tests/test_downloadermiddleware_ajaxcrawlable.py
@@ -1,15 +1,15 @@
import unittest

from scrapy.contrib.downloadermiddleware.ajaxcrawl import AjaxCrawlMiddleware
from scrapy.spider import BaseSpider
from scrapy.spider import Spider
from scrapy.http import Request, HtmlResponse, Response
from scrapy.utils.test import get_crawler

__doctests__ = ['scrapy.contrib.downloadermiddleware.ajaxcrawl']

class AjaxCrawlMiddlewareTest(unittest.TestCase):
def setUp(self):
self.spider = BaseSpider('foo')
self.spider = Spider('foo')
crawler = get_crawler({'AJAXCRAWL_ENABLED': True})
self.mw = AjaxCrawlMiddleware.from_crawler(crawler)

Expand Down
15 changes: 11 additions & 4 deletions scrapy/tests/test_downloadermiddleware_cookies.py
@@ -1,4 +1,5 @@
from unittest import TestCase
import re

from scrapy.http import Response, Request
from scrapy.spider import Spider
Expand All @@ -7,6 +8,12 @@

class CookiesMiddlewareTest(TestCase):

def assertCookieValEqual(self, first, second, msg=None):
cookievaleq = lambda cv: re.split(';\s*', cv)
return self.assertEqual(
sorted(cookievaleq(first)),
sorted(cookievaleq(second)), msg)

def setUp(self):
self.spider = Spider('foo')
self.mw = CookiesMiddleware()
Expand Down Expand Up @@ -86,7 +93,8 @@ def test_merge_request_cookies(self):

req2 = Request('http://scrapytest.org/sub1/')
assert self.mw.process_request(req2, self.spider) is None
self.assertEquals(req2.headers.get('Cookie'), "C1=value1; galleta=salada")

self.assertCookieValEqual(req2.headers.get('Cookie'), "C1=value1; galleta=salada")

def test_cookiejar_key(self):
req = Request('http://scrapytest.org/', cookies={'galleta': 'salada'}, meta={'cookiejar': "store1"})
Expand All @@ -99,8 +107,7 @@ def test_cookiejar_key(self):

req2 = Request('http://scrapytest.org/', meta=res.meta)
assert self.mw.process_request(req2, self.spider) is None
self.assertEquals(req2.headers.get('Cookie'), 'C1=value1; galleta=salada')

self.assertCookieValEqual(req2.headers.get('Cookie'),'C1=value1; galleta=salada')

req3 = Request('http://scrapytest.org/', cookies={'galleta': 'dulce'}, meta={'cookiejar': "store2"})
assert self.mw.process_request(req3, self.spider) is None
Expand All @@ -112,7 +119,7 @@ def test_cookiejar_key(self):

req4 = Request('http://scrapytest.org/', meta=res2.meta)
assert self.mw.process_request(req4, self.spider) is None
self.assertEquals(req4.headers.get('Cookie'), 'C2=value2; galleta=dulce')
self.assertCookieValEqual(req4.headers.get('Cookie'), 'C2=value2; galleta=dulce')

#cookies from hosts with port
req5_1 = Request('http://scrapytest.org:1104/')
Expand Down
16 changes: 10 additions & 6 deletions scrapy/tests/test_http_headers.py
Expand Up @@ -4,6 +4,10 @@
from scrapy.http import Headers

class HeadersTest(unittest.TestCase):

def assertSortedEqual(self, first, second, msg=None):
return self.assertEqual(sorted(first), sorted(second), msg)

def test_basics(self):
h = Headers({'Content-Type': 'text/html', 'Content-Length': 1234})
assert h['Content-Type']
Expand Down Expand Up @@ -75,13 +79,13 @@ def test_iterables(self):
idict = {'Content-Type': 'text/html', 'X-Forwarded-For': ['ip1', 'ip2']}

h = Headers(idict)
self.assertEqual(dict(h), {'Content-Type': ['text/html'], 'X-Forwarded-For': ['ip1', 'ip2']})
self.assertEqual(h.keys(), ['X-Forwarded-For', 'Content-Type'])
self.assertEqual(h.items(), [('X-Forwarded-For', ['ip1', 'ip2']), ('Content-Type', ['text/html'])])
self.assertEqual(list(h.iteritems()),
[('X-Forwarded-For', ['ip1', 'ip2']), ('Content-Type', ['text/html'])])
self.assertDictEqual(dict(h),
{'Content-Type': ['text/html'], 'X-Forwarded-For': ['ip1', 'ip2']})
self.assertSortedEqual(h.keys(), ['X-Forwarded-For', 'Content-Type'])
self.assertSortedEqual(h.items(), [('X-Forwarded-For', ['ip1', 'ip2']), ('Content-Type', ['text/html'])])
self.assertSortedEqual(h.iteritems(), [('X-Forwarded-For', ['ip1', 'ip2']), ('Content-Type', ['text/html'])])

self.assertEqual(h.values(), ['ip2', 'text/html'])
self.assertSortedEqual(h.values(), ['ip2', 'text/html'])

def test_update(self):
h = Headers()
Expand Down
9 changes: 7 additions & 2 deletions scrapy/tests/test_http_request.py
Expand Up @@ -184,6 +184,9 @@ class FormRequestTest(RequestTest):

request_class = FormRequest

def assertSortedEqual(self, first, second, msg=None):
return self.assertEqual(sorted(first), sorted(second), msg)

def test_empty_formdata(self):
r1 = self.request_class("http://www.example.com", formdata={})
self.assertEqual(r1.body, '')
Expand All @@ -194,7 +197,8 @@ def test_default_encoding(self):
r2 = self.request_class("http://www.example.com", formdata=data)
self.assertEqual(r2.method, 'POST')
self.assertEqual(r2.encoding, 'utf-8')
self.assertEqual(r2.body, 'price=%C2%A3+100&one=two')
self.assertSortedEqual(r2.body.split('&'),
'price=%C2%A3+100&one=two'.split('&'))
self.assertEqual(r2.headers['Content-Type'], 'application/x-www-form-urlencoded')

def test_custom_encoding(self):
Expand All @@ -207,7 +211,8 @@ def test_multi_key_values(self):
# using multiples values for a single key
data = {'price': u'\xa3 100', 'colours': ['red', 'blue', 'green']}
r3 = self.request_class("http://www.example.com", formdata=data)
self.assertEqual(r3.body, 'colours=red&colours=blue&colours=green&price=%C2%A3+100')
self.assertSortedEqual(r3.body.split('&'),
'colours=red&colours=blue&colours=green&price=%C2%A3+100'.split('&'))

def test_from_response_post(self):
response = _buildresponse(
Expand Down
7 changes: 5 additions & 2 deletions scrapy/tests/test_item.py
Expand Up @@ -5,6 +5,9 @@

class ItemTest(unittest.TestCase):

def assertSortedEqual(self, first, second, msg=None):
return self.assertEqual(sorted(first), sorted(second), msg)

def test_simple(self):
class TestItem(Item):
name = Field()
Expand Down Expand Up @@ -108,8 +111,8 @@ class TestItem(Item):

i['keys'] = u'Keys'
i['values'] = u'Values'
self.assertEqual(i.keys(), ['keys', 'values', 'name'])
self.assertEqual(i.values(), [u'Keys', u'Values', u'John'])
self.assertSortedEqual(i.keys(), ['keys', 'values', 'name'])
self.assertSortedEqual(i.values(), [u'Keys', u'Values', u'John'])

def test_metaclass_inheritance(self):
class BaseItem(Item):
Expand Down
4 changes: 3 additions & 1 deletion scrapy/tests/test_webclient.py
Expand Up @@ -169,7 +169,9 @@ def _test(self, factory, testvalue):
protocol = client.ScrapyHTTPPageGetter()
protocol.factory = factory
protocol.makeConnection(transport)
self.assertEqual(transport.value(), testvalue)
self.assertEqual(
set(transport.value().splitlines()),
set(testvalue.splitlines()))
return testvalue

def test_non_standard_line_endings(self):
Expand Down