Skip to content
Permalink
Browse files

Fix tests for Travis-CI build

  • Loading branch information
redapple authored and dangra committed Feb 5, 2014
1 parent 5f87b17 commit 76c7e200aa3ea5c033e12b38205ab20282e0635f
@@ -25,8 +25,6 @@ def _extract_links(self, response_text, response_url, response_encoding, base_ur
clean_text = lambda t: replace_escape_chars(remove_tags(t.decode(response_encoding))).strip()

links_text = linkre.findall(response_text)
urlstext = set([(clean_url(url).encode(response_encoding), clean_text(text))
for url, _, text in links_text])


return [Link(url, text) for url, text in urlstext]
return [Link(clean_url(url).encode(response_encoding),
clean_text(text))
for url, _, text in links_text]
@@ -1,5 +1,7 @@
import unittest, json, cPickle as pickle
from cStringIO import StringIO
import lxml.etree
import re

from scrapy.item import Item, Field
from scrapy.utils.python import str_to_unicode
@@ -139,38 +141,43 @@ class CsvItemExporterTest(BaseItemExporterTest):
def _get_exporter(self, **kwargs):
return CsvItemExporter(self.output, **kwargs)

def assertCsvEqual(self, first, second, msg=None):
csvsplit = lambda csv: [sorted(re.split(r'(,|\s+)', line))
for line in csv.splitlines(True)]
return self.assertEqual(csvsplit(first), csvsplit(second), msg)

def _check_output(self):
self.assertEqual(self.output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n')
self.assertCsvEqual(self.output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n')

def test_header(self):
output = StringIO()
ie = CsvItemExporter(output, fields_to_export=self.i.fields.keys())
ie.start_exporting()
ie.export_item(self.i)
ie.finish_exporting()
self.assertEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n')
self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n')

output = StringIO()
ie = CsvItemExporter(output, fields_to_export=['age'])
ie.start_exporting()
ie.export_item(self.i)
ie.finish_exporting()
self.assertEqual(output.getvalue(), 'age\r\n22\r\n')
self.assertCsvEqual(output.getvalue(), 'age\r\n22\r\n')

output = StringIO()
ie = CsvItemExporter(output)
ie.start_exporting()
ie.export_item(self.i)
ie.export_item(self.i)
ie.finish_exporting()
self.assertEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n')
self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n')

output = StringIO()
ie = CsvItemExporter(output, include_headers_line=False)
ie.start_exporting()
ie.export_item(self.i)
ie.finish_exporting()
self.assertEqual(output.getvalue(), '22,John\xc2\xa3\r\n')
self.assertCsvEqual(output.getvalue(), '22,John\xc2\xa3\r\n')

def test_join_multivalue(self):
class TestItem2(Item):
@@ -183,16 +190,29 @@ class TestItem2(Item):
ie.start_exporting()
ie.export_item(i)
ie.finish_exporting()
self.assertEqual(output.getvalue(), '"Mary,Paul",John\r\n')
self.assertCsvEqual(output.getvalue(), '"Mary,Paul",John\r\n')

class XmlItemExporterTest(BaseItemExporterTest):

def _get_exporter(self, **kwargs):
return XmlItemExporter(self.output, **kwargs)

def assertXmlEquivalent(self, first, second, msg=None):
def xmltuple(elem):
children = list(elem.iterchildren())
if children:
return [(child.tag, sorted(xmltuple(child)))
for child in children]
else:
return [(elem.tag, [(elem.text, ())])]
def xmlsplit(xmlcontent):
doc = lxml.etree.fromstring(xmlcontent)
return xmltuple(doc)
return self.assertEqual(xmlsplit(first), xmlsplit(second), msg)

def _check_output(self):
expected_value = '<?xml version="1.0" encoding="utf-8"?>\n<items><item><age>22</age><name>John\xc2\xa3</name></item></items>'
self.assertEqual(self.output.getvalue(), expected_value)
self.assertXmlEquivalent(self.output.getvalue(), expected_value)

def test_multivalued_fields(self):
output = StringIO()
@@ -202,7 +222,7 @@ def test_multivalued_fields(self):
ie.export_item(item)
ie.finish_exporting()
expected_value = '<?xml version="1.0" encoding="utf-8"?>\n<items><item><name><value>John\xc2\xa3</value><value>Doe</value></name></item></items>'
self.assertEqual(output.getvalue(), expected_value)
self.assertXmlEquivalent(output.getvalue(), expected_value)

def test_nested_item(self):
output = StringIO()
@@ -224,7 +244,7 @@ def test_nested_item(self):
'</age>'\
'<name>buz</name>'\
'</item></items>'
self.assertEqual(output.getvalue(), expected_value)
self.assertXmlEquivalent(output.getvalue(), expected_value)

def test_nested_list_item(self):
output = StringIO()
@@ -243,7 +263,8 @@ def test_nested_list_item(self):
'</age>'\
'<name>buz</name>'\
'</item></items>'
self.assertEqual(output.getvalue(), expected_value)
self.assertXmlEquivalent(output.getvalue(), expected_value)


class JsonLinesItemExporterTest(BaseItemExporterTest):

@@ -312,12 +312,11 @@ def setUp(self):
def test_extraction(self):
# Default arguments
lx = HtmlParserLinkExtractor()
self.assertEqual([link for link in lx.extract_links(self.response)], [
Link(url='http://example.com/sample2.html', text=u'sample 2'),
Link(url='http://example.com/sample3.html', text=u'sample 3 text'),
Link(url='http://example.com/sample3.html', text='sample 3 repetition'),
Link(url='http://www.google.com/something', text=''),
])
self.assertEqual(lx.extract_links(self.response),
[Link(url='http://example.com/sample2.html', text=u'sample 2'),
Link(url='http://example.com/sample3.html', text=u'sample 3 text'),
Link(url='http://example.com/sample3.html', text=u'sample 3 repetition'),
Link(url='http://www.google.com/something', text=u''),])


class RegexLinkExtractorTestCase(unittest.TestCase):
@@ -329,13 +328,10 @@ def setUp(self):
def test_extraction(self):
# Default arguments
lx = RegexLinkExtractor()
# Note that RegexLinkExtractor returns links in arbitrary order,
# so we need to sort them for comparison
self.assertEqual(sorted(lx.extract_links(self.response), key=lambda x: x.url), [
Link(url='http://example.com/sample2.html', text=u'sample 2'),
Link(url='http://example.com/sample3.html', text=u'sample 3 repetition'),
Link(url='http://www.google.com/something', text=u''),
])
self.assertEqual(lx.extract_links(self.response),
[Link(url='http://example.com/sample2.html', text=u'sample 2'),
Link(url='http://example.com/sample3.html', text=u'sample 3 text'),
Link(url='http://www.google.com/something', text=u''),])


if __name__ == "__main__":
@@ -24,33 +24,36 @@ class IdentifiedPersonItem(DjangoItem):

class DjangoItemTest(unittest.TestCase):

def assertSortedEqual(self, first, second, msg=None):
return self.assertEqual(sorted(first), sorted(second), msg)

def setUp(self):
if 'django' not in optional_features:
raise unittest.SkipTest("Django is not available")

def test_base(self):
i = BasePersonItem()
self.assertEqual(i.fields.keys(), ['age', 'name'])
self.assertSortedEqual(i.fields.keys(), ['age', 'name'])

def test_new_fields(self):
i = NewFieldPersonItem()
self.assertEqual(i.fields.keys(), ['age', 'other', 'name'])
self.assertSortedEqual(i.fields.keys(), ['age', 'other', 'name'])

def test_override_field(self):
i = OverrideFieldPersonItem()
self.assertEqual(i.fields.keys(), ['age', 'name'])
self.assertSortedEqual(i.fields.keys(), ['age', 'name'])

def test_custom_primary_key_field(self):
"""
Test that if a custom primary key exists, it is
in the field list.
"""
i = IdentifiedPersonItem()
self.assertEqual(i.fields.keys(), ['age', 'identifier', 'name'])
self.assertSortedEqual(i.fields.keys(), ['age', 'identifier', 'name'])

def test_save(self):
i = BasePersonItem()
self.assertEqual(i.fields.keys(), ['age', 'name'])
self.assertSortedEqual(i.fields.keys(), ['age', 'name'])

i['name'] = 'John'
i['age'] = '22'
@@ -1,15 +1,15 @@
import unittest

from scrapy.contrib.downloadermiddleware.ajaxcrawl import AjaxCrawlMiddleware
from scrapy.spider import BaseSpider
from scrapy.spider import Spider
from scrapy.http import Request, HtmlResponse, Response
from scrapy.utils.test import get_crawler

__doctests__ = ['scrapy.contrib.downloadermiddleware.ajaxcrawl']

class AjaxCrawlMiddlewareTest(unittest.TestCase):
def setUp(self):
self.spider = BaseSpider('foo')
self.spider = Spider('foo')
crawler = get_crawler({'AJAXCRAWL_ENABLED': True})
self.mw = AjaxCrawlMiddleware.from_crawler(crawler)

@@ -1,4 +1,5 @@
from unittest import TestCase
import re

from scrapy.http import Response, Request
from scrapy.spider import Spider
@@ -7,6 +8,12 @@

class CookiesMiddlewareTest(TestCase):

def assertCookieValEqual(self, first, second, msg=None):
cookievaleq = lambda cv: re.split(';\s*', cv)
return self.assertEqual(
sorted(cookievaleq(first)),
sorted(cookievaleq(second)), msg)

def setUp(self):
self.spider = Spider('foo')
self.mw = CookiesMiddleware()
@@ -86,7 +93,8 @@ def test_merge_request_cookies(self):

req2 = Request('http://scrapytest.org/sub1/')
assert self.mw.process_request(req2, self.spider) is None
self.assertEquals(req2.headers.get('Cookie'), "C1=value1; galleta=salada")

self.assertCookieValEqual(req2.headers.get('Cookie'), "C1=value1; galleta=salada")

def test_cookiejar_key(self):
req = Request('http://scrapytest.org/', cookies={'galleta': 'salada'}, meta={'cookiejar': "store1"})
@@ -99,8 +107,7 @@ def test_cookiejar_key(self):

req2 = Request('http://scrapytest.org/', meta=res.meta)
assert self.mw.process_request(req2, self.spider) is None
self.assertEquals(req2.headers.get('Cookie'), 'C1=value1; galleta=salada')

self.assertCookieValEqual(req2.headers.get('Cookie'),'C1=value1; galleta=salada')

req3 = Request('http://scrapytest.org/', cookies={'galleta': 'dulce'}, meta={'cookiejar': "store2"})
assert self.mw.process_request(req3, self.spider) is None
@@ -112,7 +119,7 @@ def test_cookiejar_key(self):

req4 = Request('http://scrapytest.org/', meta=res2.meta)
assert self.mw.process_request(req4, self.spider) is None
self.assertEquals(req4.headers.get('Cookie'), 'C2=value2; galleta=dulce')
self.assertCookieValEqual(req4.headers.get('Cookie'), 'C2=value2; galleta=dulce')

#cookies from hosts with port
req5_1 = Request('http://scrapytest.org:1104/')
@@ -4,6 +4,10 @@
from scrapy.http import Headers

class HeadersTest(unittest.TestCase):

def assertSortedEqual(self, first, second, msg=None):
return self.assertEqual(sorted(first), sorted(second), msg)

def test_basics(self):
h = Headers({'Content-Type': 'text/html', 'Content-Length': 1234})
assert h['Content-Type']
@@ -75,13 +79,13 @@ def test_iterables(self):
idict = {'Content-Type': 'text/html', 'X-Forwarded-For': ['ip1', 'ip2']}

h = Headers(idict)
self.assertEqual(dict(h), {'Content-Type': ['text/html'], 'X-Forwarded-For': ['ip1', 'ip2']})
self.assertEqual(h.keys(), ['X-Forwarded-For', 'Content-Type'])
self.assertEqual(h.items(), [('X-Forwarded-For', ['ip1', 'ip2']), ('Content-Type', ['text/html'])])
self.assertEqual(list(h.iteritems()),
[('X-Forwarded-For', ['ip1', 'ip2']), ('Content-Type', ['text/html'])])
self.assertDictEqual(dict(h),
{'Content-Type': ['text/html'], 'X-Forwarded-For': ['ip1', 'ip2']})
self.assertSortedEqual(h.keys(), ['X-Forwarded-For', 'Content-Type'])
self.assertSortedEqual(h.items(), [('X-Forwarded-For', ['ip1', 'ip2']), ('Content-Type', ['text/html'])])
self.assertSortedEqual(h.iteritems(), [('X-Forwarded-For', ['ip1', 'ip2']), ('Content-Type', ['text/html'])])

self.assertEqual(h.values(), ['ip2', 'text/html'])
self.assertSortedEqual(h.values(), ['ip2', 'text/html'])

def test_update(self):
h = Headers()
@@ -184,6 +184,9 @@ class FormRequestTest(RequestTest):

request_class = FormRequest

def assertSortedEqual(self, first, second, msg=None):
return self.assertEqual(sorted(first), sorted(second), msg)

def test_empty_formdata(self):
r1 = self.request_class("http://www.example.com", formdata={})
self.assertEqual(r1.body, '')
@@ -194,7 +197,8 @@ def test_default_encoding(self):
r2 = self.request_class("http://www.example.com", formdata=data)
self.assertEqual(r2.method, 'POST')
self.assertEqual(r2.encoding, 'utf-8')
self.assertEqual(r2.body, 'price=%C2%A3+100&one=two')
self.assertSortedEqual(r2.body.split('&'),
'price=%C2%A3+100&one=two'.split('&'))
self.assertEqual(r2.headers['Content-Type'], 'application/x-www-form-urlencoded')

def test_custom_encoding(self):
@@ -207,7 +211,8 @@ def test_multi_key_values(self):
# using multiples values for a single key
data = {'price': u'\xa3 100', 'colours': ['red', 'blue', 'green']}
r3 = self.request_class("http://www.example.com", formdata=data)
self.assertEqual(r3.body, 'colours=red&colours=blue&colours=green&price=%C2%A3+100')
self.assertSortedEqual(r3.body.split('&'),
'colours=red&colours=blue&colours=green&price=%C2%A3+100'.split('&'))

def test_from_response_post(self):
response = _buildresponse(
@@ -5,6 +5,9 @@

class ItemTest(unittest.TestCase):

def assertSortedEqual(self, first, second, msg=None):
return self.assertEqual(sorted(first), sorted(second), msg)

def test_simple(self):
class TestItem(Item):
name = Field()
@@ -108,8 +111,8 @@ class TestItem(Item):

i['keys'] = u'Keys'
i['values'] = u'Values'
self.assertEqual(i.keys(), ['keys', 'values', 'name'])
self.assertEqual(i.values(), [u'Keys', u'Values', u'John'])
self.assertSortedEqual(i.keys(), ['keys', 'values', 'name'])
self.assertSortedEqual(i.values(), [u'Keys', u'Values', u'John'])

def test_metaclass_inheritance(self):
class BaseItem(Item):
@@ -169,7 +169,9 @@ def _test(self, factory, testvalue):
protocol = client.ScrapyHTTPPageGetter()
protocol.factory = factory
protocol.makeConnection(transport)
self.assertEqual(transport.value(), testvalue)
self.assertEqual(
set(transport.value().splitlines()),
set(testvalue.splitlines()))
return testvalue

def test_non_standard_line_endings(self):

0 comments on commit 76c7e20

Please sign in to comment.
You can’t perform that action at this time.