From 2999fc75b123a1c9ce6766c9580034bf58f2726f Mon Sep 17 00:00:00 2001 From: Felix Yan Date: Tue, 15 Jul 2014 15:52:10 +0800 Subject: [PATCH 1/4] PY3: use six.BytesIO and six.moves.cStringIO --- scrapy/contrib/pipeline/files.py | 3 +-- scrapy/contrib/pipeline/images.py | 5 ++--- .../downloadermiddleware/decompression.py | 7 +++--- scrapy/core/downloader/handlers/ftp.py | 4 ++-- scrapy/core/downloader/handlers/http11.py | 4 ++-- scrapy/mail.py | 2 +- scrapy/responsetypes.py | 3 +-- scrapy/tests/test_contrib_exporter.py | 22 +++++++++---------- scrapy/tests/test_contrib_feedexport.py | 16 +++++++------- ...st_downloadermiddleware_httpcompression.py | 10 ++++----- scrapy/tests/test_log.py | 4 ++-- scrapy/tests/test_mail.py | 6 ++--- scrapy/tests/test_pipeline_images.py | 4 ++-- scrapy/tests/test_spider.py | 10 ++++----- scrapy/tests/test_utils_jsonrpc.py | 5 ++--- scrapy/tests/test_utils_misc/__init__.py | 1 - scrapy/utils/gz.py | 8 +++---- scrapy/utils/iterators.py | 9 ++++---- 18 files changed, 58 insertions(+), 65 deletions(-) diff --git a/scrapy/contrib/pipeline/files.py b/scrapy/contrib/pipeline/files.py index 8c43982a1fa..93cf9621b99 100644 --- a/scrapy/contrib/pipeline/files.py +++ b/scrapy/contrib/pipeline/files.py @@ -9,7 +9,6 @@ import time from six.moves.urllib.parse import urlparse from collections import defaultdict -from cStringIO import StringIO import six from twisted.internet import defer, threads @@ -257,7 +256,7 @@ def get_media_requests(self, item, info): def file_downloaded(self, response, request, info): path = self.file_path(request, response=response, info=info) - buf = StringIO(response.body) + buf = six.BytesIO(response.body) self.store.persist_file(path, buf, info) checksum = md5sum(buf) return checksum diff --git a/scrapy/contrib/pipeline/images.py b/scrapy/contrib/pipeline/images.py index e955e72bf75..ed29b6f5488 100644 --- a/scrapy/contrib/pipeline/images.py +++ b/scrapy/contrib/pipeline/images.py @@ -5,7 +5,6 @@ """ import hashlib -from cStringIO import StringIO import six from PIL import Image @@ -70,7 +69,7 @@ def image_downloaded(self, response, request, info): def get_images(self, response, request, info): path = self.file_path(request, response=response, info=info) - orig_image = Image.open(StringIO(response.body)) + orig_image = Image.open(six.BytesIO(response.body)) width, height = orig_image.size if width < self.MIN_WIDTH or height < self.MIN_HEIGHT: @@ -97,7 +96,7 @@ def convert_image(self, image, size=None): image = image.copy() image.thumbnail(size, Image.ANTIALIAS) - buf = StringIO() + buf = six.BytesIO() image.save(buf, 'JPEG') return image, buf diff --git a/scrapy/contrib_exp/downloadermiddleware/decompression.py b/scrapy/contrib_exp/downloadermiddleware/decompression.py index 6fad5b9b14a..f6b47eaa1b3 100644 --- a/scrapy/contrib_exp/downloadermiddleware/decompression.py +++ b/scrapy/contrib_exp/downloadermiddleware/decompression.py @@ -6,7 +6,6 @@ import gzip import zipfile import tarfile -from cStringIO import StringIO from tempfile import mktemp import six @@ -27,7 +26,7 @@ def __init__(self): } def _is_tar(self, response): - archive = StringIO(response.body) + archive = six.BytesIO(response.body) try: tar_file = tarfile.open(name=mktemp(), fileobj=archive) except tarfile.ReadError: @@ -38,7 +37,7 @@ def _is_tar(self, response): return response.replace(body=body, cls=respcls) def _is_zip(self, response): - archive = StringIO(response.body) + archive = six.BytesIO(response.body) try: zip_file = zipfile.ZipFile(archive) except zipfile.BadZipfile: @@ -50,7 +49,7 @@ def _is_zip(self, response): return response.replace(body=body, cls=respcls) def _is_gzip(self, response): - archive = StringIO(response.body) + archive = six.BytesIO(response.body) try: body = gzip.GzipFile(fileobj=archive).read() except IOError: diff --git a/scrapy/core/downloader/handlers/ftp.py b/scrapy/core/downloader/handlers/ftp.py index 6b5f03bda70..8550e1b1827 100644 --- a/scrapy/core/downloader/handlers/ftp.py +++ b/scrapy/core/downloader/handlers/ftp.py @@ -29,8 +29,8 @@ """ import re +import six from six.moves.urllib.parse import urlparse -from cStringIO import StringIO from twisted.internet import reactor from twisted.protocols.ftp import FTPClient, CommandFailed @@ -42,7 +42,7 @@ class ReceivedDataProtocol(Protocol): def __init__(self, filename=None): self.__filename = filename - self.body = open(filename, "w") if filename else StringIO() + self.body = open(filename, "w") if filename else six.BytesIO() self.size = 0 def dataReceived(self, data): diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py index 455794b14b6..47a4fed31c9 100644 --- a/scrapy/core/downloader/handlers/http11.py +++ b/scrapy/core/downloader/handlers/http11.py @@ -1,9 +1,9 @@ """Download handlers for http and https schemes""" import re +import six from time import time -from cStringIO import StringIO from six.moves.urllib.parse import urldefrag from zope.interface import implements @@ -234,7 +234,7 @@ def __init__(self, finished, txresponse, request): self._finished = finished self._txresponse = txresponse self._request = request - self._bodybuf = StringIO() + self._bodybuf = six.BytesIO() def dataReceived(self, bodyBytes): self._bodybuf.write(bodyBytes) diff --git a/scrapy/mail.py b/scrapy/mail.py index a5d936ab31c..e1d7c44f672 100644 --- a/scrapy/mail.py +++ b/scrapy/mail.py @@ -3,7 +3,7 @@ See documentation in docs/topics/email.rst """ -from cStringIO import StringIO +from six.moves import cStringIO as StringIO import six from email.utils import COMMASPACE, formatdate diff --git a/scrapy/responsetypes.py b/scrapy/responsetypes.py index ac0559a5034..f8cd5b4ea9d 100644 --- a/scrapy/responsetypes.py +++ b/scrapy/responsetypes.py @@ -6,7 +6,6 @@ from mimetypes import MimeTypes from pkgutil import get_data -from cStringIO import StringIO import six from scrapy.http import Response @@ -34,7 +33,7 @@ def __init__(self): self.classes = {} self.mimetypes = MimeTypes() mimedata = get_data('scrapy', 'mime.types') - self.mimetypes.readfp(StringIO(mimedata)) + self.mimetypes.readfp(six.BytesIO(mimedata)) for mimetype, cls in six.iteritems(self.CLASSES): self.classes[mimetype] = load_object(cls) diff --git a/scrapy/tests/test_contrib_exporter.py b/scrapy/tests/test_contrib_exporter.py index d50544c51b8..ce1a56d05a7 100644 --- a/scrapy/tests/test_contrib_exporter.py +++ b/scrapy/tests/test_contrib_exporter.py @@ -1,6 +1,6 @@ import unittest, json +import six from six.moves import cPickle as pickle -from cStringIO import StringIO import lxml.etree import re @@ -19,7 +19,7 @@ class BaseItemExporterTest(unittest.TestCase): def setUp(self): self.i = TestItem(name=u'John\xa3', age='22') - self.output = StringIO() + self.output = six.BytesIO() self.ie = self._get_exporter() def _get_exporter(self, **kwargs): @@ -126,7 +126,7 @@ def _check_output(self): def test_export_multiple_items(self): i1 = TestItem(name='hello', age='world') i2 = TestItem(name='bye', age='world') - f = StringIO() + f = six.BytesIO() ie = PickleItemExporter(f) ie.start_exporting() ie.export_item(i1) @@ -151,21 +151,21 @@ def _check_output(self): self.assertCsvEqual(self.output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n') def test_header(self): - output = StringIO() + output = six.BytesIO() ie = CsvItemExporter(output, fields_to_export=self.i.fields.keys()) ie.start_exporting() ie.export_item(self.i) ie.finish_exporting() self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n') - output = StringIO() + output = six.BytesIO() ie = CsvItemExporter(output, fields_to_export=['age']) ie.start_exporting() ie.export_item(self.i) ie.finish_exporting() self.assertCsvEqual(output.getvalue(), 'age\r\n22\r\n') - output = StringIO() + output = six.BytesIO() ie = CsvItemExporter(output) ie.start_exporting() ie.export_item(self.i) @@ -173,7 +173,7 @@ def test_header(self): ie.finish_exporting() self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n') - output = StringIO() + output = six.BytesIO() ie = CsvItemExporter(output, include_headers_line=False) ie.start_exporting() ie.export_item(self.i) @@ -186,7 +186,7 @@ class TestItem2(Item): friends = Field() i = TestItem2(name='John', friends=['Mary', 'Paul']) - output = StringIO() + output = six.BytesIO() ie = CsvItemExporter(output, include_headers_line=False) ie.start_exporting() ie.export_item(i) @@ -216,7 +216,7 @@ def _check_output(self): self.assertXmlEquivalent(self.output.getvalue(), expected_value) def test_multivalued_fields(self): - output = StringIO() + output = six.BytesIO() item = TestItem(name=[u'John\xa3', u'Doe']) ie = XmlItemExporter(output) ie.start_exporting() @@ -226,7 +226,7 @@ def test_multivalued_fields(self): self.assertXmlEquivalent(output.getvalue(), expected_value) def test_nested_item(self): - output = StringIO() + output = six.BytesIO() i1 = TestItem(name=u'foo\xa3hoo', age='22') i2 = TestItem(name=u'bar', age=i1) i3 = TestItem(name=u'buz', age=i2) @@ -248,7 +248,7 @@ def test_nested_item(self): self.assertXmlEquivalent(output.getvalue(), expected_value) def test_nested_list_item(self): - output = StringIO() + output = six.BytesIO() i1 = TestItem(name=u'foo') i2 = TestItem(name=u'bar') i3 = TestItem(name=u'buz', age=[i1, i2]) diff --git a/scrapy/tests/test_contrib_feedexport.py b/scrapy/tests/test_contrib_feedexport.py index 7a1b3dc8f0d..925b277b3d6 100644 --- a/scrapy/tests/test_contrib_feedexport.py +++ b/scrapy/tests/test_contrib_feedexport.py @@ -1,6 +1,6 @@ import os +import six from six.moves.urllib.parse import urlparse -from cStringIO import StringIO from zope.interface.verify import verifyObject from twisted.trial import unittest @@ -62,13 +62,13 @@ def test_store(self): def _assert_stores(self, storage, path): spider = Spider("default") file = storage.open(spider) - file.write("content") + file.write(b"content") yield storage.store(file) self.failUnless(os.path.exists(path)) - self.failUnlessEqual(open(path).read(), "content") + self.failUnlessEqual(open(path).read(), b"content") # again, to check s3 objects are overwritten - yield storage.store(StringIO("new content")) - self.failUnlessEqual(open(path).read(), "new content") + yield storage.store(six.BytesIO(b"new content")) + self.failUnlessEqual(open(path).read(), b"new content") class S3FeedStorageTest(unittest.TestCase): @@ -93,9 +93,9 @@ class StdoutFeedStorageTest(unittest.TestCase): @defer.inlineCallbacks def test_store(self): - out = StringIO() + out = six.BytesIO() storage = StdoutFeedStorage('stdout:', _stdout=out) file = storage.open(Spider("default")) - file.write("content") + file.write(b"content") yield storage.store(file) - self.assertEqual(out.getvalue(), "content") + self.assertEqual(out.getvalue(), b"content") diff --git a/scrapy/tests/test_downloadermiddleware_httpcompression.py b/scrapy/tests/test_downloadermiddleware_httpcompression.py index 5fbc2c6194e..e37d43d7d65 100644 --- a/scrapy/tests/test_downloadermiddleware_httpcompression.py +++ b/scrapy/tests/test_downloadermiddleware_httpcompression.py @@ -1,6 +1,6 @@ +import six from unittest import TestCase from os.path import join, abspath, dirname -from cStringIO import StringIO from gzip import GzipFile from scrapy.spider import Spider @@ -104,8 +104,8 @@ def test_process_response_encoding_inside_body(self): 'Content-Type': 'text/html', 'Content-Encoding': 'gzip', } - f = StringIO() - plainbody = """Some page""" + f = six.BytesIO() + plainbody = b"""Some page""" zf = GzipFile(fileobj=f, mode='wb') zf.write(plainbody) zf.close() @@ -122,8 +122,8 @@ def test_process_response_force_recalculate_encoding(self): 'Content-Type': 'text/html', 'Content-Encoding': 'gzip', } - f = StringIO() - plainbody = """Some page""" + f = six.BytesIO() + plainbody = b"""Some page""" zf = GzipFile(fileobj=f, mode='wb') zf.write(plainbody) zf.close() diff --git a/scrapy/tests/test_log.py b/scrapy/tests/test_log.py index 9367fb953ff..afe6fe1dd5d 100644 --- a/scrapy/tests/test_log.py +++ b/scrapy/tests/test_log.py @@ -1,4 +1,4 @@ -from cStringIO import StringIO +import six from twisted.python import log as txlog, failure from twisted.trial import unittest @@ -21,7 +21,7 @@ class ScrapyFileLogObserverTest(unittest.TestCase): encoding = 'utf-8' def setUp(self): - self.f = StringIO() + self.f = six.BytesIO() self.log_observer = log.ScrapyFileLogObserver(self.f, self.level, self.encoding) self.log_observer.start() diff --git a/scrapy/tests/test_mail.py b/scrapy/tests/test_mail.py index 250ae04aee4..392568f3826 100644 --- a/scrapy/tests/test_mail.py +++ b/scrapy/tests/test_mail.py @@ -1,6 +1,6 @@ import unittest -from cStringIO import StringIO +import six from scrapy.mail import MailSender class MailSenderTest(unittest.TestCase): @@ -30,8 +30,8 @@ def test_send_html(self): self.assertEqual(msg.get('Content-Type'), 'text/html') def test_send_attach(self): - attach = StringIO() - attach.write('content') + attach = six.BytesIO() + attach.write(b'content') attach.seek(0) attachs = [('attachment', 'text/plain', attach)] diff --git a/scrapy/tests/test_pipeline_images.py b/scrapy/tests/test_pipeline_images.py index 511d0f50e4c..fa5c29aa79e 100644 --- a/scrapy/tests/test_pipeline_images.py +++ b/scrapy/tests/test_pipeline_images.py @@ -1,9 +1,9 @@ import os import hashlib import warnings -from cStringIO import StringIO from tempfile import mkdtemp from shutil import rmtree +import six from twisted.trial import unittest @@ -201,7 +201,7 @@ class TestItem(Item): def _create_image(format, *a, **kw): - buf = StringIO() + buf = six.BytesIO() Image.new(*a, **kw).save(buf, format) buf.seek(0) return Image.open(buf) diff --git a/scrapy/tests/test_spider.py b/scrapy/tests/test_spider.py index 8abc3962843..22716cdb1ff 100644 --- a/scrapy/tests/test_spider.py +++ b/scrapy/tests/test_spider.py @@ -1,8 +1,8 @@ import gzip import inspect import warnings -from cStringIO import StringIO from scrapy.utils.trackref import object_ref +import six from twisted.trial import unittest @@ -57,7 +57,7 @@ class XMLFeedSpiderTest(SpiderTest): spider_class = XMLFeedSpider def test_register_namespace(self): - body = """ + body = b""" http://www.example.com/Special-Offers.html2009-08-16 @@ -103,7 +103,7 @@ class CSVFeedSpiderTest(SpiderTest): class CrawlSpiderTest(SpiderTest): - test_body = """Page title<title> + test_body = b"""<html><head><title>Page title<title> <body> <p><a href="item/12.html">Item 12</a></p> <div class='links'> @@ -195,8 +195,8 @@ class SitemapSpiderTest(SpiderTest): spider_class = SitemapSpider - BODY = "SITEMAP" - f = StringIO() + BODY = b"SITEMAP" + f = six.BytesIO() g = gzip.GzipFile(fileobj=f, mode='w+b') g.write(BODY) g.close() diff --git a/scrapy/tests/test_utils_jsonrpc.py b/scrapy/tests/test_utils_jsonrpc.py index 6eb60d1f44c..26e77dd5878 100644 --- a/scrapy/tests/test_utils_jsonrpc.py +++ b/scrapy/tests/test_utils_jsonrpc.py @@ -1,5 +1,4 @@ -import unittest, json -from cStringIO import StringIO +import unittest, json, six from scrapy.utils.jsonrpc import jsonrpc_client_call, jsonrpc_server_call, \ JsonRpcError, jsonrpc_errors @@ -19,7 +18,7 @@ def __init__(self, result=None, error=None): def urlopen(self, url, request): self.url = url self.request = request - return StringIO(self.response) + return six.BytesIO(self.response) class TestTarget(object): diff --git a/scrapy/tests/test_utils_misc/__init__.py b/scrapy/tests/test_utils_misc/__init__.py index 143c9b6445b..18fc6d5329e 100644 --- a/scrapy/tests/test_utils_misc/__init__.py +++ b/scrapy/tests/test_utils_misc/__init__.py @@ -1,7 +1,6 @@ import sys import os import unittest -from cStringIO import StringIO from scrapy.item import Item, Field from scrapy.utils.misc import load_object, arg_to_iter, walk_modules diff --git a/scrapy/utils/gz.py b/scrapy/utils/gz.py index aa8ffc6fca9..e978f897047 100644 --- a/scrapy/utils/gz.py +++ b/scrapy/utils/gz.py @@ -1,5 +1,5 @@ import struct -from cStringIO import StringIO +import six from gzip import GzipFile def gunzip(data): @@ -7,9 +7,9 @@ def gunzip(data): This is resilient to CRC checksum errors. """ - f = GzipFile(fileobj=StringIO(data)) - output = '' - chunk = '.' + f = GzipFile(fileobj=six.BytesIO(data)) + output = b'' + chunk = b'.' while chunk: try: chunk = f.read(8196) diff --git a/scrapy/utils/iterators.py b/scrapy/utils/iterators.py index 92717d9bcc7..983a3c8d791 100644 --- a/scrapy/utils/iterators.py +++ b/scrapy/utils/iterators.py @@ -1,5 +1,4 @@ -import re, csv -from cStringIO import StringIO +import re, csv, six from scrapy.http import TextResponse, Response from scrapy.selector import Selector @@ -48,7 +47,7 @@ def csviter(obj, delimiter=None, headers=None, encoding=None): def _getrow(csv_r): return [str_to_unicode(field, encoding) for field in next(csv_r)] - lines = StringIO(_body_or_str(obj, unicode=False)) + lines = six.BytesIO(_body_or_str(obj, unicode=False)) if delimiter: csv_r = csv.reader(lines, delimiter=delimiter) else: @@ -68,7 +67,7 @@ def _getrow(csv_r): def _body_or_str(obj, unicode=True): - assert isinstance(obj, (Response, basestring)), \ + assert isinstance(obj, (Response, six.string_types)), \ "obj must be Response or basestring, not %s" % type(obj).__name__ if isinstance(obj, Response): if not unicode: @@ -77,7 +76,7 @@ def _body_or_str(obj, unicode=True): return obj.body_as_unicode() else: return obj.body.decode('utf-8') - elif type(obj) is type(u''): + elif type(obj) is six.text_type: return obj if unicode else obj.encode('utf-8') else: return obj.decode('utf-8') if unicode else obj From ef8872a518af65714deb2f8767a8dfa55d1558cb Mon Sep 17 00:00:00 2001 From: Felix Yan <felixonmars@gmail.com> Date: Tue, 15 Jul 2014 16:24:57 +0800 Subject: [PATCH 2/4] use .seek(0) instead of reset() for compatibility Since I don't find docs for the .reset() method, I lookup up the source code, and it should work just the same as .seek(0). For reference: https://github.com/python/cpython/blob/8eeb7e9122109f5cc71c22047d5cdd312ca770a0/Modules/cStringIO.c#L281 --- scrapy/core/downloader/handlers/ftp.py | 2 +- scrapy/tests/test_contrib_exporter.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapy/core/downloader/handlers/ftp.py b/scrapy/core/downloader/handlers/ftp.py index 8550e1b1827..1fd763b4035 100644 --- a/scrapy/core/downloader/handlers/ftp.py +++ b/scrapy/core/downloader/handlers/ftp.py @@ -54,7 +54,7 @@ def filename(self): return self.__filename def close(self): - self.body.close() if self.filename else self.body.reset() + self.body.close() if self.filename else self.body.seek(0) _CODE_RE = re.compile("\d+") class FTPDownloadHandler(object): diff --git a/scrapy/tests/test_contrib_exporter.py b/scrapy/tests/test_contrib_exporter.py index ce1a56d05a7..fc5dd82b569 100644 --- a/scrapy/tests/test_contrib_exporter.py +++ b/scrapy/tests/test_contrib_exporter.py @@ -132,7 +132,7 @@ def test_export_multiple_items(self): ie.export_item(i1) ie.export_item(i2) ie.finish_exporting() - f.reset() + f.seek(0) self.assertEqual(pickle.load(f), i1) self.assertEqual(pickle.load(f), i2) From 0786e84a33155ebc8d8d3502e3a7f3060b86a4ec Mon Sep 17 00:00:00 2001 From: Felix Yan <felixonmars@gmail.com> Date: Tue, 15 Jul 2014 20:51:12 +0800 Subject: [PATCH 3/4] use io.BytesIO and cStringIO instead of six.BytesIO as suggested --- scrapy/contrib/pipeline/files.py | 7 +++++- scrapy/contrib/pipeline/images.py | 9 ++++++-- .../downloadermiddleware/decompression.py | 12 +++++++--- scrapy/core/downloader/handlers/ftp.py | 4 ++-- scrapy/core/downloader/handlers/http11.py | 4 ++-- scrapy/responsetypes.py | 3 ++- scrapy/tests/test_contrib_exporter.py | 22 +++++++++---------- scrapy/tests/test_contrib_feedexport.py | 6 ++--- ...st_downloadermiddleware_httpcompression.py | 6 ++--- scrapy/tests/test_log.py | 4 ++-- scrapy/tests/test_mail.py | 4 ++-- scrapy/tests/test_pipeline_images.py | 4 ++-- scrapy/tests/test_spider.py | 4 ++-- scrapy/tests/test_utils_jsonrpc.py | 5 +++-- scrapy/utils/gz.py | 9 ++++++-- scrapy/utils/iterators.py | 7 +++++- 16 files changed, 69 insertions(+), 41 deletions(-) diff --git a/scrapy/contrib/pipeline/files.py b/scrapy/contrib/pipeline/files.py index 93cf9621b99..db8cf8b76dc 100644 --- a/scrapy/contrib/pipeline/files.py +++ b/scrapy/contrib/pipeline/files.py @@ -11,6 +11,11 @@ from collections import defaultdict import six +try: + from cStringIO import StringIO as BytesIO +except ImportError: + from io import BytesIO + from twisted.internet import defer, threads from scrapy import log @@ -256,7 +261,7 @@ def get_media_requests(self, item, info): def file_downloaded(self, response, request, info): path = self.file_path(request, response=response, info=info) - buf = six.BytesIO(response.body) + buf = BytesIO(response.body) self.store.persist_file(path, buf, info) checksum = md5sum(buf) return checksum diff --git a/scrapy/contrib/pipeline/images.py b/scrapy/contrib/pipeline/images.py index ed29b6f5488..9c1a5445500 100644 --- a/scrapy/contrib/pipeline/images.py +++ b/scrapy/contrib/pipeline/images.py @@ -7,6 +7,11 @@ import hashlib import six +try: + from cStringIO import StringIO as BytesIO +except ImportError: + from io import BytesIO + from PIL import Image from scrapy.utils.misc import md5sum @@ -69,7 +74,7 @@ def image_downloaded(self, response, request, info): def get_images(self, response, request, info): path = self.file_path(request, response=response, info=info) - orig_image = Image.open(six.BytesIO(response.body)) + orig_image = Image.open(BytesIO(response.body)) width, height = orig_image.size if width < self.MIN_WIDTH or height < self.MIN_HEIGHT: @@ -96,7 +101,7 @@ def convert_image(self, image, size=None): image = image.copy() image.thumbnail(size, Image.ANTIALIAS) - buf = six.BytesIO() + buf = BytesIO() image.save(buf, 'JPEG') return image, buf diff --git a/scrapy/contrib_exp/downloadermiddleware/decompression.py b/scrapy/contrib_exp/downloadermiddleware/decompression.py index f6b47eaa1b3..c08f50b5ff3 100644 --- a/scrapy/contrib_exp/downloadermiddleware/decompression.py +++ b/scrapy/contrib_exp/downloadermiddleware/decompression.py @@ -7,8 +7,14 @@ import zipfile import tarfile from tempfile import mktemp + import six +try: + from cStringIO import StringIO as BytesIO +except ImportError: + from io import BytesIO + from scrapy import log from scrapy.responsetypes import responsetypes @@ -26,7 +32,7 @@ def __init__(self): } def _is_tar(self, response): - archive = six.BytesIO(response.body) + archive = BytesIO(response.body) try: tar_file = tarfile.open(name=mktemp(), fileobj=archive) except tarfile.ReadError: @@ -37,7 +43,7 @@ def _is_tar(self, response): return response.replace(body=body, cls=respcls) def _is_zip(self, response): - archive = six.BytesIO(response.body) + archive = BytesIO(response.body) try: zip_file = zipfile.ZipFile(archive) except zipfile.BadZipfile: @@ -49,7 +55,7 @@ def _is_zip(self, response): return response.replace(body=body, cls=respcls) def _is_gzip(self, response): - archive = six.BytesIO(response.body) + archive = BytesIO(response.body) try: body = gzip.GzipFile(fileobj=archive).read() except IOError: diff --git a/scrapy/core/downloader/handlers/ftp.py b/scrapy/core/downloader/handlers/ftp.py index 1fd763b4035..6ac02cc2b3b 100644 --- a/scrapy/core/downloader/handlers/ftp.py +++ b/scrapy/core/downloader/handlers/ftp.py @@ -29,7 +29,7 @@ """ import re -import six +from io import BytesIO from six.moves.urllib.parse import urlparse from twisted.internet import reactor @@ -42,7 +42,7 @@ class ReceivedDataProtocol(Protocol): def __init__(self, filename=None): self.__filename = filename - self.body = open(filename, "w") if filename else six.BytesIO() + self.body = open(filename, "w") if filename else BytesIO() self.size = 0 def dataReceived(self, data): diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py index 47a4fed31c9..b803af1dce0 100644 --- a/scrapy/core/downloader/handlers/http11.py +++ b/scrapy/core/downloader/handlers/http11.py @@ -1,8 +1,8 @@ """Download handlers for http and https schemes""" import re -import six +from io import BytesIO from time import time from six.moves.urllib.parse import urldefrag @@ -234,7 +234,7 @@ def __init__(self, finished, txresponse, request): self._finished = finished self._txresponse = txresponse self._request = request - self._bodybuf = six.BytesIO() + self._bodybuf = BytesIO() def dataReceived(self, bodyBytes): self._bodybuf.write(bodyBytes) diff --git a/scrapy/responsetypes.py b/scrapy/responsetypes.py index f8cd5b4ea9d..16479896fcb 100644 --- a/scrapy/responsetypes.py +++ b/scrapy/responsetypes.py @@ -6,6 +6,7 @@ from mimetypes import MimeTypes from pkgutil import get_data +from io import BytesIO import six from scrapy.http import Response @@ -33,7 +34,7 @@ def __init__(self): self.classes = {} self.mimetypes = MimeTypes() mimedata = get_data('scrapy', 'mime.types') - self.mimetypes.readfp(six.BytesIO(mimedata)) + self.mimetypes.readfp(BytesIO(mimedata)) for mimetype, cls in six.iteritems(self.CLASSES): self.classes[mimetype] = load_object(cls) diff --git a/scrapy/tests/test_contrib_exporter.py b/scrapy/tests/test_contrib_exporter.py index fc5dd82b569..9092007e50e 100644 --- a/scrapy/tests/test_contrib_exporter.py +++ b/scrapy/tests/test_contrib_exporter.py @@ -1,5 +1,5 @@ import unittest, json -import six +from io import BytesIO from six.moves import cPickle as pickle import lxml.etree import re @@ -19,7 +19,7 @@ class BaseItemExporterTest(unittest.TestCase): def setUp(self): self.i = TestItem(name=u'John\xa3', age='22') - self.output = six.BytesIO() + self.output = BytesIO() self.ie = self._get_exporter() def _get_exporter(self, **kwargs): @@ -126,7 +126,7 @@ def _check_output(self): def test_export_multiple_items(self): i1 = TestItem(name='hello', age='world') i2 = TestItem(name='bye', age='world') - f = six.BytesIO() + f = BytesIO() ie = PickleItemExporter(f) ie.start_exporting() ie.export_item(i1) @@ -151,21 +151,21 @@ def _check_output(self): self.assertCsvEqual(self.output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n') def test_header(self): - output = six.BytesIO() + output = BytesIO() ie = CsvItemExporter(output, fields_to_export=self.i.fields.keys()) ie.start_exporting() ie.export_item(self.i) ie.finish_exporting() self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n') - output = six.BytesIO() + output = BytesIO() ie = CsvItemExporter(output, fields_to_export=['age']) ie.start_exporting() ie.export_item(self.i) ie.finish_exporting() self.assertCsvEqual(output.getvalue(), 'age\r\n22\r\n') - output = six.BytesIO() + output = BytesIO() ie = CsvItemExporter(output) ie.start_exporting() ie.export_item(self.i) @@ -173,7 +173,7 @@ def test_header(self): ie.finish_exporting() self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n') - output = six.BytesIO() + output = BytesIO() ie = CsvItemExporter(output, include_headers_line=False) ie.start_exporting() ie.export_item(self.i) @@ -186,7 +186,7 @@ class TestItem2(Item): friends = Field() i = TestItem2(name='John', friends=['Mary', 'Paul']) - output = six.BytesIO() + output = BytesIO() ie = CsvItemExporter(output, include_headers_line=False) ie.start_exporting() ie.export_item(i) @@ -216,7 +216,7 @@ def _check_output(self): self.assertXmlEquivalent(self.output.getvalue(), expected_value) def test_multivalued_fields(self): - output = six.BytesIO() + output = BytesIO() item = TestItem(name=[u'John\xa3', u'Doe']) ie = XmlItemExporter(output) ie.start_exporting() @@ -226,7 +226,7 @@ def test_multivalued_fields(self): self.assertXmlEquivalent(output.getvalue(), expected_value) def test_nested_item(self): - output = six.BytesIO() + output = BytesIO() i1 = TestItem(name=u'foo\xa3hoo', age='22') i2 = TestItem(name=u'bar', age=i1) i3 = TestItem(name=u'buz', age=i2) @@ -248,7 +248,7 @@ def test_nested_item(self): self.assertXmlEquivalent(output.getvalue(), expected_value) def test_nested_list_item(self): - output = six.BytesIO() + output = BytesIO() i1 = TestItem(name=u'foo') i2 = TestItem(name=u'bar') i3 = TestItem(name=u'buz', age=[i1, i2]) diff --git a/scrapy/tests/test_contrib_feedexport.py b/scrapy/tests/test_contrib_feedexport.py index 925b277b3d6..bf4943bfab3 100644 --- a/scrapy/tests/test_contrib_feedexport.py +++ b/scrapy/tests/test_contrib_feedexport.py @@ -1,5 +1,5 @@ import os -import six +from io import BytesIO from six.moves.urllib.parse import urlparse from zope.interface.verify import verifyObject @@ -67,7 +67,7 @@ def _assert_stores(self, storage, path): self.failUnless(os.path.exists(path)) self.failUnlessEqual(open(path).read(), b"content") # again, to check s3 objects are overwritten - yield storage.store(six.BytesIO(b"new content")) + yield storage.store(BytesIO(b"new content")) self.failUnlessEqual(open(path).read(), b"new content") @@ -93,7 +93,7 @@ class StdoutFeedStorageTest(unittest.TestCase): @defer.inlineCallbacks def test_store(self): - out = six.BytesIO() + out = BytesIO() storage = StdoutFeedStorage('stdout:', _stdout=out) file = storage.open(Spider("default")) file.write(b"content") diff --git a/scrapy/tests/test_downloadermiddleware_httpcompression.py b/scrapy/tests/test_downloadermiddleware_httpcompression.py index e37d43d7d65..8a0e75d9032 100644 --- a/scrapy/tests/test_downloadermiddleware_httpcompression.py +++ b/scrapy/tests/test_downloadermiddleware_httpcompression.py @@ -1,4 +1,4 @@ -import six +from io import BytesIO from unittest import TestCase from os.path import join, abspath, dirname from gzip import GzipFile @@ -104,7 +104,7 @@ def test_process_response_encoding_inside_body(self): 'Content-Type': 'text/html', 'Content-Encoding': 'gzip', } - f = six.BytesIO() + f = BytesIO() plainbody = b"""<html><head><title>Some page""" zf = GzipFile(fileobj=f, mode='wb') zf.write(plainbody) @@ -122,7 +122,7 @@ def test_process_response_force_recalculate_encoding(self): 'Content-Type': 'text/html', 'Content-Encoding': 'gzip', } - f = six.BytesIO() + f = BytesIO() plainbody = b"""Some page""" zf = GzipFile(fileobj=f, mode='wb') zf.write(plainbody) diff --git a/scrapy/tests/test_log.py b/scrapy/tests/test_log.py index afe6fe1dd5d..f0b57b806a9 100644 --- a/scrapy/tests/test_log.py +++ b/scrapy/tests/test_log.py @@ -1,4 +1,4 @@ -import six +from io import BytesIO from twisted.python import log as txlog, failure from twisted.trial import unittest @@ -21,7 +21,7 @@ class ScrapyFileLogObserverTest(unittest.TestCase): encoding = 'utf-8' def setUp(self): - self.f = six.BytesIO() + self.f = BytesIO() self.log_observer = log.ScrapyFileLogObserver(self.f, self.level, self.encoding) self.log_observer.start() diff --git a/scrapy/tests/test_mail.py b/scrapy/tests/test_mail.py index 392568f3826..58d44bdb35e 100644 --- a/scrapy/tests/test_mail.py +++ b/scrapy/tests/test_mail.py @@ -1,6 +1,6 @@ import unittest +from io import BytesIO -import six from scrapy.mail import MailSender class MailSenderTest(unittest.TestCase): @@ -30,7 +30,7 @@ def test_send_html(self): self.assertEqual(msg.get('Content-Type'), 'text/html') def test_send_attach(self): - attach = six.BytesIO() + attach = BytesIO() attach.write(b'content') attach.seek(0) attachs = [('attachment', 'text/plain', attach)] diff --git a/scrapy/tests/test_pipeline_images.py b/scrapy/tests/test_pipeline_images.py index fa5c29aa79e..a6f1611281b 100644 --- a/scrapy/tests/test_pipeline_images.py +++ b/scrapy/tests/test_pipeline_images.py @@ -3,7 +3,7 @@ import warnings from tempfile import mkdtemp from shutil import rmtree -import six +from io import BytesIO from twisted.trial import unittest @@ -201,7 +201,7 @@ class TestItem(Item): def _create_image(format, *a, **kw): - buf = six.BytesIO() + buf = BytesIO() Image.new(*a, **kw).save(buf, format) buf.seek(0) return Image.open(buf) diff --git a/scrapy/tests/test_spider.py b/scrapy/tests/test_spider.py index 22716cdb1ff..903eff7b19f 100644 --- a/scrapy/tests/test_spider.py +++ b/scrapy/tests/test_spider.py @@ -2,7 +2,7 @@ import inspect import warnings from scrapy.utils.trackref import object_ref -import six +from io import BytesIO from twisted.trial import unittest @@ -196,7 +196,7 @@ class SitemapSpiderTest(SpiderTest): spider_class = SitemapSpider BODY = b"SITEMAP" - f = six.BytesIO() + f = BytesIO() g = gzip.GzipFile(fileobj=f, mode='w+b') g.write(BODY) g.close() diff --git a/scrapy/tests/test_utils_jsonrpc.py b/scrapy/tests/test_utils_jsonrpc.py index 26e77dd5878..8902e9efa38 100644 --- a/scrapy/tests/test_utils_jsonrpc.py +++ b/scrapy/tests/test_utils_jsonrpc.py @@ -1,4 +1,5 @@ -import unittest, json, six +import unittest, json +from io import BytesIO from scrapy.utils.jsonrpc import jsonrpc_client_call, jsonrpc_server_call, \ JsonRpcError, jsonrpc_errors @@ -18,7 +19,7 @@ def __init__(self, result=None, error=None): def urlopen(self, url, request): self.url = url self.request = request - return six.BytesIO(self.response) + return BytesIO(self.response) class TestTarget(object): diff --git a/scrapy/utils/gz.py b/scrapy/utils/gz.py index e978f897047..741948359c5 100644 --- a/scrapy/utils/gz.py +++ b/scrapy/utils/gz.py @@ -1,5 +1,10 @@ import struct -import six + +try: + from cStringIO import StringIO as BytesIO +except ImportError: + from io import BytesIO + from gzip import GzipFile def gunzip(data): @@ -7,7 +12,7 @@ def gunzip(data): This is resilient to CRC checksum errors. """ - f = GzipFile(fileobj=six.BytesIO(data)) + f = GzipFile(fileobj=BytesIO(data)) output = b'' chunk = b'.' while chunk: diff --git a/scrapy/utils/iterators.py b/scrapy/utils/iterators.py index 983a3c8d791..3f11d4ed256 100644 --- a/scrapy/utils/iterators.py +++ b/scrapy/utils/iterators.py @@ -1,5 +1,10 @@ import re, csv, six +try: + from cStringIO import StringIO as BytesIO +except ImportError: + from io import BytesIO + from scrapy.http import TextResponse, Response from scrapy.selector import Selector from scrapy import log @@ -47,7 +52,7 @@ def csviter(obj, delimiter=None, headers=None, encoding=None): def _getrow(csv_r): return [str_to_unicode(field, encoding) for field in next(csv_r)] - lines = six.BytesIO(_body_or_str(obj, unicode=False)) + lines = BytesIO(_body_or_str(obj, unicode=False)) if delimiter: csv_r = csv.reader(lines, delimiter=delimiter) else: From 1cd9c4d6531f0d0d2b322f193c5c554c615fce68 Mon Sep 17 00:00:00 2001 From: Felix Yan Date: Tue, 15 Jul 2014 21:26:01 +0800 Subject: [PATCH 4/4] fix tests by using a tempfile instead of BytesIO --- scrapy/tests/test_pipeline_images.py | 5 ++--- scrapy/utils/iterators.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/scrapy/tests/test_pipeline_images.py b/scrapy/tests/test_pipeline_images.py index a6f1611281b..a3b1059ef34 100644 --- a/scrapy/tests/test_pipeline_images.py +++ b/scrapy/tests/test_pipeline_images.py @@ -1,9 +1,8 @@ import os import hashlib import warnings -from tempfile import mkdtemp +from tempfile import mkdtemp, TemporaryFile from shutil import rmtree -from io import BytesIO from twisted.trial import unittest @@ -201,7 +200,7 @@ class TestItem(Item): def _create_image(format, *a, **kw): - buf = BytesIO() + buf = TemporaryFile() Image.new(*a, **kw).save(buf, format) buf.seek(0) return Image.open(buf) diff --git a/scrapy/utils/iterators.py b/scrapy/utils/iterators.py index 3f11d4ed256..150b077aef3 100644 --- a/scrapy/utils/iterators.py +++ b/scrapy/utils/iterators.py @@ -81,7 +81,7 @@ def _body_or_str(obj, unicode=True): return obj.body_as_unicode() else: return obj.body.decode('utf-8') - elif type(obj) is six.text_type: + elif isinstance(obj, six.text_type): return obj if unicode else obj.encode('utf-8') else: return obj.decode('utf-8') if unicode else obj