diff --git a/scrapy/contrib/pipeline/files.py b/scrapy/contrib/pipeline/files.py index 8c43982a1fa..db8cf8b76dc 100644 --- a/scrapy/contrib/pipeline/files.py +++ b/scrapy/contrib/pipeline/files.py @@ -9,9 +9,13 @@ import time from six.moves.urllib.parse import urlparse from collections import defaultdict -from cStringIO import StringIO import six +try: + from cStringIO import StringIO as BytesIO +except ImportError: + from io import BytesIO + from twisted.internet import defer, threads from scrapy import log @@ -257,7 +261,7 @@ def get_media_requests(self, item, info): def file_downloaded(self, response, request, info): path = self.file_path(request, response=response, info=info) - buf = StringIO(response.body) + buf = BytesIO(response.body) self.store.persist_file(path, buf, info) checksum = md5sum(buf) return checksum diff --git a/scrapy/contrib/pipeline/images.py b/scrapy/contrib/pipeline/images.py index e955e72bf75..9c1a5445500 100644 --- a/scrapy/contrib/pipeline/images.py +++ b/scrapy/contrib/pipeline/images.py @@ -5,9 +5,13 @@ """ import hashlib -from cStringIO import StringIO import six +try: + from cStringIO import StringIO as BytesIO +except ImportError: + from io import BytesIO + from PIL import Image from scrapy.utils.misc import md5sum @@ -70,7 +74,7 @@ def image_downloaded(self, response, request, info): def get_images(self, response, request, info): path = self.file_path(request, response=response, info=info) - orig_image = Image.open(StringIO(response.body)) + orig_image = Image.open(BytesIO(response.body)) width, height = orig_image.size if width < self.MIN_WIDTH or height < self.MIN_HEIGHT: @@ -97,7 +101,7 @@ def convert_image(self, image, size=None): image = image.copy() image.thumbnail(size, Image.ANTIALIAS) - buf = StringIO() + buf = BytesIO() image.save(buf, 'JPEG') return image, buf diff --git a/scrapy/contrib_exp/downloadermiddleware/decompression.py b/scrapy/contrib_exp/downloadermiddleware/decompression.py index 6fad5b9b14a..c08f50b5ff3 100644 --- a/scrapy/contrib_exp/downloadermiddleware/decompression.py +++ b/scrapy/contrib_exp/downloadermiddleware/decompression.py @@ -6,10 +6,15 @@ import gzip import zipfile import tarfile -from cStringIO import StringIO from tempfile import mktemp + import six +try: + from cStringIO import StringIO as BytesIO +except ImportError: + from io import BytesIO + from scrapy import log from scrapy.responsetypes import responsetypes @@ -27,7 +32,7 @@ def __init__(self): } def _is_tar(self, response): - archive = StringIO(response.body) + archive = BytesIO(response.body) try: tar_file = tarfile.open(name=mktemp(), fileobj=archive) except tarfile.ReadError: @@ -38,7 +43,7 @@ def _is_tar(self, response): return response.replace(body=body, cls=respcls) def _is_zip(self, response): - archive = StringIO(response.body) + archive = BytesIO(response.body) try: zip_file = zipfile.ZipFile(archive) except zipfile.BadZipfile: @@ -50,7 +55,7 @@ def _is_zip(self, response): return response.replace(body=body, cls=respcls) def _is_gzip(self, response): - archive = StringIO(response.body) + archive = BytesIO(response.body) try: body = gzip.GzipFile(fileobj=archive).read() except IOError: diff --git a/scrapy/core/downloader/handlers/ftp.py b/scrapy/core/downloader/handlers/ftp.py index 6b5f03bda70..6ac02cc2b3b 100644 --- a/scrapy/core/downloader/handlers/ftp.py +++ b/scrapy/core/downloader/handlers/ftp.py @@ -29,8 +29,8 @@ """ import re +from io import BytesIO from six.moves.urllib.parse import urlparse -from cStringIO import StringIO from twisted.internet import reactor from twisted.protocols.ftp import FTPClient, CommandFailed @@ -42,7 +42,7 @@ class ReceivedDataProtocol(Protocol): def __init__(self, filename=None): self.__filename = filename - self.body = open(filename, "w") if filename else StringIO() + self.body = open(filename, "w") if filename else BytesIO() self.size = 0 def dataReceived(self, data): @@ -54,7 +54,7 @@ def filename(self): return self.__filename def close(self): - self.body.close() if self.filename else self.body.reset() + self.body.close() if self.filename else self.body.seek(0) _CODE_RE = re.compile("\d+") class FTPDownloadHandler(object): diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py index 455794b14b6..b803af1dce0 100644 --- a/scrapy/core/downloader/handlers/http11.py +++ b/scrapy/core/downloader/handlers/http11.py @@ -2,8 +2,8 @@ import re +from io import BytesIO from time import time -from cStringIO import StringIO from six.moves.urllib.parse import urldefrag from zope.interface import implements @@ -234,7 +234,7 @@ def __init__(self, finished, txresponse, request): self._finished = finished self._txresponse = txresponse self._request = request - self._bodybuf = StringIO() + self._bodybuf = BytesIO() def dataReceived(self, bodyBytes): self._bodybuf.write(bodyBytes) diff --git a/scrapy/mail.py b/scrapy/mail.py index a5d936ab31c..e1d7c44f672 100644 --- a/scrapy/mail.py +++ b/scrapy/mail.py @@ -3,7 +3,7 @@ See documentation in docs/topics/email.rst """ -from cStringIO import StringIO +from six.moves import cStringIO as StringIO import six from email.utils import COMMASPACE, formatdate diff --git a/scrapy/responsetypes.py b/scrapy/responsetypes.py index ac0559a5034..16479896fcb 100644 --- a/scrapy/responsetypes.py +++ b/scrapy/responsetypes.py @@ -6,7 +6,7 @@ from mimetypes import MimeTypes from pkgutil import get_data -from cStringIO import StringIO +from io import BytesIO import six from scrapy.http import Response @@ -34,7 +34,7 @@ def __init__(self): self.classes = {} self.mimetypes = MimeTypes() mimedata = get_data('scrapy', 'mime.types') - self.mimetypes.readfp(StringIO(mimedata)) + self.mimetypes.readfp(BytesIO(mimedata)) for mimetype, cls in six.iteritems(self.CLASSES): self.classes[mimetype] = load_object(cls) diff --git a/scrapy/tests/test_contrib_exporter.py b/scrapy/tests/test_contrib_exporter.py index d50544c51b8..9092007e50e 100644 --- a/scrapy/tests/test_contrib_exporter.py +++ b/scrapy/tests/test_contrib_exporter.py @@ -1,6 +1,6 @@ import unittest, json +from io import BytesIO from six.moves import cPickle as pickle -from cStringIO import StringIO import lxml.etree import re @@ -19,7 +19,7 @@ class BaseItemExporterTest(unittest.TestCase): def setUp(self): self.i = TestItem(name=u'John\xa3', age='22') - self.output = StringIO() + self.output = BytesIO() self.ie = self._get_exporter() def _get_exporter(self, **kwargs): @@ -126,13 +126,13 @@ def _check_output(self): def test_export_multiple_items(self): i1 = TestItem(name='hello', age='world') i2 = TestItem(name='bye', age='world') - f = StringIO() + f = BytesIO() ie = PickleItemExporter(f) ie.start_exporting() ie.export_item(i1) ie.export_item(i2) ie.finish_exporting() - f.reset() + f.seek(0) self.assertEqual(pickle.load(f), i1) self.assertEqual(pickle.load(f), i2) @@ -151,21 +151,21 @@ def _check_output(self): self.assertCsvEqual(self.output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n') def test_header(self): - output = StringIO() + output = BytesIO() ie = CsvItemExporter(output, fields_to_export=self.i.fields.keys()) ie.start_exporting() ie.export_item(self.i) ie.finish_exporting() self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n') - output = StringIO() + output = BytesIO() ie = CsvItemExporter(output, fields_to_export=['age']) ie.start_exporting() ie.export_item(self.i) ie.finish_exporting() self.assertCsvEqual(output.getvalue(), 'age\r\n22\r\n') - output = StringIO() + output = BytesIO() ie = CsvItemExporter(output) ie.start_exporting() ie.export_item(self.i) @@ -173,7 +173,7 @@ def test_header(self): ie.finish_exporting() self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n') - output = StringIO() + output = BytesIO() ie = CsvItemExporter(output, include_headers_line=False) ie.start_exporting() ie.export_item(self.i) @@ -186,7 +186,7 @@ class TestItem2(Item): friends = Field() i = TestItem2(name='John', friends=['Mary', 'Paul']) - output = StringIO() + output = BytesIO() ie = CsvItemExporter(output, include_headers_line=False) ie.start_exporting() ie.export_item(i) @@ -216,7 +216,7 @@ def _check_output(self): self.assertXmlEquivalent(self.output.getvalue(), expected_value) def test_multivalued_fields(self): - output = StringIO() + output = BytesIO() item = TestItem(name=[u'John\xa3', u'Doe']) ie = XmlItemExporter(output) ie.start_exporting() @@ -226,7 +226,7 @@ def test_multivalued_fields(self): self.assertXmlEquivalent(output.getvalue(), expected_value) def test_nested_item(self): - output = StringIO() + output = BytesIO() i1 = TestItem(name=u'foo\xa3hoo', age='22') i2 = TestItem(name=u'bar', age=i1) i3 = TestItem(name=u'buz', age=i2) @@ -248,7 +248,7 @@ def test_nested_item(self): self.assertXmlEquivalent(output.getvalue(), expected_value) def test_nested_list_item(self): - output = StringIO() + output = BytesIO() i1 = TestItem(name=u'foo') i2 = TestItem(name=u'bar') i3 = TestItem(name=u'buz', age=[i1, i2]) diff --git a/scrapy/tests/test_contrib_feedexport.py b/scrapy/tests/test_contrib_feedexport.py index 7a1b3dc8f0d..bf4943bfab3 100644 --- a/scrapy/tests/test_contrib_feedexport.py +++ b/scrapy/tests/test_contrib_feedexport.py @@ -1,6 +1,6 @@ import os +from io import BytesIO from six.moves.urllib.parse import urlparse -from cStringIO import StringIO from zope.interface.verify import verifyObject from twisted.trial import unittest @@ -62,13 +62,13 @@ def test_store(self): def _assert_stores(self, storage, path): spider = Spider("default") file = storage.open(spider) - file.write("content") + file.write(b"content") yield storage.store(file) self.failUnless(os.path.exists(path)) - self.failUnlessEqual(open(path).read(), "content") + self.failUnlessEqual(open(path).read(), b"content") # again, to check s3 objects are overwritten - yield storage.store(StringIO("new content")) - self.failUnlessEqual(open(path).read(), "new content") + yield storage.store(BytesIO(b"new content")) + self.failUnlessEqual(open(path).read(), b"new content") class S3FeedStorageTest(unittest.TestCase): @@ -93,9 +93,9 @@ class StdoutFeedStorageTest(unittest.TestCase): @defer.inlineCallbacks def test_store(self): - out = StringIO() + out = BytesIO() storage = StdoutFeedStorage('stdout:', _stdout=out) file = storage.open(Spider("default")) - file.write("content") + file.write(b"content") yield storage.store(file) - self.assertEqual(out.getvalue(), "content") + self.assertEqual(out.getvalue(), b"content") diff --git a/scrapy/tests/test_downloadermiddleware_httpcompression.py b/scrapy/tests/test_downloadermiddleware_httpcompression.py index 5fbc2c6194e..8a0e75d9032 100644 --- a/scrapy/tests/test_downloadermiddleware_httpcompression.py +++ b/scrapy/tests/test_downloadermiddleware_httpcompression.py @@ -1,6 +1,6 @@ +from io import BytesIO from unittest import TestCase from os.path import join, abspath, dirname -from cStringIO import StringIO from gzip import GzipFile from scrapy.spider import Spider @@ -104,8 +104,8 @@ def test_process_response_encoding_inside_body(self): 'Content-Type': 'text/html', 'Content-Encoding': 'gzip', } - f = StringIO() - plainbody = """