diff --git a/scrapy/contrib/pipeline/files.py b/scrapy/contrib/pipeline/files.py index 8c43982a1fa..db8cf8b76dc 100644 --- a/scrapy/contrib/pipeline/files.py +++ b/scrapy/contrib/pipeline/files.py @@ -9,9 +9,13 @@ import time from six.moves.urllib.parse import urlparse from collections import defaultdict -from cStringIO import StringIO import six +try: + from cStringIO import StringIO as BytesIO +except ImportError: + from io import BytesIO + from twisted.internet import defer, threads from scrapy import log @@ -257,7 +261,7 @@ def get_media_requests(self, item, info): def file_downloaded(self, response, request, info): path = self.file_path(request, response=response, info=info) - buf = StringIO(response.body) + buf = BytesIO(response.body) self.store.persist_file(path, buf, info) checksum = md5sum(buf) return checksum diff --git a/scrapy/contrib/pipeline/images.py b/scrapy/contrib/pipeline/images.py index e955e72bf75..9c1a5445500 100644 --- a/scrapy/contrib/pipeline/images.py +++ b/scrapy/contrib/pipeline/images.py @@ -5,9 +5,13 @@ """ import hashlib -from cStringIO import StringIO import six +try: + from cStringIO import StringIO as BytesIO +except ImportError: + from io import BytesIO + from PIL import Image from scrapy.utils.misc import md5sum @@ -70,7 +74,7 @@ def image_downloaded(self, response, request, info): def get_images(self, response, request, info): path = self.file_path(request, response=response, info=info) - orig_image = Image.open(StringIO(response.body)) + orig_image = Image.open(BytesIO(response.body)) width, height = orig_image.size if width < self.MIN_WIDTH or height < self.MIN_HEIGHT: @@ -97,7 +101,7 @@ def convert_image(self, image, size=None): image = image.copy() image.thumbnail(size, Image.ANTIALIAS) - buf = StringIO() + buf = BytesIO() image.save(buf, 'JPEG') return image, buf diff --git a/scrapy/contrib_exp/downloadermiddleware/decompression.py b/scrapy/contrib_exp/downloadermiddleware/decompression.py index 6fad5b9b14a..c08f50b5ff3 100644 --- a/scrapy/contrib_exp/downloadermiddleware/decompression.py +++ b/scrapy/contrib_exp/downloadermiddleware/decompression.py @@ -6,10 +6,15 @@ import gzip import zipfile import tarfile -from cStringIO import StringIO from tempfile import mktemp + import six +try: + from cStringIO import StringIO as BytesIO +except ImportError: + from io import BytesIO + from scrapy import log from scrapy.responsetypes import responsetypes @@ -27,7 +32,7 @@ def __init__(self): } def _is_tar(self, response): - archive = StringIO(response.body) + archive = BytesIO(response.body) try: tar_file = tarfile.open(name=mktemp(), fileobj=archive) except tarfile.ReadError: @@ -38,7 +43,7 @@ def _is_tar(self, response): return response.replace(body=body, cls=respcls) def _is_zip(self, response): - archive = StringIO(response.body) + archive = BytesIO(response.body) try: zip_file = zipfile.ZipFile(archive) except zipfile.BadZipfile: @@ -50,7 +55,7 @@ def _is_zip(self, response): return response.replace(body=body, cls=respcls) def _is_gzip(self, response): - archive = StringIO(response.body) + archive = BytesIO(response.body) try: body = gzip.GzipFile(fileobj=archive).read() except IOError: diff --git a/scrapy/core/downloader/handlers/ftp.py b/scrapy/core/downloader/handlers/ftp.py index 6b5f03bda70..6ac02cc2b3b 100644 --- a/scrapy/core/downloader/handlers/ftp.py +++ b/scrapy/core/downloader/handlers/ftp.py @@ -29,8 +29,8 @@ """ import re +from io import BytesIO from six.moves.urllib.parse import urlparse -from cStringIO import StringIO from twisted.internet import reactor from twisted.protocols.ftp import FTPClient, CommandFailed @@ -42,7 +42,7 @@ class ReceivedDataProtocol(Protocol): def __init__(self, filename=None): self.__filename = filename - self.body = open(filename, "w") if filename else StringIO() + self.body = open(filename, "w") if filename else BytesIO() self.size = 0 def dataReceived(self, data): @@ -54,7 +54,7 @@ def filename(self): return self.__filename def close(self): - self.body.close() if self.filename else self.body.reset() + self.body.close() if self.filename else self.body.seek(0) _CODE_RE = re.compile("\d+") class FTPDownloadHandler(object): diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py index 455794b14b6..b803af1dce0 100644 --- a/scrapy/core/downloader/handlers/http11.py +++ b/scrapy/core/downloader/handlers/http11.py @@ -2,8 +2,8 @@ import re +from io import BytesIO from time import time -from cStringIO import StringIO from six.moves.urllib.parse import urldefrag from zope.interface import implements @@ -234,7 +234,7 @@ def __init__(self, finished, txresponse, request): self._finished = finished self._txresponse = txresponse self._request = request - self._bodybuf = StringIO() + self._bodybuf = BytesIO() def dataReceived(self, bodyBytes): self._bodybuf.write(bodyBytes) diff --git a/scrapy/mail.py b/scrapy/mail.py index a5d936ab31c..e1d7c44f672 100644 --- a/scrapy/mail.py +++ b/scrapy/mail.py @@ -3,7 +3,7 @@ See documentation in docs/topics/email.rst """ -from cStringIO import StringIO +from six.moves import cStringIO as StringIO import six from email.utils import COMMASPACE, formatdate diff --git a/scrapy/responsetypes.py b/scrapy/responsetypes.py index ac0559a5034..16479896fcb 100644 --- a/scrapy/responsetypes.py +++ b/scrapy/responsetypes.py @@ -6,7 +6,7 @@ from mimetypes import MimeTypes from pkgutil import get_data -from cStringIO import StringIO +from io import BytesIO import six from scrapy.http import Response @@ -34,7 +34,7 @@ def __init__(self): self.classes = {} self.mimetypes = MimeTypes() mimedata = get_data('scrapy', 'mime.types') - self.mimetypes.readfp(StringIO(mimedata)) + self.mimetypes.readfp(BytesIO(mimedata)) for mimetype, cls in six.iteritems(self.CLASSES): self.classes[mimetype] = load_object(cls) diff --git a/scrapy/tests/test_contrib_exporter.py b/scrapy/tests/test_contrib_exporter.py index d50544c51b8..9092007e50e 100644 --- a/scrapy/tests/test_contrib_exporter.py +++ b/scrapy/tests/test_contrib_exporter.py @@ -1,6 +1,6 @@ import unittest, json +from io import BytesIO from six.moves import cPickle as pickle -from cStringIO import StringIO import lxml.etree import re @@ -19,7 +19,7 @@ class BaseItemExporterTest(unittest.TestCase): def setUp(self): self.i = TestItem(name=u'John\xa3', age='22') - self.output = StringIO() + self.output = BytesIO() self.ie = self._get_exporter() def _get_exporter(self, **kwargs): @@ -126,13 +126,13 @@ def _check_output(self): def test_export_multiple_items(self): i1 = TestItem(name='hello', age='world') i2 = TestItem(name='bye', age='world') - f = StringIO() + f = BytesIO() ie = PickleItemExporter(f) ie.start_exporting() ie.export_item(i1) ie.export_item(i2) ie.finish_exporting() - f.reset() + f.seek(0) self.assertEqual(pickle.load(f), i1) self.assertEqual(pickle.load(f), i2) @@ -151,21 +151,21 @@ def _check_output(self): self.assertCsvEqual(self.output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n') def test_header(self): - output = StringIO() + output = BytesIO() ie = CsvItemExporter(output, fields_to_export=self.i.fields.keys()) ie.start_exporting() ie.export_item(self.i) ie.finish_exporting() self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n') - output = StringIO() + output = BytesIO() ie = CsvItemExporter(output, fields_to_export=['age']) ie.start_exporting() ie.export_item(self.i) ie.finish_exporting() self.assertCsvEqual(output.getvalue(), 'age\r\n22\r\n') - output = StringIO() + output = BytesIO() ie = CsvItemExporter(output) ie.start_exporting() ie.export_item(self.i) @@ -173,7 +173,7 @@ def test_header(self): ie.finish_exporting() self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n') - output = StringIO() + output = BytesIO() ie = CsvItemExporter(output, include_headers_line=False) ie.start_exporting() ie.export_item(self.i) @@ -186,7 +186,7 @@ class TestItem2(Item): friends = Field() i = TestItem2(name='John', friends=['Mary', 'Paul']) - output = StringIO() + output = BytesIO() ie = CsvItemExporter(output, include_headers_line=False) ie.start_exporting() ie.export_item(i) @@ -216,7 +216,7 @@ def _check_output(self): self.assertXmlEquivalent(self.output.getvalue(), expected_value) def test_multivalued_fields(self): - output = StringIO() + output = BytesIO() item = TestItem(name=[u'John\xa3', u'Doe']) ie = XmlItemExporter(output) ie.start_exporting() @@ -226,7 +226,7 @@ def test_multivalued_fields(self): self.assertXmlEquivalent(output.getvalue(), expected_value) def test_nested_item(self): - output = StringIO() + output = BytesIO() i1 = TestItem(name=u'foo\xa3hoo', age='22') i2 = TestItem(name=u'bar', age=i1) i3 = TestItem(name=u'buz', age=i2) @@ -248,7 +248,7 @@ def test_nested_item(self): self.assertXmlEquivalent(output.getvalue(), expected_value) def test_nested_list_item(self): - output = StringIO() + output = BytesIO() i1 = TestItem(name=u'foo') i2 = TestItem(name=u'bar') i3 = TestItem(name=u'buz', age=[i1, i2]) diff --git a/scrapy/tests/test_contrib_feedexport.py b/scrapy/tests/test_contrib_feedexport.py index 7a1b3dc8f0d..bf4943bfab3 100644 --- a/scrapy/tests/test_contrib_feedexport.py +++ b/scrapy/tests/test_contrib_feedexport.py @@ -1,6 +1,6 @@ import os +from io import BytesIO from six.moves.urllib.parse import urlparse -from cStringIO import StringIO from zope.interface.verify import verifyObject from twisted.trial import unittest @@ -62,13 +62,13 @@ def test_store(self): def _assert_stores(self, storage, path): spider = Spider("default") file = storage.open(spider) - file.write("content") + file.write(b"content") yield storage.store(file) self.failUnless(os.path.exists(path)) - self.failUnlessEqual(open(path).read(), "content") + self.failUnlessEqual(open(path).read(), b"content") # again, to check s3 objects are overwritten - yield storage.store(StringIO("new content")) - self.failUnlessEqual(open(path).read(), "new content") + yield storage.store(BytesIO(b"new content")) + self.failUnlessEqual(open(path).read(), b"new content") class S3FeedStorageTest(unittest.TestCase): @@ -93,9 +93,9 @@ class StdoutFeedStorageTest(unittest.TestCase): @defer.inlineCallbacks def test_store(self): - out = StringIO() + out = BytesIO() storage = StdoutFeedStorage('stdout:', _stdout=out) file = storage.open(Spider("default")) - file.write("content") + file.write(b"content") yield storage.store(file) - self.assertEqual(out.getvalue(), "content") + self.assertEqual(out.getvalue(), b"content") diff --git a/scrapy/tests/test_downloadermiddleware_httpcompression.py b/scrapy/tests/test_downloadermiddleware_httpcompression.py index 5fbc2c6194e..8a0e75d9032 100644 --- a/scrapy/tests/test_downloadermiddleware_httpcompression.py +++ b/scrapy/tests/test_downloadermiddleware_httpcompression.py @@ -1,6 +1,6 @@ +from io import BytesIO from unittest import TestCase from os.path import join, abspath, dirname -from cStringIO import StringIO from gzip import GzipFile from scrapy.spider import Spider @@ -104,8 +104,8 @@ def test_process_response_encoding_inside_body(self): 'Content-Type': 'text/html', 'Content-Encoding': 'gzip', } - f = StringIO() - plainbody = """Some page""" + f = BytesIO() + plainbody = b"""Some page""" zf = GzipFile(fileobj=f, mode='wb') zf.write(plainbody) zf.close() @@ -122,8 +122,8 @@ def test_process_response_force_recalculate_encoding(self): 'Content-Type': 'text/html', 'Content-Encoding': 'gzip', } - f = StringIO() - plainbody = """Some page""" + f = BytesIO() + plainbody = b"""Some page""" zf = GzipFile(fileobj=f, mode='wb') zf.write(plainbody) zf.close() diff --git a/scrapy/tests/test_log.py b/scrapy/tests/test_log.py index 9367fb953ff..f0b57b806a9 100644 --- a/scrapy/tests/test_log.py +++ b/scrapy/tests/test_log.py @@ -1,4 +1,4 @@ -from cStringIO import StringIO +from io import BytesIO from twisted.python import log as txlog, failure from twisted.trial import unittest @@ -21,7 +21,7 @@ class ScrapyFileLogObserverTest(unittest.TestCase): encoding = 'utf-8' def setUp(self): - self.f = StringIO() + self.f = BytesIO() self.log_observer = log.ScrapyFileLogObserver(self.f, self.level, self.encoding) self.log_observer.start() diff --git a/scrapy/tests/test_mail.py b/scrapy/tests/test_mail.py index 250ae04aee4..58d44bdb35e 100644 --- a/scrapy/tests/test_mail.py +++ b/scrapy/tests/test_mail.py @@ -1,6 +1,6 @@ import unittest +from io import BytesIO -from cStringIO import StringIO from scrapy.mail import MailSender class MailSenderTest(unittest.TestCase): @@ -30,8 +30,8 @@ def test_send_html(self): self.assertEqual(msg.get('Content-Type'), 'text/html') def test_send_attach(self): - attach = StringIO() - attach.write('content') + attach = BytesIO() + attach.write(b'content') attach.seek(0) attachs = [('attachment', 'text/plain', attach)] diff --git a/scrapy/tests/test_pipeline_images.py b/scrapy/tests/test_pipeline_images.py index 511d0f50e4c..a3b1059ef34 100644 --- a/scrapy/tests/test_pipeline_images.py +++ b/scrapy/tests/test_pipeline_images.py @@ -1,8 +1,7 @@ import os import hashlib import warnings -from cStringIO import StringIO -from tempfile import mkdtemp +from tempfile import mkdtemp, TemporaryFile from shutil import rmtree from twisted.trial import unittest @@ -201,7 +200,7 @@ class TestItem(Item): def _create_image(format, *a, **kw): - buf = StringIO() + buf = TemporaryFile() Image.new(*a, **kw).save(buf, format) buf.seek(0) return Image.open(buf) diff --git a/scrapy/tests/test_spider.py b/scrapy/tests/test_spider.py index 8abc3962843..903eff7b19f 100644 --- a/scrapy/tests/test_spider.py +++ b/scrapy/tests/test_spider.py @@ -1,8 +1,8 @@ import gzip import inspect import warnings -from cStringIO import StringIO from scrapy.utils.trackref import object_ref +from io import BytesIO from twisted.trial import unittest @@ -57,7 +57,7 @@ class XMLFeedSpiderTest(SpiderTest): spider_class = XMLFeedSpider def test_register_namespace(self): - body = """ + body = b""" http://www.example.com/Special-Offers.html2009-08-16 @@ -103,7 +103,7 @@ class CSVFeedSpiderTest(SpiderTest): class CrawlSpiderTest(SpiderTest): - test_body = """Page title<title> + test_body = b"""<html><head><title>Page title<title> <body> <p><a href="item/12.html">Item 12</a></p> <div class='links'> @@ -195,8 +195,8 @@ class SitemapSpiderTest(SpiderTest): spider_class = SitemapSpider - BODY = "SITEMAP" - f = StringIO() + BODY = b"SITEMAP" + f = BytesIO() g = gzip.GzipFile(fileobj=f, mode='w+b') g.write(BODY) g.close() diff --git a/scrapy/tests/test_utils_jsonrpc.py b/scrapy/tests/test_utils_jsonrpc.py index 6eb60d1f44c..8902e9efa38 100644 --- a/scrapy/tests/test_utils_jsonrpc.py +++ b/scrapy/tests/test_utils_jsonrpc.py @@ -1,5 +1,5 @@ import unittest, json -from cStringIO import StringIO +from io import BytesIO from scrapy.utils.jsonrpc import jsonrpc_client_call, jsonrpc_server_call, \ JsonRpcError, jsonrpc_errors @@ -19,7 +19,7 @@ def __init__(self, result=None, error=None): def urlopen(self, url, request): self.url = url self.request = request - return StringIO(self.response) + return BytesIO(self.response) class TestTarget(object): diff --git a/scrapy/tests/test_utils_misc/__init__.py b/scrapy/tests/test_utils_misc/__init__.py index 143c9b6445b..18fc6d5329e 100644 --- a/scrapy/tests/test_utils_misc/__init__.py +++ b/scrapy/tests/test_utils_misc/__init__.py @@ -1,7 +1,6 @@ import sys import os import unittest -from cStringIO import StringIO from scrapy.item import Item, Field from scrapy.utils.misc import load_object, arg_to_iter, walk_modules diff --git a/scrapy/utils/gz.py b/scrapy/utils/gz.py index aa8ffc6fca9..741948359c5 100644 --- a/scrapy/utils/gz.py +++ b/scrapy/utils/gz.py @@ -1,5 +1,10 @@ import struct -from cStringIO import StringIO + +try: + from cStringIO import StringIO as BytesIO +except ImportError: + from io import BytesIO + from gzip import GzipFile def gunzip(data): @@ -7,9 +12,9 @@ def gunzip(data): This is resilient to CRC checksum errors. """ - f = GzipFile(fileobj=StringIO(data)) - output = '' - chunk = '.' + f = GzipFile(fileobj=BytesIO(data)) + output = b'' + chunk = b'.' while chunk: try: chunk = f.read(8196) diff --git a/scrapy/utils/iterators.py b/scrapy/utils/iterators.py index 92717d9bcc7..150b077aef3 100644 --- a/scrapy/utils/iterators.py +++ b/scrapy/utils/iterators.py @@ -1,5 +1,9 @@ -import re, csv -from cStringIO import StringIO +import re, csv, six + +try: + from cStringIO import StringIO as BytesIO +except ImportError: + from io import BytesIO from scrapy.http import TextResponse, Response from scrapy.selector import Selector @@ -48,7 +52,7 @@ def csviter(obj, delimiter=None, headers=None, encoding=None): def _getrow(csv_r): return [str_to_unicode(field, encoding) for field in next(csv_r)] - lines = StringIO(_body_or_str(obj, unicode=False)) + lines = BytesIO(_body_or_str(obj, unicode=False)) if delimiter: csv_r = csv.reader(lines, delimiter=delimiter) else: @@ -68,7 +72,7 @@ def _getrow(csv_r): def _body_or_str(obj, unicode=True): - assert isinstance(obj, (Response, basestring)), \ + assert isinstance(obj, (Response, six.string_types)), \ "obj must be Response or basestring, not %s" % type(obj).__name__ if isinstance(obj, Response): if not unicode: @@ -77,7 +81,7 @@ def _body_or_str(obj, unicode=True): return obj.body_as_unicode() else: return obj.body.decode('utf-8') - elif type(obj) is type(u''): + elif isinstance(obj, six.text_type): return obj if unicode else obj.encode('utf-8') else: return obj.decode('utf-8') if unicode else obj