Skip to content

Commit

Permalink
Merge pull request #803 from felixonmars/py3-port
Browse files Browse the repository at this point in the history
[MRG] PY3: use six.BytesIO and six.moves.cStringIO
  • Loading branch information
dangra committed Jul 21, 2014
2 parents dd3b77e + 1cd9c4d commit f6b1e9b
Show file tree
Hide file tree
Showing 18 changed files with 87 additions and 67 deletions.
8 changes: 6 additions & 2 deletions scrapy/contrib/pipeline/files.py
Expand Up @@ -9,9 +9,13 @@
import time
from six.moves.urllib.parse import urlparse
from collections import defaultdict
from cStringIO import StringIO
import six

try:
from cStringIO import StringIO as BytesIO
except ImportError:
from io import BytesIO

from twisted.internet import defer, threads

from scrapy import log
Expand Down Expand Up @@ -257,7 +261,7 @@ def get_media_requests(self, item, info):

def file_downloaded(self, response, request, info):
path = self.file_path(request, response=response, info=info)
buf = StringIO(response.body)
buf = BytesIO(response.body)
self.store.persist_file(path, buf, info)
checksum = md5sum(buf)
return checksum
Expand Down
10 changes: 7 additions & 3 deletions scrapy/contrib/pipeline/images.py
Expand Up @@ -5,9 +5,13 @@
"""

import hashlib
from cStringIO import StringIO
import six

try:
from cStringIO import StringIO as BytesIO
except ImportError:
from io import BytesIO

from PIL import Image

from scrapy.utils.misc import md5sum
Expand Down Expand Up @@ -70,7 +74,7 @@ def image_downloaded(self, response, request, info):

def get_images(self, response, request, info):
path = self.file_path(request, response=response, info=info)
orig_image = Image.open(StringIO(response.body))
orig_image = Image.open(BytesIO(response.body))

width, height = orig_image.size
if width < self.MIN_WIDTH or height < self.MIN_HEIGHT:
Expand All @@ -97,7 +101,7 @@ def convert_image(self, image, size=None):
image = image.copy()
image.thumbnail(size, Image.ANTIALIAS)

buf = StringIO()
buf = BytesIO()
image.save(buf, 'JPEG')
return image, buf

Expand Down
13 changes: 9 additions & 4 deletions scrapy/contrib_exp/downloadermiddleware/decompression.py
Expand Up @@ -6,10 +6,15 @@
import gzip
import zipfile
import tarfile
from cStringIO import StringIO
from tempfile import mktemp

import six

try:
from cStringIO import StringIO as BytesIO
except ImportError:
from io import BytesIO

from scrapy import log
from scrapy.responsetypes import responsetypes

Expand All @@ -27,7 +32,7 @@ def __init__(self):
}

def _is_tar(self, response):
archive = StringIO(response.body)
archive = BytesIO(response.body)
try:
tar_file = tarfile.open(name=mktemp(), fileobj=archive)
except tarfile.ReadError:
Expand All @@ -38,7 +43,7 @@ def _is_tar(self, response):
return response.replace(body=body, cls=respcls)

def _is_zip(self, response):
archive = StringIO(response.body)
archive = BytesIO(response.body)
try:
zip_file = zipfile.ZipFile(archive)
except zipfile.BadZipfile:
Expand All @@ -50,7 +55,7 @@ def _is_zip(self, response):
return response.replace(body=body, cls=respcls)

def _is_gzip(self, response):
archive = StringIO(response.body)
archive = BytesIO(response.body)
try:
body = gzip.GzipFile(fileobj=archive).read()
except IOError:
Expand Down
6 changes: 3 additions & 3 deletions scrapy/core/downloader/handlers/ftp.py
Expand Up @@ -29,8 +29,8 @@
"""

import re
from io import BytesIO
from six.moves.urllib.parse import urlparse
from cStringIO import StringIO

from twisted.internet import reactor
from twisted.protocols.ftp import FTPClient, CommandFailed
Expand All @@ -42,7 +42,7 @@
class ReceivedDataProtocol(Protocol):
def __init__(self, filename=None):
self.__filename = filename
self.body = open(filename, "w") if filename else StringIO()
self.body = open(filename, "w") if filename else BytesIO()
self.size = 0

def dataReceived(self, data):
Expand All @@ -54,7 +54,7 @@ def filename(self):
return self.__filename

def close(self):
self.body.close() if self.filename else self.body.reset()
self.body.close() if self.filename else self.body.seek(0)

_CODE_RE = re.compile("\d+")
class FTPDownloadHandler(object):
Expand Down
4 changes: 2 additions & 2 deletions scrapy/core/downloader/handlers/http11.py
Expand Up @@ -2,8 +2,8 @@

import re

from io import BytesIO
from time import time
from cStringIO import StringIO
from six.moves.urllib.parse import urldefrag

from zope.interface import implements
Expand Down Expand Up @@ -234,7 +234,7 @@ def __init__(self, finished, txresponse, request):
self._finished = finished
self._txresponse = txresponse
self._request = request
self._bodybuf = StringIO()
self._bodybuf = BytesIO()

def dataReceived(self, bodyBytes):
self._bodybuf.write(bodyBytes)
Expand Down
2 changes: 1 addition & 1 deletion scrapy/mail.py
Expand Up @@ -3,7 +3,7 @@
See documentation in docs/topics/email.rst
"""
from cStringIO import StringIO
from six.moves import cStringIO as StringIO
import six

from email.utils import COMMASPACE, formatdate
Expand Down
4 changes: 2 additions & 2 deletions scrapy/responsetypes.py
Expand Up @@ -6,7 +6,7 @@

from mimetypes import MimeTypes
from pkgutil import get_data
from cStringIO import StringIO
from io import BytesIO
import six

from scrapy.http import Response
Expand Down Expand Up @@ -34,7 +34,7 @@ def __init__(self):
self.classes = {}
self.mimetypes = MimeTypes()
mimedata = get_data('scrapy', 'mime.types')
self.mimetypes.readfp(StringIO(mimedata))
self.mimetypes.readfp(BytesIO(mimedata))
for mimetype, cls in six.iteritems(self.CLASSES):
self.classes[mimetype] = load_object(cls)

Expand Down
24 changes: 12 additions & 12 deletions scrapy/tests/test_contrib_exporter.py
@@ -1,6 +1,6 @@
import unittest, json
from io import BytesIO
from six.moves import cPickle as pickle
from cStringIO import StringIO
import lxml.etree
import re

Expand All @@ -19,7 +19,7 @@ class BaseItemExporterTest(unittest.TestCase):

def setUp(self):
self.i = TestItem(name=u'John\xa3', age='22')
self.output = StringIO()
self.output = BytesIO()
self.ie = self._get_exporter()

def _get_exporter(self, **kwargs):
Expand Down Expand Up @@ -126,13 +126,13 @@ def _check_output(self):
def test_export_multiple_items(self):
i1 = TestItem(name='hello', age='world')
i2 = TestItem(name='bye', age='world')
f = StringIO()
f = BytesIO()
ie = PickleItemExporter(f)
ie.start_exporting()
ie.export_item(i1)
ie.export_item(i2)
ie.finish_exporting()
f.reset()
f.seek(0)
self.assertEqual(pickle.load(f), i1)
self.assertEqual(pickle.load(f), i2)

Expand All @@ -151,29 +151,29 @@ def _check_output(self):
self.assertCsvEqual(self.output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n')

def test_header(self):
output = StringIO()
output = BytesIO()
ie = CsvItemExporter(output, fields_to_export=self.i.fields.keys())
ie.start_exporting()
ie.export_item(self.i)
ie.finish_exporting()
self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n')

output = StringIO()
output = BytesIO()
ie = CsvItemExporter(output, fields_to_export=['age'])
ie.start_exporting()
ie.export_item(self.i)
ie.finish_exporting()
self.assertCsvEqual(output.getvalue(), 'age\r\n22\r\n')

output = StringIO()
output = BytesIO()
ie = CsvItemExporter(output)
ie.start_exporting()
ie.export_item(self.i)
ie.export_item(self.i)
ie.finish_exporting()
self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n')

output = StringIO()
output = BytesIO()
ie = CsvItemExporter(output, include_headers_line=False)
ie.start_exporting()
ie.export_item(self.i)
Expand All @@ -186,7 +186,7 @@ class TestItem2(Item):
friends = Field()

i = TestItem2(name='John', friends=['Mary', 'Paul'])
output = StringIO()
output = BytesIO()
ie = CsvItemExporter(output, include_headers_line=False)
ie.start_exporting()
ie.export_item(i)
Expand Down Expand Up @@ -216,7 +216,7 @@ def _check_output(self):
self.assertXmlEquivalent(self.output.getvalue(), expected_value)

def test_multivalued_fields(self):
output = StringIO()
output = BytesIO()
item = TestItem(name=[u'John\xa3', u'Doe'])
ie = XmlItemExporter(output)
ie.start_exporting()
Expand All @@ -226,7 +226,7 @@ def test_multivalued_fields(self):
self.assertXmlEquivalent(output.getvalue(), expected_value)

def test_nested_item(self):
output = StringIO()
output = BytesIO()
i1 = TestItem(name=u'foo\xa3hoo', age='22')
i2 = TestItem(name=u'bar', age=i1)
i3 = TestItem(name=u'buz', age=i2)
Expand All @@ -248,7 +248,7 @@ def test_nested_item(self):
self.assertXmlEquivalent(output.getvalue(), expected_value)

def test_nested_list_item(self):
output = StringIO()
output = BytesIO()
i1 = TestItem(name=u'foo')
i2 = TestItem(name=u'bar')
i3 = TestItem(name=u'buz', age=[i1, i2])
Expand Down
16 changes: 8 additions & 8 deletions scrapy/tests/test_contrib_feedexport.py
@@ -1,6 +1,6 @@
import os
from io import BytesIO
from six.moves.urllib.parse import urlparse
from cStringIO import StringIO

from zope.interface.verify import verifyObject
from twisted.trial import unittest
Expand Down Expand Up @@ -62,13 +62,13 @@ def test_store(self):
def _assert_stores(self, storage, path):
spider = Spider("default")
file = storage.open(spider)
file.write("content")
file.write(b"content")
yield storage.store(file)
self.failUnless(os.path.exists(path))
self.failUnlessEqual(open(path).read(), "content")
self.failUnlessEqual(open(path).read(), b"content")
# again, to check s3 objects are overwritten
yield storage.store(StringIO("new content"))
self.failUnlessEqual(open(path).read(), "new content")
yield storage.store(BytesIO(b"new content"))
self.failUnlessEqual(open(path).read(), b"new content")


class S3FeedStorageTest(unittest.TestCase):
Expand All @@ -93,9 +93,9 @@ class StdoutFeedStorageTest(unittest.TestCase):

@defer.inlineCallbacks
def test_store(self):
out = StringIO()
out = BytesIO()
storage = StdoutFeedStorage('stdout:', _stdout=out)
file = storage.open(Spider("default"))
file.write("content")
file.write(b"content")
yield storage.store(file)
self.assertEqual(out.getvalue(), "content")
self.assertEqual(out.getvalue(), b"content")
10 changes: 5 additions & 5 deletions scrapy/tests/test_downloadermiddleware_httpcompression.py
@@ -1,6 +1,6 @@
from io import BytesIO
from unittest import TestCase
from os.path import join, abspath, dirname
from cStringIO import StringIO
from gzip import GzipFile

from scrapy.spider import Spider
Expand Down Expand Up @@ -104,8 +104,8 @@ def test_process_response_encoding_inside_body(self):
'Content-Type': 'text/html',
'Content-Encoding': 'gzip',
}
f = StringIO()
plainbody = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">"""
f = BytesIO()
plainbody = b"""<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">"""
zf = GzipFile(fileobj=f, mode='wb')
zf.write(plainbody)
zf.close()
Expand All @@ -122,8 +122,8 @@ def test_process_response_force_recalculate_encoding(self):
'Content-Type': 'text/html',
'Content-Encoding': 'gzip',
}
f = StringIO()
plainbody = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">"""
f = BytesIO()
plainbody = b"""<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">"""
zf = GzipFile(fileobj=f, mode='wb')
zf.write(plainbody)
zf.close()
Expand Down
4 changes: 2 additions & 2 deletions scrapy/tests/test_log.py
@@ -1,4 +1,4 @@
from cStringIO import StringIO
from io import BytesIO

from twisted.python import log as txlog, failure
from twisted.trial import unittest
Expand All @@ -21,7 +21,7 @@ class ScrapyFileLogObserverTest(unittest.TestCase):
encoding = 'utf-8'

def setUp(self):
self.f = StringIO()
self.f = BytesIO()
self.log_observer = log.ScrapyFileLogObserver(self.f, self.level, self.encoding)
self.log_observer.start()

Expand Down
6 changes: 3 additions & 3 deletions scrapy/tests/test_mail.py
@@ -1,6 +1,6 @@
import unittest
from io import BytesIO

from cStringIO import StringIO
from scrapy.mail import MailSender

class MailSenderTest(unittest.TestCase):
Expand Down Expand Up @@ -30,8 +30,8 @@ def test_send_html(self):
self.assertEqual(msg.get('Content-Type'), 'text/html')

def test_send_attach(self):
attach = StringIO()
attach.write('content')
attach = BytesIO()
attach.write(b'content')
attach.seek(0)
attachs = [('attachment', 'text/plain', attach)]

Expand Down
5 changes: 2 additions & 3 deletions scrapy/tests/test_pipeline_images.py
@@ -1,8 +1,7 @@
import os
import hashlib
import warnings
from cStringIO import StringIO
from tempfile import mkdtemp
from tempfile import mkdtemp, TemporaryFile
from shutil import rmtree

from twisted.trial import unittest
Expand Down Expand Up @@ -201,7 +200,7 @@ class TestItem(Item):


def _create_image(format, *a, **kw):
buf = StringIO()
buf = TemporaryFile()
Image.new(*a, **kw).save(buf, format)
buf.seek(0)
return Image.open(buf)
Expand Down

0 comments on commit f6b1e9b

Please sign in to comment.