dissolve scrapy.contrib_exp

scrapy · Apr 3, 2015 · 08bd13c · 08bd13c
1 parent fcf9700
commit 08bd13c
Show file tree

Hide file tree

Showing 8 changed files with 139 additions and 144 deletions.
diff --git a/scrapy/contrib/downloadermiddleware/decompression.py b/scrapy/contrib/downloadermiddleware/decompression.py
@@ -0,0 +1,86 @@
+""" This module implements the DecompressionMiddleware which tries to recognise
+and extract the potentially compressed responses that may arrive. 
+"""
+
+import bz2
+import gzip
+import zipfile
+import tarfile
+from tempfile import mktemp
+
+import six
+
+try:
+    from cStringIO import StringIO as BytesIO
+except ImportError:
+    from io import BytesIO
+
+from scrapy import log
+from scrapy.responsetypes import responsetypes
+
+
+class DecompressionMiddleware(object):
+    """ This middleware tries to recognise and extract the possibly compressed
+    responses that may arrive. """
+
+    def __init__(self):
+        self._formats = {
+            'tar': self._is_tar,
+            'zip': self._is_zip,
+            'gz': self._is_gzip,
+            'bz2': self._is_bzip2
+        }
+
+    def _is_tar(self, response):
+        archive = BytesIO(response.body)
+        try:
+            tar_file = tarfile.open(name=mktemp(), fileobj=archive)
+        except tarfile.ReadError:
+            return
+
+        body = tar_file.extractfile(tar_file.members[0]).read()
+        respcls = responsetypes.from_args(filename=tar_file.members[0].name, body=body)
+        return response.replace(body=body, cls=respcls)
+
+    def _is_zip(self, response):
+        archive = BytesIO(response.body)
+        try:
+            zip_file = zipfile.ZipFile(archive)
+        except zipfile.BadZipfile:
+            return
+
+        namelist = zip_file.namelist()
+        body = zip_file.read(namelist[0])
+        respcls = responsetypes.from_args(filename=namelist[0], body=body)
+        return response.replace(body=body, cls=respcls)
+
+    def _is_gzip(self, response):
+        archive = BytesIO(response.body)
+        try:
+            body = gzip.GzipFile(fileobj=archive).read()
+        except IOError:
+            return
+
+        respcls = responsetypes.from_args(body=body)
+        return response.replace(body=body, cls=respcls)
+
+    def _is_bzip2(self, response):
+        try:
+            body = bz2.decompress(response.body)
+        except IOError:
+            return
+
+        respcls = responsetypes.from_args(body=body)
+        return response.replace(body=body, cls=respcls)
+
+    def process_response(self, request, response, spider):
+        if not response.body:
+            return response
+
+        for fmt, func in six.iteritems(self._formats):
+            new_response = func(response)
+            if new_response:
+                log.msg(format='Decompressed response with format: %(responsefmt)s',
+                        level=log.DEBUG, spider=spider, responsefmt=fmt)
+                return new_response
+        return response
diff --git a/scrapy/contrib_exp/__init__.py b/scrapy/contrib_exp/__init__.py
@@ -1,9 +0,0 @@
-"""
-This module contains experimental code that may go into scrapy.contrib in the
-future, but it's not yet stable enough to go there (either API stable or
-functionality stable).
-
-Subscribe to Scrapy developers mailing list or join the IRC channel if you want
-to discuss about this code.
-
-"""

diff --git a/scrapy/contrib_exp/djangoitem.py b/scrapy/contrib_exp/djangoitem.py
diff --git a/scrapy/contrib_exp/downloadermiddleware/decompression.py b/scrapy/contrib_exp/downloadermiddleware/decompression.py
@@ -1,86 +1,7 @@
-""" This module implements the DecompressionMiddleware which tries to recognise
-and extract the potentially compressed responses that may arrive. 
-"""
+import warnings
+from scrapy.exceptions import ScrapyDeprecationWarning
+warnings.warn("Module `scrapy.contrib_exp.downloadermiddleware.decompression` is deprecated, "
+              "use `scrapy.contrib.downloadermiddleware.decompression` instead",
+    ScrapyDeprecationWarning, stacklevel=2)
 
-import bz2
-import gzip
-import zipfile
-import tarfile
-from tempfile import mktemp
-
-import six
-
-try:
-    from cStringIO import StringIO as BytesIO
-except ImportError:
-    from io import BytesIO
-
-from scrapy import log
-from scrapy.responsetypes import responsetypes
-
-
-class DecompressionMiddleware(object):
-    """ This middleware tries to recognise and extract the possibly compressed
-    responses that may arrive. """
-
-    def __init__(self):
-        self._formats = {
-            'tar': self._is_tar,
-            'zip': self._is_zip,
-            'gz': self._is_gzip,
-            'bz2': self._is_bzip2
-        }
-
-    def _is_tar(self, response):
-        archive = BytesIO(response.body)
-        try:
-            tar_file = tarfile.open(name=mktemp(), fileobj=archive)
-        except tarfile.ReadError:
-            return
-
-        body = tar_file.extractfile(tar_file.members[0]).read()
-        respcls = responsetypes.from_args(filename=tar_file.members[0].name, body=body)
-        return response.replace(body=body, cls=respcls)
-
-    def _is_zip(self, response):
-        archive = BytesIO(response.body)
-        try:
-            zip_file = zipfile.ZipFile(archive)
-        except zipfile.BadZipfile:
-            return
-
-        namelist = zip_file.namelist()
-        body = zip_file.read(namelist[0])
-        respcls = responsetypes.from_args(filename=namelist[0], body=body)
-        return response.replace(body=body, cls=respcls)
-
-    def _is_gzip(self, response):
-        archive = BytesIO(response.body)
-        try:
-            body = gzip.GzipFile(fileobj=archive).read()
-        except IOError:
-            return
-
-        respcls = responsetypes.from_args(body=body)
-        return response.replace(body=body, cls=respcls)
-
-    def _is_bzip2(self, response):
-        try:
-            body = bz2.decompress(response.body)
-        except IOError:
-            return
-
-        respcls = responsetypes.from_args(body=body)
-        return response.replace(body=body, cls=respcls)
-
-    def process_response(self, request, response, spider):
-        if not response.body:
-            return response
-
-        for fmt, func in six.iteritems(self._formats):
-            new_response = func(response)
-            if new_response:
-                log.msg(format='Decompressed response with format: %(responsefmt)s',
-                        level=log.DEBUG, spider=spider, responsefmt=fmt)
-                return new_response
-        return response
+from scrapy.contrib.downloadermiddleware.decompression import DecompressionMiddleware
diff --git a/scrapy/contrib_exp/iterators.py b/scrapy/contrib_exp/iterators.py
@@ -1,42 +1,6 @@
-from scrapy.http import Response
-from scrapy.selector import Selector
+import warnings
+from scrapy.exceptions import ScrapyDeprecationWarning
+warnings.warn("Module `scrapy.contrib_exp.iterators` is deprecated, use `scrapy.utils.iterators` instead",
+    ScrapyDeprecationWarning, stacklevel=2)
 
-
-def xmliter_lxml(obj, nodename, namespace=None):
-    from lxml import etree
-    reader = _StreamReader(obj)
-    tag = '{%s}%s' % (namespace, nodename) if namespace else nodename
-    iterable = etree.iterparse(reader, tag=tag, encoding=reader.encoding)
-    selxpath = '//' + ('x:%s' % nodename if namespace else nodename)
-    for _, node in iterable:
-        nodetext = etree.tostring(node)
-        node.clear()
-        xs = Selector(text=nodetext, type='xml')
-        if namespace:
-            xs.register_namespace('x', namespace)
-        yield xs.xpath(selxpath)[0]
-
-
-class _StreamReader(object):
-
-    def __init__(self, obj):
-        self._ptr = 0
-        if isinstance(obj, Response):
-            self._text, self.encoding = obj.body, obj.encoding
-        else:
-            self._text, self.encoding = obj, 'utf-8'
-        self._is_unicode = isinstance(self._text, unicode)
-
-    def read(self, n=65535):
-        self.read = self._read_unicode if self._is_unicode else self._read_string
-        return self.read(n).lstrip()
-
-    def _read_string(self, n=65535):
-        s, e = self._ptr, self._ptr + n
-        self._ptr = e
-        return self._text[s:e]
-
-    def _read_unicode(self, n=65535):
-        s, e = self._ptr, self._ptr + n
-        self._ptr = e
-        return self._text[s:e].encode('utf-8')
+from scrapy.utils.iterators import xmliter_lxml
diff --git a/scrapy/utils/iterators.py b/scrapy/utils/iterators.py
@@ -35,6 +35,46 @@ def xmliter(obj, nodename):
         yield Selector(text=nodetext, type='xml').xpath('//' + nodename)[0]
 
 
+def xmliter_lxml(obj, nodename, namespace=None):
+    from lxml import etree
+    reader = _StreamReader(obj)
+    tag = '{%s}%s' % (namespace, nodename) if namespace else nodename
+    iterable = etree.iterparse(reader, tag=tag, encoding=reader.encoding)
+    selxpath = '//' + ('x:%s' % nodename if namespace else nodename)
+    for _, node in iterable:
+        nodetext = etree.tostring(node)
+        node.clear()
+        xs = Selector(text=nodetext, type='xml')
+        if namespace:
+            xs.register_namespace('x', namespace)
+        yield xs.xpath(selxpath)[0]
+
+
+class _StreamReader(object):
+
+    def __init__(self, obj):
+        self._ptr = 0
+        if isinstance(obj, Response):
+            self._text, self.encoding = obj.body, obj.encoding
+        else:
+            self._text, self.encoding = obj, 'utf-8'
+        self._is_unicode = isinstance(self._text, unicode)
+
+    def read(self, n=65535):
+        self.read = self._read_unicode if self._is_unicode else self._read_string
+        return self.read(n).lstrip()
+
+    def _read_string(self, n=65535):
+        s, e = self._ptr, self._ptr + n
+        self._ptr = e
+        return self._text[s:e]
+
+    def _read_unicode(self, n=65535):
+        s, e = self._ptr, self._ptr + n
+        self._ptr = e
+        return self._text[s:e].encode('utf-8')
+
+
 def csviter(obj, delimiter=None, headers=None, encoding=None, quotechar=None):
     """ Returns an iterator of dictionaries from the given csv object
 

diff --git a/tests/test_downloadermiddleware_decompression.py b/tests/test_downloadermiddleware_decompression.py
@@ -1,6 +1,6 @@
 from unittest import TestCase, main
 from scrapy.http import Response, XmlResponse
-from scrapy.contrib_exp.downloadermiddleware.decompression import DecompressionMiddleware
+from scrapy.contrib.downloadermiddleware.decompression import DecompressionMiddleware
 from scrapy.spider import Spider
 from tests import get_testdata
 from scrapy.utils.test import assert_samelines

diff --git a/tests/test_utils_iterators.py b/tests/test_utils_iterators.py
@@ -1,8 +1,7 @@
 import os
 from twisted.trial import unittest
 
-from scrapy.utils.iterators import csviter, xmliter, _body_or_str
-from scrapy.contrib_exp.iterators import xmliter_lxml
+from scrapy.utils.iterators import csviter, xmliter, _body_or_str, xmliter_lxml
 from scrapy.http import XmlResponse, TextResponse, Response
 from tests import get_testdata