diff --git a/scrapyrt/resources.py b/scrapyrt/resources.py index 5ad219d..4bf0309 100644 --- a/scrapyrt/resources.py +++ b/scrapyrt/resources.py @@ -12,10 +12,18 @@ from .utils import extract_scrapy_request_args, to_bytes +class AdaptedScrapyJSONEncoder(ScrapyJSONEncoder): + def default(self, o): + if isinstance(o, bytes): + return o.decode('utf8') + else: + return super().default(o) + + # XXX super() calls won't work wihout object mixin in Python 2 # maybe this can be removed at some point? class ServiceResource(resource.Resource, object): - json_encoder = ScrapyJSONEncoder() + json_encoder = AdaptedScrapyJSONEncoder() def __init__(self, root=None): resource.Resource.__init__(self) @@ -79,6 +87,7 @@ def format_error_response(self, exception, request): # Twisted HTTP Error objects still have 'message' attribute even in 3+ # and they fail on str(exception) call. msg = exception.message if hasattr(exception, 'message') else str(exception) + return { "status": "error", "message": msg, @@ -87,6 +96,7 @@ def format_error_response(self, exception, request): def render_object(self, obj, request): r = self.json_encoder.encode(obj) + "\n" + request.setHeader('Content-Type', 'application/json') request.setHeader('Access-Control-Allow-Origin', '*') request.setHeader('Access-Control-Allow-Methods', diff --git a/tests/sample_data/testproject/testproject/spiders/testspider.py b/tests/sample_data/testproject/testproject/spiders/testspider.py index 2d0d3f1..e036f5c 100644 --- a/tests/sample_data/testproject/testproject/spiders/testspider.py +++ b/tests/sample_data/testproject/testproject/spiders/testspider.py @@ -12,6 +12,9 @@ def parse(self, response): name = response.xpath('//h1/text()').extract() return TestprojectItem(name=name) + def return_bytes(self, response): + return TestprojectItem(name=b'Some bytes here') + def some_errback(self, err): self.logger.error("Logging some error {}".format(err)) return diff --git a/tests/test_resource_crawl.py b/tests/test_resource_crawl.py index bf20a8f..12f5cf1 100644 --- a/tests/test_resource_crawl.py +++ b/tests/test_resource_crawl.py @@ -400,3 +400,15 @@ def test_passing_errback(self, server, method): msg = 'ERROR: Logging some error' assert re.search(msg, log_file) + + @pytest.mark.parametrize("method", [ + perform_get, perform_post + ]) + def test_bytes_in_item(self, server, method): + url = server.url("crawl.json") + res = method(url, + {"spider_name": "test"}, + {"url": server.target_site.url("page1.html"), + 'callback': 'return_bytes'}) + assert res.status_code == 200 + assert res.json()["items"] == [{'name': 'Some bytes here'}]