Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG+2] Request subclass for json requests #3504 #3505

Merged
merged 13 commits into from
Mar 22, 2019
38 changes: 38 additions & 0 deletions docs/topics/request-response.rst
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,44 @@ method for this job. Here's an example spider which uses it::

# continue scraping with authenticated session...

JSONRequest
-----------

The JSONRequest class extends the base :class:`Request` class with functionality for
dealing with JSON requests.

.. class:: JSONRequest(url, [... data, dumps_kwargs])

The :class:`JSONRequest` class adds two new argument to the constructor. The
remaining arguments are the same as for the :class:`Request` class and are
not documented here.

Using the :class:`JSONRequest` will set the ``Content-Type`` header to ``application/json``
and ``Accept`` header to ``application/json, text/javascript, */*; q=0.01``

:param data: is any JSON serializable object that needs to be JSON encoded and assigned to body.
if :attr:`Request.body` argument is provided this parameter will be ignored.
if :attr:`Request.body` argument is not provided and data argument is provided :attr:`Request.method` will be
set to ``'POST'`` automatically.
:type data: JSON serializable object

:param dumps_kwargs: Parameters that will be passed to underlying `json.dumps`_ method which is used to serialize
data into JSON format.
:type dumps_kwargs: dict

.. _json.dumps: https://docs.python.org/3/library/json.html#json.dumps

JSONRequest usage example
-------------------------

Sending a JSON POST request with a JSON payload::

data = {
'name1': 'value1',
'name2': 'value2',
}
yield JSONRequest(url='http://www.example.com/post/action', data=data)
lopuhin marked this conversation as resolved.
Show resolved Hide resolved


Response objects
================
Expand Down
1 change: 1 addition & 0 deletions scrapy/http/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from scrapy.http.request import Request
from scrapy.http.request.form import FormRequest
from scrapy.http.request.rpc import XmlRpcRequest
from scrapy.http.request.json_request import JSONRequest

from scrapy.http.response import Response
from scrapy.http.response.html import HtmlResponse
Expand Down
53 changes: 53 additions & 0 deletions scrapy/http/request/json_request.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""
This module implements the JSONRequest class which is a more convenient class
(than Request) to generate JSON Requests.

See documentation in docs/topics/request-response.rst
"""

import copy
import json
import warnings

from scrapy.http.request import Request


class JSONRequest(Request):
def __init__(self, *args, **kwargs):
dumps_kwargs = copy.deepcopy(kwargs.pop('dumps_kwargs', {}))
dumps_kwargs.setdefault('sort_keys', True)
self._dumps_kwargs = dumps_kwargs

body_passed = kwargs.get('body', None) is not None
data = kwargs.pop('data', None)
data_passed = data is not None

if body_passed and data_passed:
warnings.warn('Both body and data passed. data will be ignored')
kmike marked this conversation as resolved.
Show resolved Hide resolved

elif not body_passed and data_passed:
kwargs['body'] = self._dumps(data)

if 'method' not in kwargs:
kwargs['method'] = 'POST'

super(JSONRequest, self).__init__(*args, **kwargs)
self.headers.setdefault('Content-Type', 'application/json')
self.headers.setdefault('Accept', 'application/json, text/javascript, */*; q=0.01')

def replace(self, *args, **kwargs):
body_passed = kwargs.get('body', None) is not None
data = kwargs.pop('data', None)
data_passed = data is not None

if body_passed and data_passed:
warnings.warn('Both body and data passed. data will be ignored')

elif not body_passed and data_passed:
kwargs['body'] = self._dumps(data)

return super(JSONRequest, self).replace(*args, **kwargs)

def _dumps(self, data):
"""Convert to JSON """
return json.dumps(data, **self._dumps_kwargs)
170 changes: 169 additions & 1 deletion tests/test_http_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,20 @@
import cgi
import unittest
import re
import json
import warnings

import six
from six.moves import xmlrpc_client as xmlrpclib
from six.moves.urllib.parse import urlparse, parse_qs, unquote
if six.PY3:
from urllib.parse import unquote_to_bytes

from scrapy.http import Request, FormRequest, XmlRpcRequest, Headers, HtmlResponse
from scrapy.http import Request, FormRequest, XmlRpcRequest, JSONRequest, Headers, HtmlResponse
from scrapy.utils.python import to_bytes, to_native_str

from tests import mock


class RequestTest(unittest.TestCase):

Expand Down Expand Up @@ -1147,5 +1151,169 @@ def test_latin1(self):
self._test_request(params=(u'pas£',), encoding='latin1')


class JSONRequestTest(RequestTest):
request_class = JSONRequest
default_method = 'GET'
default_headers = {b'Content-Type': [b'application/json'], b'Accept': [b'application/json, text/javascript, */*; q=0.01']}

def setUp(self):
warnings.simplefilter("always")
super(JSONRequestTest, self).setUp()

def test_data(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would you mind split this test method into several methods, for individual cases?

Copy link
Member

@kmike kmike Dec 14, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, could you please add a test for .replace method (make sure replacing body works; does replacing data work?)

Copy link
Contributor Author

@kasun kasun Dec 17, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test for replacing data is done. Testing for replacing body is already in the base RequestTest class which JSONRequestTest class inherits from.

r1 = self.request_class(url="http://www.example.com/")
self.assertEqual(r1.body, b'')

body = b'body'
r2 = self.request_class(url="http://www.example.com/", body=body)
self.assertEqual(r2.body, body)

data = {
'name': 'value',
}
r3 = self.request_class(url="http://www.example.com/", data=data)
self.assertEqual(r3.body, to_bytes(json.dumps(data)))

# empty data
r4 = self.request_class(url="http://www.example.com/", data=[])
self.assertEqual(r4.body, to_bytes(json.dumps([])))

def test_data_method(self):
# data is not passed
r1 = self.request_class(url="http://www.example.com/")
self.assertEqual(r1.method, 'GET')

body = b'body'
r2 = self.request_class(url="http://www.example.com/", body=body)
self.assertEqual(r2.method, 'GET')

data = {
'name': 'value',
}
r3 = self.request_class(url="http://www.example.com/", data=data)
self.assertEqual(r3.method, 'POST')

# method passed explicitly
r4 = self.request_class(url="http://www.example.com/", data=data, method='GET')
self.assertEqual(r4.method, 'GET')

r5 = self.request_class(url="http://www.example.com/", data=[])
self.assertEqual(r5.method, 'POST')

def test_body_data(self):
""" passing both body and data should result a warning """
body = b'body'
data = {
'name': 'value',
}
with warnings.catch_warnings(record=True) as _warnings:
r5 = self.request_class(url="http://www.example.com/", body=body, data=data)
self.assertEqual(r5.body, body)
self.assertEqual(r5.method, 'GET')
self.assertEqual(len(_warnings), 1)
self.assertIn('data will be ignored', str(_warnings[0].message))

def test_empty_body_data(self):
""" passing any body value and data should result a warning """
data = {
'name': 'value',
}
with warnings.catch_warnings(record=True) as _warnings:
r6 = self.request_class(url="http://www.example.com/", body=b'', data=data)
self.assertEqual(r6.body, b'')
self.assertEqual(r6.method, 'GET')
self.assertEqual(len(_warnings), 1)
self.assertIn('data will be ignored', str(_warnings[0].message))

def test_body_none_data(self):
data = {
'name': 'value',
}
with warnings.catch_warnings(record=True) as _warnings:
r7 = self.request_class(url="http://www.example.com/", body=None, data=data)
self.assertEqual(r7.body, to_bytes(json.dumps(data)))
self.assertEqual(r7.method, 'POST')
self.assertEqual(len(_warnings), 0)

def test_body_data_none(self):
with warnings.catch_warnings(record=True) as _warnings:
r8 = self.request_class(url="http://www.example.com/", body=None, data=None)
self.assertEqual(r8.method, 'GET')
self.assertEqual(len(_warnings), 0)

def test_dumps_sort_keys(self):
""" Test that sort_keys=True is passed to json.dumps by default """
data = {
'name': 'value',
}
with mock.patch('json.dumps', return_value=b'') as mock_dumps:
self.request_class(url="http://www.example.com/", data=data)
kwargs = mock_dumps.call_args[1]
self.assertEqual(kwargs['sort_keys'], True)

def test_dumps_kwargs(self):
""" Test that dumps_kwargs are passed to json.dumps """
data = {
'name': 'value',
}
dumps_kwargs = {
'ensure_ascii': True,
'allow_nan': True,
}
with mock.patch('json.dumps', return_value=b'') as mock_dumps:
self.request_class(url="http://www.example.com/", data=data, dumps_kwargs=dumps_kwargs)
kwargs = mock_dumps.call_args[1]
self.assertEqual(kwargs['ensure_ascii'], True)
self.assertEqual(kwargs['allow_nan'], True)

def test_replace_data(self):
data1 = {
'name1': 'value1',
}
data2 = {
'name2': 'value2',
}
r1 = self.request_class(url="http://www.example.com/", data=data1)
r2 = r1.replace(data=data2)
self.assertEqual(r2.body, to_bytes(json.dumps(data2)))

def test_replace_sort_keys(self):
""" Test that replace provides sort_keys=True to json.dumps """
data1 = {
'name1': 'value1',
}
data2 = {
'name2': 'value2',
}
r1 = self.request_class(url="http://www.example.com/", data=data1)
with mock.patch('json.dumps', return_value=b'') as mock_dumps:
r1.replace(data=data2)
kwargs = mock_dumps.call_args[1]
self.assertEqual(kwargs['sort_keys'], True)

def test_replace_dumps_kwargs(self):
""" Test that dumps_kwargs are provided to json.dumps when replace is called """
data1 = {
'name1': 'value1',
}
data2 = {
'name2': 'value2',
}
dumps_kwargs = {
'ensure_ascii': True,
'allow_nan': True,
}
r1 = self.request_class(url="http://www.example.com/", data=data1, dumps_kwargs=dumps_kwargs)
with mock.patch('json.dumps', return_value=b'') as mock_dumps:
r1.replace(data=data2)
kwargs = mock_dumps.call_args[1]
self.assertEqual(kwargs['ensure_ascii'], True)
self.assertEqual(kwargs['allow_nan'], True)

def tearDown(self):
warnings.resetwarnings()
super(JSONRequestTest, self).tearDown()


if __name__ == "__main__":
unittest.main()