Skip to content

Commit

Permalink
fix(datasets): do not double encode the data as json when saving an A… (
Browse files Browse the repository at this point in the history
#301)

* fix(datasets): do not double encode the data as json when saving an APIDataSet

Signed-off-by: Florian Gaudin-Delrieu <fgaudindelrieu@idmog.com>
Signed-off-by: Florian Gaudin-Delrieu <florian.gaudindelrieu@gmail.com>

* chore(lint): make pyling happy

Signed-off-by: Florian Gaudin-Delrieu <florian.gaudindelrieu@gmail.com>

---------

Signed-off-by: Florian Gaudin-Delrieu <fgaudindelrieu@idmog.com>
Signed-off-by: Florian Gaudin-Delrieu <florian.gaudindelrieu@gmail.com>
Co-authored-by: Nok Lam Chan <nok.lam.chan@quantumblack.com>
  • Loading branch information
FlorianGD and noklam committed Aug 15, 2023
1 parent 4942719 commit fdd6073
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 11 deletions.
2 changes: 2 additions & 0 deletions kedro-datasets/RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
## Major features and improvements

## Bug fixes and other changes
* Fixed an issue on `api.APIDataSet` where the sent data was doubly converted to json
string (once by us and once by the `requests` library).

## Community contributions

Expand Down
4 changes: 2 additions & 2 deletions kedro-datasets/kedro_datasets/api/api_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,9 @@ def _execute_save_with_chunks(

def _execute_save_request(self, json_data: Any) -> requests.Response:
try:
json_.loads(json_data)
self._request_args["json"] = json_.loads(json_data)
except TypeError:
self._request_args["json"] = json_.dumps(json_data)
self._request_args["json"] = json_data
try:
response = requests.request(**self._request_args)
response.raise_for_status()
Expand Down
39 changes: 32 additions & 7 deletions kedro-datasets/tests/api/test_api_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import base64
import json
import socket
from typing import Any

import pytest
import requests
Expand All @@ -23,7 +24,7 @@
TEST_METHOD = "GET"
TEST_HEADERS = {"key": "value"}

TEST_SAVE_DATA = [json.dumps({"key1": "info1", "key2": "info2"})]
TEST_SAVE_DATA = [{"key1": "info1", "key2": "info2"}]


class TestAPIDataSet:
Expand Down Expand Up @@ -272,12 +273,24 @@ def test_socket_error(self, requests_mock):
api_data_set.load()

@pytest.mark.parametrize("method", POSSIBLE_METHODS)
def test_successful_save(self, requests_mock, method):
@pytest.mark.parametrize(
"data",
[TEST_SAVE_DATA, json.dumps(TEST_SAVE_DATA)],
ids=["data_as_dict", "data_as_json_string"],
)
def test_successful_save(self, requests_mock, method, data):
"""
When we want to save some data on a server
Given an APIDataSet class
Then check we get a response
Then check that the response is OK and the sent data is in the correct form.
"""

def json_callback(
request: requests.Request, context: Any # pylint: disable=unused-argument
) -> dict:
"""Callback that sends back the json."""
return request.json()

if method in ["PUT", "POST"]:
api_data_set = APIDataSet(
url=TEST_URL,
Expand All @@ -289,10 +302,12 @@ def test_successful_save(self, requests_mock, method):
TEST_URL_WITH_PARAMS,
headers=TEST_HEADERS,
status_code=requests.codes.ok,
json=json_callback,
)
response = api_data_set._save(TEST_SAVE_DATA)

response = api_data_set._save(data)
assert isinstance(response, requests.Response)
assert response.json() == TEST_SAVE_DATA

elif method == "GET":
api_data_set = APIDataSet(
url=TEST_URL,
Expand All @@ -315,6 +330,13 @@ def test_successful_save_with_json(self, requests_mock, save_methods):
Given an APIDataSet class
Then check we get a response
"""

def json_callback(
request: requests.Request, context: Any # pylint: disable=unused-argument
) -> dict:
"""Callback that sends back the json."""
return request.json()

api_data_set = APIDataSet(
url=TEST_URL,
method=save_methods,
Expand All @@ -324,17 +346,20 @@ def test_successful_save_with_json(self, requests_mock, save_methods):
save_methods,
TEST_URL,
headers=TEST_HEADERS,
text=json.dumps(TEST_JSON_RESPONSE_DATA),
json=json_callback,
)
response_list = api_data_set._save(TEST_SAVE_DATA)

assert isinstance(response_list, requests.Response)
# check that the data was sent in the correct format
assert response_list.json() == TEST_SAVE_DATA

response_dict = api_data_set._save({"item1": "key1"})
assert isinstance(response_dict, requests.Response)
assert response_dict.json() == {"item1": "key1"}

response_json = api_data_set._save(TEST_SAVE_DATA[0])
assert isinstance(response_json, requests.Response)
assert response_json.json() == TEST_SAVE_DATA[0]

@pytest.mark.parametrize("save_methods", SAVE_METHODS)
def test_save_http_error(self, requests_mock, save_methods):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def test_full_table(self):
assert unity_ds._table.full_table_location() == "`default`.`test`"

with pytest.raises(TypeError):
ManagedTableDataSet() # pylint: disable=no-value-for-parameter
ManagedTableDataSet()

def test_describe(self):
unity_ds = ManagedTableDataSet(table="test")
Expand Down
1 change: 0 additions & 1 deletion kedro-datasets/tests/pandas/test_hdf_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ def test_save_and_load_df_with_categorical_variables(self, hdf_data_set):

def test_thread_lock_usage(self, hdf_data_set, dummy_dataframe, mocker):
"""Test thread lock usage."""
# pylint: disable=no-member
mocked_lock = HDFDataSet._lock
mocked_lock.assert_not_called()

Expand Down

0 comments on commit fdd6073

Please sign in to comment.