Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Serve json numpy encoding #2316

Merged
merged 5 commits into from
Jul 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions ludwig/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from ludwig.contrib import add_contrib_callback_args
from ludwig.globals import LUDWIG_VERSION
from ludwig.utils.print_utils import logging_level_registry, print_ludwig
from ludwig.utils.server_utils import NumpyJSONResponse

logger = logging.getLogger(__name__)

Expand All @@ -40,7 +41,6 @@
from starlette.middleware import Middleware
from starlette.middleware.cors import CORSMiddleware
from starlette.requests import Request
from starlette.responses import JSONResponse
except ImportError as e:
logger.error(e)
logger.error(
Expand All @@ -64,7 +64,7 @@ def server(model, allowed_origins=None):

@app.get("/")
def check_health():
return JSONResponse({"message": "Ludwig server is up"})
return NumpyJSONResponse({"message": "Ludwig server is up"})

@app.post("/predict")
async def predict(request: Request):
Expand All @@ -73,18 +73,18 @@ async def predict(request: Request):
entry, files = convert_input(form, model.model.input_features)
except Exception:
logger.exception("Failed to parse predict form")
return JSONResponse(COULD_NOT_RUN_INFERENCE_ERROR, status_code=500)
return NumpyJSONResponse(COULD_NOT_RUN_INFERENCE_ERROR, status_code=500)

try:
if (entry.keys() & input_features) != input_features:
return JSONResponse(ALL_FEATURES_PRESENT_ERROR, status_code=400)
return NumpyJSONResponse(ALL_FEATURES_PRESENT_ERROR, status_code=400)
try:
resp, _ = model.predict(dataset=[entry], data_format=dict)
resp = resp.to_dict("records")[0]
return JSONResponse(resp)
return NumpyJSONResponse(resp)
except Exception as exc:
logger.exception(f"Failed to run predict: {exc}")
return JSONResponse(COULD_NOT_RUN_INFERENCE_ERROR, status_code=500)
return NumpyJSONResponse(COULD_NOT_RUN_INFERENCE_ERROR, status_code=500)
finally:
for f in files:
os.remove(f.name)
Expand All @@ -97,17 +97,17 @@ async def batch_predict(request: Request):
data_df = pd.DataFrame.from_records(data["data"], index=data.get("index"), columns=data["columns"])
except Exception:
logger.exception("Failed to parse batch_predict form")
return JSONResponse(COULD_NOT_RUN_INFERENCE_ERROR, status_code=500)
return NumpyJSONResponse(COULD_NOT_RUN_INFERENCE_ERROR, status_code=500)

if (set(data_df.columns) & input_features) != input_features:
return JSONResponse(ALL_FEATURES_PRESENT_ERROR, status_code=400)
return NumpyJSONResponse(ALL_FEATURES_PRESENT_ERROR, status_code=400)
try:
resp, _ = model.predict(dataset=data_df)
resp = resp.to_dict("split")
return JSONResponse(resp)
return NumpyJSONResponse(resp)
except Exception:
logger.exception("Failed to run batch_predict: {}")
return JSONResponse(COULD_NOT_RUN_INFERENCE_ERROR, status_code=500)
return NumpyJSONResponse(COULD_NOT_RUN_INFERENCE_ERROR, status_code=500)

return app

Expand Down
21 changes: 19 additions & 2 deletions ludwig/utils/server_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import json
import os
import tempfile
from typing import Union
from typing import Any, Dict, Union

import numpy as np
import pandas as pd
from starlette.datastructures import UploadFile
from starlette.responses import JSONResponse

from ludwig.utils.data_utils import NumpyEncoder


def serialize_payload(data_source: Union[pd.DataFrame, pd.Series]) -> tuple:
Expand Down Expand Up @@ -146,8 +149,22 @@ def deserialize_request(form) -> tuple:
# to_replace: list of file path strings that the user provided
# value: list of temporary files created for each input file
#
# IMPORTANT: There is a one-to-one coorespondence of the to_replace list
# IMPORTANT: There is a one-to-one correspondence of the to_replace list
# and the value list. Each list must be the same size.
df.replace(to_replace=list(file_index.keys()), value=list(file_index.values()), inplace=True)

return df, files


class NumpyJSONResponse(JSONResponse):
def render(self, content: Dict[str, Any]) -> str:
"""Override the default JSONResponse behavior to encode numpy arrays.

Args:
content: JSON object to be serialized.

Returns: str
"""
return json.dumps(
content, ensure_ascii=False, allow_nan=False, indent=None, separators=(",", ":"), cls=NumpyEncoder
).encode("utf-8")
29 changes: 29 additions & 0 deletions tests/ludwig/utils/test_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import json

import numpy as np
import pandas as pd
import pytest
Expand All @@ -23,6 +25,7 @@
figure_data_format_dataset,
get_abs_path,
hash_dict,
NumpyEncoder,
use_credentials,
)

Expand Down Expand Up @@ -126,3 +129,29 @@ def test_use_credentials():
assert conf == s3_creds

assert len(conf) == 0


def test_numpy_encoder():
# Test Python builtin data type encoding.
assert json.dumps(None, cls=NumpyEncoder) == "null"
assert json.dumps({}, cls=NumpyEncoder) == "{}"
assert json.dumps(1, cls=NumpyEncoder) == "1"
assert json.dumps(1.0, cls=NumpyEncoder) == "1.0"
assert json.dumps("a", cls=NumpyEncoder) == '"a"'
assert json.dumps([0, 1, 2, 3, 4], cls=NumpyEncoder) == "[0, 1, 2, 3, 4]"
assert json.dumps((0, 1, 2, 3, 4), cls=NumpyEncoder) == "[0, 1, 2, 3, 4]"
assert json.dumps({0, 1, 2, 3, 4}, cls=NumpyEncoder) == "[0, 1, 2, 3, 4]"
assert json.dumps({"a": "b"}, cls=NumpyEncoder) == '{"a": "b"}'

# Test numpy data type encoding
for dtype in [np.byte, np.ubyte, np.short, np.ushort, np.int, np.uint, np.longlong, np.ulonglong]:
x = np.arange(5, dtype=dtype)
assert json.dumps(x, cls=NumpyEncoder) == "[0, 1, 2, 3, 4]"
for i in x:
assert json.dumps(i, cls=NumpyEncoder) == f"{i}"

for dtype in [np.half, np.single, np.double, np.longdouble]:
x = np.arange(5, dtype=dtype)
assert json.dumps(x, cls=NumpyEncoder) == "[0.0, 1.0, 2.0, 3.0, 4.0]"
for i in x:
assert json.dumps(i, cls=NumpyEncoder) == f"{i}"
31 changes: 31 additions & 0 deletions tests/ludwig/utils/test_server_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import numpy as np

from ludwig.utils.server_utils import NumpyJSONResponse


def test_numpy_json_response():
response = NumpyJSONResponse()

# Test Python builtin data type encoding.
assert response.render(None) == b"null"
assert response.render({}) == b"{}"
assert response.render(1) == b"1"
assert response.render(1.0) == b"1.0"
assert response.render("a") == b'"a"'
assert response.render([0, 1, 2, 3, 4]) == b"[0,1,2,3,4]"
assert response.render((0, 1, 2, 3, 4)) == b"[0,1,2,3,4]"
assert response.render({0, 1, 2, 3, 4}) == b"[0,1,2,3,4]"
assert response.render({"a": "b"}) == b'{"a":"b"}'

# Test numpy data type encoding
for dtype in [np.byte, np.ubyte, np.short, np.ushort, np.int, np.uint, np.longlong, np.ulonglong]:
x = np.arange(5, dtype=dtype)
assert response.render(x) == b"[0,1,2,3,4]"
for i in x:
assert response.render(i) == f"{i}".encode()

for dtype in [np.half, np.single, np.double, np.longdouble]:
x = np.arange(5, dtype=dtype)
assert response.render(x) == b"[0.0,1.0,2.0,3.0,4.0]"
for i in x:
assert response.render(i) == f"{i}".encode()