ludwig-ai · tgaddair · Jul 28, 2022 · Jul 26, 2022 · Jul 26, 2022 · Jul 26, 2022
@@ -30,6 +30,7 @@
 from ludwig.contrib import add_contrib_callback_args
 from ludwig.globals import LUDWIG_VERSION
 from ludwig.utils.print_utils import logging_level_registry, print_ludwig
+from ludwig.utils.server_utils import NumpyJSONResponse
 
 logger = logging.getLogger(__name__)
 
@@ -40,7 +41,6 @@
     from starlette.middleware import Middleware
     from starlette.middleware.cors import CORSMiddleware
     from starlette.requests import Request
-    from starlette.responses import JSONResponse
 except ImportError as e:
     logger.error(e)
     logger.error(
@@ -64,7 +64,7 @@ def server(model, allowed_origins=None):
 
     @app.get("/")
     def check_health():
-        return JSONResponse({"message": "Ludwig server is up"})
+        return NumpyJSONResponse({"message": "Ludwig server is up"})
 
     @app.post("/predict")
     async def predict(request: Request):
@@ -73,18 +73,18 @@ async def predict(request: Request):
             entry, files = convert_input(form, model.model.input_features)
         except Exception:
             logger.exception("Failed to parse predict form")
-            return JSONResponse(COULD_NOT_RUN_INFERENCE_ERROR, status_code=500)
+            return NumpyJSONResponse(COULD_NOT_RUN_INFERENCE_ERROR, status_code=500)
 
         try:
             if (entry.keys() & input_features) != input_features:
-                return JSONResponse(ALL_FEATURES_PRESENT_ERROR, status_code=400)
+                return NumpyJSONResponse(ALL_FEATURES_PRESENT_ERROR, status_code=400)
             try:
                 resp, _ = model.predict(dataset=[entry], data_format=dict)
                 resp = resp.to_dict("records")[0]
-                return JSONResponse(resp)
+                return NumpyJSONResponse(resp)
             except Exception as exc:
                 logger.exception(f"Failed to run predict: {exc}")
-                return JSONResponse(COULD_NOT_RUN_INFERENCE_ERROR, status_code=500)
+                return NumpyJSONResponse(COULD_NOT_RUN_INFERENCE_ERROR, status_code=500)
         finally:
             for f in files:
                 os.remove(f.name)
@@ -97,17 +97,17 @@ async def batch_predict(request: Request):
             data_df = pd.DataFrame.from_records(data["data"], index=data.get("index"), columns=data["columns"])
         except Exception:
             logger.exception("Failed to parse batch_predict form")
-            return JSONResponse(COULD_NOT_RUN_INFERENCE_ERROR, status_code=500)
+            return NumpyJSONResponse(COULD_NOT_RUN_INFERENCE_ERROR, status_code=500)
 
         if (set(data_df.columns) & input_features) != input_features:
-            return JSONResponse(ALL_FEATURES_PRESENT_ERROR, status_code=400)
+            return NumpyJSONResponse(ALL_FEATURES_PRESENT_ERROR, status_code=400)
         try:
             resp, _ = model.predict(dataset=data_df)
             resp = resp.to_dict("split")
-            return JSONResponse(resp)
+            return NumpyJSONResponse(resp)
         except Exception:
             logger.exception("Failed to run batch_predict: {}")
-            return JSONResponse(COULD_NOT_RUN_INFERENCE_ERROR, status_code=500)
+            return NumpyJSONResponse(COULD_NOT_RUN_INFERENCE_ERROR, status_code=500)
 
     return app
 

@@ -1,11 +1,14 @@
 import json
 import os
 import tempfile
-from typing import Union
+from typing import Any, Dict, Union
 
 import numpy as np
 import pandas as pd
 from starlette.datastructures import UploadFile
+from starlette.responses import JSONResponse
+
+from ludwig.utils.data_utils import NumpyEncoder
 
 
 def serialize_payload(data_source: Union[pd.DataFrame, pd.Series]) -> tuple:
@@ -146,8 +149,22 @@ def deserialize_request(form) -> tuple:
     #   to_replace: list of file path strings that the user provided
     #   value: list of temporary files created for each input file
     #
-    # IMPORTANT: There is a one-to-one coorespondence of the to_replace list
+    # IMPORTANT: There is a one-to-one correspondence of the to_replace list
     # and the value list. Each list must be the same size.
     df.replace(to_replace=list(file_index.keys()), value=list(file_index.values()), inplace=True)
 
     return df, files
+
+
+class NumpyJSONResponse(JSONResponse):
+    def render(self, content: Dict[str, Any]) -> str:
+        """Override the default JSONResponse behavior to encode numpy arrays.
+
+        Args:
+            content: JSON object to be serialized.
+
+        Returns: str
+        """
+        return json.dumps(
+            content, ensure_ascii=False, allow_nan=False, indent=None, separators=(",", ":"), cls=NumpyEncoder
+        ).encode("utf-8")
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import json
+
 import numpy as np
 import pandas as pd
 import pytest
@@ -23,6 +25,7 @@
     figure_data_format_dataset,
     get_abs_path,
     hash_dict,
+    NumpyEncoder,
     use_credentials,
 )
 
@@ -126,3 +129,29 @@ def test_use_credentials():
         assert conf == s3_creds
 
     assert len(conf) == 0
+
+
+def test_numpy_encoder():
+    # Test Python builtin data type encoding.
+    assert json.dumps(None, cls=NumpyEncoder) == "null"
+    assert json.dumps({}, cls=NumpyEncoder) == "{}"
+    assert json.dumps(1, cls=NumpyEncoder) == "1"
+    assert json.dumps(1.0, cls=NumpyEncoder) == "1.0"
+    assert json.dumps("a", cls=NumpyEncoder) == '"a"'
+    assert json.dumps([0, 1, 2, 3, 4], cls=NumpyEncoder) == "[0, 1, 2, 3, 4]"
+    assert json.dumps((0, 1, 2, 3, 4), cls=NumpyEncoder) == "[0, 1, 2, 3, 4]"
+    assert json.dumps({0, 1, 2, 3, 4}, cls=NumpyEncoder) == "[0, 1, 2, 3, 4]"
+    assert json.dumps({"a": "b"}, cls=NumpyEncoder) == '{"a": "b"}'
+
+    # Test numpy data type encoding
+    for dtype in [np.byte, np.ubyte, np.short, np.ushort, np.int, np.uint, np.longlong, np.ulonglong]:
+        x = np.arange(5, dtype=dtype)
+        assert json.dumps(x, cls=NumpyEncoder) == "[0, 1, 2, 3, 4]"
+        for i in x:
+            assert json.dumps(i, cls=NumpyEncoder) == f"{i}"
+
+    for dtype in [np.half, np.single, np.double, np.longdouble]:
+        x = np.arange(5, dtype=dtype)
+        assert json.dumps(x, cls=NumpyEncoder) == "[0.0, 1.0, 2.0, 3.0, 4.0]"
+        for i in x:
+            assert json.dumps(i, cls=NumpyEncoder) == f"{i}"
@@ -0,0 +1,31 @@
+import numpy as np
+
+from ludwig.utils.server_utils import NumpyJSONResponse
+
+
+def test_numpy_json_response():
+    response = NumpyJSONResponse()
+
+    # Test Python builtin data type encoding.
+    assert response.render(None) == b"null"
+    assert response.render({}) == b"{}"
+    assert response.render(1) == b"1"
+    assert response.render(1.0) == b"1.0"
+    assert response.render("a") == b'"a"'
+    assert response.render([0, 1, 2, 3, 4]) == b"[0,1,2,3,4]"
+    assert response.render((0, 1, 2, 3, 4)) == b"[0,1,2,3,4]"
+    assert response.render({0, 1, 2, 3, 4}) == b"[0,1,2,3,4]"
+    assert response.render({"a": "b"}) == b'{"a":"b"}'
+
+    # Test numpy data type encoding
+    for dtype in [np.byte, np.ubyte, np.short, np.ushort, np.int, np.uint, np.longlong, np.ulonglong]:
+        x = np.arange(5, dtype=dtype)
+        assert response.render(x) == b"[0,1,2,3,4]"
+        for i in x:
+            assert response.render(i) == f"{i}".encode()
+
+    for dtype in [np.half, np.single, np.double, np.longdouble]:
+        x = np.arange(5, dtype=dtype)
+        assert response.render(x) == b"[0.0,1.0,2.0,3.0,4.0]"
+        for i in x:
+            assert response.render(i) == f"{i}".encode()