Addressed review feedback:

1. Updated environment variable to `TS_OPEN_INFERENCE_PROTOCOL`. 2. Added logic to read the variable `ts_open_inference_protocol=true` from the property file to determine if OIP is enabled or not. 3. Implemented extra check for OIP `ModelInferResponse` in GRPC responses. 4. Utilized local path for the proto file in test_mnist.sh. Signed-off-by: Andrews Arokiam <andrews.arokiam@ideas2it.com>
pytorch · Dec 19, 2023 · 93b4e10 · 93b4e10
1 parent acf8910
commit 93b4e10
Show file tree

Hide file tree

Showing 5 changed files with 26 additions and 16 deletions.
diff --git a/frontend/server/src/main/java/org/pytorch/serve/job/GRPCJob.java b/frontend/server/src/main/java/org/pytorch/serve/job/GRPCJob.java
@@ -117,26 +117,24 @@ public void response(
             Map<String, String> responseHeaders) {
         ByteString output = ByteString.copyFrom(body);
         WorkerCommands cmd = this.getCmd();
+        Gson gson = new Gson();
+        String jsonResponse = output.toStringUtf8();
+        JsonObject jsonObject = gson.fromJson(jsonResponse, JsonObject.class);
 
         switch (cmd) {
             case PREDICT:
             case STREAMPREDICT:
             case STREAMPREDICT2:
                 // condition for OIP grpc ModelInfer Call
-                if (ConfigManager.getInstance().isOpenInferenceProtocol()) {
+                if (ConfigManager.getInstance().isOpenInferenceProtocol() && isResponseStructureOIP(jsonObject)) {
                     if (((ServerCallStreamObserver<ModelInferResponse>) modelInferResponseObserver)
                             .isCancelled()) {
                         logger.warn(
                                 "grpc client call already cancelled, not able to send this response for requestId: {}",
                                 getPayload().getRequestId());
                         return;
                     }
-
-                    Gson gson = new Gson();
                     ModelInferResponse.Builder responseBuilder = ModelInferResponse.newBuilder();
-                    String jsonResponse = output.toStringUtf8();
-                    JsonObject jsonObject = gson.fromJson(jsonResponse, JsonObject.class);
-
                     responseBuilder.setId(jsonObject.get("id").getAsString());
                     responseBuilder.setModelName(jsonObject.get("model_name").getAsString());
                     responseBuilder.setModelVersion(jsonObject.get("model_version").getAsString());
@@ -319,4 +317,14 @@ private void setOutputContents(JsonElement element, InferOutputTensor.Builder ou
         }
         outputBuilder.setContents(inferTensorContents); // set output contents
     }
+
+    private boolean isResponseStructureOIP(JsonObject jsonObject) {
+        if (jsonObject.has("id") &&
+                jsonObject.has("model_name") &&
+                jsonObject.has("model_version") &&
+                jsonObject.has("outputs")) {
+            return true;
+        }
+        return false;
+    }
 }
diff --git a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java
@@ -104,6 +104,7 @@ public final class ConfigManager {
     private static final String TS_INITIAL_WORKER_PORT = "initial_worker_port";
     private static final String TS_INITIAL_DISTRIBUTION_PORT = "initial_distribution_port";
     private static final String TS_WORKFLOW_STORE = "workflow_store";
+    private static final String TS_OPEN_INFERENCE_PROTOCOL = "ts_open_inference_protocol";
 
     // Configuration which are not documented or enabled through environment variables
     private static final String USE_NATIVE_IO = "use_native_io";
@@ -356,8 +357,11 @@ public int getGRPCPort(ConnectorType connectorType) {
     }
 
     public boolean isOpenInferenceProtocol() {
-        String inferenceProtocol = System.getenv("INFERENCE_PROTOCOL");
-        return "oip".equals(inferenceProtocol);
+        String inferenceProtocol = System.getenv("TS_OPEN_INFERENCE_PROTOCOL");
+        if (inferenceProtocol != null && inferenceProtocol != "") {
+            return "oip".equals(inferenceProtocol);
+        } 
+        return Boolean.parseBoolean(prop.getProperty(TS_OPEN_INFERENCE_PROTOCOL, "false"));
     }
 
     public boolean isGRPCSSLEnabled() {

diff --git a/kubernetes/kserve/tests/configs/mnist_oip_grpc.yaml b/kubernetes/kserve/tests/configs/mnist_oip_grpc.yaml
@@ -20,5 +20,5 @@ spec:
           name: h2c
           protocol: TCP
       env:
-        - name: "INFERENCE_PROTOCOL"
-          value: "oip"
+        - name: "TS_OPEN_INFERENCE_PROTOCOL"
+          value: "oip"
diff --git a/kubernetes/kserve/tests/configs/mnist_oip_http.yaml b/kubernetes/kserve/tests/configs/mnist_oip_http.yaml
@@ -18,5 +18,5 @@ spec:
       ports:
         - containerPort: 8085 # torchserve http port
       env:
-        - name: "INFERENCE_PROTOCOL"
-          value: "oip"
+        - name: "TS_OPEN_INFERENCE_PROTOCOL"
+          value: "oip"
diff --git a/kubernetes/kserve/tests/scripts/test_mnist.sh b/kubernetes/kserve/tests/scripts/test_mnist.sh
@@ -49,17 +49,15 @@ function make_cluster_accessible() {
 }
 
 function make_cluster_accessible_for_grpc() {
-    PROTO_FILE_PATH="https://raw.githubusercontent.com/andyi2it/torch-serve/oip-impl/frontend/server/src/main/resources/proto/open_inference_grpc.proto"
-    curl -s -L ${PROTO_FILE_PATH} > open_inference_grpc.proto
-    PROTO_FILE="open_inference_grpc.proto"
+    PROTO_FILE_PATH="./frontend/server/src/main/resources/proto/open_inference_grpc.proto"
     SERVICE_NAME="$1"
     GRPC_METHOD="$2"
     wait_for_inference_service 300 5 "$1"
     SERVICE_HOSTNAME=$(kubectl get inferenceservice ${SERVICE_NAME} -o jsonpath='{.status.url}' | cut -d "/" -f 3)
     wait_for_port_forwarding 5
     echo "Make inference request"
 
-    PREDICTION=$(grpcurl -plaintext -d @ -proto ${PROTO_FILE} -authority ${SERVICE_HOSTNAME} ${INGRESS_HOST}:${INGRESS_PORT} ${GRPC_METHOD} < "$3")
+    PREDICTION=$(grpcurl -plaintext -d @ -proto ${PROTO_FILE_PATH} -authority ${SERVICE_HOSTNAME} ${INGRESS_HOST}:${INGRESS_PORT} ${GRPC_METHOD} < "$3")
     PREDICTION=$(echo -n "$PREDICTION" | tr -d '\n[:space:]')
     EXPECTED="$4"
     if [ "${PREDICTION}" = "${EXPECTED}" ]; then