Adding test for triton. Fix to cast int to string for os.path.join. R…

…emove anotation from Inference moddule template
ludwig-ai · May 31, 2022 · a23baad · a23baad
1 parent 77461e5
commit a23baad
Show file tree

Hide file tree

Showing 2 changed files with 127 additions and 4 deletions.
diff --git a/ludwig/utils/triton_utils.py b/ludwig/utils/triton_utils.py
@@ -22,7 +22,7 @@ def __init__(self, inference_module):
         self.inference_module = inference_module
 
     def forward(self, {input_signature}):
-        inputs: Dict[str, Union[List[str], List[torch.Tensor], torch.Tensor]] = {input_dict}
+        inputs = {input_dict}
         results = self.inference_module(inputs)
         return {output_tuple}
 """
@@ -170,11 +170,12 @@ def _get_model_config(model: LudwigModel) -> str:
     return config
 
 
-def export_triton(model: LudwigModel, output_path: str, model_name="ludwig_model", model_version=1):
+def export_triton(model: LudwigModel, output_path: str, model_name: str = "ludwig_model", model_version: int = 1):
     model_ts = generate_triton_torchscript(model)
-    model_path = os.path.join(output_path, model_name, model_version, "model.pt")
+    model_dir = os.path.join(output_path, model_name, str(model_version))
+    os.makedirs(model_dir, exist_ok=True)
     # Save the file to <model_repository>/<model_name>/<model_version>/model.pt
-    model_ts.save(model_path)
+    model_ts.save(os.path.join(model_dir, "model.pt"))
     # Save the default onfig to <model_repository>/<model_name>/config.pbtxt
     config_path = os.path.join(output_path, model_name, "config.pbtxt")
     with open(config_path, "w") as f:

diff --git a/tests/integration_tests/test_triton.py b/tests/integration_tests/test_triton.py
@@ -0,0 +1,122 @@
+# Copyright (c) 2019 Uber Technologies, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import os
+from typing import List, Union
+
+import numpy as np
+import pandas as pd
+import torch
+
+from ludwig.api import LudwigModel
+from ludwig.constants import PREDICTIONS, TRAINER
+from ludwig.utils.triton_utils import export_triton
+from tests.integration_tests.utils import (
+    binary_feature,
+    category_feature,
+    generate_data,
+    LocalTestBackend,
+    number_feature,
+)
+
+
+def test_triton_torchscript(csv_filename, tmpdir):
+    data_csv_path = os.path.join(tmpdir, csv_filename)
+
+    # Configure features to be tested:
+    input_features = [
+        binary_feature(),
+        number_feature(),
+        category_feature(vocab_size=3),
+        # TODO: future support
+        # sequence_feature(vocab_size=3),
+        # text_feature(vocab_size=3),
+        # vector_feature(),
+        # image_feature(image_dest_folder),
+        # audio_feature(audio_dest_folder),
+        # timeseries_feature(),
+        # date_feature(),
+        # h3_feature(),
+        # set_feature(vocab_size=3),
+        # bag_feature(vocab_size=3),
+    ]
+    output_features = [
+        binary_feature(),
+        number_feature(),
+        category_feature(vocab_size=3),
+        # TODO: future support
+        # sequence_feature(vocab_size=3),
+        # text_feature(vocab_size=3),
+        # set_feature(vocab_size=3),
+        # vector_feature()
+    ]
+    backend = LocalTestBackend()
+    config = {"input_features": input_features, "output_features": output_features, TRAINER: {"epochs": 2}}
+
+    # Generate training data
+    training_data_csv_path = generate_data(input_features, output_features, data_csv_path)
+
+    # Convert bool values to strings, e.g., {'Yes', 'No'}
+    df = pd.read_csv(training_data_csv_path)
+    df.to_csv(training_data_csv_path)
+
+    # Train Ludwig (Pythonic) model:
+    ludwig_model = LudwigModel(config, backend=backend)
+    ludwig_model.train(
+        dataset=training_data_csv_path,
+        skip_save_training_description=True,
+        skip_save_training_statistics=True,
+        skip_save_model=True,
+        skip_save_progress=True,
+        skip_save_log=True,
+        skip_save_processed_input=True,
+    )
+
+    # Obtain predictions from Python model
+    preds_dict, _ = ludwig_model.predict(dataset=training_data_csv_path, return_type=dict)
+
+    # Create graph inference model (Torchscript) from trained Ludwig model.
+    triton_path = os.path.join(tmpdir, "triton")
+    model_name = "test_triton"
+    model_version = 1
+    export_triton(ludwig_model, triton_path, model_name, model_version)
+
+    # Restore the torchscript model
+    torchscript_path = os.path.join(triton_path, model_name, str(model_version), "model.pt")
+    restored_model = torch.jit.load(torchscript_path)
+
+    def to_input(s: pd.Series) -> Union[List[str], torch.Tensor]:
+        if s.dtype == "object":
+            return s.to_list()
+        return torch.from_numpy(s.to_numpy().astype(np.float32))
+
+    df = pd.read_csv(training_data_csv_path)
+    inputs = {name: to_input(df[feature.column]) for name, feature in ludwig_model.model.input_features.items()}
+    outputs = restored_model(**inputs)
+
+    def from_output(o: Union[List[str], torch.Tensor]) -> np.array:
+        if isinstance(o, list):
+            return np.array(o)
+        return o.numpy()
+
+    # Enumerate over the output feature and lookup predictions to see the match outputs
+    assert len(preds_dict) == len(outputs)
+    for i, feature_name in enumerate(ludwig_model.model.output_features):
+        output_values_expected = preds_dict[feature_name][PREDICTIONS]
+        output_values = from_output(outputs[i])
+        if output_values.dtype.type in {np.string_, np.str_}:
+            # Strings should match exactly
+            assert np.all(output_values == output_values_expected), f"feature: {feature_name}, output: predictions"
+        else:
+            assert np.allclose(output_values, output_values_expected), f"feature: {feature_name}, output: predictions"