ludwig-ai · geoffreyangus · Jun 21, 2022 · Jun 8, 2022 · Jun 17, 2022 · Jun 17, 2022
@@ -14,7 +14,7 @@
 # limitations under the License.
 # ==============================================================================
 import logging
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional
 
 import numpy as np
 import torch
@@ -41,10 +41,32 @@
 from ludwig.utils import output_feature_utils
 from ludwig.utils.misc_utils import set_default_value
 from ludwig.utils.torch_utils import LudwigModule
+from ludwig.utils.types import TorchscriptPreprocessingInput
 
 logger = logging.getLogger(__name__)
 
 
+class _VectorPreprocessing(torch.nn.Module):
+    def forward(self, v: TorchscriptPreprocessingInput):
+        if not torch.jit.isinstance(v, List[str]):
+            raise ValueError(f"Unsupported input: {v}")
+
+        vectors = []
+        for sample in v:
+            vector = torch.tensor([float(x) for x in sample.split()], dtype=torch.float32)
+            vectors.append(vector)
+        return torch.stack(vectors)
+
+
+class _VectorPostprocessing(torch.nn.Module):
+    def forward(self, preds: Dict[str, torch.Tensor]) -> Dict[str, Any]:
+        # Workaround to convert type annotation from Dict[str, torch.Tensor] to Dict[str, Any]
+        preds_any: Dict[str, Any] = {}
+        for k, v in preds.items():
+            preds_any[k] = v
+        return preds_any
+
+
 class _VectorPredict(PredictModule):
     def forward(self, inputs: Dict[str, torch.Tensor], feature_name: str) -> Dict[str, torch.Tensor]:
         logits = output_feature_utils.get_output_feature_tensor(inputs, feature_name, self.logits_key)
@@ -159,6 +181,10 @@ def populate_defaults(input_feature):
         set_default_value(input_feature, TIED, None)
         set_default_value(input_feature, "preprocessing", {})
 
+    @staticmethod
+    def create_preproc_module(metadata: Dict[str, Any]) -> torch.nn.Module:
+        return _VectorPreprocessing()
+
 
 class VectorOutputFeature(VectorFeatureMixin, OutputFeature):
     decoder = "projector"
@@ -234,3 +260,7 @@ def populate_defaults(output_feature):
         set_default_value(output_feature, "reduce_dependencies", None)
         set_default_value(output_feature, "decoder", "projector")
         set_default_value(output_feature, "dependencies", [])
+
+    @staticmethod
+    def create_postproc_module(metadata: Dict[str, Any]) -> torch.nn.Module:
+        return _VectorPostprocessing()
@@ -5,7 +5,7 @@
 import torch
 from torch import nn
 
-from ludwig.constants import COLUMN, NAME, TYPE
+from ludwig.constants import BAG, BINARY, CATEGORY, COLUMN, NAME, SEQUENCE, SET, TEXT, TIMESERIES, TYPE, VECTOR
 from ludwig.data.postprocessing import convert_dict_to_df
 from ludwig.data.preprocessing import load_metadata
 from ludwig.features.feature_registries import input_type_registry, output_type_registry
@@ -22,6 +22,8 @@
 from ludwig.utils.data_utils import load_json
 from ludwig.utils.misc_utils import get_from_registry
 
+FEATURES_TO_CAST_AS_STRINGS = {BINARY, CATEGORY, BAG, SET, TEXT, SEQUENCE, TIMESERIES, VECTOR}
+
 
 class InferenceModule(nn.Module):
     """Wraps preprocessing, model forward pass, and postprocessing into a single module.
@@ -62,7 +64,7 @@ def __init__(self, model: "ECD", config: Dict[str, Any], training_set_metadata:
             module_dict_key = get_module_dict_key_from_name(feature_name)
             self.postproc_modules[module_dict_key] = feature.create_postproc_module(training_set_metadata[feature_name])
 
-    def forward(self, inputs: Dict[str, TorchscriptPreprocessingInput]):
+    def forward(self, inputs: Dict[str, TorchscriptPreprocessingInput]) -> Dict[str, Dict[str, Any]]:
         with torch.no_grad():
             preproc_inputs = {}
             for module_dict_key, preproc in self.preproc_modules.items():
@@ -121,6 +123,6 @@ def to_inference_module_input(s: pd.Series, feature_type: str, load_paths=False)
     elif feature_type == "audio":
         if load_paths:
             return [read_audio_from_path(v) if isinstance(v, str) else v for v in s]
-    if feature_type in {"binary", "category", "bag", "set", "text", "sequence", "timeseries"}:
+    if feature_type in FEATURES_TO_CAST_AS_STRINGS:
         return s.astype(str).to_list()
     return torch.from_numpy(s.to_numpy())
@@ -212,8 +212,8 @@ def test_torchscript_e2e_tabular(csv_filename, tmpdir):
         category_feature(vocab_size=3),
         bag_feature(vocab_size=3),
         set_feature(vocab_size=3),
+        vector_feature(),
         # TODO: future support
-        # vector_feature(),
         # date_feature(),
         # h3_feature(),
     ]
@@ -222,11 +222,11 @@ def test_torchscript_e2e_tabular(csv_filename, tmpdir):
         binary_feature(),
         number_feature(),
         category_feature(vocab_size=3),
+        vector_feature()
         # TODO: future support
         # sequence_feature(vocab_size=3),
         # text_feature(vocab_size=3),
         # set_feature(vocab_size=3),
-        # vector_feature()
     ]
     backend = LocalTestBackend()
     config = {"input_features": input_features, "output_features": output_features, TRAINER: {"epochs": 2}}