ludwig-ai · geoffreyangus · Jun 21, 2022 · Jun 17, 2022 · Jun 17, 2022 · Jun 17, 2022
@@ -14,6 +14,7 @@
 # limitations under the License.
 # ==============================================================================
 import logging
+from typing import Any, Dict, List
 
 import numpy as np
 import torch
@@ -22,6 +23,7 @@
 from ludwig.features.base_feature import BaseFeatureMixin, InputFeature
 from ludwig.utils.h3_util import h3_to_components
 from ludwig.utils.misc_utils import set_default_value
+from ludwig.utils.types import TorchscriptPreprocessingInput
 
 logger = logging.getLogger(__name__)
 
@@ -30,6 +32,31 @@
 H3_PADDING_VALUE = 7
 
 
+class _H3Preprocessing(torch.nn.Module):
+    def __init__(self, metadata: Dict[str, Any]):
+        super().__init__()
+        self.max_h3_resolution = MAX_H3_RESOLUTION
+        self.h3_padding_value = H3_PADDING_VALUE
+
+    def forward(self, v: TorchscriptPreprocessingInput) -> torch.Tensor:
+        assert torch.jit.isinstance(v, torch.Tensor), "Scripted H3 preprocessing only works with torch.Tensor."
+
+        outputs: List[torch.Tensor] = []
+        for v_i in v:
+            components = h3_to_components(v_i)
+            header: List[int] = [
+                components.mode,
+                components.edge,
+                components.resolution,
+                components.base_cell,
+            ]
+            cells_padding: List[int] = [self.h3_padding_value] * (self.max_h3_resolution - len(components.cells))
+            output = torch.tensor(header + components.cells + cells_padding, dtype=torch.uint8)
+            outputs.append(output)
+
+        return torch.stack(outputs)
+
+
 class H3FeatureMixin(BaseFeatureMixin):
     @staticmethod
     def type():
@@ -63,9 +90,9 @@ def get_feature_meta(column, preprocessing_parameters, backend):
     @staticmethod
     def h3_to_list(h3_int):
         components = h3_to_components(h3_int)
-        header = [components["mode"], components["edge"], components["resolution"], components["base_cell"]]
-        cells_padding = [H3_PADDING_VALUE] * (MAX_H3_RESOLUTION - len(components["cells"]))
-        return header + components["cells"] + cells_padding
+        header = [components.mode, components.edge, components.resolution, components.base_cell]
+        cells_padding = [H3_PADDING_VALUE] * (MAX_H3_RESOLUTION - len(components.cells))
+        return header + components.cells + cells_padding
 
     @staticmethod
     def add_feature_data(
@@ -121,3 +148,7 @@ def update_config_with_metadata(input_feature, feature_metadata, *args, **kwargs
     @staticmethod
     def populate_defaults(input_feature):
         set_default_value(input_feature, TIED, None)
+
+    @staticmethod
+    def create_preproc_module(metadata: Dict[str, Any]) -> torch.nn.Module:
+        return _H3Preprocessing(metadata)
@@ -13,6 +13,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+from typing import List, NamedTuple
+
+
+class H3Data(NamedTuple):
+    mode: int
+    edge: int
+    resolution: int
+    base_cell: int
+    cells: List[int]
+
+
 def set_bit(v, index, x):
     """Set the index:th bit of v to 1 if x is truthy, else to 0, and return the new value."""
     mask = 1 << index  # Compute mask, an integer with just bit 'index' set.
@@ -42,24 +53,24 @@ def components_to_h3(components):
     return h3
 
 
-def bitslice(x, start_bit, slice_length):
-    ones_mask = 2**slice_length - 1
+def bitslice(x: int, start_bit: int, slice_length: int) -> int:
+    ones_mask: int = int(2**slice_length - 1)
     return (x & (ones_mask << start_bit)) >> start_bit
 
 
-def h3_index_mode(h3_long):
+def h3_index_mode(h3_long: int) -> int:
     return bitslice(h3_long, 64 - 5, 4)
 
 
-def h3_edge(h3_long):
+def h3_edge(h3_long: int) -> int:
     return bitslice(h3_long, 64 - 8, 3)
 
 
-def h3_resolution(h3_long):
+def h3_resolution(h3_long: int) -> int:
     return bitslice(h3_long, 64 - 12, 4)
 
 
-def h3_base_cell(h3_long):
+def h3_base_cell(h3_long: int) -> int:
     return bitslice(h3_long, 64 - 19, 7)
 
 
@@ -68,27 +79,27 @@ def h3_octal_components(h3_long):
     return "{0:0{w}o}".format(bitslice(h3_long + 2**63, 64 - 19 - 3 * res, 3 * res), w=res)
 
 
-def h3_component(h3_long, i):
+def h3_component(h3_long: int, i: int) -> int:
     return bitslice(h3_long, 64 - 19 - 3 * i, 3)
 
 
-def h3_components(h3_long):
+def h3_components(h3_long: int) -> List[int]:
     return [h3_component(h3_long, i) for i in range(1, h3_resolution(h3_long) + 1)]
 
 
-def h3_to_components(h3_value):
+def h3_to_components(h3_value: int) -> H3Data:
     """Extract the values from an H3 hexadecimal value Refer to this for the bit layout:
 
     https://uber.github.io/h3/#/documentation/core-library/h3-index-representations
     """
     # lat_long = (0, 0)  # h3ToGeo(h3_value)
-    return {
-        "mode": h3_index_mode(h3_value),
-        "edge": h3_edge(h3_value),
-        "resolution": h3_resolution(h3_value),
-        "base_cell": h3_base_cell(h3_value),
-        "cells": h3_components(h3_value),
-    }
+    return H3Data(
+        mode=h3_index_mode(h3_value),
+        edge=h3_edge(h3_value),
+        resolution=h3_resolution(h3_value),
+        base_cell=h3_base_cell(h3_value),
+        cells=h3_components(h3_value),
+    )
 
 
 if __name__ == "__main__":

@@ -376,6 +376,21 @@ def test_torchscript_e2e_timeseries(tmpdir, csv_filename):
     validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path)
 
 
+def test_torchscript_e2e_h3(tmpdir, csv_filename):
+    data_csv_path = os.path.join(tmpdir, csv_filename)
+    input_features = [
+        h3_feature(),
+    ]
+    output_features = [
+        binary_feature(),
+    ]
+    backend = LocalTestBackend()
+    config = {"input_features": input_features, "output_features": output_features, TRAINER: {"epochs": 2}}
+    training_data_csv_path = generate_data(input_features, output_features, data_csv_path)
+
+    validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path)
+
+
 def validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path, tolerance=1e-8):
     # Train Ludwig (Pythonic) model:
     ludwig_model = LudwigModel(config, backend=backend)