ludwig-ai · w4nderlust · Apr 8, 2020 · Mar 28, 2020 · Mar 28, 2020 · Mar 29, 2020
diff --git a/ludwig/features/category_feature.py b/ludwig/features/category_feature.py
@@ -27,6 +27,10 @@
 from ludwig.features.base_feature import OutputFeature
 from ludwig.models.modules.embedding_modules import Embed
 from ludwig.models.modules.initializer_modules import get_initializer
+from ludwig.models.modules.category_decoders import Regressor
+from ludwig.models.modules.category_encoders import CategoricalEmbedEncoder
+from ludwig.models.modules.category_encoders import CategoricalSparseEncoder
+from ludwig.models.modules.category_encoders import CategoricalPassthroughEncoder
 from ludwig.models.modules.loss_modules import mean_confidence_penalty
 from ludwig.models.modules.loss_modules import sampled_softmax_cross_entropy
 from ludwig.models.modules.loss_modules import weighted_softmax_cross_entropy
@@ -98,8 +102,9 @@ def add_feature_data(
 
 
 class CategoryInputFeature(CategoryBaseFeature, InputFeature):
-    def __init__(self, feature):
-        super().__init__(feature)
+    def __init__(self, feature, encoder_obj=None):
+        CategoryBaseFeature.__init__(self, feature)
+        InputFeature.__init__(self)
 
         self.vocab = []
 
@@ -112,20 +117,43 @@ def __init__(self, feature):
         self.initializer = None
         self.regularize = True
 
-        _ = self.overwrite_defaults(feature)
-
-        self.embed = Embed(
-            vocab=self.vocab,
-            embedding_size=self.embedding_size,
-            representation=self.representation,
-            embeddings_trainable=self.embeddings_trainable,
-            pretrained_embeddings=self.pretrained_embeddings,
-            embeddings_on_cpu=self.embeddings_on_cpu,
-            dropout=self.dropout,
-            initializer=self.initializer,
-            regularize=self.regularize
+
+        # _ = self.overwrite_defaults(feature)
+        #
+        # self.embed = Embed(
+        #     vocab=self.vocab,
+        #     embedding_size=self.embedding_size,
+        #     representation=self.representation,
+        #     embeddings_trainable=self.embeddings_trainable,
+        #     pretrained_embeddings=self.pretrained_embeddings,
+        #     embeddings_on_cpu=self.embeddings_on_cpu,
+        #     dropout=self.dropout,
+        #     initializer=self.initializer,
+        #     regularize=self.regularize
+        # )
+
+        self.encoder = self.representation
+        encoder_parameters = self.overwrite_defaults(feature)
+
+        if encoder_obj:
+            self.encoder_obj = encoder_obj
+        else:
+            self.encoder_obj = self.initialize_encoder(encoder_parameters)
+
+    def call(self, inputs, training=None, mask=None):
+        assert isinstance(inputs, tf.Tensor)
+        assert inputs.dtype == tf.int8 or inputs.dtype == tf.int16 or \
+               inputs.dtype == tf.int32 or inputs.dtype == tf.float64
+        assert len(inputs.shape) == 1
+
+        inputs_exp = inputs[:, tf.newaxis]
+        inputs_encoded = self.encoder_obj(
+            inputs_exp, training=training, mask=mask
         )
 
+        return inputs_encoded
+
+
     @staticmethod
     def update_model_definition_with_metadata(
             input_feature,
@@ -175,18 +203,49 @@ def build_input(
     def populate_defaults(input_feature):
         set_default_value(input_feature, TIED, None)
 
+    encoder_registry = {
+        'dense': CategoricalEmbedEncoder,
+        'sparse': CategoricalSparseEncoder,
+        'passthrough': CategoricalPassthroughEncoder,
+        'null': CategoricalPassthroughEncoder,
+        'none': CategoricalPassthroughEncoder,
+        'None': CategoricalPassthroughEncoder,
+        None: CategoricalPassthroughEncoder
+    }
+
 
 class CategoryOutputFeature(CategoryBaseFeature, OutputFeature):
     def __init__(self, feature):
-        super().__init__(feature)
+        CategoryBaseFeature.__init__(self, feature)
+        OutputFeature.__init__(self, feature)
 
         self.loss = {'type': SOFTMAX_CROSS_ENTROPY}
         self.num_classes = 0
         self.top_k = 3
         self.initializer = None
         self.regularize = True
 
-        _ = self.overwrite_defaults(feature)
+        self.decoder = 'regressor'
+        decoder_parameters = self.overwrite_defaults(feature)
+
+        self.decoder_obj = self.initialize_decoder(decoder_parameters)
+
+        self._setup_loss()
+        self._setup_metrics()
+
+
+    def logits(
+            self,
+            inputs,  # hidden
+    ):
+        return self.decoder_obj(inputs)
+
+
+    def _setup_loss(self):
+        pass
+
+    def _setup_metrics(self):
+        pass
 
     default_validation_metric = ACCURACY
 
@@ -714,3 +773,11 @@ def populate_defaults(output_feature):
                 'reduce_dependencies': SUM
             }
         )
+
+    decoder_registry = {
+        'regressor': Regressor,
+        'null': Regressor,
+        'none': Regressor,
+        'None': Regressor,
+        None: Regressor
+    }
diff --git a/ludwig/models/modules/category_decoders.py b/ludwig/models/modules/category_decoders.py
@@ -0,0 +1,33 @@
+#! /usr/bin/env python
+# coding=utf-8
+# Copyright (c) 2019 Uber Technologies, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import logging
+
+import tensorflow as tf
+from tensorflow.keras.layers import Dense
+from tensorflow.keras.layers import Layer
+
+logger = logging.getLogger(__name__)
+
+
+class Regressor(Layer):
+
+    def __init__(self, **kwargs):
+        super().__init__()
+        self.dense = Dense(4)  # todo add initialization etc.  Hard coded for debugging
+
+    def call(self, inputs, **kwargs):
+        return tf.squeeze(self.dense(inputs))
diff --git a/ludwig/models/modules/category_encoders.py b/ludwig/models/modules/category_encoders.py
@@ -0,0 +1,72 @@
+#! /usr/bin/env python
+# coding=utf-8
+# Copyright (c) 2019 Uber Technologies, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import logging
+
+import tensorflow as tf
+from tensorflow.keras.layers import Layer
+
+logger = logging.getLogger(__name__)
+
+
+class CategoricalPassthroughEncoder(Layer):
+
+    def __init__(
+            self,
+            **kwargs
+    ):
+        super(CategoricalPassthroughEncoder, self).__init__()
+
+    def call(self, inputs, training=None, mask=None):
+        """
+            :param inputs: The inputs fed into the encoder.
+                   Shape: [batch x 1], type tf.int32
+        """
+        return inputs
+
+
+class CategoricalEmbedEncoder(Layer):
+
+    def __init__(
+            self,
+            **kwargs
+    ):
+        super(CategoricalEmbedEncoder, self).__init__()
+
+    def call(self, inputs, training=None, mask=None):
+        """
+            :param inputs: The inputs fed into the encoder.
+                   Shape: [batch x 1], type tf.int32
+
+            :param return: embeddings of shape [batch x embed size], type tf.float32
+        """
+        return tf.cast(inputs, dtype=tf.float32)
+
+class CategoricalSparseEncoder(Layer):
+
+    def __init__(
+            self,
+            **kwargs
+    ):
+        super(CategoricalSparseEncoder, self).__init__()
+
+    def call(self, inputs, training=None, mask=None):
+        """
+            :param inputs: The inputs fed into the encoder.
+                   Shape: [batch x 1], type tf.int32
+            :param return: one-hot encoding, shape [batch x number classes], type tf.int32
+        """
+        return inputs