UPDATE: schema of input wrapper

p768lwy3 · Oct 21, 2019 · 2a31cc5 · 2a31cc5
1 parent 902cee6
commit 2a31cc5
Show file tree

Hide file tree

Showing 5 changed files with 61 additions and 61 deletions.
diff --git a/torecsys/inputs/base/audio_inp.py b/torecsys/inputs/base/audio_inp.py
@@ -1,6 +1,7 @@
 from . import _Inputs
 import torch
 
+
 class AudioInputs(_Inputs):
     r"""Base Inputs class for Audio.
     """

diff --git a/torecsys/inputs/base/concat_inputs.py b/torecsys/inputs/base/concat_inputs.py
@@ -5,8 +5,9 @@
 
 
 class ConcatInputs(_Inputs):
-    r"""Base Inputs class for concatenation of list of Base Inputs class in rowwise. The shape of output 
-    is :math:`(B, 1, E_{1} + ... + E_{k})`, where :math:`E_{i}` is embedding size of :math:`i-th` field. 
+    r"""Base Inputs class for concatenation of list of Base Inputs class in rowwise. 
+    The shape of output is :math:`(B, 1, E_{1} + ... + E_{k})`, where :math:`E_{i}` 
+    is embedding size of :math:`i-th` field. 
     """
     @no_jit_experimental_by_namedtensor
     def __init__(self, inputs: List[_Inputs]):
@@ -66,30 +67,26 @@ def forward(self, inputs: Dict[str, torch.Tensor]) -> torch.Tensor:
         r"""Foward calculation of ConcatInputs.
         
         Args:
-            inputs (Dict[str, T]): Dictionary of inputs, where key is name of input fields, and value is 
-                tensor pass to Input class. Remark: key should exist in schema.
+            inputs (Dict[str, T]): Dictionary of inputs, where key is name of input fields, 
+                and value is tensor pass to Input class.
         
         Returns:
-            T, shape = (B, 1, E_{sum}), dtype = torch.float: Output of ConcatInputs, where the values are
-                concatenated in the third dimension.
+            T, shape = (B, 1, E_{sum}), dtype = torch.float: Output of ConcatInputs, where 
+                the values are concatenated in the third dimension.
         """
         # initialize list to store tensors temporarily 
         outputs = list()
 
         # loop through inputs 
         for inp in self.inputs:
-            # get schema, i.e. input's field names, from input in list
-            inp_names = inp.schema.inputs
-
             # convert list of inputs to tensor, with shape = (B, N, *)
-            inp_val = [inputs[i] for i in inp_names]
-            inp_val = torch.cat(inp_val, dim=1)
+            inp_val = [inputs[i] for i in inp.schema.inputs]
+            inp_val = torch.cat(inp_val, dim="N")
             inp_args = [inp_val]
 
             # set args for specific input
             if inp.__class__.__name__ == "SequenceIndexEmbedding":
-                inp_names = inp.schema.lengths
-                inp_args.append(inputs[inp_names])
+                inp_args.append(inputs[inp.schema.lengths])
 
             # calculate embedding values
             output = inp(*inp_args)
@@ -102,7 +99,7 @@ def forward(self, inputs: Dict[str, torch.Tensor]) -> torch.Tensor:
             outputs.append(output)
 
         # concat in the third dimension, and the shape of output = (B, 1, sum(E))
-        outputs = torch.cat(outputs, dim=2)
+        outputs = torch.cat(outputs, dim="E")
 
         return outputs
 
diff --git a/torecsys/inputs/base/list_indices_emb.py b/torecsys/inputs/base/list_indices_emb.py
@@ -1,6 +1,6 @@
 from . import _Inputs
 from torecsys.functional import show_attention, dummy_attention
-from torecsys.utils.decorator import jit_experimental, no_jit_experimental_by_namedtensor
+from torecsys.utils.decorator import jit_experimental, no_jit_experimental, no_jit_experimental_by_namedtensor
 from functools import partial
 import numpy as np
 import torch
@@ -106,11 +106,11 @@ def __init__(self,
         elif output_method == "max_pooling":
             self.aggregation = nn.AdaptiveMaxPool1d(1)
         elif output_method == "mean":
-            self.aggregation = partial(torch.mean, dim=1, keepdim=True)
+            self.aggregation = partial(torch.mean, dim="N", keepdim=True)
         elif output_method == "none":
             self.aggregation = torch.Tensor
         elif output_method == "sum":
-            self.aggregation = partial(torch.sum, dim=1, keepdim=True)
+            self.aggregation = partial(torch.sum, dim="N", keepdim=True)
         else:
             raise ValueError('output_method only allows ["avg_pooling", "max_pooling", "mean", "none", "sum"].')
         self.output_method = output_method

diff --git a/torecsys/inputs/base/stacked_inp.py b/torecsys/inputs/base/stacked_inp.py
@@ -85,28 +85,22 @@ def forward(self, inputs: Dict[str, torch.Tensor]) -> torch.Tensor:
 
         # loop through inputs 
         for inp in self.inputs:
-            # get schema, i.e. input's field names, from input in list
-            inp_names = inp.schema.inputs
-
             # create inputs in different format if the inputs class is ConcatInputs
             if inp.__class__.__name__ == "ConcatInputs":
                 # create dictionary of concat inputs
-                inp_dict = { i : inputs[i] for i in inp_names }
+                inp_dict = { i : inputs[i] for i in inp.schema.inputs }
 
                 # create list variable to be passed 
                 inp_args = [inp_dict]
-
-            # else, use the same approch for other inputs class
             else:
                 # convert list of inputs to tensor, with shape = (B, N, *)
-                inp_val = [inputs[i] for i in inp_names]
-                inp_val = torch.cat(inp_val, dim=1)
+                inp_val = [inputs[i] for i in inp.schema.inputs]
+                inp_val = torch.cat(inp_val, dim="N")
                 inp_args = [inp_val]
 
                 # set args for specific input
                 if inp.__class__.__name__ == "SequenceIndexEmbedding":
-                    inp_names = inp.schema.lengths
-                    inp_args.append(inputs[inp_names])
+                    inp_args.append(inputs[inp.schema.lengths])
 
             # calculate embedding values
             output = inp(*inp_args)
@@ -119,6 +113,6 @@ def forward(self, inputs: Dict[str, torch.Tensor]) -> torch.Tensor:
             outputs.append(output)
 
         # stack in the second dimension, and the shape of output = (B, sum(N), E)
-        outputs = torch.cat(outputs, dim=1)
+        outputs = torch.cat(outputs, dim="N")
 
         return outputs
diff --git a/torecsys/inputs/inputs_wrapper.py b/torecsys/inputs/inputs_wrapper.py
@@ -4,28 +4,38 @@
 
 
 class InputsWrapper(_Inputs):
-    r"""Inputs class for wrapping a number of Base Inputs class into a dictionary. The output is a 
-    dictionary, which its keys are names of model's inputs and values are tensor of model's inputs.
+    r"""Inputs class for wrapping a number of Base Inputs class into a dictionary. The output 
+    is a dictionary, which its keys are names of model's inputs and values are tensor of model's 
+    inputs.
     """
     def __init__(self, 
-                 schema: Dict[str, tuple]):
+                 schema: Dict[str, _Inputs]):
         r"""Initialize InputsWrapper.
         
         Args:
-            schema (Dict[str, tuple]): Schema of InputsWrapper. Dictionary, which keys are names of 
-                inputs' fields and values are tensor of those fields. e.g. 
+            schema (Dict[str, _Inputs]): Schema of InputsWrapper. Dictionary, 
+                where keys are names of inputs' fields, and values are tensor of fields. e.g. 
                 
                 .. code-block:: python
-                
+                    import torecsys as trs
+
+                    # initialize embedding layers used in InputsWrapper
+                    single_index_emb_0 = trs.inputs.base.SingleIndexEmbedding(2, 8)
+                    single_index_emb_1 = trs.inputs.base.SingleIndexEmbedding(2, 8)
+
+                    # set schema, including field names etc
+                    single_index_emb_0.set_schema(["userId"])
+                    single_index_emb_1.set_schema(["movieId"])
+
+                    # create InputsWrapper
                     schema = {
-                        "user"  : (trs.inputs.base.SingleIndexEmbedding(4, 10), ["userId"]),
-                        "movie" : (trs.inputs.base.SingleIndexEmbedding(4, 10), ["movieId"]),
-                        "pair"  : (trs.inputs.base.FieldAwareMultipleIndexEmbedding(4, [10, 10]), ["userId", "movieId"]),
-                        "seq"   : (trs.inputs.base.SequenceIndexEmbedding(4, 10), ["seqId"], ["seqLength"])
+                        "user"  : single_index_emb_0,
+                        "movie" : single_index_emb_1
                     }
+                    inputs_wrapper = trs.inputs.InputWrapper(schema=schema)
         
         Attributes:
-            schema (Dict[str, tuple]): Schema of InputsWrapper.
+            schema (Dict[str, _Inputs]): Schema of InputsWrapper.
             length (int): None.
         """
         # refer to parent class
@@ -35,8 +45,8 @@ def __init__(self,
         self.schema = schema
 
         # add modules in schema to the Module
-        for k, tup in schema.items():
-            self.add_module(k, tup[0])
+        for k, inp in schema.items():
+            self.add_module(k, inp)
 
         # set length to None
         self.length = None
@@ -45,42 +55,40 @@ def forward(self, inputs: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
         r"""Forward calculation of InputsWrapper.
         
         Args:
-            inputs (Dict[str, T]): Dictionary of inputs, where key is name of input fields, and value is \
-                tensor pass to Input class. Remark: key should exist in schema.
+            inputs (Dict[str, T]): Dictionary of inputs, where key is name of input fields, 
+                and value is tensor pass to Input class.
             
         Returns:
-            Dict[str, T], dtype = torch.float: Output of InputsWrapper, which is a dictionary where keys \
-                are names of model's inputs and values are tensor of model's inputs.
+            Dict[str, T], dtype = torch.float: Output of InputsWrapper, which is a dictionary 
+                where keys are names of model's inputs and values are tensor of model's inputs.
         """
         # initialize dictionary to store tensors
         outputs = dict()
 
         # loop through schema
-        for out_name, args_tuple in self.schema.items():
-            # get basic args from tuple in schema
-            embedding = args_tuple[0]
-            inp_names = args_tuple[1]
-
-            # create inputs in different format if the inputs class is ConcatInputs or StackedInputs
-            if embedding.__class__.__name__ in ["ConcatInputs", "StackedInputs"]:
+        for out_name, out_inp in self.schema.items():
+            # create inputs in different format if it is ConcatInputs or StackedInputs
+            if out_inp.__class__.__name__ in ["ConcatInputs", "StackedInputs"]:
                 # create dictionary of concat inputs
-                args_dict = { i : inputs[i] for i in inp_names }
+                inp_dict = { i : inputs[i] for i in out_inp.schema.inputs }
 
                 # create list variable to be passed 
-                args = [args_dict]
+                inp_args = [inp_dict]
             else:
                 # convert list of inputs to tensor, with shape = (B, N, *)
-                inp_val = [inputs[i] for i in inp_names]
-                inp_val = torch.cat(inp_val, dim=1)
-                args = [inp_val]
+                inp_val = [inputs[i] for i in out_inp.schema.inputs]
+                inp_val = torch.cat(inp_val, dim="N")
+                inp_args = [inp_val]
 
                 # set args for specific input
-                if embedding.__class__.__name__ == "SequenceIndexEmbedding":
-                    arg_name = args_tuple[2][0]
-                    args.append(inputs[arg_name])
+                if out_inp.__class__.__name__ == "SequenceIndexEmbedding":
+                    inp_args.append(inputs[out_inp.schema.lengths])
+
+            # calculate embedding values
+            output = out_inp(*inp_args)
 
             # set out_name in outputs to transformed tensors or embedded tensors
-            outputs[out_name] = embedding(*args)
+            outputs[out_name] = output
 
         return outputs