Skip to content

Commit

Permalink
UPDATE: schema of input wrapper
Browse files Browse the repository at this point in the history
  • Loading branch information
jasper430 committed Oct 21, 2019
1 parent 902cee6 commit 2a31cc5
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 61 deletions.
1 change: 1 addition & 0 deletions torecsys/inputs/base/audio_inp.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from . import _Inputs
import torch


class AudioInputs(_Inputs):
r"""Base Inputs class for Audio.
"""
Expand Down
25 changes: 11 additions & 14 deletions torecsys/inputs/base/concat_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@


class ConcatInputs(_Inputs):
r"""Base Inputs class for concatenation of list of Base Inputs class in rowwise. The shape of output
is :math:`(B, 1, E_{1} + ... + E_{k})`, where :math:`E_{i}` is embedding size of :math:`i-th` field.
r"""Base Inputs class for concatenation of list of Base Inputs class in rowwise.
The shape of output is :math:`(B, 1, E_{1} + ... + E_{k})`, where :math:`E_{i}`
is embedding size of :math:`i-th` field.
"""
@no_jit_experimental_by_namedtensor
def __init__(self, inputs: List[_Inputs]):
Expand Down Expand Up @@ -66,30 +67,26 @@ def forward(self, inputs: Dict[str, torch.Tensor]) -> torch.Tensor:
r"""Foward calculation of ConcatInputs.
Args:
inputs (Dict[str, T]): Dictionary of inputs, where key is name of input fields, and value is
tensor pass to Input class. Remark: key should exist in schema.
inputs (Dict[str, T]): Dictionary of inputs, where key is name of input fields,
and value is tensor pass to Input class.
Returns:
T, shape = (B, 1, E_{sum}), dtype = torch.float: Output of ConcatInputs, where the values are
concatenated in the third dimension.
T, shape = (B, 1, E_{sum}), dtype = torch.float: Output of ConcatInputs, where
the values are concatenated in the third dimension.
"""
# initialize list to store tensors temporarily
outputs = list()

# loop through inputs
for inp in self.inputs:
# get schema, i.e. input's field names, from input in list
inp_names = inp.schema.inputs

# convert list of inputs to tensor, with shape = (B, N, *)
inp_val = [inputs[i] for i in inp_names]
inp_val = torch.cat(inp_val, dim=1)
inp_val = [inputs[i] for i in inp.schema.inputs]
inp_val = torch.cat(inp_val, dim="N")
inp_args = [inp_val]

# set args for specific input
if inp.__class__.__name__ == "SequenceIndexEmbedding":
inp_names = inp.schema.lengths
inp_args.append(inputs[inp_names])
inp_args.append(inputs[inp.schema.lengths])

# calculate embedding values
output = inp(*inp_args)
Expand All @@ -102,7 +99,7 @@ def forward(self, inputs: Dict[str, torch.Tensor]) -> torch.Tensor:
outputs.append(output)

# concat in the third dimension, and the shape of output = (B, 1, sum(E))
outputs = torch.cat(outputs, dim=2)
outputs = torch.cat(outputs, dim="E")

return outputs

6 changes: 3 additions & 3 deletions torecsys/inputs/base/list_indices_emb.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from . import _Inputs
from torecsys.functional import show_attention, dummy_attention
from torecsys.utils.decorator import jit_experimental, no_jit_experimental_by_namedtensor
from torecsys.utils.decorator import jit_experimental, no_jit_experimental, no_jit_experimental_by_namedtensor
from functools import partial
import numpy as np
import torch
Expand Down Expand Up @@ -106,11 +106,11 @@ def __init__(self,
elif output_method == "max_pooling":
self.aggregation = nn.AdaptiveMaxPool1d(1)
elif output_method == "mean":
self.aggregation = partial(torch.mean, dim=1, keepdim=True)
self.aggregation = partial(torch.mean, dim="N", keepdim=True)
elif output_method == "none":
self.aggregation = torch.Tensor
elif output_method == "sum":
self.aggregation = partial(torch.sum, dim=1, keepdim=True)
self.aggregation = partial(torch.sum, dim="N", keepdim=True)
else:
raise ValueError('output_method only allows ["avg_pooling", "max_pooling", "mean", "none", "sum"].')
self.output_method = output_method
Expand Down
16 changes: 5 additions & 11 deletions torecsys/inputs/base/stacked_inp.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,28 +85,22 @@ def forward(self, inputs: Dict[str, torch.Tensor]) -> torch.Tensor:

# loop through inputs
for inp in self.inputs:
# get schema, i.e. input's field names, from input in list
inp_names = inp.schema.inputs

# create inputs in different format if the inputs class is ConcatInputs
if inp.__class__.__name__ == "ConcatInputs":
# create dictionary of concat inputs
inp_dict = { i : inputs[i] for i in inp_names }
inp_dict = { i : inputs[i] for i in inp.schema.inputs }

# create list variable to be passed
inp_args = [inp_dict]

# else, use the same approch for other inputs class
else:
# convert list of inputs to tensor, with shape = (B, N, *)
inp_val = [inputs[i] for i in inp_names]
inp_val = torch.cat(inp_val, dim=1)
inp_val = [inputs[i] for i in inp.schema.inputs]
inp_val = torch.cat(inp_val, dim="N")
inp_args = [inp_val]

# set args for specific input
if inp.__class__.__name__ == "SequenceIndexEmbedding":
inp_names = inp.schema.lengths
inp_args.append(inputs[inp_names])
inp_args.append(inputs[inp.schema.lengths])

# calculate embedding values
output = inp(*inp_args)
Expand All @@ -119,6 +113,6 @@ def forward(self, inputs: Dict[str, torch.Tensor]) -> torch.Tensor:
outputs.append(output)

# stack in the second dimension, and the shape of output = (B, sum(N), E)
outputs = torch.cat(outputs, dim=1)
outputs = torch.cat(outputs, dim="N")

return outputs
74 changes: 41 additions & 33 deletions torecsys/inputs/inputs_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,38 @@


class InputsWrapper(_Inputs):
r"""Inputs class for wrapping a number of Base Inputs class into a dictionary. The output is a
dictionary, which its keys are names of model's inputs and values are tensor of model's inputs.
r"""Inputs class for wrapping a number of Base Inputs class into a dictionary. The output
is a dictionary, which its keys are names of model's inputs and values are tensor of model's
inputs.
"""
def __init__(self,
schema: Dict[str, tuple]):
schema: Dict[str, _Inputs]):
r"""Initialize InputsWrapper.
Args:
schema (Dict[str, tuple]): Schema of InputsWrapper. Dictionary, which keys are names of
inputs' fields and values are tensor of those fields. e.g.
schema (Dict[str, _Inputs]): Schema of InputsWrapper. Dictionary,
where keys are names of inputs' fields, and values are tensor of fields. e.g.
.. code-block:: python
import torecsys as trs
# initialize embedding layers used in InputsWrapper
single_index_emb_0 = trs.inputs.base.SingleIndexEmbedding(2, 8)
single_index_emb_1 = trs.inputs.base.SingleIndexEmbedding(2, 8)
# set schema, including field names etc
single_index_emb_0.set_schema(["userId"])
single_index_emb_1.set_schema(["movieId"])
# create InputsWrapper
schema = {
"user" : (trs.inputs.base.SingleIndexEmbedding(4, 10), ["userId"]),
"movie" : (trs.inputs.base.SingleIndexEmbedding(4, 10), ["movieId"]),
"pair" : (trs.inputs.base.FieldAwareMultipleIndexEmbedding(4, [10, 10]), ["userId", "movieId"]),
"seq" : (trs.inputs.base.SequenceIndexEmbedding(4, 10), ["seqId"], ["seqLength"])
"user" : single_index_emb_0,
"movie" : single_index_emb_1
}
inputs_wrapper = trs.inputs.InputWrapper(schema=schema)
Attributes:
schema (Dict[str, tuple]): Schema of InputsWrapper.
schema (Dict[str, _Inputs]): Schema of InputsWrapper.
length (int): None.
"""
# refer to parent class
Expand All @@ -35,8 +45,8 @@ def __init__(self,
self.schema = schema

# add modules in schema to the Module
for k, tup in schema.items():
self.add_module(k, tup[0])
for k, inp in schema.items():
self.add_module(k, inp)

# set length to None
self.length = None
Expand All @@ -45,42 +55,40 @@ def forward(self, inputs: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
r"""Forward calculation of InputsWrapper.
Args:
inputs (Dict[str, T]): Dictionary of inputs, where key is name of input fields, and value is \
tensor pass to Input class. Remark: key should exist in schema.
inputs (Dict[str, T]): Dictionary of inputs, where key is name of input fields,
and value is tensor pass to Input class.
Returns:
Dict[str, T], dtype = torch.float: Output of InputsWrapper, which is a dictionary where keys \
are names of model's inputs and values are tensor of model's inputs.
Dict[str, T], dtype = torch.float: Output of InputsWrapper, which is a dictionary
where keys are names of model's inputs and values are tensor of model's inputs.
"""
# initialize dictionary to store tensors
outputs = dict()

# loop through schema
for out_name, args_tuple in self.schema.items():
# get basic args from tuple in schema
embedding = args_tuple[0]
inp_names = args_tuple[1]

# create inputs in different format if the inputs class is ConcatInputs or StackedInputs
if embedding.__class__.__name__ in ["ConcatInputs", "StackedInputs"]:
for out_name, out_inp in self.schema.items():
# create inputs in different format if it is ConcatInputs or StackedInputs
if out_inp.__class__.__name__ in ["ConcatInputs", "StackedInputs"]:
# create dictionary of concat inputs
args_dict = { i : inputs[i] for i in inp_names }
inp_dict = { i : inputs[i] for i in out_inp.schema.inputs }

# create list variable to be passed
args = [args_dict]
inp_args = [inp_dict]
else:
# convert list of inputs to tensor, with shape = (B, N, *)
inp_val = [inputs[i] for i in inp_names]
inp_val = torch.cat(inp_val, dim=1)
args = [inp_val]
inp_val = [inputs[i] for i in out_inp.schema.inputs]
inp_val = torch.cat(inp_val, dim="N")
inp_args = [inp_val]

# set args for specific input
if embedding.__class__.__name__ == "SequenceIndexEmbedding":
arg_name = args_tuple[2][0]
args.append(inputs[arg_name])
if out_inp.__class__.__name__ == "SequenceIndexEmbedding":
inp_args.append(inputs[out_inp.schema.lengths])

# calculate embedding values
output = out_inp(*inp_args)

# set out_name in outputs to transformed tensors or embedded tensors
outputs[out_name] = embedding(*args)
outputs[out_name] = output

return outputs

0 comments on commit 2a31cc5

Please sign in to comment.