Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion example/tutorial_imdb_fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def __init__(self, vocab_size, embedding_size, n_labels):
tf.int32, shape=[None], name='labels')

# Network structure
network = AverageEmbeddingInputlayer(
network = AverageEmbeddingInputLayer(
self.inputs, self.vocab_size, self.embedding_size)
self.network = DenseLayer(network, self.n_labels)

Expand Down
47 changes: 23 additions & 24 deletions tensorlayer/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,30 +651,34 @@ def __init__(
self.all_drop = {}


class AverageEmbeddingInputlayer(Layer):
"""The :class:`AverageEmbeddingInputlayer` class is for FastText Embedding for sentence classification, see `[1] <http://arxiv.org/abs/1607.01759>`_.
class AverageEmbeddingInputLayer(Layer):
""":class:`AverageEmbeddingInputlayer` averages over embeddings of inputs.

:class:`AverageEmbeddingInputlayer` can be used as the input layer
for models like DAN[1] and FastText[2].

Parameters
------------
inputs : input placeholder or tensor; zeros are paddings
inputs : input placeholder or tensor
vocabulary_size : an integer, the size of vocabulary
embedding_size : an integer, the dimension of embedding vectors
pad_value : an integer, the scalar pad value used in inputs
name : a string, the name of the layer
embeddings_initializer : the initializer of the embedding matrix
embeddings_kwargs : kwargs to get embedding matrix variable

References
------------
- [1] Joulin, A., Grave, E., Bojanowski, P., & Mikolov, T. (2016). `Bag of Tricks for Efficient Text Classification. <http://arxiv.org/abs/1607.01759>`_
- [2] Recht, B., Re, C., Wright, S., & Niu, F. (2011). `Hogwild: A Lock-Free Approach to Parallelizing Stochastic Gradient Descent. <https://arxiv.org/abs/1106.5730>`_ In NPIS 2011 (pp. 693–701).
- [3] `TensorFlow Candidate Sampling <https://www.tensorflow.org/api_guides/python/nn#Candidate_Sampling>`_
- [1] Iyyer, M., Manjunatha, V., Boyd-Graber, J., & Daum’e III, H. (2015). Deep Unordered Composition Rivals Syntactic Methods for Text Classification. In Association for Computational Linguistics.
- [2] Joulin, A., Grave, E., Bojanowski, P., & Mikolov, T. (2016).`Bag of Tricks for Efficient Text Classification. <http://arxiv.org/abs/1607.01759>`_
"""
def __init__(
self, inputs, vocabulary_size, embedding_size,
name='fasttext_layer',
pad_value=0,
name='average_embedding_layer',
embeddings_initializer=tf.random_uniform_initializer(-0.1, 0.1),
embeddings_kwargs={}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mutable default argument values are generally not a good idea. e.g. see here

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool, please also make sure the tutorial_imdb_fasttext.py can run well ~~

):#None):
embeddings_kwargs=None,
):
super().__init__(name=name)

if inputs.get_shape().ndims != 2:
Expand All @@ -690,29 +694,24 @@ def __init__(
name='embeddings',
shape=(vocabulary_size, embedding_size),
initializer=embeddings_initializer,
# **(embeddings_kwargs or {}),
**embeddings_kwargs)
**(embeddings_kwargs or {}),
)

word_embeddings = tf.nn.embedding_lookup(
self.embeddings, self.inputs,
name='word_embeddings',
)

# Masks used to ignore padding words
masks = tf.expand_dims(
tf.sign(self.inputs),
axis=-1,
name='masks',
)
sum_word_embeddings = tf.reduce_sum(
word_embeddings * tf.cast(masks, tf.float32),
axis=1,
# Zero out embeddings of pad value
masks = tf.not_equal(self.inputs, pad_value, name='masks')
word_embeddings *= tf.cast(
tf.expand_dims(masks, axis=-1),
tf.float32,
)
sum_word_embeddings = tf.reduce_sum(word_embeddings, axis=1)

# Count number of non-padding words in each sentence
# Used to commute average word embeddings in sentences
sentence_lengths = tf.count_nonzero(
self.inputs,
masks,
axis=1,
keep_dims=True,
dtype=tf.float32,
Expand All @@ -721,7 +720,7 @@ def __init__(

sentence_embeddings = tf.divide(
sum_word_embeddings,
sentence_lengths,
sentence_lengths + 1e-8, # Add epsilon to avoid dividing by 0
name='sentence_embeddings'
)

Expand Down