diff --git a/example/tutorial_imdb_fasttext.py b/example/tutorial_imdb_fasttext.py index 250c1cb5c..e4a009113 100644 --- a/example/tutorial_imdb_fasttext.py +++ b/example/tutorial_imdb_fasttext.py @@ -71,7 +71,7 @@ def __init__(self, vocab_size, embedding_size, n_labels): tf.int32, shape=[None], name='labels') # Network structure - network = AverageEmbeddingInputlayer( + network = AverageEmbeddingInputLayer( self.inputs, self.vocab_size, self.embedding_size) self.network = DenseLayer(network, self.n_labels) diff --git a/tensorlayer/layers.py b/tensorlayer/layers.py index 3cf4096e8..a9c4b21dc 100755 --- a/tensorlayer/layers.py +++ b/tensorlayer/layers.py @@ -651,30 +651,34 @@ def __init__( self.all_drop = {} -class AverageEmbeddingInputlayer(Layer): - """The :class:`AverageEmbeddingInputlayer` class is for FastText Embedding for sentence classification, see `[1] `_. +class AverageEmbeddingInputLayer(Layer): + """:class:`AverageEmbeddingInputlayer` averages over embeddings of inputs. + + :class:`AverageEmbeddingInputlayer` can be used as the input layer + for models like DAN[1] and FastText[2]. Parameters ------------ - inputs : input placeholder or tensor; zeros are paddings + inputs : input placeholder or tensor vocabulary_size : an integer, the size of vocabulary embedding_size : an integer, the dimension of embedding vectors + pad_value : an integer, the scalar pad value used in inputs name : a string, the name of the layer embeddings_initializer : the initializer of the embedding matrix embeddings_kwargs : kwargs to get embedding matrix variable References ------------ - - [1] Joulin, A., Grave, E., Bojanowski, P., & Mikolov, T. (2016). `Bag of Tricks for Efficient Text Classification. `_ - - [2] Recht, B., Re, C., Wright, S., & Niu, F. (2011). `Hogwild: A Lock-Free Approach to Parallelizing Stochastic Gradient Descent. `_ In NPIS 2011 (pp. 693–701). - - [3] `TensorFlow Candidate Sampling `_ + - [1] Iyyer, M., Manjunatha, V., Boyd-Graber, J., & Daum’e III, H. (2015). Deep Unordered Composition Rivals Syntactic Methods for Text Classification. In Association for Computational Linguistics. + - [2] Joulin, A., Grave, E., Bojanowski, P., & Mikolov, T. (2016).`Bag of Tricks for Efficient Text Classification. `_ """ def __init__( self, inputs, vocabulary_size, embedding_size, - name='fasttext_layer', + pad_value=0, + name='average_embedding_layer', embeddings_initializer=tf.random_uniform_initializer(-0.1, 0.1), - embeddings_kwargs={} - ):#None): + embeddings_kwargs=None, + ): super().__init__(name=name) if inputs.get_shape().ndims != 2: @@ -690,29 +694,24 @@ def __init__( name='embeddings', shape=(vocabulary_size, embedding_size), initializer=embeddings_initializer, - # **(embeddings_kwargs or {}), - **embeddings_kwargs) + **(embeddings_kwargs or {}), + ) word_embeddings = tf.nn.embedding_lookup( self.embeddings, self.inputs, name='word_embeddings', ) - - # Masks used to ignore padding words - masks = tf.expand_dims( - tf.sign(self.inputs), - axis=-1, - name='masks', - ) - sum_word_embeddings = tf.reduce_sum( - word_embeddings * tf.cast(masks, tf.float32), - axis=1, + # Zero out embeddings of pad value + masks = tf.not_equal(self.inputs, pad_value, name='masks') + word_embeddings *= tf.cast( + tf.expand_dims(masks, axis=-1), + tf.float32, ) + sum_word_embeddings = tf.reduce_sum(word_embeddings, axis=1) # Count number of non-padding words in each sentence - # Used to commute average word embeddings in sentences sentence_lengths = tf.count_nonzero( - self.inputs, + masks, axis=1, keep_dims=True, dtype=tf.float32, @@ -721,7 +720,7 @@ def __init__( sentence_embeddings = tf.divide( sum_word_embeddings, - sentence_lengths, + sentence_lengths + 1e-8, # Add epsilon to avoid dividing by 0 name='sentence_embeddings' )