diff --git a/docs/api_docs/python/_redirects.yaml b/docs/api_docs/python/_redirects.yaml
new file mode 100644
index 0000000000..62246f6804
--- /dev/null
+++ b/docs/api_docs/python/_redirects.yaml
@@ -0,0 +1,127 @@
+redirects:
+- from: /api_docs/python/tfa/image/distance_transform/euclidean_dist_transform
+ to: /api_docs/python/tfa/image/euclidean_dist_transform
+- from: /api_docs/python/tfa/image/distort_image_ops/adjust_hsv_in_yiq
+ to: /api_docs/python/tfa/image/adjust_hsv_in_yiq
+- from: /api_docs/python/tfa/image/distort_image_ops/random_hsv_in_yiq
+ to: /api_docs/python/tfa/image/random_hsv_in_yiq
+- from: /api_docs/python/tfa/image/filters/mean_filter2d
+ to: /api_docs/python/tfa/image/mean_filter2d
+- from: /api_docs/python/tfa/image/filters/median_filter2d
+ to: /api_docs/python/tfa/image/median_filter2d
+- from: /api_docs/python/tfa/image/transform_ops/rotate
+ to: /api_docs/python/tfa/image/rotate
+- from: /api_docs/python/tfa/image/transform_ops/transform
+ to: /api_docs/python/tfa/image/transform
+- from: /api_docs/python/tfa/layers/maxout/Maxout
+ to: /api_docs/python/tfa/layers/Maxout
+- from: /api_docs/python/tfa/layers/normalizations/GroupNormalization
+ to: /api_docs/python/tfa/layers/GroupNormalization
+- from: /api_docs/python/tfa/layers/normalizations/InstanceNormalization
+ to: /api_docs/python/tfa/layers/InstanceNormalization
+- from: /api_docs/python/tfa/layers/poincare/PoincareNormalize
+ to: /api_docs/python/tfa/layers/PoincareNormalize
+- from: /api_docs/python/tfa/layers/sparsemax/Sparsemax
+ to: /api_docs/python/tfa/layers/Sparsemax
+- from: /api_docs/python/tfa/layers/sparsemax/sparsemax
+ to: /api_docs/python/tfa/activations/sparsemax
+- from: /api_docs/python/tfa/layers/wrappers/WeightNormalization
+ to: /api_docs/python/tfa/layers/WeightNormalization
+- from: /api_docs/python/tfa/losses/contrastive/ContrastiveLoss
+ to: /api_docs/python/tfa/losses/ContrastiveLoss
+- from: /api_docs/python/tfa/losses/contrastive/contrastive_loss
+ to: /api_docs/python/tfa/losses/contrastive_loss
+- from: /api_docs/python/tfa/losses/focal_loss/SigmoidFocalCrossEntropy
+ to: /api_docs/python/tfa/losses/SigmoidFocalCrossEntropy
+- from: /api_docs/python/tfa/losses/focal_loss/sigmoid_focal_crossentropy
+ to: /api_docs/python/tfa/losses/sigmoid_focal_crossentropy
+- from: /api_docs/python/tfa/losses/lifted/LiftedStructLoss
+ to: /api_docs/python/tfa/losses/LiftedStructLoss
+- from: /api_docs/python/tfa/losses/lifted/lifted_struct_loss
+ to: /api_docs/python/tfa/losses/lifted_struct_loss
+- from: /api_docs/python/tfa/losses/triplet/TripletSemiHardLoss
+ to: /api_docs/python/tfa/losses/TripletSemiHardLoss
+- from: /api_docs/python/tfa/losses/triplet/triplet_semihard_loss
+ to: /api_docs/python/tfa/losses/triplet_semihard_loss
+- from: /api_docs/python/tfa/metrics/cohens_kappa/CohenKappa
+ to: /api_docs/python/tfa/metrics/CohenKappa
+- from: /api_docs/python/tfa/optimizers/lazy_adam/LazyAdam
+ to: /api_docs/python/tfa/optimizers/LazyAdam
+- from: /api_docs/python/tfa/optimizers/moving_average/MovingAverage
+ to: /api_docs/python/tfa/optimizers/MovingAverage
+- from: /api_docs/python/tfa/optimizers/weight_decay_optimizers/AdamW
+ to: /api_docs/python/tfa/optimizers/AdamW
+- from: /api_docs/python/tfa/optimizers/weight_decay_optimizers/SGDW
+ to: /api_docs/python/tfa/optimizers/SGDW
+- from: /api_docs/python/tfa/optimizers/weight_decay_optimizers/extend_with_decoupled_weight_decay
+ to: /api_docs/python/tfa/optimizers/extend_with_decoupled_weight_decay
+- from: /api_docs/python/tfa/rnn/cell/LayerNormLSTMCell
+ to: /api_docs/python/tfa/rnn/LayerNormLSTMCell
+- from: /api_docs/python/tfa/rnn/cell/NASCell
+ to: /api_docs/python/tfa/rnn/NASCell
+- from: /api_docs/python/tfa/seq2seq/attention_wrapper/AttentionMechanism
+ to: /api_docs/python/tfa/seq2seq/AttentionMechanism
+- from: /api_docs/python/tfa/seq2seq/attention_wrapper/AttentionWrapper
+ to: /api_docs/python/tfa/seq2seq/AttentionWrapper
+- from: /api_docs/python/tfa/seq2seq/attention_wrapper/AttentionWrapperState
+ to: /api_docs/python/tfa/seq2seq/AttentionWrapperState
+- from: /api_docs/python/tfa/seq2seq/attention_wrapper/BahdanauAttention
+ to: /api_docs/python/tfa/seq2seq/BahdanauAttention
+- from: /api_docs/python/tfa/seq2seq/attention_wrapper/BahdanauMonotonicAttention
+ to: /api_docs/python/tfa/seq2seq/BahdanauMonotonicAttention
+- from: /api_docs/python/tfa/seq2seq/attention_wrapper/LuongAttention
+ to: /api_docs/python/tfa/seq2seq/LuongAttention
+- from: /api_docs/python/tfa/seq2seq/attention_wrapper/LuongMonotonicAttention
+ to: /api_docs/python/tfa/seq2seq/LuongMonotonicAttention
+- from: /api_docs/python/tfa/seq2seq/attention_wrapper/hardmax
+ to: /api_docs/python/tfa/seq2seq/hardmax
+- from: /api_docs/python/tfa/seq2seq/attention_wrapper/monotonic_attention
+ to: /api_docs/python/tfa/seq2seq/monotonic_attention
+- from: /api_docs/python/tfa/seq2seq/attention_wrapper/safe_cumprod
+ to: /api_docs/python/tfa/seq2seq/safe_cumprod
+- from: /api_docs/python/tfa/seq2seq/basic_decoder/BasicDecoder
+ to: /api_docs/python/tfa/seq2seq/BasicDecoder
+- from: /api_docs/python/tfa/seq2seq/basic_decoder/BasicDecoderOutput
+ to: /api_docs/python/tfa/seq2seq/BasicDecoderOutput
+- from: /api_docs/python/tfa/seq2seq/beam_search_decoder/BeamSearchDecoder
+ to: /api_docs/python/tfa/seq2seq/BeamSearchDecoder
+- from: /api_docs/python/tfa/seq2seq/beam_search_decoder/BeamSearchDecoderOutput
+ to: /api_docs/python/tfa/seq2seq/BeamSearchDecoderOutput
+- from: /api_docs/python/tfa/seq2seq/beam_search_decoder/BeamSearchDecoderState
+ to: /api_docs/python/tfa/seq2seq/BeamSearchDecoderState
+- from: /api_docs/python/tfa/seq2seq/beam_search_decoder/FinalBeamSearchDecoderOutput
+ to: /api_docs/python/tfa/seq2seq/FinalBeamSearchDecoderOutput
+- from: /api_docs/python/tfa/seq2seq/beam_search_decoder/gather_tree_from_array
+ to: /api_docs/python/tfa/seq2seq/gather_tree_from_array
+- from: /api_docs/python/tfa/seq2seq/beam_search_decoder/tile_batch
+ to: /api_docs/python/tfa/seq2seq/tile_batch
+- from: /api_docs/python/tfa/seq2seq/decoder/BaseDecoder
+ to: /api_docs/python/tfa/seq2seq/BaseDecoder
+- from: /api_docs/python/tfa/seq2seq/decoder/Decoder
+ to: /api_docs/python/tfa/seq2seq/Decoder
+- from: /api_docs/python/tfa/seq2seq/decoder/dynamic_decode
+ to: /api_docs/python/tfa/seq2seq/dynamic_decode
+- from: /api_docs/python/tfa/seq2seq/loss/SequenceLoss
+ to: /api_docs/python/tfa/seq2seq/SequenceLoss
+- from: /api_docs/python/tfa/seq2seq/loss/sequence_loss
+ to: /api_docs/python/tfa/seq2seq/sequence_loss
+- from: /api_docs/python/tfa/seq2seq/sampler/CustomSampler
+ to: /api_docs/python/tfa/seq2seq/CustomSampler
+- from: /api_docs/python/tfa/seq2seq/sampler/GreedyEmbeddingSampler
+ to: /api_docs/python/tfa/seq2seq/GreedyEmbeddingSampler
+- from: /api_docs/python/tfa/seq2seq/sampler/InferenceSampler
+ to: /api_docs/python/tfa/seq2seq/InferenceSampler
+- from: /api_docs/python/tfa/seq2seq/sampler/SampleEmbeddingSampler
+ to: /api_docs/python/tfa/seq2seq/SampleEmbeddingSampler
+- from: /api_docs/python/tfa/seq2seq/sampler/Sampler
+ to: /api_docs/python/tfa/seq2seq/Sampler
+- from: /api_docs/python/tfa/seq2seq/sampler/ScheduledEmbeddingTrainingSampler
+ to: /api_docs/python/tfa/seq2seq/ScheduledEmbeddingTrainingSampler
+- from: /api_docs/python/tfa/seq2seq/sampler/ScheduledOutputTrainingSampler
+ to: /api_docs/python/tfa/seq2seq/ScheduledOutputTrainingSampler
+- from: /api_docs/python/tfa/seq2seq/sampler/TrainingSampler
+ to: /api_docs/python/tfa/seq2seq/TrainingSampler
+- from: /api_docs/python/tfa/text/skip_gram_ops/skip_gram_sample
+ to: /api_docs/python/tfa/text/skip_gram_sample
+- from: /api_docs/python/tfa/text/skip_gram_ops/skip_gram_sample_with_text_vocab
+ to: /api_docs/python/tfa/text/skip_gram_sample_with_text_vocab
diff --git a/docs/api_docs/python/_toc.yaml b/docs/api_docs/python/_toc.yaml
new file mode 100644
index 0000000000..eb2e325ef8
--- /dev/null
+++ b/docs/api_docs/python/_toc.yaml
@@ -0,0 +1,302 @@
+# Automatically generated file; please do not edit
+toc:
+ - title: tfa
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa
+ - title: tfa.activations
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/activations
+ - title: sparsemax
+ path: /api_docs/python/tfa/activations/sparsemax
+ - title: tfa.image
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/image
+ - title: adjust_hsv_in_yiq
+ path: /api_docs/python/tfa/image/adjust_hsv_in_yiq
+ - title: dense_image_warp
+ path: /api_docs/python/tfa/image/dense_image_warp
+ - title: euclidean_dist_transform
+ path: /api_docs/python/tfa/image/euclidean_dist_transform
+ - title: interpolate_bilinear
+ path: /api_docs/python/tfa/image/interpolate_bilinear
+ - title: mean_filter2d
+ path: /api_docs/python/tfa/image/mean_filter2d
+ - title: median_filter2d
+ path: /api_docs/python/tfa/image/median_filter2d
+ - title: random_hsv_in_yiq
+ path: /api_docs/python/tfa/image/random_hsv_in_yiq
+ - title: rotate
+ path: /api_docs/python/tfa/image/rotate
+ - title: transform
+ path: /api_docs/python/tfa/image/transform
+ - title: distance_transform
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/image/distance_transform
+ - title: distort_image_ops
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/image/distort_image_ops
+ - title: filters
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/image/filters
+ - title: transform_ops
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/image/transform_ops
+ - title: angles_to_projective_transforms
+ path: /api_docs/python/tfa/image/transform_ops/angles_to_projective_transforms
+ - title: compose_transforms
+ path: /api_docs/python/tfa/image/transform_ops/compose_transforms
+ - title: flat_transforms_to_matrices
+ path: /api_docs/python/tfa/image/transform_ops/flat_transforms_to_matrices
+ - title: matrices_to_flat_transforms
+ path: /api_docs/python/tfa/image/transform_ops/matrices_to_flat_transforms
+ - title: tfa.layers
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/layers
+ - title: GroupNormalization
+ path: /api_docs/python/tfa/layers/GroupNormalization
+ - title: InstanceNormalization
+ path: /api_docs/python/tfa/layers/InstanceNormalization
+ - title: Maxout
+ path: /api_docs/python/tfa/layers/Maxout
+ - title: PoincareNormalize
+ path: /api_docs/python/tfa/layers/PoincareNormalize
+ - title: Sparsemax
+ path: /api_docs/python/tfa/layers/Sparsemax
+ - title: WeightNormalization
+ path: /api_docs/python/tfa/layers/WeightNormalization
+ - title: maxout
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/layers/maxout
+ - title: normalizations
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/layers/normalizations
+ - title: poincare
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/layers/poincare
+ - title: sparsemax
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/layers/sparsemax
+ - title: wrappers
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/layers/wrappers
+ - title: tfa.losses
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/losses
+ - title: ContrastiveLoss
+ path: /api_docs/python/tfa/losses/ContrastiveLoss
+ - title: contrastive_loss
+ path: /api_docs/python/tfa/losses/contrastive_loss
+ - title: LiftedStructLoss
+ path: /api_docs/python/tfa/losses/LiftedStructLoss
+ - title: lifted_struct_loss
+ path: /api_docs/python/tfa/losses/lifted_struct_loss
+ - title: SigmoidFocalCrossEntropy
+ path: /api_docs/python/tfa/losses/SigmoidFocalCrossEntropy
+ - title: sigmoid_focal_crossentropy
+ path: /api_docs/python/tfa/losses/sigmoid_focal_crossentropy
+ - title: SparsemaxLoss
+ path: /api_docs/python/tfa/losses/SparsemaxLoss
+ - title: sparsemax_loss
+ path: /api_docs/python/tfa/losses/sparsemax_loss
+ - title: TripletSemiHardLoss
+ path: /api_docs/python/tfa/losses/TripletSemiHardLoss
+ - title: triplet_semihard_loss
+ path: /api_docs/python/tfa/losses/triplet_semihard_loss
+ - title: contrastive
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/losses/contrastive
+ - title: focal_loss
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/losses/focal_loss
+ - title: lifted
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/losses/lifted
+ - title: metric_learning
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/losses/metric_learning
+ - title: pairwise_distance
+ path: /api_docs/python/tfa/losses/metric_learning/pairwise_distance
+ - title: triplet
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/losses/triplet
+ - title: tfa.metrics
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/metrics
+ - title: CohenKappa
+ path: /api_docs/python/tfa/metrics/CohenKappa
+ - title: cohens_kappa
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/metrics/cohens_kappa
+ - title: tfa.optimizers
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/optimizers
+ - title: AdamW
+ path: /api_docs/python/tfa/optimizers/AdamW
+ - title: extend_with_decoupled_weight_decay
+ path: /api_docs/python/tfa/optimizers/extend_with_decoupled_weight_decay
+ - title: LazyAdam
+ path: /api_docs/python/tfa/optimizers/LazyAdam
+ - title: MovingAverage
+ path: /api_docs/python/tfa/optimizers/MovingAverage
+ - title: SGDW
+ path: /api_docs/python/tfa/optimizers/SGDW
+ - title: lazy_adam
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/optimizers/lazy_adam
+ - title: moving_average
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/optimizers/moving_average
+ - title: weight_decay_optimizers
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/optimizers/weight_decay_optimizers
+ - title: DecoupledWeightDecayExtension
+ path: /api_docs/python/tfa/optimizers/weight_decay_optimizers/DecoupledWeightDecayExtension
+ - title: tfa.rnn
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/rnn
+ - title: LayerNormLSTMCell
+ path: /api_docs/python/tfa/rnn/LayerNormLSTMCell
+ - title: NASCell
+ path: /api_docs/python/tfa/rnn/NASCell
+ - title: cell
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/rnn/cell
+ - title: tfa.seq2seq
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/seq2seq
+ - title: AttentionMechanism
+ path: /api_docs/python/tfa/seq2seq/AttentionMechanism
+ - title: AttentionWrapper
+ path: /api_docs/python/tfa/seq2seq/AttentionWrapper
+ - title: AttentionWrapperState
+ path: /api_docs/python/tfa/seq2seq/AttentionWrapperState
+ - title: BahdanauAttention
+ path: /api_docs/python/tfa/seq2seq/BahdanauAttention
+ - title: BahdanauMonotonicAttention
+ path: /api_docs/python/tfa/seq2seq/BahdanauMonotonicAttention
+ - title: BaseDecoder
+ path: /api_docs/python/tfa/seq2seq/BaseDecoder
+ - title: BasicDecoder
+ path: /api_docs/python/tfa/seq2seq/BasicDecoder
+ - title: BasicDecoderOutput
+ path: /api_docs/python/tfa/seq2seq/BasicDecoderOutput
+ - title: BeamSearchDecoder
+ path: /api_docs/python/tfa/seq2seq/BeamSearchDecoder
+ - title: BeamSearchDecoderOutput
+ path: /api_docs/python/tfa/seq2seq/BeamSearchDecoderOutput
+ - title: BeamSearchDecoderState
+ path: /api_docs/python/tfa/seq2seq/BeamSearchDecoderState
+ - title: CustomSampler
+ path: /api_docs/python/tfa/seq2seq/CustomSampler
+ - title: Decoder
+ path: /api_docs/python/tfa/seq2seq/Decoder
+ - title: dynamic_decode
+ path: /api_docs/python/tfa/seq2seq/dynamic_decode
+ - title: FinalBeamSearchDecoderOutput
+ path: /api_docs/python/tfa/seq2seq/FinalBeamSearchDecoderOutput
+ - title: gather_tree_from_array
+ path: /api_docs/python/tfa/seq2seq/gather_tree_from_array
+ - title: GreedyEmbeddingSampler
+ path: /api_docs/python/tfa/seq2seq/GreedyEmbeddingSampler
+ - title: hardmax
+ path: /api_docs/python/tfa/seq2seq/hardmax
+ - title: InferenceSampler
+ path: /api_docs/python/tfa/seq2seq/InferenceSampler
+ - title: LuongAttention
+ path: /api_docs/python/tfa/seq2seq/LuongAttention
+ - title: LuongMonotonicAttention
+ path: /api_docs/python/tfa/seq2seq/LuongMonotonicAttention
+ - title: monotonic_attention
+ path: /api_docs/python/tfa/seq2seq/monotonic_attention
+ - title: safe_cumprod
+ path: /api_docs/python/tfa/seq2seq/safe_cumprod
+ - title: SampleEmbeddingSampler
+ path: /api_docs/python/tfa/seq2seq/SampleEmbeddingSampler
+ - title: Sampler
+ path: /api_docs/python/tfa/seq2seq/Sampler
+ - title: ScheduledEmbeddingTrainingSampler
+ path: /api_docs/python/tfa/seq2seq/ScheduledEmbeddingTrainingSampler
+ - title: ScheduledOutputTrainingSampler
+ path: /api_docs/python/tfa/seq2seq/ScheduledOutputTrainingSampler
+ - title: SequenceLoss
+ path: /api_docs/python/tfa/seq2seq/SequenceLoss
+ - title: sequence_loss
+ path: /api_docs/python/tfa/seq2seq/sequence_loss
+ - title: tile_batch
+ path: /api_docs/python/tfa/seq2seq/tile_batch
+ - title: TrainingSampler
+ path: /api_docs/python/tfa/seq2seq/TrainingSampler
+ - title: attention_wrapper
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/seq2seq/attention_wrapper
+ - title: basic_decoder
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/seq2seq/basic_decoder
+ - title: beam_search_decoder
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/seq2seq/beam_search_decoder
+ - title: attention_probs_from_attn_state
+ path: /api_docs/python/tfa/seq2seq/beam_search_decoder/attention_probs_from_attn_state
+ - title: BeamSearchDecoderMixin
+ path: /api_docs/python/tfa/seq2seq/beam_search_decoder/BeamSearchDecoderMixin
+ - title: get_attention_probs
+ path: /api_docs/python/tfa/seq2seq/beam_search_decoder/get_attention_probs
+ - title: decoder
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/seq2seq/decoder
+ - title: loss
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/seq2seq/loss
+ - title: sampler
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/seq2seq/sampler
+ - title: bernoulli_sample
+ path: /api_docs/python/tfa/seq2seq/sampler/bernoulli_sample
+ - title: categorical_sample
+ path: /api_docs/python/tfa/seq2seq/sampler/categorical_sample
+ - title: tfa.text
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/text
+ - title: skip_gram_sample
+ path: /api_docs/python/tfa/text/skip_gram_sample
+ - title: skip_gram_sample_with_text_vocab
+ path: /api_docs/python/tfa/text/skip_gram_sample_with_text_vocab
+ - title: skip_gram_ops
+ section:
+ - title: Overview
+ path: /api_docs/python/tfa/text/skip_gram_ops
diff --git a/docs/api_docs/python/index.md b/docs/api_docs/python/index.md
new file mode 100644
index 0000000000..c95e0535a4
--- /dev/null
+++ b/docs/api_docs/python/index.md
@@ -0,0 +1,179 @@
+# All symbols in TensorFlow Addons
+
+* tfa
+* tfa.activations
+* tfa.activations.sparsemax
+* tfa.image
+* tfa.image.adjust_hsv_in_yiq
+* tfa.image.dense_image_warp
+* tfa.image.distance_transform
+* tfa.image.distance_transform.euclidean_dist_transform
+* tfa.image.distort_image_ops
+* tfa.image.distort_image_ops.adjust_hsv_in_yiq
+* tfa.image.distort_image_ops.random_hsv_in_yiq
+* tfa.image.euclidean_dist_transform
+* tfa.image.filters
+* tfa.image.filters.mean_filter2d
+* tfa.image.filters.median_filter2d
+* tfa.image.interpolate_bilinear
+* tfa.image.mean_filter2d
+* tfa.image.median_filter2d
+* tfa.image.random_hsv_in_yiq
+* tfa.image.rotate
+* tfa.image.transform
+* tfa.image.transform_ops
+* tfa.image.transform_ops.angles_to_projective_transforms
+* tfa.image.transform_ops.compose_transforms
+* tfa.image.transform_ops.flat_transforms_to_matrices
+* tfa.image.transform_ops.matrices_to_flat_transforms
+* tfa.image.transform_ops.rotate
+* tfa.image.transform_ops.transform
+* tfa.layers
+* tfa.layers.GroupNormalization
+* tfa.layers.InstanceNormalization
+* tfa.layers.Maxout
+* tfa.layers.PoincareNormalize
+* tfa.layers.Sparsemax
+* tfa.layers.WeightNormalization
+* tfa.layers.maxout
+* tfa.layers.maxout.Maxout
+* tfa.layers.normalizations
+* tfa.layers.normalizations.GroupNormalization
+* tfa.layers.normalizations.InstanceNormalization
+* tfa.layers.poincare
+* tfa.layers.poincare.PoincareNormalize
+* tfa.layers.sparsemax
+* tfa.layers.sparsemax.Sparsemax
+* tfa.layers.sparsemax.sparsemax
+* tfa.layers.wrappers
+* tfa.layers.wrappers.WeightNormalization
+* tfa.losses
+* tfa.losses.ContrastiveLoss
+* tfa.losses.LiftedStructLoss
+* tfa.losses.SigmoidFocalCrossEntropy
+* tfa.losses.SparsemaxLoss
+* tfa.losses.TripletSemiHardLoss
+* tfa.losses.contrastive
+* tfa.losses.contrastive.ContrastiveLoss
+* tfa.losses.contrastive.contrastive_loss
+* tfa.losses.contrastive_loss
+* tfa.losses.focal_loss
+* tfa.losses.focal_loss.SigmoidFocalCrossEntropy
+* tfa.losses.focal_loss.sigmoid_focal_crossentropy
+* tfa.losses.lifted
+* tfa.losses.lifted.LiftedStructLoss
+* tfa.losses.lifted.lifted_struct_loss
+* tfa.losses.lifted_struct_loss
+* tfa.losses.metric_learning
+* tfa.losses.metric_learning.pairwise_distance
+* tfa.losses.sigmoid_focal_crossentropy
+* tfa.losses.sparsemax_loss
+* tfa.losses.triplet
+* tfa.losses.triplet.TripletSemiHardLoss
+* tfa.losses.triplet.triplet_semihard_loss
+* tfa.losses.triplet_semihard_loss
+* tfa.metrics
+* tfa.metrics.CohenKappa
+* tfa.metrics.cohens_kappa
+* tfa.metrics.cohens_kappa.CohenKappa
+* tfa.optimizers
+* tfa.optimizers.AdamW
+* tfa.optimizers.LazyAdam
+* tfa.optimizers.MovingAverage
+* tfa.optimizers.SGDW
+* tfa.optimizers.extend_with_decoupled_weight_decay
+* tfa.optimizers.lazy_adam
+* tfa.optimizers.lazy_adam.LazyAdam
+* tfa.optimizers.moving_average
+* tfa.optimizers.moving_average.MovingAverage
+* tfa.optimizers.weight_decay_optimizers
+* tfa.optimizers.weight_decay_optimizers.AdamW
+* tfa.optimizers.weight_decay_optimizers.DecoupledWeightDecayExtension
+* tfa.optimizers.weight_decay_optimizers.SGDW
+* tfa.optimizers.weight_decay_optimizers.extend_with_decoupled_weight_decay
+* tfa.rnn
+* tfa.rnn.LayerNormLSTMCell
+* tfa.rnn.NASCell
+* tfa.rnn.cell
+* tfa.rnn.cell.LayerNormLSTMCell
+* tfa.rnn.cell.NASCell
+* tfa.seq2seq
+* tfa.seq2seq.AttentionMechanism
+* tfa.seq2seq.AttentionWrapper
+* tfa.seq2seq.AttentionWrapperState
+* tfa.seq2seq.BahdanauAttention
+* tfa.seq2seq.BahdanauMonotonicAttention
+* tfa.seq2seq.BaseDecoder
+* tfa.seq2seq.BasicDecoder
+* tfa.seq2seq.BasicDecoderOutput
+* tfa.seq2seq.BeamSearchDecoder
+* tfa.seq2seq.BeamSearchDecoderOutput
+* tfa.seq2seq.BeamSearchDecoderState
+* tfa.seq2seq.CustomSampler
+* tfa.seq2seq.Decoder
+* tfa.seq2seq.FinalBeamSearchDecoderOutput
+* tfa.seq2seq.GreedyEmbeddingSampler
+* tfa.seq2seq.InferenceSampler
+* tfa.seq2seq.LuongAttention
+* tfa.seq2seq.LuongMonotonicAttention
+* tfa.seq2seq.SampleEmbeddingSampler
+* tfa.seq2seq.Sampler
+* tfa.seq2seq.ScheduledEmbeddingTrainingSampler
+* tfa.seq2seq.ScheduledOutputTrainingSampler
+* tfa.seq2seq.SequenceLoss
+* tfa.seq2seq.TrainingSampler
+* tfa.seq2seq.attention_wrapper
+* tfa.seq2seq.attention_wrapper.AttentionMechanism
+* tfa.seq2seq.attention_wrapper.AttentionWrapper
+* tfa.seq2seq.attention_wrapper.AttentionWrapperState
+* tfa.seq2seq.attention_wrapper.BahdanauAttention
+* tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention
+* tfa.seq2seq.attention_wrapper.LuongAttention
+* tfa.seq2seq.attention_wrapper.LuongMonotonicAttention
+* tfa.seq2seq.attention_wrapper.hardmax
+* tfa.seq2seq.attention_wrapper.monotonic_attention
+* tfa.seq2seq.attention_wrapper.safe_cumprod
+* tfa.seq2seq.basic_decoder
+* tfa.seq2seq.basic_decoder.BasicDecoder
+* tfa.seq2seq.basic_decoder.BasicDecoderOutput
+* tfa.seq2seq.beam_search_decoder
+* tfa.seq2seq.beam_search_decoder.BeamSearchDecoder
+* tfa.seq2seq.beam_search_decoder.BeamSearchDecoderMixin
+* tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput
+* tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState
+* tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput
+* tfa.seq2seq.beam_search_decoder.attention_probs_from_attn_state
+* tfa.seq2seq.beam_search_decoder.gather_tree_from_array
+* tfa.seq2seq.beam_search_decoder.get_attention_probs
+* tfa.seq2seq.beam_search_decoder.tile_batch
+* tfa.seq2seq.decoder
+* tfa.seq2seq.decoder.BaseDecoder
+* tfa.seq2seq.decoder.Decoder
+* tfa.seq2seq.decoder.dynamic_decode
+* tfa.seq2seq.dynamic_decode
+* tfa.seq2seq.gather_tree_from_array
+* tfa.seq2seq.hardmax
+* tfa.seq2seq.loss
+* tfa.seq2seq.loss.SequenceLoss
+* tfa.seq2seq.loss.sequence_loss
+* tfa.seq2seq.monotonic_attention
+* tfa.seq2seq.safe_cumprod
+* tfa.seq2seq.sampler
+* tfa.seq2seq.sampler.CustomSampler
+* tfa.seq2seq.sampler.GreedyEmbeddingSampler
+* tfa.seq2seq.sampler.InferenceSampler
+* tfa.seq2seq.sampler.SampleEmbeddingSampler
+* tfa.seq2seq.sampler.Sampler
+* tfa.seq2seq.sampler.ScheduledEmbeddingTrainingSampler
+* tfa.seq2seq.sampler.ScheduledOutputTrainingSampler
+* tfa.seq2seq.sampler.TrainingSampler
+* tfa.seq2seq.sampler.bernoulli_sample
+* tfa.seq2seq.sampler.categorical_sample
+* tfa.seq2seq.sequence_loss
+* tfa.seq2seq.tile_batch
+* tfa.text
+* tfa.text.skip_gram_ops
+* tfa.text.skip_gram_ops.skip_gram_sample
+* tfa.text.skip_gram_ops.skip_gram_sample_with_text_vocab
+* tfa.text.skip_gram_sample
+* tfa.text.skip_gram_sample_with_text_vocab
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa.md b/docs/api_docs/python/tfa.md
new file mode 100644
index 0000000000..a4086ba235
--- /dev/null
+++ b/docs/api_docs/python/tfa.md
@@ -0,0 +1,36 @@
+
+
+
+
+
+# Module: tfa
+
+Useful extra functionality for TensorFlow maintained by SIG-addons.
+
+
+
+Defined in [`__init__.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/__init__.py).
+
+
+
+
+## Modules
+
+[`activations`](./tfa/activations.md) module: A module containing activation routines.
+
+[`image`](./tfa/image.md) module: Image manipulation ops.
+
+[`layers`](./tfa/layers.md) module: Additional layers that conform to Keras API.
+
+[`losses`](./tfa/losses.md) module: Additional losses that conform to Keras API.
+
+[`metrics`](./tfa/metrics.md) module: A module containing metrics that conform to Keras API.
+
+[`optimizers`](./tfa/optimizers.md) module: Additional optimizers that conform to Keras API.
+
+[`rnn`](./tfa/rnn.md) module: Customized RNN cells.
+
+[`seq2seq`](./tfa/seq2seq.md) module: Ops for building neural network sequence to sequence decoders and losses.
+
+[`text`](./tfa/text.md) module: Text-processing ops.
+
diff --git a/docs/api_docs/python/tfa/_api_cache.json b/docs/api_docs/python/tfa/_api_cache.json
new file mode 100644
index 0000000000..1b18603244
--- /dev/null
+++ b/docs/api_docs/python/tfa/_api_cache.json
@@ -0,0 +1,3805 @@
+{
+ "duplicate_of": {
+ "tfa.image.absolute_import": "tfa.activations.absolute_import",
+ "tfa.image.distance_transform.absolute_import": "tfa.activations.absolute_import",
+ "tfa.image.distance_transform.division": "tfa.activations.division",
+ "tfa.image.distance_transform.euclidean_dist_transform": "tfa.image.euclidean_dist_transform",
+ "tfa.image.distance_transform.print_function": "tfa.activations.print_function",
+ "tfa.image.distort_image_ops.absolute_import": "tfa.activations.absolute_import",
+ "tfa.image.distort_image_ops.adjust_hsv_in_yiq": "tfa.image.adjust_hsv_in_yiq",
+ "tfa.image.distort_image_ops.division": "tfa.activations.division",
+ "tfa.image.distort_image_ops.print_function": "tfa.activations.print_function",
+ "tfa.image.distort_image_ops.random_hsv_in_yiq": "tfa.image.random_hsv_in_yiq",
+ "tfa.image.division": "tfa.activations.division",
+ "tfa.image.filters.absolute_import": "tfa.activations.absolute_import",
+ "tfa.image.filters.division": "tfa.activations.division",
+ "tfa.image.filters.mean_filter2d": "tfa.image.mean_filter2d",
+ "tfa.image.filters.median_filter2d": "tfa.image.median_filter2d",
+ "tfa.image.filters.print_function": "tfa.activations.print_function",
+ "tfa.image.print_function": "tfa.activations.print_function",
+ "tfa.image.transform_ops.absolute_import": "tfa.activations.absolute_import",
+ "tfa.image.transform_ops.division": "tfa.activations.division",
+ "tfa.image.transform_ops.print_function": "tfa.activations.print_function",
+ "tfa.image.transform_ops.rotate": "tfa.image.rotate",
+ "tfa.image.transform_ops.transform": "tfa.image.transform",
+ "tfa.layers.InstanceNormalization.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.layers.InstanceNormalization.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.layers.InstanceNormalization.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.layers.InstanceNormalization.input": "tfa.layers.GroupNormalization.input",
+ "tfa.layers.InstanceNormalization.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.layers.InstanceNormalization.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.layers.InstanceNormalization.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.layers.InstanceNormalization.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.layers.InstanceNormalization.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.layers.InstanceNormalization.name": "tfa.layers.GroupNormalization.name",
+ "tfa.layers.InstanceNormalization.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.layers.InstanceNormalization.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.layers.InstanceNormalization.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.layers.InstanceNormalization.output": "tfa.layers.GroupNormalization.output",
+ "tfa.layers.InstanceNormalization.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.layers.InstanceNormalization.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.layers.InstanceNormalization.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.layers.InstanceNormalization.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.layers.InstanceNormalization.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.layers.InstanceNormalization.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.layers.InstanceNormalization.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.layers.InstanceNormalization.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.layers.InstanceNormalization.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.layers.Maxout.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.layers.Maxout.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.layers.Maxout.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.layers.Maxout.input": "tfa.layers.GroupNormalization.input",
+ "tfa.layers.Maxout.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.layers.Maxout.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.layers.Maxout.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.layers.Maxout.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.layers.Maxout.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.layers.Maxout.name": "tfa.layers.GroupNormalization.name",
+ "tfa.layers.Maxout.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.layers.Maxout.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.layers.Maxout.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.layers.Maxout.output": "tfa.layers.GroupNormalization.output",
+ "tfa.layers.Maxout.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.layers.Maxout.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.layers.Maxout.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.layers.Maxout.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.layers.Maxout.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.layers.Maxout.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.layers.Maxout.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.layers.Maxout.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.layers.Maxout.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.layers.PoincareNormalize.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.layers.PoincareNormalize.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.layers.PoincareNormalize.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.layers.PoincareNormalize.input": "tfa.layers.GroupNormalization.input",
+ "tfa.layers.PoincareNormalize.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.layers.PoincareNormalize.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.layers.PoincareNormalize.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.layers.PoincareNormalize.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.layers.PoincareNormalize.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.layers.PoincareNormalize.name": "tfa.layers.GroupNormalization.name",
+ "tfa.layers.PoincareNormalize.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.layers.PoincareNormalize.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.layers.PoincareNormalize.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.layers.PoincareNormalize.output": "tfa.layers.GroupNormalization.output",
+ "tfa.layers.PoincareNormalize.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.layers.PoincareNormalize.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.layers.PoincareNormalize.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.layers.PoincareNormalize.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.layers.PoincareNormalize.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.layers.PoincareNormalize.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.layers.PoincareNormalize.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.layers.PoincareNormalize.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.layers.PoincareNormalize.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.layers.Sparsemax.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.layers.Sparsemax.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.layers.Sparsemax.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.layers.Sparsemax.input": "tfa.layers.GroupNormalization.input",
+ "tfa.layers.Sparsemax.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.layers.Sparsemax.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.layers.Sparsemax.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.layers.Sparsemax.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.layers.Sparsemax.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.layers.Sparsemax.name": "tfa.layers.GroupNormalization.name",
+ "tfa.layers.Sparsemax.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.layers.Sparsemax.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.layers.Sparsemax.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.layers.Sparsemax.output": "tfa.layers.GroupNormalization.output",
+ "tfa.layers.Sparsemax.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.layers.Sparsemax.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.layers.Sparsemax.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.layers.Sparsemax.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.layers.Sparsemax.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.layers.Sparsemax.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.layers.Sparsemax.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.layers.Sparsemax.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.layers.Sparsemax.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.layers.WeightNormalization.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.layers.WeightNormalization.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.layers.WeightNormalization.input": "tfa.layers.GroupNormalization.input",
+ "tfa.layers.WeightNormalization.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.layers.WeightNormalization.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.layers.WeightNormalization.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.layers.WeightNormalization.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.layers.WeightNormalization.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.layers.WeightNormalization.name": "tfa.layers.GroupNormalization.name",
+ "tfa.layers.WeightNormalization.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.layers.WeightNormalization.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.layers.WeightNormalization.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.layers.WeightNormalization.output": "tfa.layers.GroupNormalization.output",
+ "tfa.layers.WeightNormalization.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.layers.WeightNormalization.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.layers.WeightNormalization.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.layers.WeightNormalization.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.layers.WeightNormalization.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.layers.WeightNormalization.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.layers.WeightNormalization.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.layers.WeightNormalization.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.layers.WeightNormalization.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.layers.absolute_import": "tfa.activations.absolute_import",
+ "tfa.layers.division": "tfa.activations.division",
+ "tfa.layers.maxout.Maxout": "tfa.layers.Maxout",
+ "tfa.layers.maxout.Maxout.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.layers.maxout.Maxout.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.layers.maxout.Maxout.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.layers.maxout.Maxout.input": "tfa.layers.GroupNormalization.input",
+ "tfa.layers.maxout.Maxout.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.layers.maxout.Maxout.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.layers.maxout.Maxout.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.layers.maxout.Maxout.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.layers.maxout.Maxout.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.layers.maxout.Maxout.name": "tfa.layers.GroupNormalization.name",
+ "tfa.layers.maxout.Maxout.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.layers.maxout.Maxout.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.layers.maxout.Maxout.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.layers.maxout.Maxout.output": "tfa.layers.GroupNormalization.output",
+ "tfa.layers.maxout.Maxout.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.layers.maxout.Maxout.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.layers.maxout.Maxout.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.layers.maxout.Maxout.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.layers.maxout.Maxout.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.layers.maxout.Maxout.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.layers.maxout.Maxout.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.layers.maxout.Maxout.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.layers.maxout.Maxout.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.layers.maxout.absolute_import": "tfa.activations.absolute_import",
+ "tfa.layers.maxout.division": "tfa.activations.division",
+ "tfa.layers.maxout.print_function": "tfa.activations.print_function",
+ "tfa.layers.normalizations.GroupNormalization": "tfa.layers.GroupNormalization",
+ "tfa.layers.normalizations.GroupNormalization.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.layers.normalizations.GroupNormalization.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.layers.normalizations.GroupNormalization.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.layers.normalizations.GroupNormalization.input": "tfa.layers.GroupNormalization.input",
+ "tfa.layers.normalizations.GroupNormalization.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.layers.normalizations.GroupNormalization.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.layers.normalizations.GroupNormalization.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.layers.normalizations.GroupNormalization.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.layers.normalizations.GroupNormalization.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.layers.normalizations.GroupNormalization.name": "tfa.layers.GroupNormalization.name",
+ "tfa.layers.normalizations.GroupNormalization.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.layers.normalizations.GroupNormalization.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.layers.normalizations.GroupNormalization.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.layers.normalizations.GroupNormalization.output": "tfa.layers.GroupNormalization.output",
+ "tfa.layers.normalizations.GroupNormalization.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.layers.normalizations.GroupNormalization.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.layers.normalizations.GroupNormalization.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.layers.normalizations.GroupNormalization.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.layers.normalizations.GroupNormalization.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.layers.normalizations.GroupNormalization.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.layers.normalizations.GroupNormalization.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.layers.normalizations.GroupNormalization.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.layers.normalizations.GroupNormalization.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.layers.normalizations.InstanceNormalization": "tfa.layers.InstanceNormalization",
+ "tfa.layers.normalizations.InstanceNormalization.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.layers.normalizations.InstanceNormalization.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.layers.normalizations.InstanceNormalization.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.layers.normalizations.InstanceNormalization.input": "tfa.layers.GroupNormalization.input",
+ "tfa.layers.normalizations.InstanceNormalization.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.layers.normalizations.InstanceNormalization.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.layers.normalizations.InstanceNormalization.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.layers.normalizations.InstanceNormalization.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.layers.normalizations.InstanceNormalization.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.layers.normalizations.InstanceNormalization.name": "tfa.layers.GroupNormalization.name",
+ "tfa.layers.normalizations.InstanceNormalization.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.layers.normalizations.InstanceNormalization.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.layers.normalizations.InstanceNormalization.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.layers.normalizations.InstanceNormalization.output": "tfa.layers.GroupNormalization.output",
+ "tfa.layers.normalizations.InstanceNormalization.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.layers.normalizations.InstanceNormalization.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.layers.normalizations.InstanceNormalization.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.layers.normalizations.InstanceNormalization.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.layers.normalizations.InstanceNormalization.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.layers.normalizations.InstanceNormalization.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.layers.normalizations.InstanceNormalization.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.layers.normalizations.InstanceNormalization.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.layers.normalizations.InstanceNormalization.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.layers.normalizations.absolute_import": "tfa.activations.absolute_import",
+ "tfa.layers.normalizations.division": "tfa.activations.division",
+ "tfa.layers.normalizations.print_function": "tfa.activations.print_function",
+ "tfa.layers.poincare.PoincareNormalize": "tfa.layers.PoincareNormalize",
+ "tfa.layers.poincare.PoincareNormalize.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.layers.poincare.PoincareNormalize.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.layers.poincare.PoincareNormalize.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.layers.poincare.PoincareNormalize.input": "tfa.layers.GroupNormalization.input",
+ "tfa.layers.poincare.PoincareNormalize.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.layers.poincare.PoincareNormalize.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.layers.poincare.PoincareNormalize.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.layers.poincare.PoincareNormalize.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.layers.poincare.PoincareNormalize.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.layers.poincare.PoincareNormalize.name": "tfa.layers.GroupNormalization.name",
+ "tfa.layers.poincare.PoincareNormalize.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.layers.poincare.PoincareNormalize.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.layers.poincare.PoincareNormalize.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.layers.poincare.PoincareNormalize.output": "tfa.layers.GroupNormalization.output",
+ "tfa.layers.poincare.PoincareNormalize.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.layers.poincare.PoincareNormalize.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.layers.poincare.PoincareNormalize.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.layers.poincare.PoincareNormalize.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.layers.poincare.PoincareNormalize.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.layers.poincare.PoincareNormalize.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.layers.poincare.PoincareNormalize.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.layers.poincare.PoincareNormalize.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.layers.poincare.PoincareNormalize.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.layers.poincare.absolute_import": "tfa.activations.absolute_import",
+ "tfa.layers.poincare.division": "tfa.activations.division",
+ "tfa.layers.poincare.print_function": "tfa.activations.print_function",
+ "tfa.layers.print_function": "tfa.activations.print_function",
+ "tfa.layers.sparsemax.Sparsemax": "tfa.layers.Sparsemax",
+ "tfa.layers.sparsemax.Sparsemax.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.layers.sparsemax.Sparsemax.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.layers.sparsemax.Sparsemax.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.layers.sparsemax.Sparsemax.input": "tfa.layers.GroupNormalization.input",
+ "tfa.layers.sparsemax.Sparsemax.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.layers.sparsemax.Sparsemax.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.layers.sparsemax.Sparsemax.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.layers.sparsemax.Sparsemax.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.layers.sparsemax.Sparsemax.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.layers.sparsemax.Sparsemax.name": "tfa.layers.GroupNormalization.name",
+ "tfa.layers.sparsemax.Sparsemax.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.layers.sparsemax.Sparsemax.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.layers.sparsemax.Sparsemax.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.layers.sparsemax.Sparsemax.output": "tfa.layers.GroupNormalization.output",
+ "tfa.layers.sparsemax.Sparsemax.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.layers.sparsemax.Sparsemax.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.layers.sparsemax.Sparsemax.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.layers.sparsemax.Sparsemax.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.layers.sparsemax.Sparsemax.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.layers.sparsemax.Sparsemax.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.layers.sparsemax.Sparsemax.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.layers.sparsemax.Sparsemax.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.layers.sparsemax.Sparsemax.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.layers.sparsemax.absolute_import": "tfa.activations.absolute_import",
+ "tfa.layers.sparsemax.division": "tfa.activations.division",
+ "tfa.layers.sparsemax.print_function": "tfa.activations.print_function",
+ "tfa.layers.sparsemax.sparsemax": "tfa.activations.sparsemax",
+ "tfa.layers.wrappers.WeightNormalization": "tfa.layers.WeightNormalization",
+ "tfa.layers.wrappers.WeightNormalization.activity_regularizer": "tfa.layers.WeightNormalization.activity_regularizer",
+ "tfa.layers.wrappers.WeightNormalization.call": "tfa.layers.WeightNormalization.call",
+ "tfa.layers.wrappers.WeightNormalization.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.layers.wrappers.WeightNormalization.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.layers.wrappers.WeightNormalization.input": "tfa.layers.GroupNormalization.input",
+ "tfa.layers.wrappers.WeightNormalization.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.layers.wrappers.WeightNormalization.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.layers.wrappers.WeightNormalization.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.layers.wrappers.WeightNormalization.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.layers.wrappers.WeightNormalization.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.layers.wrappers.WeightNormalization.name": "tfa.layers.GroupNormalization.name",
+ "tfa.layers.wrappers.WeightNormalization.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.layers.wrappers.WeightNormalization.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.layers.wrappers.WeightNormalization.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.layers.wrappers.WeightNormalization.output": "tfa.layers.GroupNormalization.output",
+ "tfa.layers.wrappers.WeightNormalization.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.layers.wrappers.WeightNormalization.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.layers.wrappers.WeightNormalization.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.layers.wrappers.WeightNormalization.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.layers.wrappers.WeightNormalization.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.layers.wrappers.WeightNormalization.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.layers.wrappers.WeightNormalization.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.layers.wrappers.WeightNormalization.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.layers.wrappers.WeightNormalization.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.layers.wrappers.absolute_import": "tfa.activations.absolute_import",
+ "tfa.layers.wrappers.division": "tfa.activations.division",
+ "tfa.layers.wrappers.print_function": "tfa.activations.print_function",
+ "tfa.losses.absolute_import": "tfa.activations.absolute_import",
+ "tfa.losses.contrastive.ContrastiveLoss": "tfa.losses.ContrastiveLoss",
+ "tfa.losses.contrastive.absolute_import": "tfa.activations.absolute_import",
+ "tfa.losses.contrastive.contrastive_loss": "tfa.losses.contrastive_loss",
+ "tfa.losses.contrastive.division": "tfa.activations.division",
+ "tfa.losses.contrastive.print_function": "tfa.activations.print_function",
+ "tfa.losses.division": "tfa.activations.division",
+ "tfa.losses.focal_loss.SigmoidFocalCrossEntropy": "tfa.losses.SigmoidFocalCrossEntropy",
+ "tfa.losses.focal_loss.absolute_import": "tfa.activations.absolute_import",
+ "tfa.losses.focal_loss.division": "tfa.activations.division",
+ "tfa.losses.focal_loss.print_function": "tfa.activations.print_function",
+ "tfa.losses.focal_loss.sigmoid_focal_crossentropy": "tfa.losses.sigmoid_focal_crossentropy",
+ "tfa.losses.lifted.LiftedStructLoss": "tfa.losses.LiftedStructLoss",
+ "tfa.losses.lifted.absolute_import": "tfa.activations.absolute_import",
+ "tfa.losses.lifted.division": "tfa.activations.division",
+ "tfa.losses.lifted.lifted_struct_loss": "tfa.losses.lifted_struct_loss",
+ "tfa.losses.lifted.print_function": "tfa.activations.print_function",
+ "tfa.losses.metric_learning.absolute_import": "tfa.activations.absolute_import",
+ "tfa.losses.metric_learning.division": "tfa.activations.division",
+ "tfa.losses.metric_learning.print_function": "tfa.activations.print_function",
+ "tfa.losses.print_function": "tfa.activations.print_function",
+ "tfa.losses.triplet.TripletSemiHardLoss": "tfa.losses.TripletSemiHardLoss",
+ "tfa.losses.triplet.absolute_import": "tfa.activations.absolute_import",
+ "tfa.losses.triplet.division": "tfa.activations.division",
+ "tfa.losses.triplet.print_function": "tfa.activations.print_function",
+ "tfa.losses.triplet.triplet_semihard_loss": "tfa.losses.triplet_semihard_loss",
+ "tfa.metrics.CohenKappa.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.metrics.CohenKappa.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.metrics.CohenKappa.input": "tfa.layers.GroupNormalization.input",
+ "tfa.metrics.CohenKappa.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.metrics.CohenKappa.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.metrics.CohenKappa.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.metrics.CohenKappa.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.metrics.CohenKappa.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.metrics.CohenKappa.name": "tfa.layers.GroupNormalization.name",
+ "tfa.metrics.CohenKappa.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.metrics.CohenKappa.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.metrics.CohenKappa.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.metrics.CohenKappa.output": "tfa.layers.GroupNormalization.output",
+ "tfa.metrics.CohenKappa.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.metrics.CohenKappa.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.metrics.CohenKappa.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.metrics.CohenKappa.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.metrics.CohenKappa.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.metrics.CohenKappa.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.metrics.CohenKappa.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.metrics.CohenKappa.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.metrics.CohenKappa.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.metrics.absolute_import": "tfa.activations.absolute_import",
+ "tfa.metrics.cohens_kappa.CohenKappa": "tfa.metrics.CohenKappa",
+ "tfa.metrics.cohens_kappa.CohenKappa.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.metrics.cohens_kappa.CohenKappa.dtype": "tfa.metrics.CohenKappa.dtype",
+ "tfa.metrics.cohens_kappa.CohenKappa.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.metrics.cohens_kappa.CohenKappa.input": "tfa.layers.GroupNormalization.input",
+ "tfa.metrics.cohens_kappa.CohenKappa.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.metrics.cohens_kappa.CohenKappa.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.metrics.cohens_kappa.CohenKappa.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.metrics.cohens_kappa.CohenKappa.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.metrics.cohens_kappa.CohenKappa.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.metrics.cohens_kappa.CohenKappa.name": "tfa.layers.GroupNormalization.name",
+ "tfa.metrics.cohens_kappa.CohenKappa.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.metrics.cohens_kappa.CohenKappa.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.metrics.cohens_kappa.CohenKappa.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.metrics.cohens_kappa.CohenKappa.output": "tfa.layers.GroupNormalization.output",
+ "tfa.metrics.cohens_kappa.CohenKappa.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.metrics.cohens_kappa.CohenKappa.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.metrics.cohens_kappa.CohenKappa.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.metrics.cohens_kappa.CohenKappa.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.metrics.cohens_kappa.CohenKappa.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.metrics.cohens_kappa.CohenKappa.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.metrics.cohens_kappa.CohenKappa.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.metrics.cohens_kappa.CohenKappa.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.metrics.cohens_kappa.CohenKappa.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.metrics.cohens_kappa.absolute_import": "tfa.activations.absolute_import",
+ "tfa.metrics.cohens_kappa.division": "tfa.activations.division",
+ "tfa.metrics.cohens_kappa.print_function": "tfa.activations.print_function",
+ "tfa.metrics.division": "tfa.activations.division",
+ "tfa.metrics.print_function": "tfa.activations.print_function",
+ "tfa.optimizers.LazyAdam.iterations": "tfa.optimizers.AdamW.iterations",
+ "tfa.optimizers.LazyAdam.weights": "tfa.optimizers.AdamW.weights",
+ "tfa.optimizers.MovingAverage.iterations": "tfa.optimizers.AdamW.iterations",
+ "tfa.optimizers.SGDW.iterations": "tfa.optimizers.AdamW.iterations",
+ "tfa.optimizers.SGDW.weights": "tfa.optimizers.AdamW.weights",
+ "tfa.optimizers.absolute_import": "tfa.activations.absolute_import",
+ "tfa.optimizers.division": "tfa.activations.division",
+ "tfa.optimizers.lazy_adam.LazyAdam": "tfa.optimizers.LazyAdam",
+ "tfa.optimizers.lazy_adam.LazyAdam.iterations": "tfa.optimizers.AdamW.iterations",
+ "tfa.optimizers.lazy_adam.LazyAdam.weights": "tfa.optimizers.AdamW.weights",
+ "tfa.optimizers.lazy_adam.absolute_import": "tfa.activations.absolute_import",
+ "tfa.optimizers.lazy_adam.division": "tfa.activations.division",
+ "tfa.optimizers.lazy_adam.print_function": "tfa.activations.print_function",
+ "tfa.optimizers.moving_average.MovingAverage": "tfa.optimizers.MovingAverage",
+ "tfa.optimizers.moving_average.MovingAverage.iterations": "tfa.optimizers.AdamW.iterations",
+ "tfa.optimizers.moving_average.MovingAverage.weights": "tfa.optimizers.MovingAverage.weights",
+ "tfa.optimizers.moving_average.absolute_import": "tfa.activations.absolute_import",
+ "tfa.optimizers.moving_average.division": "tfa.activations.division",
+ "tfa.optimizers.moving_average.print_function": "tfa.activations.print_function",
+ "tfa.optimizers.print_function": "tfa.activations.print_function",
+ "tfa.optimizers.weight_decay_optimizers.AdamW": "tfa.optimizers.AdamW",
+ "tfa.optimizers.weight_decay_optimizers.AdamW.iterations": "tfa.optimizers.AdamW.iterations",
+ "tfa.optimizers.weight_decay_optimizers.AdamW.weights": "tfa.optimizers.AdamW.weights",
+ "tfa.optimizers.weight_decay_optimizers.SGDW": "tfa.optimizers.SGDW",
+ "tfa.optimizers.weight_decay_optimizers.SGDW.iterations": "tfa.optimizers.AdamW.iterations",
+ "tfa.optimizers.weight_decay_optimizers.SGDW.weights": "tfa.optimizers.AdamW.weights",
+ "tfa.optimizers.weight_decay_optimizers.absolute_import": "tfa.activations.absolute_import",
+ "tfa.optimizers.weight_decay_optimizers.division": "tfa.activations.division",
+ "tfa.optimizers.weight_decay_optimizers.extend_with_decoupled_weight_decay": "tfa.optimizers.extend_with_decoupled_weight_decay",
+ "tfa.optimizers.weight_decay_optimizers.print_function": "tfa.activations.print_function",
+ "tfa.rnn.LayerNormLSTMCell.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.rnn.LayerNormLSTMCell.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.rnn.LayerNormLSTMCell.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.rnn.LayerNormLSTMCell.input": "tfa.layers.GroupNormalization.input",
+ "tfa.rnn.LayerNormLSTMCell.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.rnn.LayerNormLSTMCell.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.rnn.LayerNormLSTMCell.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.rnn.LayerNormLSTMCell.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.rnn.LayerNormLSTMCell.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.rnn.LayerNormLSTMCell.name": "tfa.layers.GroupNormalization.name",
+ "tfa.rnn.LayerNormLSTMCell.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.rnn.LayerNormLSTMCell.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.rnn.LayerNormLSTMCell.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.rnn.LayerNormLSTMCell.output": "tfa.layers.GroupNormalization.output",
+ "tfa.rnn.LayerNormLSTMCell.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.rnn.LayerNormLSTMCell.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.rnn.LayerNormLSTMCell.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.rnn.LayerNormLSTMCell.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.rnn.LayerNormLSTMCell.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.rnn.LayerNormLSTMCell.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.rnn.LayerNormLSTMCell.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.rnn.LayerNormLSTMCell.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.rnn.LayerNormLSTMCell.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.rnn.NASCell.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.rnn.NASCell.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.rnn.NASCell.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.rnn.NASCell.input": "tfa.layers.GroupNormalization.input",
+ "tfa.rnn.NASCell.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.rnn.NASCell.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.rnn.NASCell.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.rnn.NASCell.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.rnn.NASCell.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.rnn.NASCell.name": "tfa.layers.GroupNormalization.name",
+ "tfa.rnn.NASCell.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.rnn.NASCell.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.rnn.NASCell.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.rnn.NASCell.output": "tfa.layers.GroupNormalization.output",
+ "tfa.rnn.NASCell.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.rnn.NASCell.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.rnn.NASCell.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.rnn.NASCell.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.rnn.NASCell.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.rnn.NASCell.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.rnn.NASCell.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.rnn.NASCell.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.rnn.NASCell.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.rnn.absolute_import": "tfa.activations.absolute_import",
+ "tfa.rnn.cell.LayerNormLSTMCell": "tfa.rnn.LayerNormLSTMCell",
+ "tfa.rnn.cell.LayerNormLSTMCell.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.rnn.cell.LayerNormLSTMCell.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.rnn.cell.LayerNormLSTMCell.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.rnn.cell.LayerNormLSTMCell.input": "tfa.layers.GroupNormalization.input",
+ "tfa.rnn.cell.LayerNormLSTMCell.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.rnn.cell.LayerNormLSTMCell.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.rnn.cell.LayerNormLSTMCell.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.rnn.cell.LayerNormLSTMCell.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.rnn.cell.LayerNormLSTMCell.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.rnn.cell.LayerNormLSTMCell.name": "tfa.layers.GroupNormalization.name",
+ "tfa.rnn.cell.LayerNormLSTMCell.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.rnn.cell.LayerNormLSTMCell.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.rnn.cell.LayerNormLSTMCell.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.rnn.cell.LayerNormLSTMCell.output": "tfa.layers.GroupNormalization.output",
+ "tfa.rnn.cell.LayerNormLSTMCell.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.rnn.cell.LayerNormLSTMCell.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.rnn.cell.LayerNormLSTMCell.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.rnn.cell.LayerNormLSTMCell.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.rnn.cell.LayerNormLSTMCell.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.rnn.cell.LayerNormLSTMCell.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.rnn.cell.LayerNormLSTMCell.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.rnn.cell.LayerNormLSTMCell.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.rnn.cell.LayerNormLSTMCell.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.rnn.cell.NASCell": "tfa.rnn.NASCell",
+ "tfa.rnn.cell.NASCell.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.rnn.cell.NASCell.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.rnn.cell.NASCell.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.rnn.cell.NASCell.input": "tfa.layers.GroupNormalization.input",
+ "tfa.rnn.cell.NASCell.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.rnn.cell.NASCell.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.rnn.cell.NASCell.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.rnn.cell.NASCell.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.rnn.cell.NASCell.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.rnn.cell.NASCell.name": "tfa.layers.GroupNormalization.name",
+ "tfa.rnn.cell.NASCell.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.rnn.cell.NASCell.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.rnn.cell.NASCell.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.rnn.cell.NASCell.output": "tfa.layers.GroupNormalization.output",
+ "tfa.rnn.cell.NASCell.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.rnn.cell.NASCell.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.rnn.cell.NASCell.output_size": "tfa.rnn.NASCell.output_size",
+ "tfa.rnn.cell.NASCell.state_size": "tfa.rnn.NASCell.state_size",
+ "tfa.rnn.cell.NASCell.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.rnn.cell.NASCell.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.rnn.cell.NASCell.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.rnn.cell.NASCell.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.rnn.cell.NASCell.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.rnn.cell.NASCell.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.rnn.cell.NASCell.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.rnn.cell.absolute_import": "tfa.activations.absolute_import",
+ "tfa.rnn.cell.division": "tfa.activations.division",
+ "tfa.rnn.cell.print_function": "tfa.activations.print_function",
+ "tfa.rnn.division": "tfa.activations.division",
+ "tfa.rnn.print_function": "tfa.activations.print_function",
+ "tfa.seq2seq.AttentionWrapper.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.seq2seq.AttentionWrapper.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.seq2seq.AttentionWrapper.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.seq2seq.AttentionWrapper.input": "tfa.layers.GroupNormalization.input",
+ "tfa.seq2seq.AttentionWrapper.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.seq2seq.AttentionWrapper.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.seq2seq.AttentionWrapper.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.seq2seq.AttentionWrapper.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.seq2seq.AttentionWrapper.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.seq2seq.AttentionWrapper.name": "tfa.layers.GroupNormalization.name",
+ "tfa.seq2seq.AttentionWrapper.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.seq2seq.AttentionWrapper.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.seq2seq.AttentionWrapper.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.seq2seq.AttentionWrapper.output": "tfa.layers.GroupNormalization.output",
+ "tfa.seq2seq.AttentionWrapper.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.seq2seq.AttentionWrapper.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.seq2seq.AttentionWrapper.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.seq2seq.AttentionWrapper.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.seq2seq.AttentionWrapper.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.seq2seq.AttentionWrapper.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.seq2seq.AttentionWrapper.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.seq2seq.AttentionWrapper.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.seq2seq.AttentionWrapper.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.seq2seq.AttentionWrapperState.__init__": "tfa.seq2seq.AttentionMechanism.__init__",
+ "tfa.seq2seq.BahdanauAttention.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.seq2seq.BahdanauAttention.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.seq2seq.BahdanauAttention.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.seq2seq.BahdanauAttention.input": "tfa.layers.GroupNormalization.input",
+ "tfa.seq2seq.BahdanauAttention.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.seq2seq.BahdanauAttention.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.seq2seq.BahdanauAttention.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.seq2seq.BahdanauAttention.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.seq2seq.BahdanauAttention.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.seq2seq.BahdanauAttention.name": "tfa.layers.GroupNormalization.name",
+ "tfa.seq2seq.BahdanauAttention.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.seq2seq.BahdanauAttention.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.seq2seq.BahdanauAttention.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.seq2seq.BahdanauAttention.output": "tfa.layers.GroupNormalization.output",
+ "tfa.seq2seq.BahdanauAttention.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.seq2seq.BahdanauAttention.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.seq2seq.BahdanauAttention.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.seq2seq.BahdanauAttention.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.seq2seq.BahdanauAttention.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.seq2seq.BahdanauAttention.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.seq2seq.BahdanauAttention.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.seq2seq.BahdanauAttention.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.seq2seq.BahdanauAttention.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.seq2seq.BahdanauMonotonicAttention.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.seq2seq.BahdanauMonotonicAttention.alignments_size": "tfa.seq2seq.BahdanauAttention.alignments_size",
+ "tfa.seq2seq.BahdanauMonotonicAttention.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.seq2seq.BahdanauMonotonicAttention.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.seq2seq.BahdanauMonotonicAttention.input": "tfa.layers.GroupNormalization.input",
+ "tfa.seq2seq.BahdanauMonotonicAttention.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.seq2seq.BahdanauMonotonicAttention.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.seq2seq.BahdanauMonotonicAttention.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.seq2seq.BahdanauMonotonicAttention.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.seq2seq.BahdanauMonotonicAttention.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.seq2seq.BahdanauMonotonicAttention.name": "tfa.layers.GroupNormalization.name",
+ "tfa.seq2seq.BahdanauMonotonicAttention.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.seq2seq.BahdanauMonotonicAttention.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.seq2seq.BahdanauMonotonicAttention.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.seq2seq.BahdanauMonotonicAttention.output": "tfa.layers.GroupNormalization.output",
+ "tfa.seq2seq.BahdanauMonotonicAttention.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.seq2seq.BahdanauMonotonicAttention.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.seq2seq.BahdanauMonotonicAttention.state_size": "tfa.seq2seq.BahdanauAttention.state_size",
+ "tfa.seq2seq.BahdanauMonotonicAttention.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.seq2seq.BahdanauMonotonicAttention.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.seq2seq.BahdanauMonotonicAttention.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.seq2seq.BahdanauMonotonicAttention.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.seq2seq.BahdanauMonotonicAttention.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.seq2seq.BahdanauMonotonicAttention.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.seq2seq.BahdanauMonotonicAttention.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.seq2seq.BaseDecoder.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.seq2seq.BaseDecoder.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.seq2seq.BaseDecoder.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.seq2seq.BaseDecoder.input": "tfa.layers.GroupNormalization.input",
+ "tfa.seq2seq.BaseDecoder.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.seq2seq.BaseDecoder.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.seq2seq.BaseDecoder.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.seq2seq.BaseDecoder.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.seq2seq.BaseDecoder.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.seq2seq.BaseDecoder.name": "tfa.layers.GroupNormalization.name",
+ "tfa.seq2seq.BaseDecoder.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.seq2seq.BaseDecoder.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.seq2seq.BaseDecoder.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.seq2seq.BaseDecoder.output": "tfa.layers.GroupNormalization.output",
+ "tfa.seq2seq.BaseDecoder.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.seq2seq.BaseDecoder.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.seq2seq.BaseDecoder.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.seq2seq.BaseDecoder.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.seq2seq.BaseDecoder.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.seq2seq.BaseDecoder.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.seq2seq.BaseDecoder.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.seq2seq.BaseDecoder.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.seq2seq.BaseDecoder.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.seq2seq.BasicDecoder.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.seq2seq.BasicDecoder.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.seq2seq.BasicDecoder.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.seq2seq.BasicDecoder.input": "tfa.layers.GroupNormalization.input",
+ "tfa.seq2seq.BasicDecoder.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.seq2seq.BasicDecoder.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.seq2seq.BasicDecoder.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.seq2seq.BasicDecoder.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.seq2seq.BasicDecoder.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.seq2seq.BasicDecoder.name": "tfa.layers.GroupNormalization.name",
+ "tfa.seq2seq.BasicDecoder.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.seq2seq.BasicDecoder.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.seq2seq.BasicDecoder.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.seq2seq.BasicDecoder.output": "tfa.layers.GroupNormalization.output",
+ "tfa.seq2seq.BasicDecoder.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.seq2seq.BasicDecoder.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.seq2seq.BasicDecoder.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.seq2seq.BasicDecoder.tracks_own_finished": "tfa.seq2seq.BaseDecoder.tracks_own_finished",
+ "tfa.seq2seq.BasicDecoder.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.seq2seq.BasicDecoder.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.seq2seq.BasicDecoder.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.seq2seq.BasicDecoder.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.seq2seq.BasicDecoder.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.seq2seq.BasicDecoder.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.seq2seq.BasicDecoderOutput.__add__": "tfa.seq2seq.AttentionWrapperState.__add__",
+ "tfa.seq2seq.BasicDecoderOutput.__contains__": "tfa.seq2seq.AttentionWrapperState.__contains__",
+ "tfa.seq2seq.BasicDecoderOutput.__eq__": "tfa.seq2seq.AttentionWrapperState.__eq__",
+ "tfa.seq2seq.BasicDecoderOutput.__ge__": "tfa.seq2seq.AttentionWrapperState.__ge__",
+ "tfa.seq2seq.BasicDecoderOutput.__getitem__": "tfa.seq2seq.AttentionWrapperState.__getitem__",
+ "tfa.seq2seq.BasicDecoderOutput.__gt__": "tfa.seq2seq.AttentionWrapperState.__gt__",
+ "tfa.seq2seq.BasicDecoderOutput.__init__": "tfa.seq2seq.AttentionMechanism.__init__",
+ "tfa.seq2seq.BasicDecoderOutput.__iter__": "tfa.seq2seq.AttentionWrapperState.__iter__",
+ "tfa.seq2seq.BasicDecoderOutput.__le__": "tfa.seq2seq.AttentionWrapperState.__le__",
+ "tfa.seq2seq.BasicDecoderOutput.__len__": "tfa.seq2seq.AttentionWrapperState.__len__",
+ "tfa.seq2seq.BasicDecoderOutput.__lt__": "tfa.seq2seq.AttentionWrapperState.__lt__",
+ "tfa.seq2seq.BasicDecoderOutput.__mul__": "tfa.seq2seq.AttentionWrapperState.__mul__",
+ "tfa.seq2seq.BasicDecoderOutput.__ne__": "tfa.seq2seq.AttentionWrapperState.__ne__",
+ "tfa.seq2seq.BasicDecoderOutput.__rmul__": "tfa.seq2seq.AttentionWrapperState.__rmul__",
+ "tfa.seq2seq.BasicDecoderOutput.count": "tfa.seq2seq.AttentionWrapperState.count",
+ "tfa.seq2seq.BasicDecoderOutput.index": "tfa.seq2seq.AttentionWrapperState.index",
+ "tfa.seq2seq.BeamSearchDecoder.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.seq2seq.BeamSearchDecoder.batch_size": "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderMixin.batch_size",
+ "tfa.seq2seq.BeamSearchDecoder.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.seq2seq.BeamSearchDecoder.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.seq2seq.BeamSearchDecoder.input": "tfa.layers.GroupNormalization.input",
+ "tfa.seq2seq.BeamSearchDecoder.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.seq2seq.BeamSearchDecoder.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.seq2seq.BeamSearchDecoder.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.seq2seq.BeamSearchDecoder.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.seq2seq.BeamSearchDecoder.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.seq2seq.BeamSearchDecoder.name": "tfa.layers.GroupNormalization.name",
+ "tfa.seq2seq.BeamSearchDecoder.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.seq2seq.BeamSearchDecoder.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.seq2seq.BeamSearchDecoder.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.seq2seq.BeamSearchDecoder.output": "tfa.layers.GroupNormalization.output",
+ "tfa.seq2seq.BeamSearchDecoder.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.seq2seq.BeamSearchDecoder.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.seq2seq.BeamSearchDecoder.output_size": "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderMixin.output_size",
+ "tfa.seq2seq.BeamSearchDecoder.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.seq2seq.BeamSearchDecoder.tracks_own_finished": "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderMixin.tracks_own_finished",
+ "tfa.seq2seq.BeamSearchDecoder.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.seq2seq.BeamSearchDecoder.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.seq2seq.BeamSearchDecoder.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.seq2seq.BeamSearchDecoder.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.seq2seq.BeamSearchDecoder.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.seq2seq.BeamSearchDecoder.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.seq2seq.BeamSearchDecoderOutput.__add__": "tfa.seq2seq.AttentionWrapperState.__add__",
+ "tfa.seq2seq.BeamSearchDecoderOutput.__contains__": "tfa.seq2seq.AttentionWrapperState.__contains__",
+ "tfa.seq2seq.BeamSearchDecoderOutput.__eq__": "tfa.seq2seq.AttentionWrapperState.__eq__",
+ "tfa.seq2seq.BeamSearchDecoderOutput.__ge__": "tfa.seq2seq.AttentionWrapperState.__ge__",
+ "tfa.seq2seq.BeamSearchDecoderOutput.__getitem__": "tfa.seq2seq.AttentionWrapperState.__getitem__",
+ "tfa.seq2seq.BeamSearchDecoderOutput.__gt__": "tfa.seq2seq.AttentionWrapperState.__gt__",
+ "tfa.seq2seq.BeamSearchDecoderOutput.__init__": "tfa.seq2seq.AttentionMechanism.__init__",
+ "tfa.seq2seq.BeamSearchDecoderOutput.__iter__": "tfa.seq2seq.AttentionWrapperState.__iter__",
+ "tfa.seq2seq.BeamSearchDecoderOutput.__le__": "tfa.seq2seq.AttentionWrapperState.__le__",
+ "tfa.seq2seq.BeamSearchDecoderOutput.__len__": "tfa.seq2seq.AttentionWrapperState.__len__",
+ "tfa.seq2seq.BeamSearchDecoderOutput.__lt__": "tfa.seq2seq.AttentionWrapperState.__lt__",
+ "tfa.seq2seq.BeamSearchDecoderOutput.__mul__": "tfa.seq2seq.AttentionWrapperState.__mul__",
+ "tfa.seq2seq.BeamSearchDecoderOutput.__ne__": "tfa.seq2seq.AttentionWrapperState.__ne__",
+ "tfa.seq2seq.BeamSearchDecoderOutput.__rmul__": "tfa.seq2seq.AttentionWrapperState.__rmul__",
+ "tfa.seq2seq.BeamSearchDecoderOutput.count": "tfa.seq2seq.AttentionWrapperState.count",
+ "tfa.seq2seq.BeamSearchDecoderOutput.index": "tfa.seq2seq.AttentionWrapperState.index",
+ "tfa.seq2seq.BeamSearchDecoderState.__add__": "tfa.seq2seq.AttentionWrapperState.__add__",
+ "tfa.seq2seq.BeamSearchDecoderState.__contains__": "tfa.seq2seq.AttentionWrapperState.__contains__",
+ "tfa.seq2seq.BeamSearchDecoderState.__eq__": "tfa.seq2seq.AttentionWrapperState.__eq__",
+ "tfa.seq2seq.BeamSearchDecoderState.__ge__": "tfa.seq2seq.AttentionWrapperState.__ge__",
+ "tfa.seq2seq.BeamSearchDecoderState.__getitem__": "tfa.seq2seq.AttentionWrapperState.__getitem__",
+ "tfa.seq2seq.BeamSearchDecoderState.__gt__": "tfa.seq2seq.AttentionWrapperState.__gt__",
+ "tfa.seq2seq.BeamSearchDecoderState.__init__": "tfa.seq2seq.AttentionMechanism.__init__",
+ "tfa.seq2seq.BeamSearchDecoderState.__iter__": "tfa.seq2seq.AttentionWrapperState.__iter__",
+ "tfa.seq2seq.BeamSearchDecoderState.__le__": "tfa.seq2seq.AttentionWrapperState.__le__",
+ "tfa.seq2seq.BeamSearchDecoderState.__len__": "tfa.seq2seq.AttentionWrapperState.__len__",
+ "tfa.seq2seq.BeamSearchDecoderState.__lt__": "tfa.seq2seq.AttentionWrapperState.__lt__",
+ "tfa.seq2seq.BeamSearchDecoderState.__mul__": "tfa.seq2seq.AttentionWrapperState.__mul__",
+ "tfa.seq2seq.BeamSearchDecoderState.__ne__": "tfa.seq2seq.AttentionWrapperState.__ne__",
+ "tfa.seq2seq.BeamSearchDecoderState.__rmul__": "tfa.seq2seq.AttentionWrapperState.__rmul__",
+ "tfa.seq2seq.BeamSearchDecoderState.count": "tfa.seq2seq.AttentionWrapperState.count",
+ "tfa.seq2seq.BeamSearchDecoderState.index": "tfa.seq2seq.AttentionWrapperState.index",
+ "tfa.seq2seq.Decoder.__init__": "tfa.seq2seq.AttentionMechanism.__init__",
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__add__": "tfa.seq2seq.AttentionWrapperState.__add__",
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__contains__": "tfa.seq2seq.AttentionWrapperState.__contains__",
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__eq__": "tfa.seq2seq.AttentionWrapperState.__eq__",
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__ge__": "tfa.seq2seq.AttentionWrapperState.__ge__",
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__getitem__": "tfa.seq2seq.AttentionWrapperState.__getitem__",
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__gt__": "tfa.seq2seq.AttentionWrapperState.__gt__",
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__init__": "tfa.seq2seq.AttentionMechanism.__init__",
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__iter__": "tfa.seq2seq.AttentionWrapperState.__iter__",
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__le__": "tfa.seq2seq.AttentionWrapperState.__le__",
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__len__": "tfa.seq2seq.AttentionWrapperState.__len__",
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__lt__": "tfa.seq2seq.AttentionWrapperState.__lt__",
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__mul__": "tfa.seq2seq.AttentionWrapperState.__mul__",
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__ne__": "tfa.seq2seq.AttentionWrapperState.__ne__",
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__rmul__": "tfa.seq2seq.AttentionWrapperState.__rmul__",
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.count": "tfa.seq2seq.AttentionWrapperState.count",
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.index": "tfa.seq2seq.AttentionWrapperState.index",
+ "tfa.seq2seq.LuongAttention.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.seq2seq.LuongAttention.alignments_size": "tfa.seq2seq.BahdanauAttention.alignments_size",
+ "tfa.seq2seq.LuongAttention.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.seq2seq.LuongAttention.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.seq2seq.LuongAttention.input": "tfa.layers.GroupNormalization.input",
+ "tfa.seq2seq.LuongAttention.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.seq2seq.LuongAttention.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.seq2seq.LuongAttention.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.seq2seq.LuongAttention.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.seq2seq.LuongAttention.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.seq2seq.LuongAttention.name": "tfa.layers.GroupNormalization.name",
+ "tfa.seq2seq.LuongAttention.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.seq2seq.LuongAttention.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.seq2seq.LuongAttention.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.seq2seq.LuongAttention.output": "tfa.layers.GroupNormalization.output",
+ "tfa.seq2seq.LuongAttention.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.seq2seq.LuongAttention.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.seq2seq.LuongAttention.state_size": "tfa.seq2seq.BahdanauAttention.state_size",
+ "tfa.seq2seq.LuongAttention.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.seq2seq.LuongAttention.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.seq2seq.LuongAttention.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.seq2seq.LuongAttention.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.seq2seq.LuongAttention.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.seq2seq.LuongAttention.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.seq2seq.LuongAttention.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.seq2seq.LuongMonotonicAttention.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.seq2seq.LuongMonotonicAttention.alignments_size": "tfa.seq2seq.BahdanauAttention.alignments_size",
+ "tfa.seq2seq.LuongMonotonicAttention.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.seq2seq.LuongMonotonicAttention.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.seq2seq.LuongMonotonicAttention.input": "tfa.layers.GroupNormalization.input",
+ "tfa.seq2seq.LuongMonotonicAttention.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.seq2seq.LuongMonotonicAttention.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.seq2seq.LuongMonotonicAttention.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.seq2seq.LuongMonotonicAttention.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.seq2seq.LuongMonotonicAttention.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.seq2seq.LuongMonotonicAttention.name": "tfa.layers.GroupNormalization.name",
+ "tfa.seq2seq.LuongMonotonicAttention.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.seq2seq.LuongMonotonicAttention.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.seq2seq.LuongMonotonicAttention.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.seq2seq.LuongMonotonicAttention.output": "tfa.layers.GroupNormalization.output",
+ "tfa.seq2seq.LuongMonotonicAttention.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.seq2seq.LuongMonotonicAttention.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.seq2seq.LuongMonotonicAttention.state_size": "tfa.seq2seq.BahdanauAttention.state_size",
+ "tfa.seq2seq.LuongMonotonicAttention.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.seq2seq.LuongMonotonicAttention.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.seq2seq.LuongMonotonicAttention.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.seq2seq.LuongMonotonicAttention.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.seq2seq.LuongMonotonicAttention.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.seq2seq.LuongMonotonicAttention.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.seq2seq.LuongMonotonicAttention.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.seq2seq.SampleEmbeddingSampler.batch_size": "tfa.seq2seq.GreedyEmbeddingSampler.batch_size",
+ "tfa.seq2seq.SampleEmbeddingSampler.sample_ids_dtype": "tfa.seq2seq.GreedyEmbeddingSampler.sample_ids_dtype",
+ "tfa.seq2seq.SampleEmbeddingSampler.sample_ids_shape": "tfa.seq2seq.GreedyEmbeddingSampler.sample_ids_shape",
+ "tfa.seq2seq.Sampler.__init__": "tfa.seq2seq.AttentionMechanism.__init__",
+ "tfa.seq2seq.ScheduledEmbeddingTrainingSampler.batch_size": "tfa.seq2seq.TrainingSampler.batch_size",
+ "tfa.seq2seq.ScheduledEmbeddingTrainingSampler.sample_ids_dtype": "tfa.seq2seq.TrainingSampler.sample_ids_dtype",
+ "tfa.seq2seq.ScheduledEmbeddingTrainingSampler.sample_ids_shape": "tfa.seq2seq.TrainingSampler.sample_ids_shape",
+ "tfa.seq2seq.ScheduledOutputTrainingSampler.batch_size": "tfa.seq2seq.TrainingSampler.batch_size",
+ "tfa.seq2seq.ScheduledOutputTrainingSampler.sample_ids_dtype": "tfa.seq2seq.TrainingSampler.sample_ids_dtype",
+ "tfa.seq2seq.ScheduledOutputTrainingSampler.sample_ids_shape": "tfa.seq2seq.TrainingSampler.sample_ids_shape",
+ "tfa.seq2seq.absolute_import": "tfa.activations.absolute_import",
+ "tfa.seq2seq.attention_wrapper.AttentionMechanism": "tfa.seq2seq.AttentionMechanism",
+ "tfa.seq2seq.attention_wrapper.AttentionMechanism.__init__": "tfa.seq2seq.AttentionMechanism.__init__",
+ "tfa.seq2seq.attention_wrapper.AttentionMechanism.alignments_size": "tfa.seq2seq.AttentionMechanism.alignments_size",
+ "tfa.seq2seq.attention_wrapper.AttentionMechanism.state_size": "tfa.seq2seq.AttentionMechanism.state_size",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper": "tfa.seq2seq.AttentionWrapper",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.input": "tfa.layers.GroupNormalization.input",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.name": "tfa.layers.GroupNormalization.name",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.output": "tfa.layers.GroupNormalization.output",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.output_size": "tfa.seq2seq.AttentionWrapper.output_size",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.state_size": "tfa.seq2seq.AttentionWrapper.state_size",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState": "tfa.seq2seq.AttentionWrapperState",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__add__": "tfa.seq2seq.AttentionWrapperState.__add__",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__contains__": "tfa.seq2seq.AttentionWrapperState.__contains__",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__eq__": "tfa.seq2seq.AttentionWrapperState.__eq__",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__ge__": "tfa.seq2seq.AttentionWrapperState.__ge__",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__getitem__": "tfa.seq2seq.AttentionWrapperState.__getitem__",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__gt__": "tfa.seq2seq.AttentionWrapperState.__gt__",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__init__": "tfa.seq2seq.AttentionMechanism.__init__",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__iter__": "tfa.seq2seq.AttentionWrapperState.__iter__",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__le__": "tfa.seq2seq.AttentionWrapperState.__le__",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__len__": "tfa.seq2seq.AttentionWrapperState.__len__",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__lt__": "tfa.seq2seq.AttentionWrapperState.__lt__",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__mul__": "tfa.seq2seq.AttentionWrapperState.__mul__",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__ne__": "tfa.seq2seq.AttentionWrapperState.__ne__",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__rmul__": "tfa.seq2seq.AttentionWrapperState.__rmul__",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.alignment_history": "tfa.seq2seq.AttentionWrapperState.alignment_history",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.alignments": "tfa.seq2seq.AttentionWrapperState.alignments",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.attention": "tfa.seq2seq.AttentionWrapperState.attention",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.attention_state": "tfa.seq2seq.AttentionWrapperState.attention_state",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.cell_state": "tfa.seq2seq.AttentionWrapperState.cell_state",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.count": "tfa.seq2seq.AttentionWrapperState.count",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.index": "tfa.seq2seq.AttentionWrapperState.index",
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.time": "tfa.seq2seq.AttentionWrapperState.time",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention": "tfa.seq2seq.BahdanauAttention",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.alignments_size": "tfa.seq2seq.BahdanauAttention.alignments_size",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.input": "tfa.layers.GroupNormalization.input",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.name": "tfa.layers.GroupNormalization.name",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.output": "tfa.layers.GroupNormalization.output",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.state_size": "tfa.seq2seq.BahdanauAttention.state_size",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention": "tfa.seq2seq.BahdanauMonotonicAttention",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.alignments_size": "tfa.seq2seq.BahdanauAttention.alignments_size",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.input": "tfa.layers.GroupNormalization.input",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.name": "tfa.layers.GroupNormalization.name",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.output": "tfa.layers.GroupNormalization.output",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.state_size": "tfa.seq2seq.BahdanauAttention.state_size",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.seq2seq.attention_wrapper.LuongAttention": "tfa.seq2seq.LuongAttention",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.alignments_size": "tfa.seq2seq.BahdanauAttention.alignments_size",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.input": "tfa.layers.GroupNormalization.input",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.name": "tfa.layers.GroupNormalization.name",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.output": "tfa.layers.GroupNormalization.output",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.state_size": "tfa.seq2seq.BahdanauAttention.state_size",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.seq2seq.attention_wrapper.LuongAttention.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention": "tfa.seq2seq.LuongMonotonicAttention",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.alignments_size": "tfa.seq2seq.BahdanauAttention.alignments_size",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.input": "tfa.layers.GroupNormalization.input",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.name": "tfa.layers.GroupNormalization.name",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.output": "tfa.layers.GroupNormalization.output",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.state_size": "tfa.seq2seq.BahdanauAttention.state_size",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.seq2seq.attention_wrapper.absolute_import": "tfa.activations.absolute_import",
+ "tfa.seq2seq.attention_wrapper.division": "tfa.activations.division",
+ "tfa.seq2seq.attention_wrapper.hardmax": "tfa.seq2seq.hardmax",
+ "tfa.seq2seq.attention_wrapper.monotonic_attention": "tfa.seq2seq.monotonic_attention",
+ "tfa.seq2seq.attention_wrapper.print_function": "tfa.activations.print_function",
+ "tfa.seq2seq.attention_wrapper.safe_cumprod": "tfa.seq2seq.safe_cumprod",
+ "tfa.seq2seq.basic_decoder.BasicDecoder": "tfa.seq2seq.BasicDecoder",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.batch_size": "tfa.seq2seq.BasicDecoder.batch_size",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.input": "tfa.layers.GroupNormalization.input",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.name": "tfa.layers.GroupNormalization.name",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.output": "tfa.layers.GroupNormalization.output",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.output_dtype": "tfa.seq2seq.BasicDecoder.output_dtype",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.output_size": "tfa.seq2seq.BasicDecoder.output_size",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.tracks_own_finished": "tfa.seq2seq.BaseDecoder.tracks_own_finished",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.seq2seq.basic_decoder.BasicDecoder.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput": "tfa.seq2seq.BasicDecoderOutput",
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__add__": "tfa.seq2seq.AttentionWrapperState.__add__",
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__contains__": "tfa.seq2seq.AttentionWrapperState.__contains__",
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__eq__": "tfa.seq2seq.AttentionWrapperState.__eq__",
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__ge__": "tfa.seq2seq.AttentionWrapperState.__ge__",
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__getitem__": "tfa.seq2seq.AttentionWrapperState.__getitem__",
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__gt__": "tfa.seq2seq.AttentionWrapperState.__gt__",
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__init__": "tfa.seq2seq.AttentionMechanism.__init__",
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__iter__": "tfa.seq2seq.AttentionWrapperState.__iter__",
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__le__": "tfa.seq2seq.AttentionWrapperState.__le__",
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__len__": "tfa.seq2seq.AttentionWrapperState.__len__",
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__lt__": "tfa.seq2seq.AttentionWrapperState.__lt__",
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__mul__": "tfa.seq2seq.AttentionWrapperState.__mul__",
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__ne__": "tfa.seq2seq.AttentionWrapperState.__ne__",
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__rmul__": "tfa.seq2seq.AttentionWrapperState.__rmul__",
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.count": "tfa.seq2seq.AttentionWrapperState.count",
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.index": "tfa.seq2seq.AttentionWrapperState.index",
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.rnn_output": "tfa.seq2seq.BasicDecoderOutput.rnn_output",
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.sample_id": "tfa.seq2seq.BasicDecoderOutput.sample_id",
+ "tfa.seq2seq.basic_decoder.absolute_import": "tfa.activations.absolute_import",
+ "tfa.seq2seq.basic_decoder.division": "tfa.activations.division",
+ "tfa.seq2seq.basic_decoder.print_function": "tfa.activations.print_function",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder": "tfa.seq2seq.BeamSearchDecoder",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.batch_size": "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderMixin.batch_size",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.input": "tfa.layers.GroupNormalization.input",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.name": "tfa.layers.GroupNormalization.name",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.output": "tfa.layers.GroupNormalization.output",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.output_dtype": "tfa.seq2seq.BeamSearchDecoder.output_dtype",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.output_size": "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderMixin.output_size",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.tracks_own_finished": "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderMixin.tracks_own_finished",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput": "tfa.seq2seq.BeamSearchDecoderOutput",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__add__": "tfa.seq2seq.AttentionWrapperState.__add__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__contains__": "tfa.seq2seq.AttentionWrapperState.__contains__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__eq__": "tfa.seq2seq.AttentionWrapperState.__eq__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__ge__": "tfa.seq2seq.AttentionWrapperState.__ge__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__getitem__": "tfa.seq2seq.AttentionWrapperState.__getitem__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__gt__": "tfa.seq2seq.AttentionWrapperState.__gt__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__init__": "tfa.seq2seq.AttentionMechanism.__init__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__iter__": "tfa.seq2seq.AttentionWrapperState.__iter__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__le__": "tfa.seq2seq.AttentionWrapperState.__le__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__len__": "tfa.seq2seq.AttentionWrapperState.__len__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__lt__": "tfa.seq2seq.AttentionWrapperState.__lt__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__mul__": "tfa.seq2seq.AttentionWrapperState.__mul__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__ne__": "tfa.seq2seq.AttentionWrapperState.__ne__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__rmul__": "tfa.seq2seq.AttentionWrapperState.__rmul__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.count": "tfa.seq2seq.AttentionWrapperState.count",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.index": "tfa.seq2seq.AttentionWrapperState.index",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.parent_ids": "tfa.seq2seq.BeamSearchDecoderOutput.parent_ids",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.predicted_ids": "tfa.seq2seq.BeamSearchDecoderOutput.predicted_ids",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.scores": "tfa.seq2seq.BeamSearchDecoderOutput.scores",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState": "tfa.seq2seq.BeamSearchDecoderState",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__add__": "tfa.seq2seq.AttentionWrapperState.__add__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__contains__": "tfa.seq2seq.AttentionWrapperState.__contains__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__eq__": "tfa.seq2seq.AttentionWrapperState.__eq__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__ge__": "tfa.seq2seq.AttentionWrapperState.__ge__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__getitem__": "tfa.seq2seq.AttentionWrapperState.__getitem__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__gt__": "tfa.seq2seq.AttentionWrapperState.__gt__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__init__": "tfa.seq2seq.AttentionMechanism.__init__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__iter__": "tfa.seq2seq.AttentionWrapperState.__iter__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__le__": "tfa.seq2seq.AttentionWrapperState.__le__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__len__": "tfa.seq2seq.AttentionWrapperState.__len__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__lt__": "tfa.seq2seq.AttentionWrapperState.__lt__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__mul__": "tfa.seq2seq.AttentionWrapperState.__mul__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__ne__": "tfa.seq2seq.AttentionWrapperState.__ne__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__rmul__": "tfa.seq2seq.AttentionWrapperState.__rmul__",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.accumulated_attention_probs": "tfa.seq2seq.BeamSearchDecoderState.accumulated_attention_probs",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.cell_state": "tfa.seq2seq.BeamSearchDecoderState.cell_state",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.count": "tfa.seq2seq.AttentionWrapperState.count",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.finished": "tfa.seq2seq.BeamSearchDecoderState.finished",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.index": "tfa.seq2seq.AttentionWrapperState.index",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.lengths": "tfa.seq2seq.BeamSearchDecoderState.lengths",
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.log_probs": "tfa.seq2seq.BeamSearchDecoderState.log_probs",
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput": "tfa.seq2seq.FinalBeamSearchDecoderOutput",
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__add__": "tfa.seq2seq.AttentionWrapperState.__add__",
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__contains__": "tfa.seq2seq.AttentionWrapperState.__contains__",
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__eq__": "tfa.seq2seq.AttentionWrapperState.__eq__",
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__ge__": "tfa.seq2seq.AttentionWrapperState.__ge__",
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__getitem__": "tfa.seq2seq.AttentionWrapperState.__getitem__",
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__gt__": "tfa.seq2seq.AttentionWrapperState.__gt__",
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__init__": "tfa.seq2seq.AttentionMechanism.__init__",
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__iter__": "tfa.seq2seq.AttentionWrapperState.__iter__",
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__le__": "tfa.seq2seq.AttentionWrapperState.__le__",
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__len__": "tfa.seq2seq.AttentionWrapperState.__len__",
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__lt__": "tfa.seq2seq.AttentionWrapperState.__lt__",
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__mul__": "tfa.seq2seq.AttentionWrapperState.__mul__",
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__ne__": "tfa.seq2seq.AttentionWrapperState.__ne__",
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__rmul__": "tfa.seq2seq.AttentionWrapperState.__rmul__",
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.beam_search_decoder_output": "tfa.seq2seq.FinalBeamSearchDecoderOutput.beam_search_decoder_output",
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.count": "tfa.seq2seq.AttentionWrapperState.count",
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.index": "tfa.seq2seq.AttentionWrapperState.index",
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.predicted_ids": "tfa.seq2seq.FinalBeamSearchDecoderOutput.predicted_ids",
+ "tfa.seq2seq.beam_search_decoder.absolute_import": "tfa.activations.absolute_import",
+ "tfa.seq2seq.beam_search_decoder.division": "tfa.activations.division",
+ "tfa.seq2seq.beam_search_decoder.gather_tree_from_array": "tfa.seq2seq.gather_tree_from_array",
+ "tfa.seq2seq.beam_search_decoder.print_function": "tfa.activations.print_function",
+ "tfa.seq2seq.beam_search_decoder.tile_batch": "tfa.seq2seq.tile_batch",
+ "tfa.seq2seq.decoder.BaseDecoder": "tfa.seq2seq.BaseDecoder",
+ "tfa.seq2seq.decoder.BaseDecoder.activity_regularizer": "tfa.layers.GroupNormalization.activity_regularizer",
+ "tfa.seq2seq.decoder.BaseDecoder.batch_size": "tfa.seq2seq.BaseDecoder.batch_size",
+ "tfa.seq2seq.decoder.BaseDecoder.dtype": "tfa.layers.GroupNormalization.dtype",
+ "tfa.seq2seq.decoder.BaseDecoder.dynamic": "tfa.layers.GroupNormalization.dynamic",
+ "tfa.seq2seq.decoder.BaseDecoder.input": "tfa.layers.GroupNormalization.input",
+ "tfa.seq2seq.decoder.BaseDecoder.input_mask": "tfa.layers.GroupNormalization.input_mask",
+ "tfa.seq2seq.decoder.BaseDecoder.input_shape": "tfa.layers.GroupNormalization.input_shape",
+ "tfa.seq2seq.decoder.BaseDecoder.input_spec": "tfa.layers.GroupNormalization.input_spec",
+ "tfa.seq2seq.decoder.BaseDecoder.losses": "tfa.layers.GroupNormalization.losses",
+ "tfa.seq2seq.decoder.BaseDecoder.metrics": "tfa.layers.GroupNormalization.metrics",
+ "tfa.seq2seq.decoder.BaseDecoder.name": "tfa.layers.GroupNormalization.name",
+ "tfa.seq2seq.decoder.BaseDecoder.name_scope": "tfa.layers.GroupNormalization.name_scope",
+ "tfa.seq2seq.decoder.BaseDecoder.non_trainable_variables": "tfa.layers.GroupNormalization.non_trainable_variables",
+ "tfa.seq2seq.decoder.BaseDecoder.non_trainable_weights": "tfa.layers.GroupNormalization.non_trainable_weights",
+ "tfa.seq2seq.decoder.BaseDecoder.output": "tfa.layers.GroupNormalization.output",
+ "tfa.seq2seq.decoder.BaseDecoder.output_dtype": "tfa.seq2seq.BaseDecoder.output_dtype",
+ "tfa.seq2seq.decoder.BaseDecoder.output_mask": "tfa.layers.GroupNormalization.output_mask",
+ "tfa.seq2seq.decoder.BaseDecoder.output_shape": "tfa.layers.GroupNormalization.output_shape",
+ "tfa.seq2seq.decoder.BaseDecoder.output_size": "tfa.seq2seq.BaseDecoder.output_size",
+ "tfa.seq2seq.decoder.BaseDecoder.submodules": "tfa.layers.GroupNormalization.submodules",
+ "tfa.seq2seq.decoder.BaseDecoder.tracks_own_finished": "tfa.seq2seq.BaseDecoder.tracks_own_finished",
+ "tfa.seq2seq.decoder.BaseDecoder.trainable": "tfa.layers.GroupNormalization.trainable",
+ "tfa.seq2seq.decoder.BaseDecoder.trainable_variables": "tfa.layers.GroupNormalization.trainable_variables",
+ "tfa.seq2seq.decoder.BaseDecoder.trainable_weights": "tfa.layers.GroupNormalization.trainable_weights",
+ "tfa.seq2seq.decoder.BaseDecoder.updates": "tfa.layers.GroupNormalization.updates",
+ "tfa.seq2seq.decoder.BaseDecoder.variables": "tfa.layers.GroupNormalization.variables",
+ "tfa.seq2seq.decoder.BaseDecoder.weights": "tfa.layers.GroupNormalization.weights",
+ "tfa.seq2seq.decoder.Decoder": "tfa.seq2seq.Decoder",
+ "tfa.seq2seq.decoder.Decoder.__init__": "tfa.seq2seq.AttentionMechanism.__init__",
+ "tfa.seq2seq.decoder.Decoder.batch_size": "tfa.seq2seq.Decoder.batch_size",
+ "tfa.seq2seq.decoder.Decoder.output_dtype": "tfa.seq2seq.Decoder.output_dtype",
+ "tfa.seq2seq.decoder.Decoder.output_size": "tfa.seq2seq.Decoder.output_size",
+ "tfa.seq2seq.decoder.Decoder.tracks_own_finished": "tfa.seq2seq.Decoder.tracks_own_finished",
+ "tfa.seq2seq.decoder.absolute_import": "tfa.activations.absolute_import",
+ "tfa.seq2seq.decoder.division": "tfa.activations.division",
+ "tfa.seq2seq.decoder.dynamic_decode": "tfa.seq2seq.dynamic_decode",
+ "tfa.seq2seq.decoder.print_function": "tfa.activations.print_function",
+ "tfa.seq2seq.division": "tfa.activations.division",
+ "tfa.seq2seq.loss.SequenceLoss": "tfa.seq2seq.SequenceLoss",
+ "tfa.seq2seq.loss.absolute_import": "tfa.activations.absolute_import",
+ "tfa.seq2seq.loss.division": "tfa.activations.division",
+ "tfa.seq2seq.loss.print_function": "tfa.activations.print_function",
+ "tfa.seq2seq.loss.sequence_loss": "tfa.seq2seq.sequence_loss",
+ "tfa.seq2seq.print_function": "tfa.activations.print_function",
+ "tfa.seq2seq.sampler.CustomSampler": "tfa.seq2seq.CustomSampler",
+ "tfa.seq2seq.sampler.CustomSampler.batch_size": "tfa.seq2seq.CustomSampler.batch_size",
+ "tfa.seq2seq.sampler.CustomSampler.sample_ids_dtype": "tfa.seq2seq.CustomSampler.sample_ids_dtype",
+ "tfa.seq2seq.sampler.CustomSampler.sample_ids_shape": "tfa.seq2seq.CustomSampler.sample_ids_shape",
+ "tfa.seq2seq.sampler.GreedyEmbeddingSampler": "tfa.seq2seq.GreedyEmbeddingSampler",
+ "tfa.seq2seq.sampler.GreedyEmbeddingSampler.batch_size": "tfa.seq2seq.GreedyEmbeddingSampler.batch_size",
+ "tfa.seq2seq.sampler.GreedyEmbeddingSampler.sample_ids_dtype": "tfa.seq2seq.GreedyEmbeddingSampler.sample_ids_dtype",
+ "tfa.seq2seq.sampler.GreedyEmbeddingSampler.sample_ids_shape": "tfa.seq2seq.GreedyEmbeddingSampler.sample_ids_shape",
+ "tfa.seq2seq.sampler.InferenceSampler": "tfa.seq2seq.InferenceSampler",
+ "tfa.seq2seq.sampler.InferenceSampler.batch_size": "tfa.seq2seq.InferenceSampler.batch_size",
+ "tfa.seq2seq.sampler.InferenceSampler.sample_ids_dtype": "tfa.seq2seq.InferenceSampler.sample_ids_dtype",
+ "tfa.seq2seq.sampler.InferenceSampler.sample_ids_shape": "tfa.seq2seq.InferenceSampler.sample_ids_shape",
+ "tfa.seq2seq.sampler.SampleEmbeddingSampler": "tfa.seq2seq.SampleEmbeddingSampler",
+ "tfa.seq2seq.sampler.SampleEmbeddingSampler.batch_size": "tfa.seq2seq.GreedyEmbeddingSampler.batch_size",
+ "tfa.seq2seq.sampler.SampleEmbeddingSampler.sample_ids_dtype": "tfa.seq2seq.GreedyEmbeddingSampler.sample_ids_dtype",
+ "tfa.seq2seq.sampler.SampleEmbeddingSampler.sample_ids_shape": "tfa.seq2seq.GreedyEmbeddingSampler.sample_ids_shape",
+ "tfa.seq2seq.sampler.Sampler": "tfa.seq2seq.Sampler",
+ "tfa.seq2seq.sampler.Sampler.__init__": "tfa.seq2seq.AttentionMechanism.__init__",
+ "tfa.seq2seq.sampler.Sampler.batch_size": "tfa.seq2seq.Sampler.batch_size",
+ "tfa.seq2seq.sampler.Sampler.sample_ids_dtype": "tfa.seq2seq.Sampler.sample_ids_dtype",
+ "tfa.seq2seq.sampler.Sampler.sample_ids_shape": "tfa.seq2seq.Sampler.sample_ids_shape",
+ "tfa.seq2seq.sampler.ScheduledEmbeddingTrainingSampler": "tfa.seq2seq.ScheduledEmbeddingTrainingSampler",
+ "tfa.seq2seq.sampler.ScheduledEmbeddingTrainingSampler.batch_size": "tfa.seq2seq.TrainingSampler.batch_size",
+ "tfa.seq2seq.sampler.ScheduledEmbeddingTrainingSampler.sample_ids_dtype": "tfa.seq2seq.TrainingSampler.sample_ids_dtype",
+ "tfa.seq2seq.sampler.ScheduledEmbeddingTrainingSampler.sample_ids_shape": "tfa.seq2seq.TrainingSampler.sample_ids_shape",
+ "tfa.seq2seq.sampler.ScheduledOutputTrainingSampler": "tfa.seq2seq.ScheduledOutputTrainingSampler",
+ "tfa.seq2seq.sampler.ScheduledOutputTrainingSampler.batch_size": "tfa.seq2seq.TrainingSampler.batch_size",
+ "tfa.seq2seq.sampler.ScheduledOutputTrainingSampler.sample_ids_dtype": "tfa.seq2seq.TrainingSampler.sample_ids_dtype",
+ "tfa.seq2seq.sampler.ScheduledOutputTrainingSampler.sample_ids_shape": "tfa.seq2seq.TrainingSampler.sample_ids_shape",
+ "tfa.seq2seq.sampler.TrainingSampler": "tfa.seq2seq.TrainingSampler",
+ "tfa.seq2seq.sampler.TrainingSampler.batch_size": "tfa.seq2seq.TrainingSampler.batch_size",
+ "tfa.seq2seq.sampler.TrainingSampler.sample_ids_dtype": "tfa.seq2seq.TrainingSampler.sample_ids_dtype",
+ "tfa.seq2seq.sampler.TrainingSampler.sample_ids_shape": "tfa.seq2seq.TrainingSampler.sample_ids_shape",
+ "tfa.seq2seq.sampler.absolute_import": "tfa.activations.absolute_import",
+ "tfa.seq2seq.sampler.division": "tfa.activations.division",
+ "tfa.seq2seq.sampler.print_function": "tfa.activations.print_function",
+ "tfa.text.absolute_import": "tfa.activations.absolute_import",
+ "tfa.text.division": "tfa.activations.division",
+ "tfa.text.print_function": "tfa.activations.print_function",
+ "tfa.text.skip_gram_ops.absolute_import": "tfa.activations.absolute_import",
+ "tfa.text.skip_gram_ops.division": "tfa.activations.division",
+ "tfa.text.skip_gram_ops.print_function": "tfa.activations.print_function",
+ "tfa.text.skip_gram_ops.skip_gram_sample": "tfa.text.skip_gram_sample",
+ "tfa.text.skip_gram_ops.skip_gram_sample_with_text_vocab": "tfa.text.skip_gram_sample_with_text_vocab"
+ },
+ "is_fragment": {
+ "tfa": false,
+ "tfa.activations": false,
+ "tfa.activations.absolute_import": true,
+ "tfa.activations.division": true,
+ "tfa.activations.print_function": true,
+ "tfa.activations.sparsemax": false,
+ "tfa.image": false,
+ "tfa.image.absolute_import": true,
+ "tfa.image.adjust_hsv_in_yiq": false,
+ "tfa.image.dense_image_warp": false,
+ "tfa.image.distance_transform": false,
+ "tfa.image.distance_transform.absolute_import": true,
+ "tfa.image.distance_transform.division": true,
+ "tfa.image.distance_transform.euclidean_dist_transform": false,
+ "tfa.image.distance_transform.print_function": true,
+ "tfa.image.distort_image_ops": false,
+ "tfa.image.distort_image_ops.absolute_import": true,
+ "tfa.image.distort_image_ops.adjust_hsv_in_yiq": false,
+ "tfa.image.distort_image_ops.division": true,
+ "tfa.image.distort_image_ops.print_function": true,
+ "tfa.image.distort_image_ops.random_hsv_in_yiq": false,
+ "tfa.image.division": true,
+ "tfa.image.euclidean_dist_transform": false,
+ "tfa.image.filters": false,
+ "tfa.image.filters.absolute_import": true,
+ "tfa.image.filters.division": true,
+ "tfa.image.filters.mean_filter2d": false,
+ "tfa.image.filters.median_filter2d": false,
+ "tfa.image.filters.print_function": true,
+ "tfa.image.interpolate_bilinear": false,
+ "tfa.image.mean_filter2d": false,
+ "tfa.image.median_filter2d": false,
+ "tfa.image.print_function": true,
+ "tfa.image.random_hsv_in_yiq": false,
+ "tfa.image.rotate": false,
+ "tfa.image.transform": false,
+ "tfa.image.transform_ops": false,
+ "tfa.image.transform_ops.absolute_import": true,
+ "tfa.image.transform_ops.angles_to_projective_transforms": false,
+ "tfa.image.transform_ops.compose_transforms": false,
+ "tfa.image.transform_ops.division": true,
+ "tfa.image.transform_ops.flat_transforms_to_matrices": false,
+ "tfa.image.transform_ops.matrices_to_flat_transforms": false,
+ "tfa.image.transform_ops.print_function": true,
+ "tfa.image.transform_ops.rotate": false,
+ "tfa.image.transform_ops.transform": false,
+ "tfa.layers": false,
+ "tfa.layers.GroupNormalization": false,
+ "tfa.layers.GroupNormalization.__call__": true,
+ "tfa.layers.GroupNormalization.__init__": true,
+ "tfa.layers.GroupNormalization.activity_regularizer": true,
+ "tfa.layers.GroupNormalization.add_loss": true,
+ "tfa.layers.GroupNormalization.add_metric": true,
+ "tfa.layers.GroupNormalization.add_update": true,
+ "tfa.layers.GroupNormalization.add_variable": true,
+ "tfa.layers.GroupNormalization.add_weight": true,
+ "tfa.layers.GroupNormalization.apply": true,
+ "tfa.layers.GroupNormalization.build": true,
+ "tfa.layers.GroupNormalization.call": true,
+ "tfa.layers.GroupNormalization.compute_mask": true,
+ "tfa.layers.GroupNormalization.compute_output_shape": true,
+ "tfa.layers.GroupNormalization.compute_output_signature": true,
+ "tfa.layers.GroupNormalization.count_params": true,
+ "tfa.layers.GroupNormalization.dtype": true,
+ "tfa.layers.GroupNormalization.dynamic": true,
+ "tfa.layers.GroupNormalization.from_config": true,
+ "tfa.layers.GroupNormalization.get_config": true,
+ "tfa.layers.GroupNormalization.get_input_at": true,
+ "tfa.layers.GroupNormalization.get_input_mask_at": true,
+ "tfa.layers.GroupNormalization.get_input_shape_at": true,
+ "tfa.layers.GroupNormalization.get_losses_for": true,
+ "tfa.layers.GroupNormalization.get_output_at": true,
+ "tfa.layers.GroupNormalization.get_output_mask_at": true,
+ "tfa.layers.GroupNormalization.get_output_shape_at": true,
+ "tfa.layers.GroupNormalization.get_updates_for": true,
+ "tfa.layers.GroupNormalization.get_weights": true,
+ "tfa.layers.GroupNormalization.input": true,
+ "tfa.layers.GroupNormalization.input_mask": true,
+ "tfa.layers.GroupNormalization.input_shape": true,
+ "tfa.layers.GroupNormalization.input_spec": true,
+ "tfa.layers.GroupNormalization.losses": true,
+ "tfa.layers.GroupNormalization.metrics": true,
+ "tfa.layers.GroupNormalization.name": true,
+ "tfa.layers.GroupNormalization.name_scope": true,
+ "tfa.layers.GroupNormalization.non_trainable_variables": true,
+ "tfa.layers.GroupNormalization.non_trainable_weights": true,
+ "tfa.layers.GroupNormalization.output": true,
+ "tfa.layers.GroupNormalization.output_mask": true,
+ "tfa.layers.GroupNormalization.output_shape": true,
+ "tfa.layers.GroupNormalization.set_weights": true,
+ "tfa.layers.GroupNormalization.submodules": true,
+ "tfa.layers.GroupNormalization.trainable": true,
+ "tfa.layers.GroupNormalization.trainable_variables": true,
+ "tfa.layers.GroupNormalization.trainable_weights": true,
+ "tfa.layers.GroupNormalization.updates": true,
+ "tfa.layers.GroupNormalization.variables": true,
+ "tfa.layers.GroupNormalization.weights": true,
+ "tfa.layers.GroupNormalization.with_name_scope": true,
+ "tfa.layers.InstanceNormalization": false,
+ "tfa.layers.InstanceNormalization.__call__": true,
+ "tfa.layers.InstanceNormalization.__init__": true,
+ "tfa.layers.InstanceNormalization.activity_regularizer": true,
+ "tfa.layers.InstanceNormalization.add_loss": true,
+ "tfa.layers.InstanceNormalization.add_metric": true,
+ "tfa.layers.InstanceNormalization.add_update": true,
+ "tfa.layers.InstanceNormalization.add_variable": true,
+ "tfa.layers.InstanceNormalization.add_weight": true,
+ "tfa.layers.InstanceNormalization.apply": true,
+ "tfa.layers.InstanceNormalization.build": true,
+ "tfa.layers.InstanceNormalization.call": true,
+ "tfa.layers.InstanceNormalization.compute_mask": true,
+ "tfa.layers.InstanceNormalization.compute_output_shape": true,
+ "tfa.layers.InstanceNormalization.compute_output_signature": true,
+ "tfa.layers.InstanceNormalization.count_params": true,
+ "tfa.layers.InstanceNormalization.dtype": true,
+ "tfa.layers.InstanceNormalization.dynamic": true,
+ "tfa.layers.InstanceNormalization.from_config": true,
+ "tfa.layers.InstanceNormalization.get_config": true,
+ "tfa.layers.InstanceNormalization.get_input_at": true,
+ "tfa.layers.InstanceNormalization.get_input_mask_at": true,
+ "tfa.layers.InstanceNormalization.get_input_shape_at": true,
+ "tfa.layers.InstanceNormalization.get_losses_for": true,
+ "tfa.layers.InstanceNormalization.get_output_at": true,
+ "tfa.layers.InstanceNormalization.get_output_mask_at": true,
+ "tfa.layers.InstanceNormalization.get_output_shape_at": true,
+ "tfa.layers.InstanceNormalization.get_updates_for": true,
+ "tfa.layers.InstanceNormalization.get_weights": true,
+ "tfa.layers.InstanceNormalization.input": true,
+ "tfa.layers.InstanceNormalization.input_mask": true,
+ "tfa.layers.InstanceNormalization.input_shape": true,
+ "tfa.layers.InstanceNormalization.input_spec": true,
+ "tfa.layers.InstanceNormalization.losses": true,
+ "tfa.layers.InstanceNormalization.metrics": true,
+ "tfa.layers.InstanceNormalization.name": true,
+ "tfa.layers.InstanceNormalization.name_scope": true,
+ "tfa.layers.InstanceNormalization.non_trainable_variables": true,
+ "tfa.layers.InstanceNormalization.non_trainable_weights": true,
+ "tfa.layers.InstanceNormalization.output": true,
+ "tfa.layers.InstanceNormalization.output_mask": true,
+ "tfa.layers.InstanceNormalization.output_shape": true,
+ "tfa.layers.InstanceNormalization.set_weights": true,
+ "tfa.layers.InstanceNormalization.submodules": true,
+ "tfa.layers.InstanceNormalization.trainable": true,
+ "tfa.layers.InstanceNormalization.trainable_variables": true,
+ "tfa.layers.InstanceNormalization.trainable_weights": true,
+ "tfa.layers.InstanceNormalization.updates": true,
+ "tfa.layers.InstanceNormalization.variables": true,
+ "tfa.layers.InstanceNormalization.weights": true,
+ "tfa.layers.InstanceNormalization.with_name_scope": true,
+ "tfa.layers.Maxout": false,
+ "tfa.layers.Maxout.__call__": true,
+ "tfa.layers.Maxout.__init__": true,
+ "tfa.layers.Maxout.activity_regularizer": true,
+ "tfa.layers.Maxout.add_loss": true,
+ "tfa.layers.Maxout.add_metric": true,
+ "tfa.layers.Maxout.add_update": true,
+ "tfa.layers.Maxout.add_variable": true,
+ "tfa.layers.Maxout.add_weight": true,
+ "tfa.layers.Maxout.apply": true,
+ "tfa.layers.Maxout.build": true,
+ "tfa.layers.Maxout.call": true,
+ "tfa.layers.Maxout.compute_mask": true,
+ "tfa.layers.Maxout.compute_output_shape": true,
+ "tfa.layers.Maxout.compute_output_signature": true,
+ "tfa.layers.Maxout.count_params": true,
+ "tfa.layers.Maxout.dtype": true,
+ "tfa.layers.Maxout.dynamic": true,
+ "tfa.layers.Maxout.from_config": true,
+ "tfa.layers.Maxout.get_config": true,
+ "tfa.layers.Maxout.get_input_at": true,
+ "tfa.layers.Maxout.get_input_mask_at": true,
+ "tfa.layers.Maxout.get_input_shape_at": true,
+ "tfa.layers.Maxout.get_losses_for": true,
+ "tfa.layers.Maxout.get_output_at": true,
+ "tfa.layers.Maxout.get_output_mask_at": true,
+ "tfa.layers.Maxout.get_output_shape_at": true,
+ "tfa.layers.Maxout.get_updates_for": true,
+ "tfa.layers.Maxout.get_weights": true,
+ "tfa.layers.Maxout.input": true,
+ "tfa.layers.Maxout.input_mask": true,
+ "tfa.layers.Maxout.input_shape": true,
+ "tfa.layers.Maxout.input_spec": true,
+ "tfa.layers.Maxout.losses": true,
+ "tfa.layers.Maxout.metrics": true,
+ "tfa.layers.Maxout.name": true,
+ "tfa.layers.Maxout.name_scope": true,
+ "tfa.layers.Maxout.non_trainable_variables": true,
+ "tfa.layers.Maxout.non_trainable_weights": true,
+ "tfa.layers.Maxout.output": true,
+ "tfa.layers.Maxout.output_mask": true,
+ "tfa.layers.Maxout.output_shape": true,
+ "tfa.layers.Maxout.set_weights": true,
+ "tfa.layers.Maxout.submodules": true,
+ "tfa.layers.Maxout.trainable": true,
+ "tfa.layers.Maxout.trainable_variables": true,
+ "tfa.layers.Maxout.trainable_weights": true,
+ "tfa.layers.Maxout.updates": true,
+ "tfa.layers.Maxout.variables": true,
+ "tfa.layers.Maxout.weights": true,
+ "tfa.layers.Maxout.with_name_scope": true,
+ "tfa.layers.PoincareNormalize": false,
+ "tfa.layers.PoincareNormalize.__call__": true,
+ "tfa.layers.PoincareNormalize.__init__": true,
+ "tfa.layers.PoincareNormalize.activity_regularizer": true,
+ "tfa.layers.PoincareNormalize.add_loss": true,
+ "tfa.layers.PoincareNormalize.add_metric": true,
+ "tfa.layers.PoincareNormalize.add_update": true,
+ "tfa.layers.PoincareNormalize.add_variable": true,
+ "tfa.layers.PoincareNormalize.add_weight": true,
+ "tfa.layers.PoincareNormalize.apply": true,
+ "tfa.layers.PoincareNormalize.build": true,
+ "tfa.layers.PoincareNormalize.call": true,
+ "tfa.layers.PoincareNormalize.compute_mask": true,
+ "tfa.layers.PoincareNormalize.compute_output_shape": true,
+ "tfa.layers.PoincareNormalize.compute_output_signature": true,
+ "tfa.layers.PoincareNormalize.count_params": true,
+ "tfa.layers.PoincareNormalize.dtype": true,
+ "tfa.layers.PoincareNormalize.dynamic": true,
+ "tfa.layers.PoincareNormalize.from_config": true,
+ "tfa.layers.PoincareNormalize.get_config": true,
+ "tfa.layers.PoincareNormalize.get_input_at": true,
+ "tfa.layers.PoincareNormalize.get_input_mask_at": true,
+ "tfa.layers.PoincareNormalize.get_input_shape_at": true,
+ "tfa.layers.PoincareNormalize.get_losses_for": true,
+ "tfa.layers.PoincareNormalize.get_output_at": true,
+ "tfa.layers.PoincareNormalize.get_output_mask_at": true,
+ "tfa.layers.PoincareNormalize.get_output_shape_at": true,
+ "tfa.layers.PoincareNormalize.get_updates_for": true,
+ "tfa.layers.PoincareNormalize.get_weights": true,
+ "tfa.layers.PoincareNormalize.input": true,
+ "tfa.layers.PoincareNormalize.input_mask": true,
+ "tfa.layers.PoincareNormalize.input_shape": true,
+ "tfa.layers.PoincareNormalize.input_spec": true,
+ "tfa.layers.PoincareNormalize.losses": true,
+ "tfa.layers.PoincareNormalize.metrics": true,
+ "tfa.layers.PoincareNormalize.name": true,
+ "tfa.layers.PoincareNormalize.name_scope": true,
+ "tfa.layers.PoincareNormalize.non_trainable_variables": true,
+ "tfa.layers.PoincareNormalize.non_trainable_weights": true,
+ "tfa.layers.PoincareNormalize.output": true,
+ "tfa.layers.PoincareNormalize.output_mask": true,
+ "tfa.layers.PoincareNormalize.output_shape": true,
+ "tfa.layers.PoincareNormalize.set_weights": true,
+ "tfa.layers.PoincareNormalize.submodules": true,
+ "tfa.layers.PoincareNormalize.trainable": true,
+ "tfa.layers.PoincareNormalize.trainable_variables": true,
+ "tfa.layers.PoincareNormalize.trainable_weights": true,
+ "tfa.layers.PoincareNormalize.updates": true,
+ "tfa.layers.PoincareNormalize.variables": true,
+ "tfa.layers.PoincareNormalize.weights": true,
+ "tfa.layers.PoincareNormalize.with_name_scope": true,
+ "tfa.layers.Sparsemax": false,
+ "tfa.layers.Sparsemax.__call__": true,
+ "tfa.layers.Sparsemax.__init__": true,
+ "tfa.layers.Sparsemax.activity_regularizer": true,
+ "tfa.layers.Sparsemax.add_loss": true,
+ "tfa.layers.Sparsemax.add_metric": true,
+ "tfa.layers.Sparsemax.add_update": true,
+ "tfa.layers.Sparsemax.add_variable": true,
+ "tfa.layers.Sparsemax.add_weight": true,
+ "tfa.layers.Sparsemax.apply": true,
+ "tfa.layers.Sparsemax.build": true,
+ "tfa.layers.Sparsemax.call": true,
+ "tfa.layers.Sparsemax.compute_mask": true,
+ "tfa.layers.Sparsemax.compute_output_shape": true,
+ "tfa.layers.Sparsemax.compute_output_signature": true,
+ "tfa.layers.Sparsemax.count_params": true,
+ "tfa.layers.Sparsemax.dtype": true,
+ "tfa.layers.Sparsemax.dynamic": true,
+ "tfa.layers.Sparsemax.from_config": true,
+ "tfa.layers.Sparsemax.get_config": true,
+ "tfa.layers.Sparsemax.get_input_at": true,
+ "tfa.layers.Sparsemax.get_input_mask_at": true,
+ "tfa.layers.Sparsemax.get_input_shape_at": true,
+ "tfa.layers.Sparsemax.get_losses_for": true,
+ "tfa.layers.Sparsemax.get_output_at": true,
+ "tfa.layers.Sparsemax.get_output_mask_at": true,
+ "tfa.layers.Sparsemax.get_output_shape_at": true,
+ "tfa.layers.Sparsemax.get_updates_for": true,
+ "tfa.layers.Sparsemax.get_weights": true,
+ "tfa.layers.Sparsemax.input": true,
+ "tfa.layers.Sparsemax.input_mask": true,
+ "tfa.layers.Sparsemax.input_shape": true,
+ "tfa.layers.Sparsemax.input_spec": true,
+ "tfa.layers.Sparsemax.losses": true,
+ "tfa.layers.Sparsemax.metrics": true,
+ "tfa.layers.Sparsemax.name": true,
+ "tfa.layers.Sparsemax.name_scope": true,
+ "tfa.layers.Sparsemax.non_trainable_variables": true,
+ "tfa.layers.Sparsemax.non_trainable_weights": true,
+ "tfa.layers.Sparsemax.output": true,
+ "tfa.layers.Sparsemax.output_mask": true,
+ "tfa.layers.Sparsemax.output_shape": true,
+ "tfa.layers.Sparsemax.set_weights": true,
+ "tfa.layers.Sparsemax.submodules": true,
+ "tfa.layers.Sparsemax.trainable": true,
+ "tfa.layers.Sparsemax.trainable_variables": true,
+ "tfa.layers.Sparsemax.trainable_weights": true,
+ "tfa.layers.Sparsemax.updates": true,
+ "tfa.layers.Sparsemax.variables": true,
+ "tfa.layers.Sparsemax.weights": true,
+ "tfa.layers.Sparsemax.with_name_scope": true,
+ "tfa.layers.WeightNormalization": false,
+ "tfa.layers.WeightNormalization.__call__": true,
+ "tfa.layers.WeightNormalization.__init__": true,
+ "tfa.layers.WeightNormalization.activity_regularizer": true,
+ "tfa.layers.WeightNormalization.add_loss": true,
+ "tfa.layers.WeightNormalization.add_metric": true,
+ "tfa.layers.WeightNormalization.add_update": true,
+ "tfa.layers.WeightNormalization.add_variable": true,
+ "tfa.layers.WeightNormalization.add_weight": true,
+ "tfa.layers.WeightNormalization.apply": true,
+ "tfa.layers.WeightNormalization.build": true,
+ "tfa.layers.WeightNormalization.call": true,
+ "tfa.layers.WeightNormalization.compute_mask": true,
+ "tfa.layers.WeightNormalization.compute_output_shape": true,
+ "tfa.layers.WeightNormalization.compute_output_signature": true,
+ "tfa.layers.WeightNormalization.count_params": true,
+ "tfa.layers.WeightNormalization.dtype": true,
+ "tfa.layers.WeightNormalization.dynamic": true,
+ "tfa.layers.WeightNormalization.from_config": true,
+ "tfa.layers.WeightNormalization.get_config": true,
+ "tfa.layers.WeightNormalization.get_input_at": true,
+ "tfa.layers.WeightNormalization.get_input_mask_at": true,
+ "tfa.layers.WeightNormalization.get_input_shape_at": true,
+ "tfa.layers.WeightNormalization.get_losses_for": true,
+ "tfa.layers.WeightNormalization.get_output_at": true,
+ "tfa.layers.WeightNormalization.get_output_mask_at": true,
+ "tfa.layers.WeightNormalization.get_output_shape_at": true,
+ "tfa.layers.WeightNormalization.get_updates_for": true,
+ "tfa.layers.WeightNormalization.get_weights": true,
+ "tfa.layers.WeightNormalization.input": true,
+ "tfa.layers.WeightNormalization.input_mask": true,
+ "tfa.layers.WeightNormalization.input_shape": true,
+ "tfa.layers.WeightNormalization.input_spec": true,
+ "tfa.layers.WeightNormalization.losses": true,
+ "tfa.layers.WeightNormalization.metrics": true,
+ "tfa.layers.WeightNormalization.name": true,
+ "tfa.layers.WeightNormalization.name_scope": true,
+ "tfa.layers.WeightNormalization.non_trainable_variables": true,
+ "tfa.layers.WeightNormalization.non_trainable_weights": true,
+ "tfa.layers.WeightNormalization.output": true,
+ "tfa.layers.WeightNormalization.output_mask": true,
+ "tfa.layers.WeightNormalization.output_shape": true,
+ "tfa.layers.WeightNormalization.set_weights": true,
+ "tfa.layers.WeightNormalization.submodules": true,
+ "tfa.layers.WeightNormalization.trainable": true,
+ "tfa.layers.WeightNormalization.trainable_variables": true,
+ "tfa.layers.WeightNormalization.trainable_weights": true,
+ "tfa.layers.WeightNormalization.updates": true,
+ "tfa.layers.WeightNormalization.variables": true,
+ "tfa.layers.WeightNormalization.weights": true,
+ "tfa.layers.WeightNormalization.with_name_scope": true,
+ "tfa.layers.absolute_import": true,
+ "tfa.layers.division": true,
+ "tfa.layers.maxout": false,
+ "tfa.layers.maxout.Maxout": false,
+ "tfa.layers.maxout.Maxout.__call__": true,
+ "tfa.layers.maxout.Maxout.__init__": true,
+ "tfa.layers.maxout.Maxout.activity_regularizer": true,
+ "tfa.layers.maxout.Maxout.add_loss": true,
+ "tfa.layers.maxout.Maxout.add_metric": true,
+ "tfa.layers.maxout.Maxout.add_update": true,
+ "tfa.layers.maxout.Maxout.add_variable": true,
+ "tfa.layers.maxout.Maxout.add_weight": true,
+ "tfa.layers.maxout.Maxout.apply": true,
+ "tfa.layers.maxout.Maxout.build": true,
+ "tfa.layers.maxout.Maxout.call": true,
+ "tfa.layers.maxout.Maxout.compute_mask": true,
+ "tfa.layers.maxout.Maxout.compute_output_shape": true,
+ "tfa.layers.maxout.Maxout.compute_output_signature": true,
+ "tfa.layers.maxout.Maxout.count_params": true,
+ "tfa.layers.maxout.Maxout.dtype": true,
+ "tfa.layers.maxout.Maxout.dynamic": true,
+ "tfa.layers.maxout.Maxout.from_config": true,
+ "tfa.layers.maxout.Maxout.get_config": true,
+ "tfa.layers.maxout.Maxout.get_input_at": true,
+ "tfa.layers.maxout.Maxout.get_input_mask_at": true,
+ "tfa.layers.maxout.Maxout.get_input_shape_at": true,
+ "tfa.layers.maxout.Maxout.get_losses_for": true,
+ "tfa.layers.maxout.Maxout.get_output_at": true,
+ "tfa.layers.maxout.Maxout.get_output_mask_at": true,
+ "tfa.layers.maxout.Maxout.get_output_shape_at": true,
+ "tfa.layers.maxout.Maxout.get_updates_for": true,
+ "tfa.layers.maxout.Maxout.get_weights": true,
+ "tfa.layers.maxout.Maxout.input": true,
+ "tfa.layers.maxout.Maxout.input_mask": true,
+ "tfa.layers.maxout.Maxout.input_shape": true,
+ "tfa.layers.maxout.Maxout.input_spec": true,
+ "tfa.layers.maxout.Maxout.losses": true,
+ "tfa.layers.maxout.Maxout.metrics": true,
+ "tfa.layers.maxout.Maxout.name": true,
+ "tfa.layers.maxout.Maxout.name_scope": true,
+ "tfa.layers.maxout.Maxout.non_trainable_variables": true,
+ "tfa.layers.maxout.Maxout.non_trainable_weights": true,
+ "tfa.layers.maxout.Maxout.output": true,
+ "tfa.layers.maxout.Maxout.output_mask": true,
+ "tfa.layers.maxout.Maxout.output_shape": true,
+ "tfa.layers.maxout.Maxout.set_weights": true,
+ "tfa.layers.maxout.Maxout.submodules": true,
+ "tfa.layers.maxout.Maxout.trainable": true,
+ "tfa.layers.maxout.Maxout.trainable_variables": true,
+ "tfa.layers.maxout.Maxout.trainable_weights": true,
+ "tfa.layers.maxout.Maxout.updates": true,
+ "tfa.layers.maxout.Maxout.variables": true,
+ "tfa.layers.maxout.Maxout.weights": true,
+ "tfa.layers.maxout.Maxout.with_name_scope": true,
+ "tfa.layers.maxout.absolute_import": true,
+ "tfa.layers.maxout.division": true,
+ "tfa.layers.maxout.print_function": true,
+ "tfa.layers.normalizations": false,
+ "tfa.layers.normalizations.GroupNormalization": false,
+ "tfa.layers.normalizations.GroupNormalization.__call__": true,
+ "tfa.layers.normalizations.GroupNormalization.__init__": true,
+ "tfa.layers.normalizations.GroupNormalization.activity_regularizer": true,
+ "tfa.layers.normalizations.GroupNormalization.add_loss": true,
+ "tfa.layers.normalizations.GroupNormalization.add_metric": true,
+ "tfa.layers.normalizations.GroupNormalization.add_update": true,
+ "tfa.layers.normalizations.GroupNormalization.add_variable": true,
+ "tfa.layers.normalizations.GroupNormalization.add_weight": true,
+ "tfa.layers.normalizations.GroupNormalization.apply": true,
+ "tfa.layers.normalizations.GroupNormalization.build": true,
+ "tfa.layers.normalizations.GroupNormalization.call": true,
+ "tfa.layers.normalizations.GroupNormalization.compute_mask": true,
+ "tfa.layers.normalizations.GroupNormalization.compute_output_shape": true,
+ "tfa.layers.normalizations.GroupNormalization.compute_output_signature": true,
+ "tfa.layers.normalizations.GroupNormalization.count_params": true,
+ "tfa.layers.normalizations.GroupNormalization.dtype": true,
+ "tfa.layers.normalizations.GroupNormalization.dynamic": true,
+ "tfa.layers.normalizations.GroupNormalization.from_config": true,
+ "tfa.layers.normalizations.GroupNormalization.get_config": true,
+ "tfa.layers.normalizations.GroupNormalization.get_input_at": true,
+ "tfa.layers.normalizations.GroupNormalization.get_input_mask_at": true,
+ "tfa.layers.normalizations.GroupNormalization.get_input_shape_at": true,
+ "tfa.layers.normalizations.GroupNormalization.get_losses_for": true,
+ "tfa.layers.normalizations.GroupNormalization.get_output_at": true,
+ "tfa.layers.normalizations.GroupNormalization.get_output_mask_at": true,
+ "tfa.layers.normalizations.GroupNormalization.get_output_shape_at": true,
+ "tfa.layers.normalizations.GroupNormalization.get_updates_for": true,
+ "tfa.layers.normalizations.GroupNormalization.get_weights": true,
+ "tfa.layers.normalizations.GroupNormalization.input": true,
+ "tfa.layers.normalizations.GroupNormalization.input_mask": true,
+ "tfa.layers.normalizations.GroupNormalization.input_shape": true,
+ "tfa.layers.normalizations.GroupNormalization.input_spec": true,
+ "tfa.layers.normalizations.GroupNormalization.losses": true,
+ "tfa.layers.normalizations.GroupNormalization.metrics": true,
+ "tfa.layers.normalizations.GroupNormalization.name": true,
+ "tfa.layers.normalizations.GroupNormalization.name_scope": true,
+ "tfa.layers.normalizations.GroupNormalization.non_trainable_variables": true,
+ "tfa.layers.normalizations.GroupNormalization.non_trainable_weights": true,
+ "tfa.layers.normalizations.GroupNormalization.output": true,
+ "tfa.layers.normalizations.GroupNormalization.output_mask": true,
+ "tfa.layers.normalizations.GroupNormalization.output_shape": true,
+ "tfa.layers.normalizations.GroupNormalization.set_weights": true,
+ "tfa.layers.normalizations.GroupNormalization.submodules": true,
+ "tfa.layers.normalizations.GroupNormalization.trainable": true,
+ "tfa.layers.normalizations.GroupNormalization.trainable_variables": true,
+ "tfa.layers.normalizations.GroupNormalization.trainable_weights": true,
+ "tfa.layers.normalizations.GroupNormalization.updates": true,
+ "tfa.layers.normalizations.GroupNormalization.variables": true,
+ "tfa.layers.normalizations.GroupNormalization.weights": true,
+ "tfa.layers.normalizations.GroupNormalization.with_name_scope": true,
+ "tfa.layers.normalizations.InstanceNormalization": false,
+ "tfa.layers.normalizations.InstanceNormalization.__call__": true,
+ "tfa.layers.normalizations.InstanceNormalization.__init__": true,
+ "tfa.layers.normalizations.InstanceNormalization.activity_regularizer": true,
+ "tfa.layers.normalizations.InstanceNormalization.add_loss": true,
+ "tfa.layers.normalizations.InstanceNormalization.add_metric": true,
+ "tfa.layers.normalizations.InstanceNormalization.add_update": true,
+ "tfa.layers.normalizations.InstanceNormalization.add_variable": true,
+ "tfa.layers.normalizations.InstanceNormalization.add_weight": true,
+ "tfa.layers.normalizations.InstanceNormalization.apply": true,
+ "tfa.layers.normalizations.InstanceNormalization.build": true,
+ "tfa.layers.normalizations.InstanceNormalization.call": true,
+ "tfa.layers.normalizations.InstanceNormalization.compute_mask": true,
+ "tfa.layers.normalizations.InstanceNormalization.compute_output_shape": true,
+ "tfa.layers.normalizations.InstanceNormalization.compute_output_signature": true,
+ "tfa.layers.normalizations.InstanceNormalization.count_params": true,
+ "tfa.layers.normalizations.InstanceNormalization.dtype": true,
+ "tfa.layers.normalizations.InstanceNormalization.dynamic": true,
+ "tfa.layers.normalizations.InstanceNormalization.from_config": true,
+ "tfa.layers.normalizations.InstanceNormalization.get_config": true,
+ "tfa.layers.normalizations.InstanceNormalization.get_input_at": true,
+ "tfa.layers.normalizations.InstanceNormalization.get_input_mask_at": true,
+ "tfa.layers.normalizations.InstanceNormalization.get_input_shape_at": true,
+ "tfa.layers.normalizations.InstanceNormalization.get_losses_for": true,
+ "tfa.layers.normalizations.InstanceNormalization.get_output_at": true,
+ "tfa.layers.normalizations.InstanceNormalization.get_output_mask_at": true,
+ "tfa.layers.normalizations.InstanceNormalization.get_output_shape_at": true,
+ "tfa.layers.normalizations.InstanceNormalization.get_updates_for": true,
+ "tfa.layers.normalizations.InstanceNormalization.get_weights": true,
+ "tfa.layers.normalizations.InstanceNormalization.input": true,
+ "tfa.layers.normalizations.InstanceNormalization.input_mask": true,
+ "tfa.layers.normalizations.InstanceNormalization.input_shape": true,
+ "tfa.layers.normalizations.InstanceNormalization.input_spec": true,
+ "tfa.layers.normalizations.InstanceNormalization.losses": true,
+ "tfa.layers.normalizations.InstanceNormalization.metrics": true,
+ "tfa.layers.normalizations.InstanceNormalization.name": true,
+ "tfa.layers.normalizations.InstanceNormalization.name_scope": true,
+ "tfa.layers.normalizations.InstanceNormalization.non_trainable_variables": true,
+ "tfa.layers.normalizations.InstanceNormalization.non_trainable_weights": true,
+ "tfa.layers.normalizations.InstanceNormalization.output": true,
+ "tfa.layers.normalizations.InstanceNormalization.output_mask": true,
+ "tfa.layers.normalizations.InstanceNormalization.output_shape": true,
+ "tfa.layers.normalizations.InstanceNormalization.set_weights": true,
+ "tfa.layers.normalizations.InstanceNormalization.submodules": true,
+ "tfa.layers.normalizations.InstanceNormalization.trainable": true,
+ "tfa.layers.normalizations.InstanceNormalization.trainable_variables": true,
+ "tfa.layers.normalizations.InstanceNormalization.trainable_weights": true,
+ "tfa.layers.normalizations.InstanceNormalization.updates": true,
+ "tfa.layers.normalizations.InstanceNormalization.variables": true,
+ "tfa.layers.normalizations.InstanceNormalization.weights": true,
+ "tfa.layers.normalizations.InstanceNormalization.with_name_scope": true,
+ "tfa.layers.normalizations.absolute_import": true,
+ "tfa.layers.normalizations.division": true,
+ "tfa.layers.normalizations.print_function": true,
+ "tfa.layers.poincare": false,
+ "tfa.layers.poincare.PoincareNormalize": false,
+ "tfa.layers.poincare.PoincareNormalize.__call__": true,
+ "tfa.layers.poincare.PoincareNormalize.__init__": true,
+ "tfa.layers.poincare.PoincareNormalize.activity_regularizer": true,
+ "tfa.layers.poincare.PoincareNormalize.add_loss": true,
+ "tfa.layers.poincare.PoincareNormalize.add_metric": true,
+ "tfa.layers.poincare.PoincareNormalize.add_update": true,
+ "tfa.layers.poincare.PoincareNormalize.add_variable": true,
+ "tfa.layers.poincare.PoincareNormalize.add_weight": true,
+ "tfa.layers.poincare.PoincareNormalize.apply": true,
+ "tfa.layers.poincare.PoincareNormalize.build": true,
+ "tfa.layers.poincare.PoincareNormalize.call": true,
+ "tfa.layers.poincare.PoincareNormalize.compute_mask": true,
+ "tfa.layers.poincare.PoincareNormalize.compute_output_shape": true,
+ "tfa.layers.poincare.PoincareNormalize.compute_output_signature": true,
+ "tfa.layers.poincare.PoincareNormalize.count_params": true,
+ "tfa.layers.poincare.PoincareNormalize.dtype": true,
+ "tfa.layers.poincare.PoincareNormalize.dynamic": true,
+ "tfa.layers.poincare.PoincareNormalize.from_config": true,
+ "tfa.layers.poincare.PoincareNormalize.get_config": true,
+ "tfa.layers.poincare.PoincareNormalize.get_input_at": true,
+ "tfa.layers.poincare.PoincareNormalize.get_input_mask_at": true,
+ "tfa.layers.poincare.PoincareNormalize.get_input_shape_at": true,
+ "tfa.layers.poincare.PoincareNormalize.get_losses_for": true,
+ "tfa.layers.poincare.PoincareNormalize.get_output_at": true,
+ "tfa.layers.poincare.PoincareNormalize.get_output_mask_at": true,
+ "tfa.layers.poincare.PoincareNormalize.get_output_shape_at": true,
+ "tfa.layers.poincare.PoincareNormalize.get_updates_for": true,
+ "tfa.layers.poincare.PoincareNormalize.get_weights": true,
+ "tfa.layers.poincare.PoincareNormalize.input": true,
+ "tfa.layers.poincare.PoincareNormalize.input_mask": true,
+ "tfa.layers.poincare.PoincareNormalize.input_shape": true,
+ "tfa.layers.poincare.PoincareNormalize.input_spec": true,
+ "tfa.layers.poincare.PoincareNormalize.losses": true,
+ "tfa.layers.poincare.PoincareNormalize.metrics": true,
+ "tfa.layers.poincare.PoincareNormalize.name": true,
+ "tfa.layers.poincare.PoincareNormalize.name_scope": true,
+ "tfa.layers.poincare.PoincareNormalize.non_trainable_variables": true,
+ "tfa.layers.poincare.PoincareNormalize.non_trainable_weights": true,
+ "tfa.layers.poincare.PoincareNormalize.output": true,
+ "tfa.layers.poincare.PoincareNormalize.output_mask": true,
+ "tfa.layers.poincare.PoincareNormalize.output_shape": true,
+ "tfa.layers.poincare.PoincareNormalize.set_weights": true,
+ "tfa.layers.poincare.PoincareNormalize.submodules": true,
+ "tfa.layers.poincare.PoincareNormalize.trainable": true,
+ "tfa.layers.poincare.PoincareNormalize.trainable_variables": true,
+ "tfa.layers.poincare.PoincareNormalize.trainable_weights": true,
+ "tfa.layers.poincare.PoincareNormalize.updates": true,
+ "tfa.layers.poincare.PoincareNormalize.variables": true,
+ "tfa.layers.poincare.PoincareNormalize.weights": true,
+ "tfa.layers.poincare.PoincareNormalize.with_name_scope": true,
+ "tfa.layers.poincare.absolute_import": true,
+ "tfa.layers.poincare.division": true,
+ "tfa.layers.poincare.print_function": true,
+ "tfa.layers.print_function": true,
+ "tfa.layers.sparsemax": false,
+ "tfa.layers.sparsemax.Sparsemax": false,
+ "tfa.layers.sparsemax.Sparsemax.__call__": true,
+ "tfa.layers.sparsemax.Sparsemax.__init__": true,
+ "tfa.layers.sparsemax.Sparsemax.activity_regularizer": true,
+ "tfa.layers.sparsemax.Sparsemax.add_loss": true,
+ "tfa.layers.sparsemax.Sparsemax.add_metric": true,
+ "tfa.layers.sparsemax.Sparsemax.add_update": true,
+ "tfa.layers.sparsemax.Sparsemax.add_variable": true,
+ "tfa.layers.sparsemax.Sparsemax.add_weight": true,
+ "tfa.layers.sparsemax.Sparsemax.apply": true,
+ "tfa.layers.sparsemax.Sparsemax.build": true,
+ "tfa.layers.sparsemax.Sparsemax.call": true,
+ "tfa.layers.sparsemax.Sparsemax.compute_mask": true,
+ "tfa.layers.sparsemax.Sparsemax.compute_output_shape": true,
+ "tfa.layers.sparsemax.Sparsemax.compute_output_signature": true,
+ "tfa.layers.sparsemax.Sparsemax.count_params": true,
+ "tfa.layers.sparsemax.Sparsemax.dtype": true,
+ "tfa.layers.sparsemax.Sparsemax.dynamic": true,
+ "tfa.layers.sparsemax.Sparsemax.from_config": true,
+ "tfa.layers.sparsemax.Sparsemax.get_config": true,
+ "tfa.layers.sparsemax.Sparsemax.get_input_at": true,
+ "tfa.layers.sparsemax.Sparsemax.get_input_mask_at": true,
+ "tfa.layers.sparsemax.Sparsemax.get_input_shape_at": true,
+ "tfa.layers.sparsemax.Sparsemax.get_losses_for": true,
+ "tfa.layers.sparsemax.Sparsemax.get_output_at": true,
+ "tfa.layers.sparsemax.Sparsemax.get_output_mask_at": true,
+ "tfa.layers.sparsemax.Sparsemax.get_output_shape_at": true,
+ "tfa.layers.sparsemax.Sparsemax.get_updates_for": true,
+ "tfa.layers.sparsemax.Sparsemax.get_weights": true,
+ "tfa.layers.sparsemax.Sparsemax.input": true,
+ "tfa.layers.sparsemax.Sparsemax.input_mask": true,
+ "tfa.layers.sparsemax.Sparsemax.input_shape": true,
+ "tfa.layers.sparsemax.Sparsemax.input_spec": true,
+ "tfa.layers.sparsemax.Sparsemax.losses": true,
+ "tfa.layers.sparsemax.Sparsemax.metrics": true,
+ "tfa.layers.sparsemax.Sparsemax.name": true,
+ "tfa.layers.sparsemax.Sparsemax.name_scope": true,
+ "tfa.layers.sparsemax.Sparsemax.non_trainable_variables": true,
+ "tfa.layers.sparsemax.Sparsemax.non_trainable_weights": true,
+ "tfa.layers.sparsemax.Sparsemax.output": true,
+ "tfa.layers.sparsemax.Sparsemax.output_mask": true,
+ "tfa.layers.sparsemax.Sparsemax.output_shape": true,
+ "tfa.layers.sparsemax.Sparsemax.set_weights": true,
+ "tfa.layers.sparsemax.Sparsemax.submodules": true,
+ "tfa.layers.sparsemax.Sparsemax.trainable": true,
+ "tfa.layers.sparsemax.Sparsemax.trainable_variables": true,
+ "tfa.layers.sparsemax.Sparsemax.trainable_weights": true,
+ "tfa.layers.sparsemax.Sparsemax.updates": true,
+ "tfa.layers.sparsemax.Sparsemax.variables": true,
+ "tfa.layers.sparsemax.Sparsemax.weights": true,
+ "tfa.layers.sparsemax.Sparsemax.with_name_scope": true,
+ "tfa.layers.sparsemax.absolute_import": true,
+ "tfa.layers.sparsemax.division": true,
+ "tfa.layers.sparsemax.print_function": true,
+ "tfa.layers.sparsemax.sparsemax": false,
+ "tfa.layers.wrappers": false,
+ "tfa.layers.wrappers.WeightNormalization": false,
+ "tfa.layers.wrappers.WeightNormalization.__call__": true,
+ "tfa.layers.wrappers.WeightNormalization.__init__": true,
+ "tfa.layers.wrappers.WeightNormalization.activity_regularizer": true,
+ "tfa.layers.wrappers.WeightNormalization.add_loss": true,
+ "tfa.layers.wrappers.WeightNormalization.add_metric": true,
+ "tfa.layers.wrappers.WeightNormalization.add_update": true,
+ "tfa.layers.wrappers.WeightNormalization.add_variable": true,
+ "tfa.layers.wrappers.WeightNormalization.add_weight": true,
+ "tfa.layers.wrappers.WeightNormalization.apply": true,
+ "tfa.layers.wrappers.WeightNormalization.build": true,
+ "tfa.layers.wrappers.WeightNormalization.call": true,
+ "tfa.layers.wrappers.WeightNormalization.compute_mask": true,
+ "tfa.layers.wrappers.WeightNormalization.compute_output_shape": true,
+ "tfa.layers.wrappers.WeightNormalization.compute_output_signature": true,
+ "tfa.layers.wrappers.WeightNormalization.count_params": true,
+ "tfa.layers.wrappers.WeightNormalization.dtype": true,
+ "tfa.layers.wrappers.WeightNormalization.dynamic": true,
+ "tfa.layers.wrappers.WeightNormalization.from_config": true,
+ "tfa.layers.wrappers.WeightNormalization.get_config": true,
+ "tfa.layers.wrappers.WeightNormalization.get_input_at": true,
+ "tfa.layers.wrappers.WeightNormalization.get_input_mask_at": true,
+ "tfa.layers.wrappers.WeightNormalization.get_input_shape_at": true,
+ "tfa.layers.wrappers.WeightNormalization.get_losses_for": true,
+ "tfa.layers.wrappers.WeightNormalization.get_output_at": true,
+ "tfa.layers.wrappers.WeightNormalization.get_output_mask_at": true,
+ "tfa.layers.wrappers.WeightNormalization.get_output_shape_at": true,
+ "tfa.layers.wrappers.WeightNormalization.get_updates_for": true,
+ "tfa.layers.wrappers.WeightNormalization.get_weights": true,
+ "tfa.layers.wrappers.WeightNormalization.input": true,
+ "tfa.layers.wrappers.WeightNormalization.input_mask": true,
+ "tfa.layers.wrappers.WeightNormalization.input_shape": true,
+ "tfa.layers.wrappers.WeightNormalization.input_spec": true,
+ "tfa.layers.wrappers.WeightNormalization.losses": true,
+ "tfa.layers.wrappers.WeightNormalization.metrics": true,
+ "tfa.layers.wrappers.WeightNormalization.name": true,
+ "tfa.layers.wrappers.WeightNormalization.name_scope": true,
+ "tfa.layers.wrappers.WeightNormalization.non_trainable_variables": true,
+ "tfa.layers.wrappers.WeightNormalization.non_trainable_weights": true,
+ "tfa.layers.wrappers.WeightNormalization.output": true,
+ "tfa.layers.wrappers.WeightNormalization.output_mask": true,
+ "tfa.layers.wrappers.WeightNormalization.output_shape": true,
+ "tfa.layers.wrappers.WeightNormalization.set_weights": true,
+ "tfa.layers.wrappers.WeightNormalization.submodules": true,
+ "tfa.layers.wrappers.WeightNormalization.trainable": true,
+ "tfa.layers.wrappers.WeightNormalization.trainable_variables": true,
+ "tfa.layers.wrappers.WeightNormalization.trainable_weights": true,
+ "tfa.layers.wrappers.WeightNormalization.updates": true,
+ "tfa.layers.wrappers.WeightNormalization.variables": true,
+ "tfa.layers.wrappers.WeightNormalization.weights": true,
+ "tfa.layers.wrappers.WeightNormalization.with_name_scope": true,
+ "tfa.layers.wrappers.absolute_import": true,
+ "tfa.layers.wrappers.division": true,
+ "tfa.layers.wrappers.print_function": true,
+ "tfa.losses": false,
+ "tfa.losses.ContrastiveLoss": false,
+ "tfa.losses.ContrastiveLoss.__call__": true,
+ "tfa.losses.ContrastiveLoss.__init__": true,
+ "tfa.losses.ContrastiveLoss.call": true,
+ "tfa.losses.ContrastiveLoss.from_config": true,
+ "tfa.losses.ContrastiveLoss.get_config": true,
+ "tfa.losses.LiftedStructLoss": false,
+ "tfa.losses.LiftedStructLoss.__call__": true,
+ "tfa.losses.LiftedStructLoss.__init__": true,
+ "tfa.losses.LiftedStructLoss.call": true,
+ "tfa.losses.LiftedStructLoss.from_config": true,
+ "tfa.losses.LiftedStructLoss.get_config": true,
+ "tfa.losses.SigmoidFocalCrossEntropy": false,
+ "tfa.losses.SigmoidFocalCrossEntropy.__call__": true,
+ "tfa.losses.SigmoidFocalCrossEntropy.__init__": true,
+ "tfa.losses.SigmoidFocalCrossEntropy.call": true,
+ "tfa.losses.SigmoidFocalCrossEntropy.from_config": true,
+ "tfa.losses.SigmoidFocalCrossEntropy.get_config": true,
+ "tfa.losses.SparsemaxLoss": false,
+ "tfa.losses.SparsemaxLoss.__call__": true,
+ "tfa.losses.SparsemaxLoss.__init__": true,
+ "tfa.losses.SparsemaxLoss.call": true,
+ "tfa.losses.SparsemaxLoss.from_config": true,
+ "tfa.losses.SparsemaxLoss.get_config": true,
+ "tfa.losses.TripletSemiHardLoss": false,
+ "tfa.losses.TripletSemiHardLoss.__call__": true,
+ "tfa.losses.TripletSemiHardLoss.__init__": true,
+ "tfa.losses.TripletSemiHardLoss.call": true,
+ "tfa.losses.TripletSemiHardLoss.from_config": true,
+ "tfa.losses.TripletSemiHardLoss.get_config": true,
+ "tfa.losses.absolute_import": true,
+ "tfa.losses.contrastive": false,
+ "tfa.losses.contrastive.ContrastiveLoss": false,
+ "tfa.losses.contrastive.ContrastiveLoss.__call__": true,
+ "tfa.losses.contrastive.ContrastiveLoss.__init__": true,
+ "tfa.losses.contrastive.ContrastiveLoss.call": true,
+ "tfa.losses.contrastive.ContrastiveLoss.from_config": true,
+ "tfa.losses.contrastive.ContrastiveLoss.get_config": true,
+ "tfa.losses.contrastive.absolute_import": true,
+ "tfa.losses.contrastive.contrastive_loss": false,
+ "tfa.losses.contrastive.division": true,
+ "tfa.losses.contrastive.print_function": true,
+ "tfa.losses.contrastive_loss": false,
+ "tfa.losses.division": true,
+ "tfa.losses.focal_loss": false,
+ "tfa.losses.focal_loss.SigmoidFocalCrossEntropy": false,
+ "tfa.losses.focal_loss.SigmoidFocalCrossEntropy.__call__": true,
+ "tfa.losses.focal_loss.SigmoidFocalCrossEntropy.__init__": true,
+ "tfa.losses.focal_loss.SigmoidFocalCrossEntropy.call": true,
+ "tfa.losses.focal_loss.SigmoidFocalCrossEntropy.from_config": true,
+ "tfa.losses.focal_loss.SigmoidFocalCrossEntropy.get_config": true,
+ "tfa.losses.focal_loss.absolute_import": true,
+ "tfa.losses.focal_loss.division": true,
+ "tfa.losses.focal_loss.print_function": true,
+ "tfa.losses.focal_loss.sigmoid_focal_crossentropy": false,
+ "tfa.losses.lifted": false,
+ "tfa.losses.lifted.LiftedStructLoss": false,
+ "tfa.losses.lifted.LiftedStructLoss.__call__": true,
+ "tfa.losses.lifted.LiftedStructLoss.__init__": true,
+ "tfa.losses.lifted.LiftedStructLoss.call": true,
+ "tfa.losses.lifted.LiftedStructLoss.from_config": true,
+ "tfa.losses.lifted.LiftedStructLoss.get_config": true,
+ "tfa.losses.lifted.absolute_import": true,
+ "tfa.losses.lifted.division": true,
+ "tfa.losses.lifted.lifted_struct_loss": false,
+ "tfa.losses.lifted.print_function": true,
+ "tfa.losses.lifted_struct_loss": false,
+ "tfa.losses.metric_learning": false,
+ "tfa.losses.metric_learning.absolute_import": true,
+ "tfa.losses.metric_learning.division": true,
+ "tfa.losses.metric_learning.pairwise_distance": false,
+ "tfa.losses.metric_learning.print_function": true,
+ "tfa.losses.print_function": true,
+ "tfa.losses.sigmoid_focal_crossentropy": false,
+ "tfa.losses.sparsemax_loss": false,
+ "tfa.losses.triplet": false,
+ "tfa.losses.triplet.TripletSemiHardLoss": false,
+ "tfa.losses.triplet.TripletSemiHardLoss.__call__": true,
+ "tfa.losses.triplet.TripletSemiHardLoss.__init__": true,
+ "tfa.losses.triplet.TripletSemiHardLoss.call": true,
+ "tfa.losses.triplet.TripletSemiHardLoss.from_config": true,
+ "tfa.losses.triplet.TripletSemiHardLoss.get_config": true,
+ "tfa.losses.triplet.absolute_import": true,
+ "tfa.losses.triplet.division": true,
+ "tfa.losses.triplet.print_function": true,
+ "tfa.losses.triplet.triplet_semihard_loss": false,
+ "tfa.losses.triplet_semihard_loss": false,
+ "tfa.metrics": false,
+ "tfa.metrics.CohenKappa": false,
+ "tfa.metrics.CohenKappa.__call__": true,
+ "tfa.metrics.CohenKappa.__init__": true,
+ "tfa.metrics.CohenKappa.activity_regularizer": true,
+ "tfa.metrics.CohenKappa.add_loss": true,
+ "tfa.metrics.CohenKappa.add_metric": true,
+ "tfa.metrics.CohenKappa.add_update": true,
+ "tfa.metrics.CohenKappa.add_variable": true,
+ "tfa.metrics.CohenKappa.add_weight": true,
+ "tfa.metrics.CohenKappa.apply": true,
+ "tfa.metrics.CohenKappa.build": true,
+ "tfa.metrics.CohenKappa.call": true,
+ "tfa.metrics.CohenKappa.compute_mask": true,
+ "tfa.metrics.CohenKappa.compute_output_shape": true,
+ "tfa.metrics.CohenKappa.compute_output_signature": true,
+ "tfa.metrics.CohenKappa.count_params": true,
+ "tfa.metrics.CohenKappa.dtype": true,
+ "tfa.metrics.CohenKappa.dynamic": true,
+ "tfa.metrics.CohenKappa.from_config": true,
+ "tfa.metrics.CohenKappa.get_config": true,
+ "tfa.metrics.CohenKappa.get_input_at": true,
+ "tfa.metrics.CohenKappa.get_input_mask_at": true,
+ "tfa.metrics.CohenKappa.get_input_shape_at": true,
+ "tfa.metrics.CohenKappa.get_losses_for": true,
+ "tfa.metrics.CohenKappa.get_output_at": true,
+ "tfa.metrics.CohenKappa.get_output_mask_at": true,
+ "tfa.metrics.CohenKappa.get_output_shape_at": true,
+ "tfa.metrics.CohenKappa.get_updates_for": true,
+ "tfa.metrics.CohenKappa.get_weights": true,
+ "tfa.metrics.CohenKappa.input": true,
+ "tfa.metrics.CohenKappa.input_mask": true,
+ "tfa.metrics.CohenKappa.input_shape": true,
+ "tfa.metrics.CohenKappa.input_spec": true,
+ "tfa.metrics.CohenKappa.losses": true,
+ "tfa.metrics.CohenKappa.metrics": true,
+ "tfa.metrics.CohenKappa.name": true,
+ "tfa.metrics.CohenKappa.name_scope": true,
+ "tfa.metrics.CohenKappa.non_trainable_variables": true,
+ "tfa.metrics.CohenKappa.non_trainable_weights": true,
+ "tfa.metrics.CohenKappa.output": true,
+ "tfa.metrics.CohenKappa.output_mask": true,
+ "tfa.metrics.CohenKappa.output_shape": true,
+ "tfa.metrics.CohenKappa.reset_states": true,
+ "tfa.metrics.CohenKappa.result": true,
+ "tfa.metrics.CohenKappa.set_weights": true,
+ "tfa.metrics.CohenKappa.submodules": true,
+ "tfa.metrics.CohenKappa.trainable": true,
+ "tfa.metrics.CohenKappa.trainable_variables": true,
+ "tfa.metrics.CohenKappa.trainable_weights": true,
+ "tfa.metrics.CohenKappa.update_state": true,
+ "tfa.metrics.CohenKappa.updates": true,
+ "tfa.metrics.CohenKappa.variables": true,
+ "tfa.metrics.CohenKappa.weights": true,
+ "tfa.metrics.CohenKappa.with_name_scope": true,
+ "tfa.metrics.absolute_import": true,
+ "tfa.metrics.cohens_kappa": false,
+ "tfa.metrics.cohens_kappa.CohenKappa": false,
+ "tfa.metrics.cohens_kappa.CohenKappa.__call__": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.__init__": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.activity_regularizer": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.add_loss": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.add_metric": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.add_update": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.add_variable": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.add_weight": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.apply": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.build": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.call": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.compute_mask": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.compute_output_shape": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.compute_output_signature": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.count_params": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.dtype": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.dynamic": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.from_config": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.get_config": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.get_input_at": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.get_input_mask_at": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.get_input_shape_at": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.get_losses_for": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.get_output_at": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.get_output_mask_at": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.get_output_shape_at": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.get_updates_for": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.get_weights": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.input": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.input_mask": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.input_shape": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.input_spec": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.losses": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.metrics": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.name": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.name_scope": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.non_trainable_variables": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.non_trainable_weights": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.output": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.output_mask": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.output_shape": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.reset_states": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.result": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.set_weights": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.submodules": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.trainable": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.trainable_variables": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.trainable_weights": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.update_state": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.updates": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.variables": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.weights": true,
+ "tfa.metrics.cohens_kappa.CohenKappa.with_name_scope": true,
+ "tfa.metrics.cohens_kappa.absolute_import": true,
+ "tfa.metrics.cohens_kappa.division": true,
+ "tfa.metrics.cohens_kappa.print_function": true,
+ "tfa.metrics.division": true,
+ "tfa.metrics.print_function": true,
+ "tfa.optimizers": false,
+ "tfa.optimizers.AdamW": false,
+ "tfa.optimizers.AdamW.__init__": true,
+ "tfa.optimizers.AdamW.add_slot": true,
+ "tfa.optimizers.AdamW.add_weight": true,
+ "tfa.optimizers.AdamW.apply_gradients": true,
+ "tfa.optimizers.AdamW.from_config": true,
+ "tfa.optimizers.AdamW.get_config": true,
+ "tfa.optimizers.AdamW.get_gradients": true,
+ "tfa.optimizers.AdamW.get_slot": true,
+ "tfa.optimizers.AdamW.get_slot_names": true,
+ "tfa.optimizers.AdamW.get_updates": true,
+ "tfa.optimizers.AdamW.get_weights": true,
+ "tfa.optimizers.AdamW.iterations": true,
+ "tfa.optimizers.AdamW.minimize": true,
+ "tfa.optimizers.AdamW.set_weights": true,
+ "tfa.optimizers.AdamW.variables": true,
+ "tfa.optimizers.AdamW.weights": true,
+ "tfa.optimizers.LazyAdam": false,
+ "tfa.optimizers.LazyAdam.__init__": true,
+ "tfa.optimizers.LazyAdam.add_slot": true,
+ "tfa.optimizers.LazyAdam.add_weight": true,
+ "tfa.optimizers.LazyAdam.apply_gradients": true,
+ "tfa.optimizers.LazyAdam.from_config": true,
+ "tfa.optimizers.LazyAdam.get_config": true,
+ "tfa.optimizers.LazyAdam.get_gradients": true,
+ "tfa.optimizers.LazyAdam.get_slot": true,
+ "tfa.optimizers.LazyAdam.get_slot_names": true,
+ "tfa.optimizers.LazyAdam.get_updates": true,
+ "tfa.optimizers.LazyAdam.get_weights": true,
+ "tfa.optimizers.LazyAdam.iterations": true,
+ "tfa.optimizers.LazyAdam.minimize": true,
+ "tfa.optimizers.LazyAdam.set_weights": true,
+ "tfa.optimizers.LazyAdam.variables": true,
+ "tfa.optimizers.LazyAdam.weights": true,
+ "tfa.optimizers.MovingAverage": false,
+ "tfa.optimizers.MovingAverage.__init__": true,
+ "tfa.optimizers.MovingAverage.add_slot": true,
+ "tfa.optimizers.MovingAverage.add_weight": true,
+ "tfa.optimizers.MovingAverage.apply_gradients": true,
+ "tfa.optimizers.MovingAverage.assign_average_vars": true,
+ "tfa.optimizers.MovingAverage.from_config": true,
+ "tfa.optimizers.MovingAverage.get_config": true,
+ "tfa.optimizers.MovingAverage.get_gradients": true,
+ "tfa.optimizers.MovingAverage.get_slot": true,
+ "tfa.optimizers.MovingAverage.get_slot_names": true,
+ "tfa.optimizers.MovingAverage.get_updates": true,
+ "tfa.optimizers.MovingAverage.get_weights": true,
+ "tfa.optimizers.MovingAverage.iterations": true,
+ "tfa.optimizers.MovingAverage.minimize": true,
+ "tfa.optimizers.MovingAverage.set_weights": true,
+ "tfa.optimizers.MovingAverage.variables": true,
+ "tfa.optimizers.MovingAverage.weights": true,
+ "tfa.optimizers.SGDW": false,
+ "tfa.optimizers.SGDW.__init__": true,
+ "tfa.optimizers.SGDW.add_slot": true,
+ "tfa.optimizers.SGDW.add_weight": true,
+ "tfa.optimizers.SGDW.apply_gradients": true,
+ "tfa.optimizers.SGDW.from_config": true,
+ "tfa.optimizers.SGDW.get_config": true,
+ "tfa.optimizers.SGDW.get_gradients": true,
+ "tfa.optimizers.SGDW.get_slot": true,
+ "tfa.optimizers.SGDW.get_slot_names": true,
+ "tfa.optimizers.SGDW.get_updates": true,
+ "tfa.optimizers.SGDW.get_weights": true,
+ "tfa.optimizers.SGDW.iterations": true,
+ "tfa.optimizers.SGDW.minimize": true,
+ "tfa.optimizers.SGDW.set_weights": true,
+ "tfa.optimizers.SGDW.variables": true,
+ "tfa.optimizers.SGDW.weights": true,
+ "tfa.optimizers.absolute_import": true,
+ "tfa.optimizers.division": true,
+ "tfa.optimizers.extend_with_decoupled_weight_decay": false,
+ "tfa.optimizers.lazy_adam": false,
+ "tfa.optimizers.lazy_adam.LazyAdam": false,
+ "tfa.optimizers.lazy_adam.LazyAdam.__init__": true,
+ "tfa.optimizers.lazy_adam.LazyAdam.add_slot": true,
+ "tfa.optimizers.lazy_adam.LazyAdam.add_weight": true,
+ "tfa.optimizers.lazy_adam.LazyAdam.apply_gradients": true,
+ "tfa.optimizers.lazy_adam.LazyAdam.from_config": true,
+ "tfa.optimizers.lazy_adam.LazyAdam.get_config": true,
+ "tfa.optimizers.lazy_adam.LazyAdam.get_gradients": true,
+ "tfa.optimizers.lazy_adam.LazyAdam.get_slot": true,
+ "tfa.optimizers.lazy_adam.LazyAdam.get_slot_names": true,
+ "tfa.optimizers.lazy_adam.LazyAdam.get_updates": true,
+ "tfa.optimizers.lazy_adam.LazyAdam.get_weights": true,
+ "tfa.optimizers.lazy_adam.LazyAdam.iterations": true,
+ "tfa.optimizers.lazy_adam.LazyAdam.minimize": true,
+ "tfa.optimizers.lazy_adam.LazyAdam.set_weights": true,
+ "tfa.optimizers.lazy_adam.LazyAdam.variables": true,
+ "tfa.optimizers.lazy_adam.LazyAdam.weights": true,
+ "tfa.optimizers.lazy_adam.absolute_import": true,
+ "tfa.optimizers.lazy_adam.division": true,
+ "tfa.optimizers.lazy_adam.print_function": true,
+ "tfa.optimizers.moving_average": false,
+ "tfa.optimizers.moving_average.MovingAverage": false,
+ "tfa.optimizers.moving_average.MovingAverage.__init__": true,
+ "tfa.optimizers.moving_average.MovingAverage.add_slot": true,
+ "tfa.optimizers.moving_average.MovingAverage.add_weight": true,
+ "tfa.optimizers.moving_average.MovingAverage.apply_gradients": true,
+ "tfa.optimizers.moving_average.MovingAverage.assign_average_vars": true,
+ "tfa.optimizers.moving_average.MovingAverage.from_config": true,
+ "tfa.optimizers.moving_average.MovingAverage.get_config": true,
+ "tfa.optimizers.moving_average.MovingAverage.get_gradients": true,
+ "tfa.optimizers.moving_average.MovingAverage.get_slot": true,
+ "tfa.optimizers.moving_average.MovingAverage.get_slot_names": true,
+ "tfa.optimizers.moving_average.MovingAverage.get_updates": true,
+ "tfa.optimizers.moving_average.MovingAverage.get_weights": true,
+ "tfa.optimizers.moving_average.MovingAverage.iterations": true,
+ "tfa.optimizers.moving_average.MovingAverage.minimize": true,
+ "tfa.optimizers.moving_average.MovingAverage.set_weights": true,
+ "tfa.optimizers.moving_average.MovingAverage.variables": true,
+ "tfa.optimizers.moving_average.MovingAverage.weights": true,
+ "tfa.optimizers.moving_average.absolute_import": true,
+ "tfa.optimizers.moving_average.division": true,
+ "tfa.optimizers.moving_average.print_function": true,
+ "tfa.optimizers.print_function": true,
+ "tfa.optimizers.weight_decay_optimizers": false,
+ "tfa.optimizers.weight_decay_optimizers.AdamW": false,
+ "tfa.optimizers.weight_decay_optimizers.AdamW.__init__": true,
+ "tfa.optimizers.weight_decay_optimizers.AdamW.add_slot": true,
+ "tfa.optimizers.weight_decay_optimizers.AdamW.add_weight": true,
+ "tfa.optimizers.weight_decay_optimizers.AdamW.apply_gradients": true,
+ "tfa.optimizers.weight_decay_optimizers.AdamW.from_config": true,
+ "tfa.optimizers.weight_decay_optimizers.AdamW.get_config": true,
+ "tfa.optimizers.weight_decay_optimizers.AdamW.get_gradients": true,
+ "tfa.optimizers.weight_decay_optimizers.AdamW.get_slot": true,
+ "tfa.optimizers.weight_decay_optimizers.AdamW.get_slot_names": true,
+ "tfa.optimizers.weight_decay_optimizers.AdamW.get_updates": true,
+ "tfa.optimizers.weight_decay_optimizers.AdamW.get_weights": true,
+ "tfa.optimizers.weight_decay_optimizers.AdamW.iterations": true,
+ "tfa.optimizers.weight_decay_optimizers.AdamW.minimize": true,
+ "tfa.optimizers.weight_decay_optimizers.AdamW.set_weights": true,
+ "tfa.optimizers.weight_decay_optimizers.AdamW.variables": true,
+ "tfa.optimizers.weight_decay_optimizers.AdamW.weights": true,
+ "tfa.optimizers.weight_decay_optimizers.DecoupledWeightDecayExtension": false,
+ "tfa.optimizers.weight_decay_optimizers.DecoupledWeightDecayExtension.__init__": true,
+ "tfa.optimizers.weight_decay_optimizers.DecoupledWeightDecayExtension.apply_gradients": true,
+ "tfa.optimizers.weight_decay_optimizers.DecoupledWeightDecayExtension.get_config": true,
+ "tfa.optimizers.weight_decay_optimizers.DecoupledWeightDecayExtension.minimize": true,
+ "tfa.optimizers.weight_decay_optimizers.SGDW": false,
+ "tfa.optimizers.weight_decay_optimizers.SGDW.__init__": true,
+ "tfa.optimizers.weight_decay_optimizers.SGDW.add_slot": true,
+ "tfa.optimizers.weight_decay_optimizers.SGDW.add_weight": true,
+ "tfa.optimizers.weight_decay_optimizers.SGDW.apply_gradients": true,
+ "tfa.optimizers.weight_decay_optimizers.SGDW.from_config": true,
+ "tfa.optimizers.weight_decay_optimizers.SGDW.get_config": true,
+ "tfa.optimizers.weight_decay_optimizers.SGDW.get_gradients": true,
+ "tfa.optimizers.weight_decay_optimizers.SGDW.get_slot": true,
+ "tfa.optimizers.weight_decay_optimizers.SGDW.get_slot_names": true,
+ "tfa.optimizers.weight_decay_optimizers.SGDW.get_updates": true,
+ "tfa.optimizers.weight_decay_optimizers.SGDW.get_weights": true,
+ "tfa.optimizers.weight_decay_optimizers.SGDW.iterations": true,
+ "tfa.optimizers.weight_decay_optimizers.SGDW.minimize": true,
+ "tfa.optimizers.weight_decay_optimizers.SGDW.set_weights": true,
+ "tfa.optimizers.weight_decay_optimizers.SGDW.variables": true,
+ "tfa.optimizers.weight_decay_optimizers.SGDW.weights": true,
+ "tfa.optimizers.weight_decay_optimizers.absolute_import": true,
+ "tfa.optimizers.weight_decay_optimizers.division": true,
+ "tfa.optimizers.weight_decay_optimizers.extend_with_decoupled_weight_decay": false,
+ "tfa.optimizers.weight_decay_optimizers.print_function": true,
+ "tfa.rnn": false,
+ "tfa.rnn.LayerNormLSTMCell": false,
+ "tfa.rnn.LayerNormLSTMCell.__call__": true,
+ "tfa.rnn.LayerNormLSTMCell.__init__": true,
+ "tfa.rnn.LayerNormLSTMCell.activity_regularizer": true,
+ "tfa.rnn.LayerNormLSTMCell.add_loss": true,
+ "tfa.rnn.LayerNormLSTMCell.add_metric": true,
+ "tfa.rnn.LayerNormLSTMCell.add_update": true,
+ "tfa.rnn.LayerNormLSTMCell.add_variable": true,
+ "tfa.rnn.LayerNormLSTMCell.add_weight": true,
+ "tfa.rnn.LayerNormLSTMCell.apply": true,
+ "tfa.rnn.LayerNormLSTMCell.build": true,
+ "tfa.rnn.LayerNormLSTMCell.call": true,
+ "tfa.rnn.LayerNormLSTMCell.compute_mask": true,
+ "tfa.rnn.LayerNormLSTMCell.compute_output_shape": true,
+ "tfa.rnn.LayerNormLSTMCell.compute_output_signature": true,
+ "tfa.rnn.LayerNormLSTMCell.count_params": true,
+ "tfa.rnn.LayerNormLSTMCell.dtype": true,
+ "tfa.rnn.LayerNormLSTMCell.dynamic": true,
+ "tfa.rnn.LayerNormLSTMCell.from_config": true,
+ "tfa.rnn.LayerNormLSTMCell.get_config": true,
+ "tfa.rnn.LayerNormLSTMCell.get_dropout_mask_for_cell": true,
+ "tfa.rnn.LayerNormLSTMCell.get_initial_state": true,
+ "tfa.rnn.LayerNormLSTMCell.get_input_at": true,
+ "tfa.rnn.LayerNormLSTMCell.get_input_mask_at": true,
+ "tfa.rnn.LayerNormLSTMCell.get_input_shape_at": true,
+ "tfa.rnn.LayerNormLSTMCell.get_losses_for": true,
+ "tfa.rnn.LayerNormLSTMCell.get_output_at": true,
+ "tfa.rnn.LayerNormLSTMCell.get_output_mask_at": true,
+ "tfa.rnn.LayerNormLSTMCell.get_output_shape_at": true,
+ "tfa.rnn.LayerNormLSTMCell.get_recurrent_dropout_mask_for_cell": true,
+ "tfa.rnn.LayerNormLSTMCell.get_updates_for": true,
+ "tfa.rnn.LayerNormLSTMCell.get_weights": true,
+ "tfa.rnn.LayerNormLSTMCell.input": true,
+ "tfa.rnn.LayerNormLSTMCell.input_mask": true,
+ "tfa.rnn.LayerNormLSTMCell.input_shape": true,
+ "tfa.rnn.LayerNormLSTMCell.input_spec": true,
+ "tfa.rnn.LayerNormLSTMCell.losses": true,
+ "tfa.rnn.LayerNormLSTMCell.metrics": true,
+ "tfa.rnn.LayerNormLSTMCell.name": true,
+ "tfa.rnn.LayerNormLSTMCell.name_scope": true,
+ "tfa.rnn.LayerNormLSTMCell.non_trainable_variables": true,
+ "tfa.rnn.LayerNormLSTMCell.non_trainable_weights": true,
+ "tfa.rnn.LayerNormLSTMCell.output": true,
+ "tfa.rnn.LayerNormLSTMCell.output_mask": true,
+ "tfa.rnn.LayerNormLSTMCell.output_shape": true,
+ "tfa.rnn.LayerNormLSTMCell.reset_dropout_mask": true,
+ "tfa.rnn.LayerNormLSTMCell.reset_recurrent_dropout_mask": true,
+ "tfa.rnn.LayerNormLSTMCell.set_weights": true,
+ "tfa.rnn.LayerNormLSTMCell.submodules": true,
+ "tfa.rnn.LayerNormLSTMCell.trainable": true,
+ "tfa.rnn.LayerNormLSTMCell.trainable_variables": true,
+ "tfa.rnn.LayerNormLSTMCell.trainable_weights": true,
+ "tfa.rnn.LayerNormLSTMCell.updates": true,
+ "tfa.rnn.LayerNormLSTMCell.variables": true,
+ "tfa.rnn.LayerNormLSTMCell.weights": true,
+ "tfa.rnn.LayerNormLSTMCell.with_name_scope": true,
+ "tfa.rnn.NASCell": false,
+ "tfa.rnn.NASCell.__call__": true,
+ "tfa.rnn.NASCell.__init__": true,
+ "tfa.rnn.NASCell.activity_regularizer": true,
+ "tfa.rnn.NASCell.add_loss": true,
+ "tfa.rnn.NASCell.add_metric": true,
+ "tfa.rnn.NASCell.add_update": true,
+ "tfa.rnn.NASCell.add_variable": true,
+ "tfa.rnn.NASCell.add_weight": true,
+ "tfa.rnn.NASCell.apply": true,
+ "tfa.rnn.NASCell.build": true,
+ "tfa.rnn.NASCell.call": true,
+ "tfa.rnn.NASCell.compute_mask": true,
+ "tfa.rnn.NASCell.compute_output_shape": true,
+ "tfa.rnn.NASCell.compute_output_signature": true,
+ "tfa.rnn.NASCell.count_params": true,
+ "tfa.rnn.NASCell.dtype": true,
+ "tfa.rnn.NASCell.dynamic": true,
+ "tfa.rnn.NASCell.from_config": true,
+ "tfa.rnn.NASCell.get_config": true,
+ "tfa.rnn.NASCell.get_initial_state": true,
+ "tfa.rnn.NASCell.get_input_at": true,
+ "tfa.rnn.NASCell.get_input_mask_at": true,
+ "tfa.rnn.NASCell.get_input_shape_at": true,
+ "tfa.rnn.NASCell.get_losses_for": true,
+ "tfa.rnn.NASCell.get_output_at": true,
+ "tfa.rnn.NASCell.get_output_mask_at": true,
+ "tfa.rnn.NASCell.get_output_shape_at": true,
+ "tfa.rnn.NASCell.get_updates_for": true,
+ "tfa.rnn.NASCell.get_weights": true,
+ "tfa.rnn.NASCell.input": true,
+ "tfa.rnn.NASCell.input_mask": true,
+ "tfa.rnn.NASCell.input_shape": true,
+ "tfa.rnn.NASCell.input_spec": true,
+ "tfa.rnn.NASCell.losses": true,
+ "tfa.rnn.NASCell.metrics": true,
+ "tfa.rnn.NASCell.name": true,
+ "tfa.rnn.NASCell.name_scope": true,
+ "tfa.rnn.NASCell.non_trainable_variables": true,
+ "tfa.rnn.NASCell.non_trainable_weights": true,
+ "tfa.rnn.NASCell.output": true,
+ "tfa.rnn.NASCell.output_mask": true,
+ "tfa.rnn.NASCell.output_shape": true,
+ "tfa.rnn.NASCell.output_size": true,
+ "tfa.rnn.NASCell.set_weights": true,
+ "tfa.rnn.NASCell.state_size": true,
+ "tfa.rnn.NASCell.submodules": true,
+ "tfa.rnn.NASCell.trainable": true,
+ "tfa.rnn.NASCell.trainable_variables": true,
+ "tfa.rnn.NASCell.trainable_weights": true,
+ "tfa.rnn.NASCell.updates": true,
+ "tfa.rnn.NASCell.variables": true,
+ "tfa.rnn.NASCell.weights": true,
+ "tfa.rnn.NASCell.with_name_scope": true,
+ "tfa.rnn.absolute_import": true,
+ "tfa.rnn.cell": false,
+ "tfa.rnn.cell.LayerNormLSTMCell": false,
+ "tfa.rnn.cell.LayerNormLSTMCell.__call__": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.__init__": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.activity_regularizer": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.add_loss": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.add_metric": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.add_update": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.add_variable": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.add_weight": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.apply": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.build": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.call": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.compute_mask": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.compute_output_shape": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.compute_output_signature": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.count_params": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.dtype": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.dynamic": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.from_config": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.get_config": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.get_dropout_mask_for_cell": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.get_initial_state": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.get_input_at": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.get_input_mask_at": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.get_input_shape_at": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.get_losses_for": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.get_output_at": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.get_output_mask_at": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.get_output_shape_at": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.get_recurrent_dropout_mask_for_cell": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.get_updates_for": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.get_weights": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.input": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.input_mask": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.input_shape": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.input_spec": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.losses": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.metrics": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.name": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.name_scope": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.non_trainable_variables": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.non_trainable_weights": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.output": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.output_mask": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.output_shape": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.reset_dropout_mask": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.reset_recurrent_dropout_mask": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.set_weights": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.submodules": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.trainable": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.trainable_variables": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.trainable_weights": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.updates": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.variables": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.weights": true,
+ "tfa.rnn.cell.LayerNormLSTMCell.with_name_scope": true,
+ "tfa.rnn.cell.NASCell": false,
+ "tfa.rnn.cell.NASCell.__call__": true,
+ "tfa.rnn.cell.NASCell.__init__": true,
+ "tfa.rnn.cell.NASCell.activity_regularizer": true,
+ "tfa.rnn.cell.NASCell.add_loss": true,
+ "tfa.rnn.cell.NASCell.add_metric": true,
+ "tfa.rnn.cell.NASCell.add_update": true,
+ "tfa.rnn.cell.NASCell.add_variable": true,
+ "tfa.rnn.cell.NASCell.add_weight": true,
+ "tfa.rnn.cell.NASCell.apply": true,
+ "tfa.rnn.cell.NASCell.build": true,
+ "tfa.rnn.cell.NASCell.call": true,
+ "tfa.rnn.cell.NASCell.compute_mask": true,
+ "tfa.rnn.cell.NASCell.compute_output_shape": true,
+ "tfa.rnn.cell.NASCell.compute_output_signature": true,
+ "tfa.rnn.cell.NASCell.count_params": true,
+ "tfa.rnn.cell.NASCell.dtype": true,
+ "tfa.rnn.cell.NASCell.dynamic": true,
+ "tfa.rnn.cell.NASCell.from_config": true,
+ "tfa.rnn.cell.NASCell.get_config": true,
+ "tfa.rnn.cell.NASCell.get_initial_state": true,
+ "tfa.rnn.cell.NASCell.get_input_at": true,
+ "tfa.rnn.cell.NASCell.get_input_mask_at": true,
+ "tfa.rnn.cell.NASCell.get_input_shape_at": true,
+ "tfa.rnn.cell.NASCell.get_losses_for": true,
+ "tfa.rnn.cell.NASCell.get_output_at": true,
+ "tfa.rnn.cell.NASCell.get_output_mask_at": true,
+ "tfa.rnn.cell.NASCell.get_output_shape_at": true,
+ "tfa.rnn.cell.NASCell.get_updates_for": true,
+ "tfa.rnn.cell.NASCell.get_weights": true,
+ "tfa.rnn.cell.NASCell.input": true,
+ "tfa.rnn.cell.NASCell.input_mask": true,
+ "tfa.rnn.cell.NASCell.input_shape": true,
+ "tfa.rnn.cell.NASCell.input_spec": true,
+ "tfa.rnn.cell.NASCell.losses": true,
+ "tfa.rnn.cell.NASCell.metrics": true,
+ "tfa.rnn.cell.NASCell.name": true,
+ "tfa.rnn.cell.NASCell.name_scope": true,
+ "tfa.rnn.cell.NASCell.non_trainable_variables": true,
+ "tfa.rnn.cell.NASCell.non_trainable_weights": true,
+ "tfa.rnn.cell.NASCell.output": true,
+ "tfa.rnn.cell.NASCell.output_mask": true,
+ "tfa.rnn.cell.NASCell.output_shape": true,
+ "tfa.rnn.cell.NASCell.output_size": true,
+ "tfa.rnn.cell.NASCell.set_weights": true,
+ "tfa.rnn.cell.NASCell.state_size": true,
+ "tfa.rnn.cell.NASCell.submodules": true,
+ "tfa.rnn.cell.NASCell.trainable": true,
+ "tfa.rnn.cell.NASCell.trainable_variables": true,
+ "tfa.rnn.cell.NASCell.trainable_weights": true,
+ "tfa.rnn.cell.NASCell.updates": true,
+ "tfa.rnn.cell.NASCell.variables": true,
+ "tfa.rnn.cell.NASCell.weights": true,
+ "tfa.rnn.cell.NASCell.with_name_scope": true,
+ "tfa.rnn.cell.absolute_import": true,
+ "tfa.rnn.cell.division": true,
+ "tfa.rnn.cell.print_function": true,
+ "tfa.rnn.division": true,
+ "tfa.rnn.print_function": true,
+ "tfa.seq2seq": false,
+ "tfa.seq2seq.AttentionMechanism": false,
+ "tfa.seq2seq.AttentionMechanism.__init__": true,
+ "tfa.seq2seq.AttentionMechanism.alignments_size": true,
+ "tfa.seq2seq.AttentionMechanism.state_size": true,
+ "tfa.seq2seq.AttentionWrapper": false,
+ "tfa.seq2seq.AttentionWrapper.__call__": true,
+ "tfa.seq2seq.AttentionWrapper.__init__": true,
+ "tfa.seq2seq.AttentionWrapper.activity_regularizer": true,
+ "tfa.seq2seq.AttentionWrapper.add_loss": true,
+ "tfa.seq2seq.AttentionWrapper.add_metric": true,
+ "tfa.seq2seq.AttentionWrapper.add_update": true,
+ "tfa.seq2seq.AttentionWrapper.add_variable": true,
+ "tfa.seq2seq.AttentionWrapper.add_weight": true,
+ "tfa.seq2seq.AttentionWrapper.apply": true,
+ "tfa.seq2seq.AttentionWrapper.build": true,
+ "tfa.seq2seq.AttentionWrapper.call": true,
+ "tfa.seq2seq.AttentionWrapper.compute_mask": true,
+ "tfa.seq2seq.AttentionWrapper.compute_output_shape": true,
+ "tfa.seq2seq.AttentionWrapper.compute_output_signature": true,
+ "tfa.seq2seq.AttentionWrapper.count_params": true,
+ "tfa.seq2seq.AttentionWrapper.dtype": true,
+ "tfa.seq2seq.AttentionWrapper.dynamic": true,
+ "tfa.seq2seq.AttentionWrapper.from_config": true,
+ "tfa.seq2seq.AttentionWrapper.get_config": true,
+ "tfa.seq2seq.AttentionWrapper.get_initial_state": true,
+ "tfa.seq2seq.AttentionWrapper.get_input_at": true,
+ "tfa.seq2seq.AttentionWrapper.get_input_mask_at": true,
+ "tfa.seq2seq.AttentionWrapper.get_input_shape_at": true,
+ "tfa.seq2seq.AttentionWrapper.get_losses_for": true,
+ "tfa.seq2seq.AttentionWrapper.get_output_at": true,
+ "tfa.seq2seq.AttentionWrapper.get_output_mask_at": true,
+ "tfa.seq2seq.AttentionWrapper.get_output_shape_at": true,
+ "tfa.seq2seq.AttentionWrapper.get_updates_for": true,
+ "tfa.seq2seq.AttentionWrapper.get_weights": true,
+ "tfa.seq2seq.AttentionWrapper.input": true,
+ "tfa.seq2seq.AttentionWrapper.input_mask": true,
+ "tfa.seq2seq.AttentionWrapper.input_shape": true,
+ "tfa.seq2seq.AttentionWrapper.input_spec": true,
+ "tfa.seq2seq.AttentionWrapper.losses": true,
+ "tfa.seq2seq.AttentionWrapper.metrics": true,
+ "tfa.seq2seq.AttentionWrapper.name": true,
+ "tfa.seq2seq.AttentionWrapper.name_scope": true,
+ "tfa.seq2seq.AttentionWrapper.non_trainable_variables": true,
+ "tfa.seq2seq.AttentionWrapper.non_trainable_weights": true,
+ "tfa.seq2seq.AttentionWrapper.output": true,
+ "tfa.seq2seq.AttentionWrapper.output_mask": true,
+ "tfa.seq2seq.AttentionWrapper.output_shape": true,
+ "tfa.seq2seq.AttentionWrapper.output_size": true,
+ "tfa.seq2seq.AttentionWrapper.set_weights": true,
+ "tfa.seq2seq.AttentionWrapper.state_size": true,
+ "tfa.seq2seq.AttentionWrapper.submodules": true,
+ "tfa.seq2seq.AttentionWrapper.trainable": true,
+ "tfa.seq2seq.AttentionWrapper.trainable_variables": true,
+ "tfa.seq2seq.AttentionWrapper.trainable_weights": true,
+ "tfa.seq2seq.AttentionWrapper.updates": true,
+ "tfa.seq2seq.AttentionWrapper.variables": true,
+ "tfa.seq2seq.AttentionWrapper.weights": true,
+ "tfa.seq2seq.AttentionWrapper.with_name_scope": true,
+ "tfa.seq2seq.AttentionWrapperState": false,
+ "tfa.seq2seq.AttentionWrapperState.__add__": true,
+ "tfa.seq2seq.AttentionWrapperState.__contains__": true,
+ "tfa.seq2seq.AttentionWrapperState.__eq__": true,
+ "tfa.seq2seq.AttentionWrapperState.__ge__": true,
+ "tfa.seq2seq.AttentionWrapperState.__getitem__": true,
+ "tfa.seq2seq.AttentionWrapperState.__gt__": true,
+ "tfa.seq2seq.AttentionWrapperState.__init__": true,
+ "tfa.seq2seq.AttentionWrapperState.__iter__": true,
+ "tfa.seq2seq.AttentionWrapperState.__le__": true,
+ "tfa.seq2seq.AttentionWrapperState.__len__": true,
+ "tfa.seq2seq.AttentionWrapperState.__lt__": true,
+ "tfa.seq2seq.AttentionWrapperState.__mul__": true,
+ "tfa.seq2seq.AttentionWrapperState.__ne__": true,
+ "tfa.seq2seq.AttentionWrapperState.__rmul__": true,
+ "tfa.seq2seq.AttentionWrapperState.alignment_history": true,
+ "tfa.seq2seq.AttentionWrapperState.alignments": true,
+ "tfa.seq2seq.AttentionWrapperState.attention": true,
+ "tfa.seq2seq.AttentionWrapperState.attention_state": true,
+ "tfa.seq2seq.AttentionWrapperState.cell_state": true,
+ "tfa.seq2seq.AttentionWrapperState.clone": true,
+ "tfa.seq2seq.AttentionWrapperState.count": true,
+ "tfa.seq2seq.AttentionWrapperState.index": true,
+ "tfa.seq2seq.AttentionWrapperState.time": true,
+ "tfa.seq2seq.BahdanauAttention": false,
+ "tfa.seq2seq.BahdanauAttention.__call__": true,
+ "tfa.seq2seq.BahdanauAttention.__init__": true,
+ "tfa.seq2seq.BahdanauAttention.activity_regularizer": true,
+ "tfa.seq2seq.BahdanauAttention.add_loss": true,
+ "tfa.seq2seq.BahdanauAttention.add_metric": true,
+ "tfa.seq2seq.BahdanauAttention.add_update": true,
+ "tfa.seq2seq.BahdanauAttention.add_variable": true,
+ "tfa.seq2seq.BahdanauAttention.add_weight": true,
+ "tfa.seq2seq.BahdanauAttention.alignments_size": true,
+ "tfa.seq2seq.BahdanauAttention.apply": true,
+ "tfa.seq2seq.BahdanauAttention.build": true,
+ "tfa.seq2seq.BahdanauAttention.call": true,
+ "tfa.seq2seq.BahdanauAttention.compute_mask": true,
+ "tfa.seq2seq.BahdanauAttention.compute_output_shape": true,
+ "tfa.seq2seq.BahdanauAttention.compute_output_signature": true,
+ "tfa.seq2seq.BahdanauAttention.count_params": true,
+ "tfa.seq2seq.BahdanauAttention.deserialize_inner_layer_from_config": true,
+ "tfa.seq2seq.BahdanauAttention.dtype": true,
+ "tfa.seq2seq.BahdanauAttention.dynamic": true,
+ "tfa.seq2seq.BahdanauAttention.from_config": true,
+ "tfa.seq2seq.BahdanauAttention.get_config": true,
+ "tfa.seq2seq.BahdanauAttention.get_input_at": true,
+ "tfa.seq2seq.BahdanauAttention.get_input_mask_at": true,
+ "tfa.seq2seq.BahdanauAttention.get_input_shape_at": true,
+ "tfa.seq2seq.BahdanauAttention.get_losses_for": true,
+ "tfa.seq2seq.BahdanauAttention.get_output_at": true,
+ "tfa.seq2seq.BahdanauAttention.get_output_mask_at": true,
+ "tfa.seq2seq.BahdanauAttention.get_output_shape_at": true,
+ "tfa.seq2seq.BahdanauAttention.get_updates_for": true,
+ "tfa.seq2seq.BahdanauAttention.get_weights": true,
+ "tfa.seq2seq.BahdanauAttention.initial_alignments": true,
+ "tfa.seq2seq.BahdanauAttention.initial_state": true,
+ "tfa.seq2seq.BahdanauAttention.input": true,
+ "tfa.seq2seq.BahdanauAttention.input_mask": true,
+ "tfa.seq2seq.BahdanauAttention.input_shape": true,
+ "tfa.seq2seq.BahdanauAttention.input_spec": true,
+ "tfa.seq2seq.BahdanauAttention.losses": true,
+ "tfa.seq2seq.BahdanauAttention.metrics": true,
+ "tfa.seq2seq.BahdanauAttention.name": true,
+ "tfa.seq2seq.BahdanauAttention.name_scope": true,
+ "tfa.seq2seq.BahdanauAttention.non_trainable_variables": true,
+ "tfa.seq2seq.BahdanauAttention.non_trainable_weights": true,
+ "tfa.seq2seq.BahdanauAttention.output": true,
+ "tfa.seq2seq.BahdanauAttention.output_mask": true,
+ "tfa.seq2seq.BahdanauAttention.output_shape": true,
+ "tfa.seq2seq.BahdanauAttention.set_weights": true,
+ "tfa.seq2seq.BahdanauAttention.state_size": true,
+ "tfa.seq2seq.BahdanauAttention.submodules": true,
+ "tfa.seq2seq.BahdanauAttention.trainable": true,
+ "tfa.seq2seq.BahdanauAttention.trainable_variables": true,
+ "tfa.seq2seq.BahdanauAttention.trainable_weights": true,
+ "tfa.seq2seq.BahdanauAttention.updates": true,
+ "tfa.seq2seq.BahdanauAttention.variables": true,
+ "tfa.seq2seq.BahdanauAttention.weights": true,
+ "tfa.seq2seq.BahdanauAttention.with_name_scope": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention": false,
+ "tfa.seq2seq.BahdanauMonotonicAttention.__call__": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.__init__": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.activity_regularizer": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.add_loss": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.add_metric": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.add_update": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.add_variable": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.add_weight": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.alignments_size": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.apply": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.build": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.call": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.compute_mask": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.compute_output_shape": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.compute_output_signature": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.count_params": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.deserialize_inner_layer_from_config": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.dtype": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.dynamic": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.from_config": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.get_config": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.get_input_at": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.get_input_mask_at": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.get_input_shape_at": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.get_losses_for": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.get_output_at": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.get_output_mask_at": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.get_output_shape_at": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.get_updates_for": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.get_weights": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.initial_alignments": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.initial_state": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.input": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.input_mask": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.input_shape": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.input_spec": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.losses": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.metrics": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.name": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.name_scope": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.non_trainable_variables": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.non_trainable_weights": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.output": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.output_mask": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.output_shape": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.set_weights": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.state_size": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.submodules": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.trainable": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.trainable_variables": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.trainable_weights": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.updates": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.variables": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.weights": true,
+ "tfa.seq2seq.BahdanauMonotonicAttention.with_name_scope": true,
+ "tfa.seq2seq.BaseDecoder": false,
+ "tfa.seq2seq.BaseDecoder.__call__": true,
+ "tfa.seq2seq.BaseDecoder.__init__": true,
+ "tfa.seq2seq.BaseDecoder.activity_regularizer": true,
+ "tfa.seq2seq.BaseDecoder.add_loss": true,
+ "tfa.seq2seq.BaseDecoder.add_metric": true,
+ "tfa.seq2seq.BaseDecoder.add_update": true,
+ "tfa.seq2seq.BaseDecoder.add_variable": true,
+ "tfa.seq2seq.BaseDecoder.add_weight": true,
+ "tfa.seq2seq.BaseDecoder.apply": true,
+ "tfa.seq2seq.BaseDecoder.batch_size": true,
+ "tfa.seq2seq.BaseDecoder.build": true,
+ "tfa.seq2seq.BaseDecoder.call": true,
+ "tfa.seq2seq.BaseDecoder.compute_mask": true,
+ "tfa.seq2seq.BaseDecoder.compute_output_shape": true,
+ "tfa.seq2seq.BaseDecoder.compute_output_signature": true,
+ "tfa.seq2seq.BaseDecoder.count_params": true,
+ "tfa.seq2seq.BaseDecoder.dtype": true,
+ "tfa.seq2seq.BaseDecoder.dynamic": true,
+ "tfa.seq2seq.BaseDecoder.finalize": true,
+ "tfa.seq2seq.BaseDecoder.from_config": true,
+ "tfa.seq2seq.BaseDecoder.get_config": true,
+ "tfa.seq2seq.BaseDecoder.get_input_at": true,
+ "tfa.seq2seq.BaseDecoder.get_input_mask_at": true,
+ "tfa.seq2seq.BaseDecoder.get_input_shape_at": true,
+ "tfa.seq2seq.BaseDecoder.get_losses_for": true,
+ "tfa.seq2seq.BaseDecoder.get_output_at": true,
+ "tfa.seq2seq.BaseDecoder.get_output_mask_at": true,
+ "tfa.seq2seq.BaseDecoder.get_output_shape_at": true,
+ "tfa.seq2seq.BaseDecoder.get_updates_for": true,
+ "tfa.seq2seq.BaseDecoder.get_weights": true,
+ "tfa.seq2seq.BaseDecoder.initialize": true,
+ "tfa.seq2seq.BaseDecoder.input": true,
+ "tfa.seq2seq.BaseDecoder.input_mask": true,
+ "tfa.seq2seq.BaseDecoder.input_shape": true,
+ "tfa.seq2seq.BaseDecoder.input_spec": true,
+ "tfa.seq2seq.BaseDecoder.losses": true,
+ "tfa.seq2seq.BaseDecoder.metrics": true,
+ "tfa.seq2seq.BaseDecoder.name": true,
+ "tfa.seq2seq.BaseDecoder.name_scope": true,
+ "tfa.seq2seq.BaseDecoder.non_trainable_variables": true,
+ "tfa.seq2seq.BaseDecoder.non_trainable_weights": true,
+ "tfa.seq2seq.BaseDecoder.output": true,
+ "tfa.seq2seq.BaseDecoder.output_dtype": true,
+ "tfa.seq2seq.BaseDecoder.output_mask": true,
+ "tfa.seq2seq.BaseDecoder.output_shape": true,
+ "tfa.seq2seq.BaseDecoder.output_size": true,
+ "tfa.seq2seq.BaseDecoder.set_weights": true,
+ "tfa.seq2seq.BaseDecoder.step": true,
+ "tfa.seq2seq.BaseDecoder.submodules": true,
+ "tfa.seq2seq.BaseDecoder.tracks_own_finished": true,
+ "tfa.seq2seq.BaseDecoder.trainable": true,
+ "tfa.seq2seq.BaseDecoder.trainable_variables": true,
+ "tfa.seq2seq.BaseDecoder.trainable_weights": true,
+ "tfa.seq2seq.BaseDecoder.updates": true,
+ "tfa.seq2seq.BaseDecoder.variables": true,
+ "tfa.seq2seq.BaseDecoder.weights": true,
+ "tfa.seq2seq.BaseDecoder.with_name_scope": true,
+ "tfa.seq2seq.BasicDecoder": false,
+ "tfa.seq2seq.BasicDecoder.__call__": true,
+ "tfa.seq2seq.BasicDecoder.__init__": true,
+ "tfa.seq2seq.BasicDecoder.activity_regularizer": true,
+ "tfa.seq2seq.BasicDecoder.add_loss": true,
+ "tfa.seq2seq.BasicDecoder.add_metric": true,
+ "tfa.seq2seq.BasicDecoder.add_update": true,
+ "tfa.seq2seq.BasicDecoder.add_variable": true,
+ "tfa.seq2seq.BasicDecoder.add_weight": true,
+ "tfa.seq2seq.BasicDecoder.apply": true,
+ "tfa.seq2seq.BasicDecoder.batch_size": true,
+ "tfa.seq2seq.BasicDecoder.build": true,
+ "tfa.seq2seq.BasicDecoder.call": true,
+ "tfa.seq2seq.BasicDecoder.compute_mask": true,
+ "tfa.seq2seq.BasicDecoder.compute_output_shape": true,
+ "tfa.seq2seq.BasicDecoder.compute_output_signature": true,
+ "tfa.seq2seq.BasicDecoder.count_params": true,
+ "tfa.seq2seq.BasicDecoder.dtype": true,
+ "tfa.seq2seq.BasicDecoder.dynamic": true,
+ "tfa.seq2seq.BasicDecoder.finalize": true,
+ "tfa.seq2seq.BasicDecoder.from_config": true,
+ "tfa.seq2seq.BasicDecoder.get_config": true,
+ "tfa.seq2seq.BasicDecoder.get_input_at": true,
+ "tfa.seq2seq.BasicDecoder.get_input_mask_at": true,
+ "tfa.seq2seq.BasicDecoder.get_input_shape_at": true,
+ "tfa.seq2seq.BasicDecoder.get_losses_for": true,
+ "tfa.seq2seq.BasicDecoder.get_output_at": true,
+ "tfa.seq2seq.BasicDecoder.get_output_mask_at": true,
+ "tfa.seq2seq.BasicDecoder.get_output_shape_at": true,
+ "tfa.seq2seq.BasicDecoder.get_updates_for": true,
+ "tfa.seq2seq.BasicDecoder.get_weights": true,
+ "tfa.seq2seq.BasicDecoder.initialize": true,
+ "tfa.seq2seq.BasicDecoder.input": true,
+ "tfa.seq2seq.BasicDecoder.input_mask": true,
+ "tfa.seq2seq.BasicDecoder.input_shape": true,
+ "tfa.seq2seq.BasicDecoder.input_spec": true,
+ "tfa.seq2seq.BasicDecoder.losses": true,
+ "tfa.seq2seq.BasicDecoder.metrics": true,
+ "tfa.seq2seq.BasicDecoder.name": true,
+ "tfa.seq2seq.BasicDecoder.name_scope": true,
+ "tfa.seq2seq.BasicDecoder.non_trainable_variables": true,
+ "tfa.seq2seq.BasicDecoder.non_trainable_weights": true,
+ "tfa.seq2seq.BasicDecoder.output": true,
+ "tfa.seq2seq.BasicDecoder.output_dtype": true,
+ "tfa.seq2seq.BasicDecoder.output_mask": true,
+ "tfa.seq2seq.BasicDecoder.output_shape": true,
+ "tfa.seq2seq.BasicDecoder.output_size": true,
+ "tfa.seq2seq.BasicDecoder.set_weights": true,
+ "tfa.seq2seq.BasicDecoder.step": true,
+ "tfa.seq2seq.BasicDecoder.submodules": true,
+ "tfa.seq2seq.BasicDecoder.tracks_own_finished": true,
+ "tfa.seq2seq.BasicDecoder.trainable": true,
+ "tfa.seq2seq.BasicDecoder.trainable_variables": true,
+ "tfa.seq2seq.BasicDecoder.trainable_weights": true,
+ "tfa.seq2seq.BasicDecoder.updates": true,
+ "tfa.seq2seq.BasicDecoder.variables": true,
+ "tfa.seq2seq.BasicDecoder.weights": true,
+ "tfa.seq2seq.BasicDecoder.with_name_scope": true,
+ "tfa.seq2seq.BasicDecoderOutput": false,
+ "tfa.seq2seq.BasicDecoderOutput.__add__": true,
+ "tfa.seq2seq.BasicDecoderOutput.__contains__": true,
+ "tfa.seq2seq.BasicDecoderOutput.__eq__": true,
+ "tfa.seq2seq.BasicDecoderOutput.__ge__": true,
+ "tfa.seq2seq.BasicDecoderOutput.__getitem__": true,
+ "tfa.seq2seq.BasicDecoderOutput.__gt__": true,
+ "tfa.seq2seq.BasicDecoderOutput.__init__": true,
+ "tfa.seq2seq.BasicDecoderOutput.__iter__": true,
+ "tfa.seq2seq.BasicDecoderOutput.__le__": true,
+ "tfa.seq2seq.BasicDecoderOutput.__len__": true,
+ "tfa.seq2seq.BasicDecoderOutput.__lt__": true,
+ "tfa.seq2seq.BasicDecoderOutput.__mul__": true,
+ "tfa.seq2seq.BasicDecoderOutput.__ne__": true,
+ "tfa.seq2seq.BasicDecoderOutput.__rmul__": true,
+ "tfa.seq2seq.BasicDecoderOutput.count": true,
+ "tfa.seq2seq.BasicDecoderOutput.index": true,
+ "tfa.seq2seq.BasicDecoderOutput.rnn_output": true,
+ "tfa.seq2seq.BasicDecoderOutput.sample_id": true,
+ "tfa.seq2seq.BeamSearchDecoder": false,
+ "tfa.seq2seq.BeamSearchDecoder.__call__": true,
+ "tfa.seq2seq.BeamSearchDecoder.__init__": true,
+ "tfa.seq2seq.BeamSearchDecoder.activity_regularizer": true,
+ "tfa.seq2seq.BeamSearchDecoder.add_loss": true,
+ "tfa.seq2seq.BeamSearchDecoder.add_metric": true,
+ "tfa.seq2seq.BeamSearchDecoder.add_update": true,
+ "tfa.seq2seq.BeamSearchDecoder.add_variable": true,
+ "tfa.seq2seq.BeamSearchDecoder.add_weight": true,
+ "tfa.seq2seq.BeamSearchDecoder.apply": true,
+ "tfa.seq2seq.BeamSearchDecoder.batch_size": true,
+ "tfa.seq2seq.BeamSearchDecoder.build": true,
+ "tfa.seq2seq.BeamSearchDecoder.call": true,
+ "tfa.seq2seq.BeamSearchDecoder.compute_mask": true,
+ "tfa.seq2seq.BeamSearchDecoder.compute_output_shape": true,
+ "tfa.seq2seq.BeamSearchDecoder.compute_output_signature": true,
+ "tfa.seq2seq.BeamSearchDecoder.count_params": true,
+ "tfa.seq2seq.BeamSearchDecoder.dtype": true,
+ "tfa.seq2seq.BeamSearchDecoder.dynamic": true,
+ "tfa.seq2seq.BeamSearchDecoder.finalize": true,
+ "tfa.seq2seq.BeamSearchDecoder.from_config": true,
+ "tfa.seq2seq.BeamSearchDecoder.get_config": true,
+ "tfa.seq2seq.BeamSearchDecoder.get_input_at": true,
+ "tfa.seq2seq.BeamSearchDecoder.get_input_mask_at": true,
+ "tfa.seq2seq.BeamSearchDecoder.get_input_shape_at": true,
+ "tfa.seq2seq.BeamSearchDecoder.get_losses_for": true,
+ "tfa.seq2seq.BeamSearchDecoder.get_output_at": true,
+ "tfa.seq2seq.BeamSearchDecoder.get_output_mask_at": true,
+ "tfa.seq2seq.BeamSearchDecoder.get_output_shape_at": true,
+ "tfa.seq2seq.BeamSearchDecoder.get_updates_for": true,
+ "tfa.seq2seq.BeamSearchDecoder.get_weights": true,
+ "tfa.seq2seq.BeamSearchDecoder.initialize": true,
+ "tfa.seq2seq.BeamSearchDecoder.input": true,
+ "tfa.seq2seq.BeamSearchDecoder.input_mask": true,
+ "tfa.seq2seq.BeamSearchDecoder.input_shape": true,
+ "tfa.seq2seq.BeamSearchDecoder.input_spec": true,
+ "tfa.seq2seq.BeamSearchDecoder.losses": true,
+ "tfa.seq2seq.BeamSearchDecoder.metrics": true,
+ "tfa.seq2seq.BeamSearchDecoder.name": true,
+ "tfa.seq2seq.BeamSearchDecoder.name_scope": true,
+ "tfa.seq2seq.BeamSearchDecoder.non_trainable_variables": true,
+ "tfa.seq2seq.BeamSearchDecoder.non_trainable_weights": true,
+ "tfa.seq2seq.BeamSearchDecoder.output": true,
+ "tfa.seq2seq.BeamSearchDecoder.output_dtype": true,
+ "tfa.seq2seq.BeamSearchDecoder.output_mask": true,
+ "tfa.seq2seq.BeamSearchDecoder.output_shape": true,
+ "tfa.seq2seq.BeamSearchDecoder.output_size": true,
+ "tfa.seq2seq.BeamSearchDecoder.set_weights": true,
+ "tfa.seq2seq.BeamSearchDecoder.step": true,
+ "tfa.seq2seq.BeamSearchDecoder.submodules": true,
+ "tfa.seq2seq.BeamSearchDecoder.tracks_own_finished": true,
+ "tfa.seq2seq.BeamSearchDecoder.trainable": true,
+ "tfa.seq2seq.BeamSearchDecoder.trainable_variables": true,
+ "tfa.seq2seq.BeamSearchDecoder.trainable_weights": true,
+ "tfa.seq2seq.BeamSearchDecoder.updates": true,
+ "tfa.seq2seq.BeamSearchDecoder.variables": true,
+ "tfa.seq2seq.BeamSearchDecoder.weights": true,
+ "tfa.seq2seq.BeamSearchDecoder.with_name_scope": true,
+ "tfa.seq2seq.BeamSearchDecoderOutput": false,
+ "tfa.seq2seq.BeamSearchDecoderOutput.__add__": true,
+ "tfa.seq2seq.BeamSearchDecoderOutput.__contains__": true,
+ "tfa.seq2seq.BeamSearchDecoderOutput.__eq__": true,
+ "tfa.seq2seq.BeamSearchDecoderOutput.__ge__": true,
+ "tfa.seq2seq.BeamSearchDecoderOutput.__getitem__": true,
+ "tfa.seq2seq.BeamSearchDecoderOutput.__gt__": true,
+ "tfa.seq2seq.BeamSearchDecoderOutput.__init__": true,
+ "tfa.seq2seq.BeamSearchDecoderOutput.__iter__": true,
+ "tfa.seq2seq.BeamSearchDecoderOutput.__le__": true,
+ "tfa.seq2seq.BeamSearchDecoderOutput.__len__": true,
+ "tfa.seq2seq.BeamSearchDecoderOutput.__lt__": true,
+ "tfa.seq2seq.BeamSearchDecoderOutput.__mul__": true,
+ "tfa.seq2seq.BeamSearchDecoderOutput.__ne__": true,
+ "tfa.seq2seq.BeamSearchDecoderOutput.__rmul__": true,
+ "tfa.seq2seq.BeamSearchDecoderOutput.count": true,
+ "tfa.seq2seq.BeamSearchDecoderOutput.index": true,
+ "tfa.seq2seq.BeamSearchDecoderOutput.parent_ids": true,
+ "tfa.seq2seq.BeamSearchDecoderOutput.predicted_ids": true,
+ "tfa.seq2seq.BeamSearchDecoderOutput.scores": true,
+ "tfa.seq2seq.BeamSearchDecoderState": false,
+ "tfa.seq2seq.BeamSearchDecoderState.__add__": true,
+ "tfa.seq2seq.BeamSearchDecoderState.__contains__": true,
+ "tfa.seq2seq.BeamSearchDecoderState.__eq__": true,
+ "tfa.seq2seq.BeamSearchDecoderState.__ge__": true,
+ "tfa.seq2seq.BeamSearchDecoderState.__getitem__": true,
+ "tfa.seq2seq.BeamSearchDecoderState.__gt__": true,
+ "tfa.seq2seq.BeamSearchDecoderState.__init__": true,
+ "tfa.seq2seq.BeamSearchDecoderState.__iter__": true,
+ "tfa.seq2seq.BeamSearchDecoderState.__le__": true,
+ "tfa.seq2seq.BeamSearchDecoderState.__len__": true,
+ "tfa.seq2seq.BeamSearchDecoderState.__lt__": true,
+ "tfa.seq2seq.BeamSearchDecoderState.__mul__": true,
+ "tfa.seq2seq.BeamSearchDecoderState.__ne__": true,
+ "tfa.seq2seq.BeamSearchDecoderState.__rmul__": true,
+ "tfa.seq2seq.BeamSearchDecoderState.accumulated_attention_probs": true,
+ "tfa.seq2seq.BeamSearchDecoderState.cell_state": true,
+ "tfa.seq2seq.BeamSearchDecoderState.count": true,
+ "tfa.seq2seq.BeamSearchDecoderState.finished": true,
+ "tfa.seq2seq.BeamSearchDecoderState.index": true,
+ "tfa.seq2seq.BeamSearchDecoderState.lengths": true,
+ "tfa.seq2seq.BeamSearchDecoderState.log_probs": true,
+ "tfa.seq2seq.CustomSampler": false,
+ "tfa.seq2seq.CustomSampler.__init__": true,
+ "tfa.seq2seq.CustomSampler.batch_size": true,
+ "tfa.seq2seq.CustomSampler.initialize": true,
+ "tfa.seq2seq.CustomSampler.next_inputs": true,
+ "tfa.seq2seq.CustomSampler.sample": true,
+ "tfa.seq2seq.CustomSampler.sample_ids_dtype": true,
+ "tfa.seq2seq.CustomSampler.sample_ids_shape": true,
+ "tfa.seq2seq.Decoder": false,
+ "tfa.seq2seq.Decoder.__init__": true,
+ "tfa.seq2seq.Decoder.batch_size": true,
+ "tfa.seq2seq.Decoder.finalize": true,
+ "tfa.seq2seq.Decoder.initialize": true,
+ "tfa.seq2seq.Decoder.output_dtype": true,
+ "tfa.seq2seq.Decoder.output_size": true,
+ "tfa.seq2seq.Decoder.step": true,
+ "tfa.seq2seq.Decoder.tracks_own_finished": true,
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput": false,
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__add__": true,
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__contains__": true,
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__eq__": true,
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__ge__": true,
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__getitem__": true,
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__gt__": true,
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__init__": true,
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__iter__": true,
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__le__": true,
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__len__": true,
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__lt__": true,
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__mul__": true,
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__ne__": true,
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.__rmul__": true,
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.beam_search_decoder_output": true,
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.count": true,
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.index": true,
+ "tfa.seq2seq.FinalBeamSearchDecoderOutput.predicted_ids": true,
+ "tfa.seq2seq.GreedyEmbeddingSampler": false,
+ "tfa.seq2seq.GreedyEmbeddingSampler.__init__": true,
+ "tfa.seq2seq.GreedyEmbeddingSampler.batch_size": true,
+ "tfa.seq2seq.GreedyEmbeddingSampler.initialize": true,
+ "tfa.seq2seq.GreedyEmbeddingSampler.next_inputs": true,
+ "tfa.seq2seq.GreedyEmbeddingSampler.sample": true,
+ "tfa.seq2seq.GreedyEmbeddingSampler.sample_ids_dtype": true,
+ "tfa.seq2seq.GreedyEmbeddingSampler.sample_ids_shape": true,
+ "tfa.seq2seq.InferenceSampler": false,
+ "tfa.seq2seq.InferenceSampler.__init__": true,
+ "tfa.seq2seq.InferenceSampler.batch_size": true,
+ "tfa.seq2seq.InferenceSampler.initialize": true,
+ "tfa.seq2seq.InferenceSampler.next_inputs": true,
+ "tfa.seq2seq.InferenceSampler.sample": true,
+ "tfa.seq2seq.InferenceSampler.sample_ids_dtype": true,
+ "tfa.seq2seq.InferenceSampler.sample_ids_shape": true,
+ "tfa.seq2seq.LuongAttention": false,
+ "tfa.seq2seq.LuongAttention.__call__": true,
+ "tfa.seq2seq.LuongAttention.__init__": true,
+ "tfa.seq2seq.LuongAttention.activity_regularizer": true,
+ "tfa.seq2seq.LuongAttention.add_loss": true,
+ "tfa.seq2seq.LuongAttention.add_metric": true,
+ "tfa.seq2seq.LuongAttention.add_update": true,
+ "tfa.seq2seq.LuongAttention.add_variable": true,
+ "tfa.seq2seq.LuongAttention.add_weight": true,
+ "tfa.seq2seq.LuongAttention.alignments_size": true,
+ "tfa.seq2seq.LuongAttention.apply": true,
+ "tfa.seq2seq.LuongAttention.build": true,
+ "tfa.seq2seq.LuongAttention.call": true,
+ "tfa.seq2seq.LuongAttention.compute_mask": true,
+ "tfa.seq2seq.LuongAttention.compute_output_shape": true,
+ "tfa.seq2seq.LuongAttention.compute_output_signature": true,
+ "tfa.seq2seq.LuongAttention.count_params": true,
+ "tfa.seq2seq.LuongAttention.deserialize_inner_layer_from_config": true,
+ "tfa.seq2seq.LuongAttention.dtype": true,
+ "tfa.seq2seq.LuongAttention.dynamic": true,
+ "tfa.seq2seq.LuongAttention.from_config": true,
+ "tfa.seq2seq.LuongAttention.get_config": true,
+ "tfa.seq2seq.LuongAttention.get_input_at": true,
+ "tfa.seq2seq.LuongAttention.get_input_mask_at": true,
+ "tfa.seq2seq.LuongAttention.get_input_shape_at": true,
+ "tfa.seq2seq.LuongAttention.get_losses_for": true,
+ "tfa.seq2seq.LuongAttention.get_output_at": true,
+ "tfa.seq2seq.LuongAttention.get_output_mask_at": true,
+ "tfa.seq2seq.LuongAttention.get_output_shape_at": true,
+ "tfa.seq2seq.LuongAttention.get_updates_for": true,
+ "tfa.seq2seq.LuongAttention.get_weights": true,
+ "tfa.seq2seq.LuongAttention.initial_alignments": true,
+ "tfa.seq2seq.LuongAttention.initial_state": true,
+ "tfa.seq2seq.LuongAttention.input": true,
+ "tfa.seq2seq.LuongAttention.input_mask": true,
+ "tfa.seq2seq.LuongAttention.input_shape": true,
+ "tfa.seq2seq.LuongAttention.input_spec": true,
+ "tfa.seq2seq.LuongAttention.losses": true,
+ "tfa.seq2seq.LuongAttention.metrics": true,
+ "tfa.seq2seq.LuongAttention.name": true,
+ "tfa.seq2seq.LuongAttention.name_scope": true,
+ "tfa.seq2seq.LuongAttention.non_trainable_variables": true,
+ "tfa.seq2seq.LuongAttention.non_trainable_weights": true,
+ "tfa.seq2seq.LuongAttention.output": true,
+ "tfa.seq2seq.LuongAttention.output_mask": true,
+ "tfa.seq2seq.LuongAttention.output_shape": true,
+ "tfa.seq2seq.LuongAttention.set_weights": true,
+ "tfa.seq2seq.LuongAttention.state_size": true,
+ "tfa.seq2seq.LuongAttention.submodules": true,
+ "tfa.seq2seq.LuongAttention.trainable": true,
+ "tfa.seq2seq.LuongAttention.trainable_variables": true,
+ "tfa.seq2seq.LuongAttention.trainable_weights": true,
+ "tfa.seq2seq.LuongAttention.updates": true,
+ "tfa.seq2seq.LuongAttention.variables": true,
+ "tfa.seq2seq.LuongAttention.weights": true,
+ "tfa.seq2seq.LuongAttention.with_name_scope": true,
+ "tfa.seq2seq.LuongMonotonicAttention": false,
+ "tfa.seq2seq.LuongMonotonicAttention.__call__": true,
+ "tfa.seq2seq.LuongMonotonicAttention.__init__": true,
+ "tfa.seq2seq.LuongMonotonicAttention.activity_regularizer": true,
+ "tfa.seq2seq.LuongMonotonicAttention.add_loss": true,
+ "tfa.seq2seq.LuongMonotonicAttention.add_metric": true,
+ "tfa.seq2seq.LuongMonotonicAttention.add_update": true,
+ "tfa.seq2seq.LuongMonotonicAttention.add_variable": true,
+ "tfa.seq2seq.LuongMonotonicAttention.add_weight": true,
+ "tfa.seq2seq.LuongMonotonicAttention.alignments_size": true,
+ "tfa.seq2seq.LuongMonotonicAttention.apply": true,
+ "tfa.seq2seq.LuongMonotonicAttention.build": true,
+ "tfa.seq2seq.LuongMonotonicAttention.call": true,
+ "tfa.seq2seq.LuongMonotonicAttention.compute_mask": true,
+ "tfa.seq2seq.LuongMonotonicAttention.compute_output_shape": true,
+ "tfa.seq2seq.LuongMonotonicAttention.compute_output_signature": true,
+ "tfa.seq2seq.LuongMonotonicAttention.count_params": true,
+ "tfa.seq2seq.LuongMonotonicAttention.deserialize_inner_layer_from_config": true,
+ "tfa.seq2seq.LuongMonotonicAttention.dtype": true,
+ "tfa.seq2seq.LuongMonotonicAttention.dynamic": true,
+ "tfa.seq2seq.LuongMonotonicAttention.from_config": true,
+ "tfa.seq2seq.LuongMonotonicAttention.get_config": true,
+ "tfa.seq2seq.LuongMonotonicAttention.get_input_at": true,
+ "tfa.seq2seq.LuongMonotonicAttention.get_input_mask_at": true,
+ "tfa.seq2seq.LuongMonotonicAttention.get_input_shape_at": true,
+ "tfa.seq2seq.LuongMonotonicAttention.get_losses_for": true,
+ "tfa.seq2seq.LuongMonotonicAttention.get_output_at": true,
+ "tfa.seq2seq.LuongMonotonicAttention.get_output_mask_at": true,
+ "tfa.seq2seq.LuongMonotonicAttention.get_output_shape_at": true,
+ "tfa.seq2seq.LuongMonotonicAttention.get_updates_for": true,
+ "tfa.seq2seq.LuongMonotonicAttention.get_weights": true,
+ "tfa.seq2seq.LuongMonotonicAttention.initial_alignments": true,
+ "tfa.seq2seq.LuongMonotonicAttention.initial_state": true,
+ "tfa.seq2seq.LuongMonotonicAttention.input": true,
+ "tfa.seq2seq.LuongMonotonicAttention.input_mask": true,
+ "tfa.seq2seq.LuongMonotonicAttention.input_shape": true,
+ "tfa.seq2seq.LuongMonotonicAttention.input_spec": true,
+ "tfa.seq2seq.LuongMonotonicAttention.losses": true,
+ "tfa.seq2seq.LuongMonotonicAttention.metrics": true,
+ "tfa.seq2seq.LuongMonotonicAttention.name": true,
+ "tfa.seq2seq.LuongMonotonicAttention.name_scope": true,
+ "tfa.seq2seq.LuongMonotonicAttention.non_trainable_variables": true,
+ "tfa.seq2seq.LuongMonotonicAttention.non_trainable_weights": true,
+ "tfa.seq2seq.LuongMonotonicAttention.output": true,
+ "tfa.seq2seq.LuongMonotonicAttention.output_mask": true,
+ "tfa.seq2seq.LuongMonotonicAttention.output_shape": true,
+ "tfa.seq2seq.LuongMonotonicAttention.set_weights": true,
+ "tfa.seq2seq.LuongMonotonicAttention.state_size": true,
+ "tfa.seq2seq.LuongMonotonicAttention.submodules": true,
+ "tfa.seq2seq.LuongMonotonicAttention.trainable": true,
+ "tfa.seq2seq.LuongMonotonicAttention.trainable_variables": true,
+ "tfa.seq2seq.LuongMonotonicAttention.trainable_weights": true,
+ "tfa.seq2seq.LuongMonotonicAttention.updates": true,
+ "tfa.seq2seq.LuongMonotonicAttention.variables": true,
+ "tfa.seq2seq.LuongMonotonicAttention.weights": true,
+ "tfa.seq2seq.LuongMonotonicAttention.with_name_scope": true,
+ "tfa.seq2seq.SampleEmbeddingSampler": false,
+ "tfa.seq2seq.SampleEmbeddingSampler.__init__": true,
+ "tfa.seq2seq.SampleEmbeddingSampler.batch_size": true,
+ "tfa.seq2seq.SampleEmbeddingSampler.initialize": true,
+ "tfa.seq2seq.SampleEmbeddingSampler.next_inputs": true,
+ "tfa.seq2seq.SampleEmbeddingSampler.sample": true,
+ "tfa.seq2seq.SampleEmbeddingSampler.sample_ids_dtype": true,
+ "tfa.seq2seq.SampleEmbeddingSampler.sample_ids_shape": true,
+ "tfa.seq2seq.Sampler": false,
+ "tfa.seq2seq.Sampler.__init__": true,
+ "tfa.seq2seq.Sampler.batch_size": true,
+ "tfa.seq2seq.Sampler.initialize": true,
+ "tfa.seq2seq.Sampler.next_inputs": true,
+ "tfa.seq2seq.Sampler.sample": true,
+ "tfa.seq2seq.Sampler.sample_ids_dtype": true,
+ "tfa.seq2seq.Sampler.sample_ids_shape": true,
+ "tfa.seq2seq.ScheduledEmbeddingTrainingSampler": false,
+ "tfa.seq2seq.ScheduledEmbeddingTrainingSampler.__init__": true,
+ "tfa.seq2seq.ScheduledEmbeddingTrainingSampler.batch_size": true,
+ "tfa.seq2seq.ScheduledEmbeddingTrainingSampler.initialize": true,
+ "tfa.seq2seq.ScheduledEmbeddingTrainingSampler.next_inputs": true,
+ "tfa.seq2seq.ScheduledEmbeddingTrainingSampler.sample": true,
+ "tfa.seq2seq.ScheduledEmbeddingTrainingSampler.sample_ids_dtype": true,
+ "tfa.seq2seq.ScheduledEmbeddingTrainingSampler.sample_ids_shape": true,
+ "tfa.seq2seq.ScheduledOutputTrainingSampler": false,
+ "tfa.seq2seq.ScheduledOutputTrainingSampler.__init__": true,
+ "tfa.seq2seq.ScheduledOutputTrainingSampler.batch_size": true,
+ "tfa.seq2seq.ScheduledOutputTrainingSampler.initialize": true,
+ "tfa.seq2seq.ScheduledOutputTrainingSampler.next_inputs": true,
+ "tfa.seq2seq.ScheduledOutputTrainingSampler.sample": true,
+ "tfa.seq2seq.ScheduledOutputTrainingSampler.sample_ids_dtype": true,
+ "tfa.seq2seq.ScheduledOutputTrainingSampler.sample_ids_shape": true,
+ "tfa.seq2seq.SequenceLoss": false,
+ "tfa.seq2seq.SequenceLoss.__call__": true,
+ "tfa.seq2seq.SequenceLoss.__init__": true,
+ "tfa.seq2seq.SequenceLoss.call": true,
+ "tfa.seq2seq.SequenceLoss.from_config": true,
+ "tfa.seq2seq.SequenceLoss.get_config": true,
+ "tfa.seq2seq.TrainingSampler": false,
+ "tfa.seq2seq.TrainingSampler.__init__": true,
+ "tfa.seq2seq.TrainingSampler.batch_size": true,
+ "tfa.seq2seq.TrainingSampler.initialize": true,
+ "tfa.seq2seq.TrainingSampler.next_inputs": true,
+ "tfa.seq2seq.TrainingSampler.sample": true,
+ "tfa.seq2seq.TrainingSampler.sample_ids_dtype": true,
+ "tfa.seq2seq.TrainingSampler.sample_ids_shape": true,
+ "tfa.seq2seq.absolute_import": true,
+ "tfa.seq2seq.attention_wrapper": false,
+ "tfa.seq2seq.attention_wrapper.AttentionMechanism": false,
+ "tfa.seq2seq.attention_wrapper.AttentionMechanism.__init__": true,
+ "tfa.seq2seq.attention_wrapper.AttentionMechanism.alignments_size": true,
+ "tfa.seq2seq.attention_wrapper.AttentionMechanism.state_size": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper": false,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.__call__": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.__init__": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.activity_regularizer": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.add_loss": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.add_metric": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.add_update": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.add_variable": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.add_weight": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.apply": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.build": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.call": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.compute_mask": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.compute_output_shape": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.compute_output_signature": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.count_params": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.dtype": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.dynamic": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.from_config": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.get_config": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.get_initial_state": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.get_input_at": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.get_input_mask_at": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.get_input_shape_at": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.get_losses_for": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.get_output_at": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.get_output_mask_at": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.get_output_shape_at": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.get_updates_for": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.get_weights": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.input": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.input_mask": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.input_shape": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.input_spec": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.losses": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.metrics": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.name": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.name_scope": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.non_trainable_variables": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.non_trainable_weights": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.output": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.output_mask": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.output_shape": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.output_size": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.set_weights": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.state_size": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.submodules": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.trainable": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.trainable_variables": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.trainable_weights": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.updates": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.variables": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.weights": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapper.with_name_scope": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState": false,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__add__": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__contains__": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__eq__": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__ge__": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__getitem__": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__gt__": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__init__": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__iter__": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__le__": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__len__": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__lt__": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__mul__": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__ne__": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.__rmul__": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.alignment_history": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.alignments": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.attention": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.attention_state": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.cell_state": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.clone": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.count": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.index": true,
+ "tfa.seq2seq.attention_wrapper.AttentionWrapperState.time": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention": false,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.__call__": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.__init__": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.activity_regularizer": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.add_loss": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.add_metric": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.add_update": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.add_variable": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.add_weight": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.alignments_size": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.apply": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.build": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.call": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.compute_mask": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.compute_output_shape": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.compute_output_signature": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.count_params": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.deserialize_inner_layer_from_config": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.dtype": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.dynamic": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.from_config": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.get_config": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.get_input_at": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.get_input_mask_at": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.get_input_shape_at": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.get_losses_for": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.get_output_at": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.get_output_mask_at": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.get_output_shape_at": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.get_updates_for": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.get_weights": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.initial_alignments": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.initial_state": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.input": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.input_mask": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.input_shape": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.input_spec": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.losses": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.metrics": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.name": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.name_scope": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.non_trainable_variables": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.non_trainable_weights": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.output": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.output_mask": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.output_shape": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.set_weights": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.state_size": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.submodules": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.trainable": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.trainable_variables": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.trainable_weights": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.updates": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.variables": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.weights": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauAttention.with_name_scope": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention": false,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.__call__": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.__init__": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.activity_regularizer": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.add_loss": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.add_metric": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.add_update": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.add_variable": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.add_weight": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.alignments_size": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.apply": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.build": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.call": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.compute_mask": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.compute_output_shape": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.compute_output_signature": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.count_params": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.deserialize_inner_layer_from_config": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.dtype": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.dynamic": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.from_config": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.get_config": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.get_input_at": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.get_input_mask_at": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.get_input_shape_at": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.get_losses_for": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.get_output_at": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.get_output_mask_at": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.get_output_shape_at": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.get_updates_for": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.get_weights": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.initial_alignments": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.initial_state": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.input": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.input_mask": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.input_shape": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.input_spec": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.losses": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.metrics": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.name": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.name_scope": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.non_trainable_variables": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.non_trainable_weights": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.output": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.output_mask": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.output_shape": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.set_weights": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.state_size": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.submodules": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.trainable": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.trainable_variables": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.trainable_weights": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.updates": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.variables": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.weights": true,
+ "tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention.with_name_scope": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention": false,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.__call__": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.__init__": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.activity_regularizer": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.add_loss": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.add_metric": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.add_update": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.add_variable": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.add_weight": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.alignments_size": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.apply": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.build": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.call": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.compute_mask": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.compute_output_shape": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.compute_output_signature": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.count_params": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.deserialize_inner_layer_from_config": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.dtype": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.dynamic": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.from_config": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.get_config": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.get_input_at": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.get_input_mask_at": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.get_input_shape_at": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.get_losses_for": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.get_output_at": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.get_output_mask_at": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.get_output_shape_at": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.get_updates_for": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.get_weights": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.initial_alignments": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.initial_state": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.input": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.input_mask": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.input_shape": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.input_spec": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.losses": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.metrics": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.name": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.name_scope": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.non_trainable_variables": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.non_trainable_weights": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.output": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.output_mask": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.output_shape": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.set_weights": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.state_size": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.submodules": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.trainable": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.trainable_variables": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.trainable_weights": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.updates": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.variables": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.weights": true,
+ "tfa.seq2seq.attention_wrapper.LuongAttention.with_name_scope": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention": false,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.__call__": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.__init__": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.activity_regularizer": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.add_loss": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.add_metric": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.add_update": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.add_variable": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.add_weight": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.alignments_size": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.apply": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.build": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.call": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.compute_mask": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.compute_output_shape": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.compute_output_signature": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.count_params": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.deserialize_inner_layer_from_config": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.dtype": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.dynamic": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.from_config": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.get_config": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.get_input_at": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.get_input_mask_at": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.get_input_shape_at": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.get_losses_for": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.get_output_at": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.get_output_mask_at": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.get_output_shape_at": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.get_updates_for": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.get_weights": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.initial_alignments": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.initial_state": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.input": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.input_mask": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.input_shape": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.input_spec": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.losses": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.metrics": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.name": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.name_scope": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.non_trainable_variables": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.non_trainable_weights": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.output": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.output_mask": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.output_shape": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.set_weights": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.state_size": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.submodules": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.trainable": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.trainable_variables": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.trainable_weights": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.updates": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.variables": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.weights": true,
+ "tfa.seq2seq.attention_wrapper.LuongMonotonicAttention.with_name_scope": true,
+ "tfa.seq2seq.attention_wrapper.absolute_import": true,
+ "tfa.seq2seq.attention_wrapper.division": true,
+ "tfa.seq2seq.attention_wrapper.hardmax": false,
+ "tfa.seq2seq.attention_wrapper.monotonic_attention": false,
+ "tfa.seq2seq.attention_wrapper.print_function": true,
+ "tfa.seq2seq.attention_wrapper.safe_cumprod": false,
+ "tfa.seq2seq.basic_decoder": false,
+ "tfa.seq2seq.basic_decoder.BasicDecoder": false,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.__call__": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.__init__": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.activity_regularizer": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.add_loss": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.add_metric": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.add_update": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.add_variable": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.add_weight": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.apply": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.batch_size": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.build": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.call": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.compute_mask": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.compute_output_shape": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.compute_output_signature": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.count_params": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.dtype": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.dynamic": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.finalize": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.from_config": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.get_config": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.get_input_at": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.get_input_mask_at": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.get_input_shape_at": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.get_losses_for": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.get_output_at": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.get_output_mask_at": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.get_output_shape_at": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.get_updates_for": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.get_weights": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.initialize": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.input": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.input_mask": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.input_shape": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.input_spec": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.losses": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.metrics": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.name": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.name_scope": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.non_trainable_variables": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.non_trainable_weights": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.output": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.output_dtype": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.output_mask": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.output_shape": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.output_size": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.set_weights": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.step": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.submodules": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.tracks_own_finished": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.trainable": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.trainable_variables": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.trainable_weights": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.updates": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.variables": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.weights": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoder.with_name_scope": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput": false,
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__add__": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__contains__": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__eq__": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__ge__": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__getitem__": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__gt__": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__init__": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__iter__": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__le__": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__len__": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__lt__": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__mul__": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__ne__": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.__rmul__": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.count": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.index": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.rnn_output": true,
+ "tfa.seq2seq.basic_decoder.BasicDecoderOutput.sample_id": true,
+ "tfa.seq2seq.basic_decoder.absolute_import": true,
+ "tfa.seq2seq.basic_decoder.division": true,
+ "tfa.seq2seq.basic_decoder.print_function": true,
+ "tfa.seq2seq.beam_search_decoder": false,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder": false,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.__call__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.__init__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.activity_regularizer": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.add_loss": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.add_metric": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.add_update": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.add_variable": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.add_weight": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.apply": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.batch_size": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.build": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.call": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.compute_mask": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.compute_output_shape": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.compute_output_signature": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.count_params": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.dtype": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.dynamic": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.finalize": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.from_config": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.get_config": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.get_input_at": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.get_input_mask_at": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.get_input_shape_at": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.get_losses_for": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.get_output_at": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.get_output_mask_at": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.get_output_shape_at": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.get_updates_for": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.get_weights": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.initialize": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.input": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.input_mask": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.input_shape": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.input_spec": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.losses": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.metrics": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.name": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.name_scope": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.non_trainable_variables": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.non_trainable_weights": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.output": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.output_dtype": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.output_mask": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.output_shape": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.output_size": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.set_weights": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.step": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.submodules": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.tracks_own_finished": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.trainable": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.trainable_variables": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.trainable_weights": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.updates": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.variables": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.weights": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoder.with_name_scope": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderMixin": false,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderMixin.__init__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderMixin.batch_size": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderMixin.finalize": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderMixin.output_size": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderMixin.step": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderMixin.tracks_own_finished": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput": false,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__add__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__contains__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__eq__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__ge__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__getitem__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__gt__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__init__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__iter__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__le__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__len__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__lt__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__mul__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__ne__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.__rmul__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.count": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.index": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.parent_ids": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.predicted_ids": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput.scores": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState": false,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__add__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__contains__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__eq__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__ge__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__getitem__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__gt__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__init__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__iter__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__le__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__len__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__lt__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__mul__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__ne__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.__rmul__": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.accumulated_attention_probs": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.cell_state": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.count": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.finished": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.index": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.lengths": true,
+ "tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState.log_probs": true,
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput": false,
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__add__": true,
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__contains__": true,
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__eq__": true,
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__ge__": true,
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__getitem__": true,
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__gt__": true,
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__init__": true,
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__iter__": true,
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__le__": true,
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__len__": true,
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__lt__": true,
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__mul__": true,
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__ne__": true,
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.__rmul__": true,
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.beam_search_decoder_output": true,
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.count": true,
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.index": true,
+ "tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput.predicted_ids": true,
+ "tfa.seq2seq.beam_search_decoder.absolute_import": true,
+ "tfa.seq2seq.beam_search_decoder.attention_probs_from_attn_state": false,
+ "tfa.seq2seq.beam_search_decoder.division": true,
+ "tfa.seq2seq.beam_search_decoder.gather_tree_from_array": false,
+ "tfa.seq2seq.beam_search_decoder.get_attention_probs": false,
+ "tfa.seq2seq.beam_search_decoder.print_function": true,
+ "tfa.seq2seq.beam_search_decoder.tile_batch": false,
+ "tfa.seq2seq.decoder": false,
+ "tfa.seq2seq.decoder.BaseDecoder": false,
+ "tfa.seq2seq.decoder.BaseDecoder.__call__": true,
+ "tfa.seq2seq.decoder.BaseDecoder.__init__": true,
+ "tfa.seq2seq.decoder.BaseDecoder.activity_regularizer": true,
+ "tfa.seq2seq.decoder.BaseDecoder.add_loss": true,
+ "tfa.seq2seq.decoder.BaseDecoder.add_metric": true,
+ "tfa.seq2seq.decoder.BaseDecoder.add_update": true,
+ "tfa.seq2seq.decoder.BaseDecoder.add_variable": true,
+ "tfa.seq2seq.decoder.BaseDecoder.add_weight": true,
+ "tfa.seq2seq.decoder.BaseDecoder.apply": true,
+ "tfa.seq2seq.decoder.BaseDecoder.batch_size": true,
+ "tfa.seq2seq.decoder.BaseDecoder.build": true,
+ "tfa.seq2seq.decoder.BaseDecoder.call": true,
+ "tfa.seq2seq.decoder.BaseDecoder.compute_mask": true,
+ "tfa.seq2seq.decoder.BaseDecoder.compute_output_shape": true,
+ "tfa.seq2seq.decoder.BaseDecoder.compute_output_signature": true,
+ "tfa.seq2seq.decoder.BaseDecoder.count_params": true,
+ "tfa.seq2seq.decoder.BaseDecoder.dtype": true,
+ "tfa.seq2seq.decoder.BaseDecoder.dynamic": true,
+ "tfa.seq2seq.decoder.BaseDecoder.finalize": true,
+ "tfa.seq2seq.decoder.BaseDecoder.from_config": true,
+ "tfa.seq2seq.decoder.BaseDecoder.get_config": true,
+ "tfa.seq2seq.decoder.BaseDecoder.get_input_at": true,
+ "tfa.seq2seq.decoder.BaseDecoder.get_input_mask_at": true,
+ "tfa.seq2seq.decoder.BaseDecoder.get_input_shape_at": true,
+ "tfa.seq2seq.decoder.BaseDecoder.get_losses_for": true,
+ "tfa.seq2seq.decoder.BaseDecoder.get_output_at": true,
+ "tfa.seq2seq.decoder.BaseDecoder.get_output_mask_at": true,
+ "tfa.seq2seq.decoder.BaseDecoder.get_output_shape_at": true,
+ "tfa.seq2seq.decoder.BaseDecoder.get_updates_for": true,
+ "tfa.seq2seq.decoder.BaseDecoder.get_weights": true,
+ "tfa.seq2seq.decoder.BaseDecoder.initialize": true,
+ "tfa.seq2seq.decoder.BaseDecoder.input": true,
+ "tfa.seq2seq.decoder.BaseDecoder.input_mask": true,
+ "tfa.seq2seq.decoder.BaseDecoder.input_shape": true,
+ "tfa.seq2seq.decoder.BaseDecoder.input_spec": true,
+ "tfa.seq2seq.decoder.BaseDecoder.losses": true,
+ "tfa.seq2seq.decoder.BaseDecoder.metrics": true,
+ "tfa.seq2seq.decoder.BaseDecoder.name": true,
+ "tfa.seq2seq.decoder.BaseDecoder.name_scope": true,
+ "tfa.seq2seq.decoder.BaseDecoder.non_trainable_variables": true,
+ "tfa.seq2seq.decoder.BaseDecoder.non_trainable_weights": true,
+ "tfa.seq2seq.decoder.BaseDecoder.output": true,
+ "tfa.seq2seq.decoder.BaseDecoder.output_dtype": true,
+ "tfa.seq2seq.decoder.BaseDecoder.output_mask": true,
+ "tfa.seq2seq.decoder.BaseDecoder.output_shape": true,
+ "tfa.seq2seq.decoder.BaseDecoder.output_size": true,
+ "tfa.seq2seq.decoder.BaseDecoder.set_weights": true,
+ "tfa.seq2seq.decoder.BaseDecoder.step": true,
+ "tfa.seq2seq.decoder.BaseDecoder.submodules": true,
+ "tfa.seq2seq.decoder.BaseDecoder.tracks_own_finished": true,
+ "tfa.seq2seq.decoder.BaseDecoder.trainable": true,
+ "tfa.seq2seq.decoder.BaseDecoder.trainable_variables": true,
+ "tfa.seq2seq.decoder.BaseDecoder.trainable_weights": true,
+ "tfa.seq2seq.decoder.BaseDecoder.updates": true,
+ "tfa.seq2seq.decoder.BaseDecoder.variables": true,
+ "tfa.seq2seq.decoder.BaseDecoder.weights": true,
+ "tfa.seq2seq.decoder.BaseDecoder.with_name_scope": true,
+ "tfa.seq2seq.decoder.Decoder": false,
+ "tfa.seq2seq.decoder.Decoder.__init__": true,
+ "tfa.seq2seq.decoder.Decoder.batch_size": true,
+ "tfa.seq2seq.decoder.Decoder.finalize": true,
+ "tfa.seq2seq.decoder.Decoder.initialize": true,
+ "tfa.seq2seq.decoder.Decoder.output_dtype": true,
+ "tfa.seq2seq.decoder.Decoder.output_size": true,
+ "tfa.seq2seq.decoder.Decoder.step": true,
+ "tfa.seq2seq.decoder.Decoder.tracks_own_finished": true,
+ "tfa.seq2seq.decoder.absolute_import": true,
+ "tfa.seq2seq.decoder.division": true,
+ "tfa.seq2seq.decoder.dynamic_decode": false,
+ "tfa.seq2seq.decoder.print_function": true,
+ "tfa.seq2seq.division": true,
+ "tfa.seq2seq.dynamic_decode": false,
+ "tfa.seq2seq.gather_tree_from_array": false,
+ "tfa.seq2seq.hardmax": false,
+ "tfa.seq2seq.loss": false,
+ "tfa.seq2seq.loss.SequenceLoss": false,
+ "tfa.seq2seq.loss.SequenceLoss.__call__": true,
+ "tfa.seq2seq.loss.SequenceLoss.__init__": true,
+ "tfa.seq2seq.loss.SequenceLoss.call": true,
+ "tfa.seq2seq.loss.SequenceLoss.from_config": true,
+ "tfa.seq2seq.loss.SequenceLoss.get_config": true,
+ "tfa.seq2seq.loss.absolute_import": true,
+ "tfa.seq2seq.loss.division": true,
+ "tfa.seq2seq.loss.print_function": true,
+ "tfa.seq2seq.loss.sequence_loss": false,
+ "tfa.seq2seq.monotonic_attention": false,
+ "tfa.seq2seq.print_function": true,
+ "tfa.seq2seq.safe_cumprod": false,
+ "tfa.seq2seq.sampler": false,
+ "tfa.seq2seq.sampler.CustomSampler": false,
+ "tfa.seq2seq.sampler.CustomSampler.__init__": true,
+ "tfa.seq2seq.sampler.CustomSampler.batch_size": true,
+ "tfa.seq2seq.sampler.CustomSampler.initialize": true,
+ "tfa.seq2seq.sampler.CustomSampler.next_inputs": true,
+ "tfa.seq2seq.sampler.CustomSampler.sample": true,
+ "tfa.seq2seq.sampler.CustomSampler.sample_ids_dtype": true,
+ "tfa.seq2seq.sampler.CustomSampler.sample_ids_shape": true,
+ "tfa.seq2seq.sampler.GreedyEmbeddingSampler": false,
+ "tfa.seq2seq.sampler.GreedyEmbeddingSampler.__init__": true,
+ "tfa.seq2seq.sampler.GreedyEmbeddingSampler.batch_size": true,
+ "tfa.seq2seq.sampler.GreedyEmbeddingSampler.initialize": true,
+ "tfa.seq2seq.sampler.GreedyEmbeddingSampler.next_inputs": true,
+ "tfa.seq2seq.sampler.GreedyEmbeddingSampler.sample": true,
+ "tfa.seq2seq.sampler.GreedyEmbeddingSampler.sample_ids_dtype": true,
+ "tfa.seq2seq.sampler.GreedyEmbeddingSampler.sample_ids_shape": true,
+ "tfa.seq2seq.sampler.InferenceSampler": false,
+ "tfa.seq2seq.sampler.InferenceSampler.__init__": true,
+ "tfa.seq2seq.sampler.InferenceSampler.batch_size": true,
+ "tfa.seq2seq.sampler.InferenceSampler.initialize": true,
+ "tfa.seq2seq.sampler.InferenceSampler.next_inputs": true,
+ "tfa.seq2seq.sampler.InferenceSampler.sample": true,
+ "tfa.seq2seq.sampler.InferenceSampler.sample_ids_dtype": true,
+ "tfa.seq2seq.sampler.InferenceSampler.sample_ids_shape": true,
+ "tfa.seq2seq.sampler.SampleEmbeddingSampler": false,
+ "tfa.seq2seq.sampler.SampleEmbeddingSampler.__init__": true,
+ "tfa.seq2seq.sampler.SampleEmbeddingSampler.batch_size": true,
+ "tfa.seq2seq.sampler.SampleEmbeddingSampler.initialize": true,
+ "tfa.seq2seq.sampler.SampleEmbeddingSampler.next_inputs": true,
+ "tfa.seq2seq.sampler.SampleEmbeddingSampler.sample": true,
+ "tfa.seq2seq.sampler.SampleEmbeddingSampler.sample_ids_dtype": true,
+ "tfa.seq2seq.sampler.SampleEmbeddingSampler.sample_ids_shape": true,
+ "tfa.seq2seq.sampler.Sampler": false,
+ "tfa.seq2seq.sampler.Sampler.__init__": true,
+ "tfa.seq2seq.sampler.Sampler.batch_size": true,
+ "tfa.seq2seq.sampler.Sampler.initialize": true,
+ "tfa.seq2seq.sampler.Sampler.next_inputs": true,
+ "tfa.seq2seq.sampler.Sampler.sample": true,
+ "tfa.seq2seq.sampler.Sampler.sample_ids_dtype": true,
+ "tfa.seq2seq.sampler.Sampler.sample_ids_shape": true,
+ "tfa.seq2seq.sampler.ScheduledEmbeddingTrainingSampler": false,
+ "tfa.seq2seq.sampler.ScheduledEmbeddingTrainingSampler.__init__": true,
+ "tfa.seq2seq.sampler.ScheduledEmbeddingTrainingSampler.batch_size": true,
+ "tfa.seq2seq.sampler.ScheduledEmbeddingTrainingSampler.initialize": true,
+ "tfa.seq2seq.sampler.ScheduledEmbeddingTrainingSampler.next_inputs": true,
+ "tfa.seq2seq.sampler.ScheduledEmbeddingTrainingSampler.sample": true,
+ "tfa.seq2seq.sampler.ScheduledEmbeddingTrainingSampler.sample_ids_dtype": true,
+ "tfa.seq2seq.sampler.ScheduledEmbeddingTrainingSampler.sample_ids_shape": true,
+ "tfa.seq2seq.sampler.ScheduledOutputTrainingSampler": false,
+ "tfa.seq2seq.sampler.ScheduledOutputTrainingSampler.__init__": true,
+ "tfa.seq2seq.sampler.ScheduledOutputTrainingSampler.batch_size": true,
+ "tfa.seq2seq.sampler.ScheduledOutputTrainingSampler.initialize": true,
+ "tfa.seq2seq.sampler.ScheduledOutputTrainingSampler.next_inputs": true,
+ "tfa.seq2seq.sampler.ScheduledOutputTrainingSampler.sample": true,
+ "tfa.seq2seq.sampler.ScheduledOutputTrainingSampler.sample_ids_dtype": true,
+ "tfa.seq2seq.sampler.ScheduledOutputTrainingSampler.sample_ids_shape": true,
+ "tfa.seq2seq.sampler.TrainingSampler": false,
+ "tfa.seq2seq.sampler.TrainingSampler.__init__": true,
+ "tfa.seq2seq.sampler.TrainingSampler.batch_size": true,
+ "tfa.seq2seq.sampler.TrainingSampler.initialize": true,
+ "tfa.seq2seq.sampler.TrainingSampler.next_inputs": true,
+ "tfa.seq2seq.sampler.TrainingSampler.sample": true,
+ "tfa.seq2seq.sampler.TrainingSampler.sample_ids_dtype": true,
+ "tfa.seq2seq.sampler.TrainingSampler.sample_ids_shape": true,
+ "tfa.seq2seq.sampler.absolute_import": true,
+ "tfa.seq2seq.sampler.bernoulli_sample": false,
+ "tfa.seq2seq.sampler.categorical_sample": false,
+ "tfa.seq2seq.sampler.division": true,
+ "tfa.seq2seq.sampler.print_function": true,
+ "tfa.seq2seq.sequence_loss": false,
+ "tfa.seq2seq.tile_batch": false,
+ "tfa.text": false,
+ "tfa.text.absolute_import": true,
+ "tfa.text.division": true,
+ "tfa.text.print_function": true,
+ "tfa.text.skip_gram_ops": false,
+ "tfa.text.skip_gram_ops.absolute_import": true,
+ "tfa.text.skip_gram_ops.division": true,
+ "tfa.text.skip_gram_ops.print_function": true,
+ "tfa.text.skip_gram_ops.skip_gram_sample": false,
+ "tfa.text.skip_gram_ops.skip_gram_sample_with_text_vocab": false,
+ "tfa.text.skip_gram_sample": false,
+ "tfa.text.skip_gram_sample_with_text_vocab": false
+ },
+ "py_module_names": [
+ "tfa"
+ ]
+}
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/activations.md b/docs/api_docs/python/tfa/activations.md
new file mode 100644
index 0000000000..f688c75501
--- /dev/null
+++ b/docs/api_docs/python/tfa/activations.md
@@ -0,0 +1,20 @@
+
+
+
+
+
+# Module: tfa.activations
+
+A module containing activation routines.
+
+
+
+Defined in [`activations/__init__.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/activations/__init__.py).
+
+
+
+
+## Functions
+
+[`sparsemax(...)`](../tfa/activations/sparsemax.md): Sparsemax activation function [1].
+
diff --git a/docs/api_docs/python/tfa/activations/sparsemax.md b/docs/api_docs/python/tfa/activations/sparsemax.md
new file mode 100644
index 0000000000..d3b19f68d7
--- /dev/null
+++ b/docs/api_docs/python/tfa/activations/sparsemax.md
@@ -0,0 +1,50 @@
+
+
+
+
+
+# tfa.activations.sparsemax
+
+Sparsemax activation function [1].
+
+### Aliases:
+
+* `tfa.activations.sparsemax`
+* `tfa.layers.sparsemax.sparsemax`
+
+``` python
+tfa.activations.sparsemax(
+ logits,
+ axis=-1,
+ name=None
+)
+```
+
+
+
+Defined in [`activations/sparsemax.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/activations/sparsemax.py).
+
+
+
+For each batch `i` and class `j` we have
+ $$sparsemax[i, j] = max(logits[i, j] - tau(logits[i, :]), 0)$$
+
+[1]: https://arxiv.org/abs/1602.02068
+
+#### Args:
+
+
+* `logits`: Input tensor.
+* `axis`: Integer, axis along which the sparsemax operation is applied.
+* `name`: A name for the operation (optional).
+
+#### Returns:
+
+Tensor, output of sparsemax transformation. Has the same type and
+shape as `logits`.
+
+
+#### Raises:
+
+
+* `ValueError`: In case `dim(logits) == 1`.
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/image.md b/docs/api_docs/python/tfa/image.md
new file mode 100644
index 0000000000..5ac3048ce8
--- /dev/null
+++ b/docs/api_docs/python/tfa/image.md
@@ -0,0 +1,46 @@
+
+
+
+
+
+# Module: tfa.image
+
+Image manipulation ops.
+
+
+
+Defined in [`image/__init__.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/image/__init__.py).
+
+
+
+
+## Modules
+
+[`distance_transform`](../tfa/image/distance_transform.md) module: Distance transform ops.
+
+[`distort_image_ops`](../tfa/image/distort_image_ops.md) module: Python layer for distort_image_ops.
+
+[`filters`](../tfa/image/filters.md) module
+
+[`transform_ops`](../tfa/image/transform_ops.md) module: Image transform ops.
+
+## Functions
+
+[`adjust_hsv_in_yiq(...)`](../tfa/image/adjust_hsv_in_yiq.md): Adjust hue, saturation, value of an RGB image in YIQ color space.
+
+[`dense_image_warp(...)`](../tfa/image/dense_image_warp.md): Image warping using per-pixel flow vectors.
+
+[`euclidean_dist_transform(...)`](../tfa/image/euclidean_dist_transform.md): Applies euclidean distance transform(s) to the image(s).
+
+[`interpolate_bilinear(...)`](../tfa/image/interpolate_bilinear.md): Similar to Matlab's interp2 function.
+
+[`mean_filter2d(...)`](../tfa/image/mean_filter2d.md): Perform mean filtering on image(s).
+
+[`median_filter2d(...)`](../tfa/image/median_filter2d.md): This method performs Median Filtering on image. Filter shape can be user
+
+[`random_hsv_in_yiq(...)`](../tfa/image/random_hsv_in_yiq.md): Adjust hue, saturation, value of an RGB image randomly in YIQ color
+
+[`rotate(...)`](../tfa/image/rotate.md): Rotate image(s) counterclockwise by the passed angle(s) in radians.
+
+[`transform(...)`](../tfa/image/transform.md): Applies the given transform(s) to the image(s).
+
diff --git a/docs/api_docs/python/tfa/image/adjust_hsv_in_yiq.md b/docs/api_docs/python/tfa/image/adjust_hsv_in_yiq.md
new file mode 100644
index 0000000000..fa5ff5eb71
--- /dev/null
+++ b/docs/api_docs/python/tfa/image/adjust_hsv_in_yiq.md
@@ -0,0 +1,55 @@
+
+
+
+
+
+# tfa.image.adjust_hsv_in_yiq
+
+Adjust hue, saturation, value of an RGB image in YIQ color space.
+
+### Aliases:
+
+* `tfa.image.adjust_hsv_in_yiq`
+* `tfa.image.distort_image_ops.adjust_hsv_in_yiq`
+
+``` python
+tfa.image.adjust_hsv_in_yiq(
+ image,
+ delta_hue=0,
+ scale_saturation=1,
+ scale_value=1,
+ name=None
+)
+```
+
+
+
+Defined in [`image/distort_image_ops.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/image/distort_image_ops.py).
+
+
+
+This is a convenience method that converts an RGB image to float
+representation, converts it to YIQ, rotates the color around the
+Y channel by delta_hue in radians, scales the chrominance channels
+(I, Q) by scale_saturation, scales all channels (Y, I, Q) by scale_value,
+converts back to RGB, and then back to the original data type.
+
+`image` is an RGB image. The image hue is adjusted by converting the
+image to YIQ, rotating around the luminance channel (Y) by
+`delta_hue` in radians, multiplying the chrominance channels (I, Q) by
+`scale_saturation`, and multiplying all channels (Y, I, Q) by
+`scale_value`. The image is then converted back to RGB.
+
+#### Args:
+
+
+* `image`: RGB image or images. Size of the last dimension must be 3.
+* `delta_hue`: float, the hue rotation amount, in radians.
+* `scale_saturation`: float, factor to multiply the saturation by.
+* `scale_value`: float, factor to multiply the value by.
+* `name`: A name for this operation (optional).
+
+
+#### Returns:
+
+Adjusted image(s), same shape and dtype as `image`.
diff --git a/docs/api_docs/python/tfa/image/dense_image_warp.md b/docs/api_docs/python/tfa/image/dense_image_warp.md
new file mode 100644
index 0000000000..0650d89ee6
--- /dev/null
+++ b/docs/api_docs/python/tfa/image/dense_image_warp.md
@@ -0,0 +1,58 @@
+
+
+
+
+
+# tfa.image.dense_image_warp
+
+Image warping using per-pixel flow vectors.
+
+``` python
+tfa.image.dense_image_warp(
+ image,
+ flow,
+ name=None
+)
+```
+
+
+
+Defined in [`image/dense_image_warp.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/image/dense_image_warp.py).
+
+
+
+Apply a non-linear warp to the image, where the warp is specified by a
+dense flow field of offset vectors that define the correspondences of
+pixel values in the output image back to locations in the source image.
+Specifically, the pixel value at output[b, j, i, c] is
+images[b, j - flow[b, j, i, 0], i - flow[b, j, i, 1], c].
+
+The locations specified by this formula do not necessarily map to an int
+index. Therefore, the pixel value is obtained by bilinear
+interpolation of the 4 nearest pixels around
+(b, j - flow[b, j, i, 0], i - flow[b, j, i, 1]). For locations outside
+of the image, we use the nearest pixel values at the image boundary.
+
+#### Args:
+
+
+* `image`: 4-D float `Tensor` with shape `[batch, height, width, channels]`.
+* `flow`: A 4-D float `Tensor` with shape `[batch, height, width, 2]`.
+* `name`: A name for the operation (optional).
+
+Note that image and flow can be of type tf.half, tf.float32, or
+tf.float64, and do not necessarily have to be the same type.
+
+
+#### Returns:
+
+A 4-D float `Tensor` with shape`[batch, height, width, channels]`
+ and same type as input image.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if height < 2 or width < 2 or the inputs have the wrong
+ number of dimensions.
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/image/distance_transform.md b/docs/api_docs/python/tfa/image/distance_transform.md
new file mode 100644
index 0000000000..3fe00d0dfc
--- /dev/null
+++ b/docs/api_docs/python/tfa/image/distance_transform.md
@@ -0,0 +1,20 @@
+
+
+
+
+
+# Module: tfa.image.distance_transform
+
+Distance transform ops.
+
+
+
+Defined in [`image/distance_transform.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/image/distance_transform.py).
+
+
+
+
+## Functions
+
+[`euclidean_dist_transform(...)`](../../tfa/image/euclidean_dist_transform.md): Applies euclidean distance transform(s) to the image(s).
+
diff --git a/docs/api_docs/python/tfa/image/distort_image_ops.md b/docs/api_docs/python/tfa/image/distort_image_ops.md
new file mode 100644
index 0000000000..526cd491d4
--- /dev/null
+++ b/docs/api_docs/python/tfa/image/distort_image_ops.md
@@ -0,0 +1,22 @@
+
+
+
+
+
+# Module: tfa.image.distort_image_ops
+
+Python layer for distort_image_ops.
+
+
+
+Defined in [`image/distort_image_ops.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/image/distort_image_ops.py).
+
+
+
+
+## Functions
+
+[`adjust_hsv_in_yiq(...)`](../../tfa/image/adjust_hsv_in_yiq.md): Adjust hue, saturation, value of an RGB image in YIQ color space.
+
+[`random_hsv_in_yiq(...)`](../../tfa/image/random_hsv_in_yiq.md): Adjust hue, saturation, value of an RGB image randomly in YIQ color
+
diff --git a/docs/api_docs/python/tfa/image/euclidean_dist_transform.md b/docs/api_docs/python/tfa/image/euclidean_dist_transform.md
new file mode 100644
index 0000000000..f862c54f9a
--- /dev/null
+++ b/docs/api_docs/python/tfa/image/euclidean_dist_transform.md
@@ -0,0 +1,53 @@
+
+
+
+
+
+# tfa.image.euclidean_dist_transform
+
+Applies euclidean distance transform(s) to the image(s).
+
+### Aliases:
+
+* `tfa.image.distance_transform.euclidean_dist_transform`
+* `tfa.image.euclidean_dist_transform`
+
+``` python
+tfa.image.euclidean_dist_transform(
+ images,
+ dtype=tf.float32,
+ name=None
+)
+```
+
+
+
+Defined in [`image/distance_transform.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/image/distance_transform.py).
+
+
+
+
+#### Args:
+
+
+* `images`: A tensor of shape (num_images, num_rows, num_columns, 1) (NHWC),
+ or (num_rows, num_columns, 1) (HWC). The rank must be statically known
+ (the shape is not `TensorShape(None)`.
+* `dtype`: DType of the output tensor.
+* `name`: The name of the op.
+
+
+#### Returns:
+
+Image(s) with the type `dtype` and same shape as `images`, with the
+transform applied. If a tensor of all ones is given as input, the
+output tensor will be filled with the max value of the `dtype`.
+
+
+
+#### Raises:
+
+
+* `TypeError`: If `image` is not tf.uint8, or `dtype` is not floating point.
+* `ValueError`: If `image` more than one channel, or `image` is not of
+ rank 3 or 4.
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/image/filters.md b/docs/api_docs/python/tfa/image/filters.md
new file mode 100644
index 0000000000..ee8f4931cf
--- /dev/null
+++ b/docs/api_docs/python/tfa/image/filters.md
@@ -0,0 +1,22 @@
+
+
+
+
+
+# Module: tfa.image.filters
+
+
+
+
+
+Defined in [`image/filters.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/image/filters.py).
+
+
+
+
+## Functions
+
+[`mean_filter2d(...)`](../../tfa/image/mean_filter2d.md): Perform mean filtering on image(s).
+
+[`median_filter2d(...)`](../../tfa/image/median_filter2d.md): This method performs Median Filtering on image. Filter shape can be user
+
diff --git a/docs/api_docs/python/tfa/image/interpolate_bilinear.md b/docs/api_docs/python/tfa/image/interpolate_bilinear.md
new file mode 100644
index 0000000000..e2abbf5875
--- /dev/null
+++ b/docs/api_docs/python/tfa/image/interpolate_bilinear.md
@@ -0,0 +1,48 @@
+
+
+
+
+
+# tfa.image.interpolate_bilinear
+
+Similar to Matlab's interp2 function.
+
+``` python
+tfa.image.interpolate_bilinear(
+ grid,
+ query_points,
+ indexing='ij',
+ name=None
+)
+```
+
+
+
+Defined in [`image/dense_image_warp.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/image/dense_image_warp.py).
+
+
+
+Finds values for query points on a grid using bilinear interpolation.
+
+#### Args:
+
+
+* `grid`: a 4-D float `Tensor` of shape `[batch, height, width, channels]`.
+* `query_points`: a 3-D float `Tensor` of N points with shape
+ `[batch, N, 2]`.
+* `indexing`: whether the query points are specified as row and column (ij),
+ or Cartesian coordinates (xy).
+* `name`: a name for the operation (optional).
+
+
+#### Returns:
+
+
+* `values`: a 3-D `Tensor` with shape `[batch, N, channels]`
+
+
+#### Raises:
+
+
+* `ValueError`: if the indexing mode is invalid, or if the shape of the
+ inputs invalid.
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/image/mean_filter2d.md b/docs/api_docs/python/tfa/image/mean_filter2d.md
new file mode 100644
index 0000000000..dd6e3c8e95
--- /dev/null
+++ b/docs/api_docs/python/tfa/image/mean_filter2d.md
@@ -0,0 +1,58 @@
+
+
+
+
+
+# tfa.image.mean_filter2d
+
+Perform mean filtering on image(s).
+
+### Aliases:
+
+* `tfa.image.filters.mean_filter2d`
+* `tfa.image.mean_filter2d`
+
+``` python
+tfa.image.mean_filter2d(
+ image,
+ filter_shape=(3, 3),
+ padding='REFLECT',
+ constant_values=0,
+ name=None
+)
+```
+
+
+
+Defined in [`image/filters.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/image/filters.py).
+
+
+
+
+#### Args:
+
+
+* `image`: Either a 3-D `Tensor` of shape `[height, width, channels]`,
+ or a 4-D `Tensor` of shape `[batch_size, height, width, channels]`.
+* `filter_shape`: An `integer` or `tuple`/`list` of 2 integers, specifying
+ the height and width of the 2-D mean filter. Can be a single integer
+ to specify the same value for all spatial dimensions.
+* `padding`: A `string`, one of "REFLECT", "CONSTANT", or "SYMMETRIC".
+ The type of padding algorithm to use, which is compatible with
+ `mode` argument in `tf.pad`. For more details, please refer to
+ https://www.tensorflow.org/api_docs/python/tf/pad.
+* `constant_values`: A `scalar`, the pad value to use in "CONSTANT"
+ padding mode.
+* `name`: A name for this operation (optional).
+
+#### Returns:
+
+3-D or 4-D `Tensor` of the same dtype as input.
+
+
+#### Raises:
+
+
+* `ValueError`: If `image` is not 3 or 4-dimensional,
+ if `padding` is other than "REFLECT", "CONSTANT" or "SYMMETRIC",
+ or if `filter_shape` is invalid.
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/image/median_filter2d.md b/docs/api_docs/python/tfa/image/median_filter2d.md
new file mode 100644
index 0000000000..5d250f4245
--- /dev/null
+++ b/docs/api_docs/python/tfa/image/median_filter2d.md
@@ -0,0 +1,45 @@
+
+
+
+
+
+# tfa.image.median_filter2d
+
+This method performs Median Filtering on image. Filter shape can be user
+
+### Aliases:
+
+* `tfa.image.filters.median_filter2d`
+* `tfa.image.median_filter2d`
+
+``` python
+tfa.image.median_filter2d(
+ image,
+ filter_shape=(3, 3),
+ name=None
+)
+```
+
+
+
+Defined in [`image/filters.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/image/filters.py).
+
+
+given.
+
+This method takes both kind of images where pixel values lie between 0 to
+255 and where it lies between 0.0 and 1.0
+Args:
+ image: A 3D `Tensor` of type `float32` or 'int32' or 'float64' or
+ 'int64 and of shape`[rows, columns, channels]`
+
+ filter_shape: Optional Argument. A tuple of 2 integers (R,C).
+ R is the first value is the number of rows in the filter and
+ C is the second value in the filter is the number of columns
+ in the filter. This creates a filter of shape (R,C) or RxC
+ filter. Default value = (3,3)
+ name: The name of the op.
+
+ Returns:
+ A 3D median filtered image tensor of shape [rows,columns,channels] and
+ type 'int32'. Pixel value of returned tensor ranges between 0 to 255
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/image/random_hsv_in_yiq.md b/docs/api_docs/python/tfa/image/random_hsv_in_yiq.md
new file mode 100644
index 0000000000..808ca51d6e
--- /dev/null
+++ b/docs/api_docs/python/tfa/image/random_hsv_in_yiq.md
@@ -0,0 +1,68 @@
+
+
+
+
+
+# tfa.image.random_hsv_in_yiq
+
+Adjust hue, saturation, value of an RGB image randomly in YIQ color
+
+### Aliases:
+
+* `tfa.image.distort_image_ops.random_hsv_in_yiq`
+* `tfa.image.random_hsv_in_yiq`
+
+``` python
+tfa.image.random_hsv_in_yiq(
+ image,
+ max_delta_hue=0,
+ lower_saturation=1,
+ upper_saturation=1,
+ lower_value=1,
+ upper_value=1,
+ seed=None,
+ name=None
+)
+```
+
+
+
+Defined in [`image/distort_image_ops.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/image/distort_image_ops.py).
+
+
+space.
+
+Equivalent to `adjust_yiq_hsv()` but uses a `delta_h` randomly
+picked in the interval `[-max_delta_hue, max_delta_hue]`, a
+`scale_saturation` randomly picked in the interval
+`[lower_saturation, upper_saturation]`, and a `scale_value`
+randomly picked in the interval `[lower_saturation, upper_saturation]`.
+
+#### Args:
+
+
+* `image`: RGB image or images. Size of the last dimension must be 3.
+* `max_delta_hue`: float. Maximum value for the random delta_hue. Passing 0
+ disables adjusting hue.
+* `lower_saturation`: float. Lower bound for the random scale_saturation.
+* `upper_saturation`: float. Upper bound for the random scale_saturation.
+* `lower_value`: float. Lower bound for the random scale_value.
+* `upper_value`: float. Upper bound for the random scale_value.
+* `seed`: An operation-specific seed. It will be used in conjunction
+ with the graph-level seed to determine the real seeds that will be
+ used in this operation. Please see the documentation of
+ set_random_seed for its interaction with the graph-level random seed.
+* `name`: A name for this operation (optional).
+
+
+#### Returns:
+
+3-D float tensor of shape `[height, width, channels]`.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if `max_delta`, `lower_saturation`, `upper_saturation`,
+ `lower_value`, or `upper_value` is invalid.
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/image/rotate.md b/docs/api_docs/python/tfa/image/rotate.md
new file mode 100644
index 0000000000..63da52f409
--- /dev/null
+++ b/docs/api_docs/python/tfa/image/rotate.md
@@ -0,0 +1,57 @@
+
+
+
+
+
+# tfa.image.rotate
+
+Rotate image(s) counterclockwise by the passed angle(s) in radians.
+
+### Aliases:
+
+* `tfa.image.rotate`
+* `tfa.image.transform_ops.rotate`
+
+``` python
+tfa.image.rotate(
+ images,
+ angles,
+ interpolation='NEAREST',
+ name=None
+)
+```
+
+
+
+Defined in [`image/transform_ops.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/image/transform_ops.py).
+
+
+
+
+#### Args:
+
+
+* `images`: A tensor of shape
+ (num_images, num_rows, num_columns, num_channels)
+ (NHWC), (num_rows, num_columns, num_channels) (HWC), or
+ (num_rows, num_columns) (HW). The rank must be statically known (the
+ shape is not `TensorShape(None)`.
+* `angles`: A scalar angle to rotate all images by, or (if images has rank 4)
+ a vector of length num_images, with an angle for each image in the
+ batch.
+* `interpolation`: Interpolation mode. Supported values: "NEAREST",
+ "BILINEAR".
+* `name`: The name of the op.
+
+
+#### Returns:
+
+Image(s) with the same type and shape as `images`, rotated by the given
+angle(s). Empty space due to the rotation will be filled with zeros.
+
+
+
+#### Raises:
+
+
+* `TypeError`: If `image` is an invalid type.
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/image/transform.md b/docs/api_docs/python/tfa/image/transform.md
new file mode 100644
index 0000000000..63bc05ac0e
--- /dev/null
+++ b/docs/api_docs/python/tfa/image/transform.md
@@ -0,0 +1,67 @@
+
+
+
+
+
+# tfa.image.transform
+
+Applies the given transform(s) to the image(s).
+
+### Aliases:
+
+* `tfa.image.transform`
+* `tfa.image.transform_ops.transform`
+
+``` python
+tfa.image.transform(
+ images,
+ transforms,
+ interpolation='NEAREST',
+ output_shape=None,
+ name=None
+)
+```
+
+
+
+Defined in [`image/transform_ops.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/image/transform_ops.py).
+
+
+
+
+#### Args:
+
+
+* `images`: A tensor of shape (num_images, num_rows, num_columns,
+ num_channels) (NHWC), (num_rows, num_columns, num_channels) (HWC), or
+ (num_rows, num_columns) (HW). The rank must be statically known (the
+ shape is not `TensorShape(None)`.
+* `transforms`: Projective transform matrix/matrices. A vector of length 8 or
+ tensor of size N x 8. If one row of transforms is
+ [a0, a1, a2, b0, b1, b2, c0, c1], then it maps the *output* point
+ `(x, y)` to a transformed *input* point
+ `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`,
+ where `k = c0 x + c1 y + 1`. The transforms are *inverted* compared to
+ the transform mapping input points to output points. Note that
+ gradients are not backpropagated into transformation parameters.
+* `interpolation`: Interpolation mode.
+ Supported values: "NEAREST", "BILINEAR".
+* `output_shape`: Output dimesion after the transform, [height, width].
+ If None, output is the same size as input image.
+
+* `name`: The name of the op.
+
+
+#### Returns:
+
+Image(s) with the same type and shape as `images`, with the given
+transform(s) applied. Transformed coordinates outside of the input image
+will be filled with zeros.
+
+
+
+#### Raises:
+
+
+* `TypeError`: If `image` is an invalid type.
+* `ValueError`: If output shape is not 1-D int32 Tensor.
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/image/transform_ops.md b/docs/api_docs/python/tfa/image/transform_ops.md
new file mode 100644
index 0000000000..c163e8799e
--- /dev/null
+++ b/docs/api_docs/python/tfa/image/transform_ops.md
@@ -0,0 +1,30 @@
+
+
+
+
+
+# Module: tfa.image.transform_ops
+
+Image transform ops.
+
+
+
+Defined in [`image/transform_ops.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/image/transform_ops.py).
+
+
+
+
+## Functions
+
+[`angles_to_projective_transforms(...)`](../../tfa/image/transform_ops/angles_to_projective_transforms.md): Returns projective transform(s) for the given angle(s).
+
+[`compose_transforms(...)`](../../tfa/image/transform_ops/compose_transforms.md): Composes the transforms tensors.
+
+[`flat_transforms_to_matrices(...)`](../../tfa/image/transform_ops/flat_transforms_to_matrices.md): Converts projective transforms to affine matrices.
+
+[`matrices_to_flat_transforms(...)`](../../tfa/image/transform_ops/matrices_to_flat_transforms.md): Converts affine matrices to projective transforms.
+
+[`rotate(...)`](../../tfa/image/rotate.md): Rotate image(s) counterclockwise by the passed angle(s) in radians.
+
+[`transform(...)`](../../tfa/image/transform.md): Applies the given transform(s) to the image(s).
+
diff --git a/docs/api_docs/python/tfa/image/transform_ops/angles_to_projective_transforms.md b/docs/api_docs/python/tfa/image/transform_ops/angles_to_projective_transforms.md
new file mode 100644
index 0000000000..49421a051c
--- /dev/null
+++ b/docs/api_docs/python/tfa/image/transform_ops/angles_to_projective_transforms.md
@@ -0,0 +1,39 @@
+
+
+
+
+
+# tfa.image.transform_ops.angles_to_projective_transforms
+
+Returns projective transform(s) for the given angle(s).
+
+``` python
+tfa.image.transform_ops.angles_to_projective_transforms(
+ angles,
+ image_height,
+ image_width,
+ name=None
+)
+```
+
+
+
+Defined in [`image/transform_ops.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/image/transform_ops.py).
+
+
+
+
+#### Args:
+
+
+* `angles`: A scalar angle to rotate all images by, or (for batches of
+ images) a vector with an angle to rotate each image in the batch. The
+ rank must be statically known (the shape is not `TensorShape(None)`.
+* `image_height`: Height of the image(s) to be transformed.
+* `image_width`: Width of the image(s) to be transformed.
+
+
+#### Returns:
+
+A tensor of shape (num_images, 8). Projective transforms which can be
+given to `transform` op.
diff --git a/docs/api_docs/python/tfa/image/transform_ops/compose_transforms.md b/docs/api_docs/python/tfa/image/transform_ops/compose_transforms.md
new file mode 100644
index 0000000000..c574234aae
--- /dev/null
+++ b/docs/api_docs/python/tfa/image/transform_ops/compose_transforms.md
@@ -0,0 +1,38 @@
+
+
+
+
+
+# tfa.image.transform_ops.compose_transforms
+
+Composes the transforms tensors.
+
+``` python
+tfa.image.transform_ops.compose_transforms(
+ transforms,
+ name=None
+)
+```
+
+
+
+Defined in [`image/transform_ops.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/image/transform_ops.py).
+
+
+
+
+#### Args:
+
+
+* `transforms`: List of image projective transforms to be composed. Each
+ transform is length 8 (single transform) or shape (N, 8) (batched
+ transforms). The shapes of all inputs must be equal, and at least one
+ input must be given.
+* `name`: The name for the op.
+
+
+#### Returns:
+
+A composed transform tensor. When passed to `transform` op,
+ equivalent to applying each of the given transforms to the image in
+ order.
diff --git a/docs/api_docs/python/tfa/image/transform_ops/flat_transforms_to_matrices.md b/docs/api_docs/python/tfa/image/transform_ops/flat_transforms_to_matrices.md
new file mode 100644
index 0000000000..e16562a78f
--- /dev/null
+++ b/docs/api_docs/python/tfa/image/transform_ops/flat_transforms_to_matrices.md
@@ -0,0 +1,45 @@
+
+
+
+
+
+# tfa.image.transform_ops.flat_transforms_to_matrices
+
+Converts projective transforms to affine matrices.
+
+``` python
+tfa.image.transform_ops.flat_transforms_to_matrices(
+ transforms,
+ name=None
+)
+```
+
+
+
+Defined in [`image/transform_ops.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/image/transform_ops.py).
+
+
+
+Note that the output matrices map output coordinates to input coordinates.
+For the forward transformation matrix, call `tf.linalg.inv` on the result.
+
+#### Args:
+
+
+* `transforms`: Vector of length 8, or batches of transforms with shape
+ `(N, 8)`.
+* `name`: The name for the op.
+
+
+#### Returns:
+
+3D tensor of matrices with shape `(N, 3, 3)`. The output matrices map the
+ *output coordinates* (in homogeneous coordinates) of each transform to
+ the corresponding *input coordinates*.
+
+
+
+#### Raises:
+
+
+* `ValueError`: If `transforms` have an invalid shape.
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/image/transform_ops/matrices_to_flat_transforms.md b/docs/api_docs/python/tfa/image/transform_ops/matrices_to_flat_transforms.md
new file mode 100644
index 0000000000..f7c79c8e2f
--- /dev/null
+++ b/docs/api_docs/python/tfa/image/transform_ops/matrices_to_flat_transforms.md
@@ -0,0 +1,46 @@
+
+
+
+
+
+# tfa.image.transform_ops.matrices_to_flat_transforms
+
+Converts affine matrices to projective transforms.
+
+``` python
+tfa.image.transform_ops.matrices_to_flat_transforms(
+ transform_matrices,
+ name=None
+)
+```
+
+
+
+Defined in [`image/transform_ops.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/image/transform_ops.py).
+
+
+
+Note that we expect matrices that map output coordinates to input
+coordinates. To convert forward transformation matrices,
+call `tf.linalg.inv` on the matrices and use the result here.
+
+#### Args:
+
+
+* `transform_matrices`: One or more affine transformation matrices, for the
+ reverse transformation in homogeneous coordinates. Shape `(3, 3)` or
+ `(N, 3, 3)`.
+* `name`: The name for the op.
+
+
+#### Returns:
+
+2D tensor of flat transforms with shape `(N, 8)`, which may be passed
+into `transform` op.
+
+
+
+#### Raises:
+
+
+* `ValueError`: If `transform_matrices` have an invalid shape.
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/layers.md b/docs/api_docs/python/tfa/layers.md
new file mode 100644
index 0000000000..10f9af0f22
--- /dev/null
+++ b/docs/api_docs/python/tfa/layers.md
@@ -0,0 +1,42 @@
+
+
+
+
+
+# Module: tfa.layers
+
+Additional layers that conform to Keras API.
+
+
+
+Defined in [`layers/__init__.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/layers/__init__.py).
+
+
+
+
+## Modules
+
+[`maxout`](../tfa/layers/maxout.md) module: Implementing Maxout layer.
+
+[`normalizations`](../tfa/layers/normalizations.md) module
+
+[`poincare`](../tfa/layers/poincare.md) module: Implementing PoincareNormalize layer.
+
+[`sparsemax`](../tfa/layers/sparsemax.md) module
+
+[`wrappers`](../tfa/layers/wrappers.md) module
+
+## Classes
+
+[`class GroupNormalization`](../tfa/layers/GroupNormalization.md): Group normalization layer.
+
+[`class InstanceNormalization`](../tfa/layers/InstanceNormalization.md): Instance normalization layer.
+
+[`class Maxout`](../tfa/layers/Maxout.md): Applies Maxout to the input.
+
+[`class PoincareNormalize`](../tfa/layers/PoincareNormalize.md): Project into the Poincare ball with norm <= 1.0 - epsilon.
+
+[`class Sparsemax`](../tfa/layers/Sparsemax.md): Sparsemax activation function [1].
+
+[`class WeightNormalization`](../tfa/layers/WeightNormalization.md): This wrapper reparameterizes a layer by decoupling the weight's
+
diff --git a/docs/api_docs/python/tfa/layers/GroupNormalization.md b/docs/api_docs/python/tfa/layers/GroupNormalization.md
new file mode 100644
index 0000000000..0eb32cf473
--- /dev/null
+++ b/docs/api_docs/python/tfa/layers/GroupNormalization.md
@@ -0,0 +1,852 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.layers.GroupNormalization
+
+## Class `GroupNormalization`
+
+Group normalization layer.
+
+
+
+### Aliases:
+
+* Class `tfa.layers.GroupNormalization`
+* Class `tfa.layers.normalizations.GroupNormalization`
+
+
+
+Defined in [`layers/normalizations.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/layers/normalizations.py).
+
+
+
+Group Normalization divides the channels into groups and computes
+within each group the mean and variance for normalization.
+Empirically, its accuracy is more stable than batch norm in a wide
+range of small batch sizes, if learning rate is adjusted linearly
+with batch sizes.
+
+Relation to Layer Normalization:
+If the number of groups is set to 1, then this operation becomes identical
+to Layer Normalization.
+
+Relation to Instance Normalization:
+If the number of groups is set to the
+input dimension (number of groups is equal
+to number of channels), then this operation becomes
+identical to Instance Normalization.
+
+Arguments
+ groups: Integer, the number of groups for Group Normalization.
+ Can be in the range [1, N] where N is the input dimension.
+ The input dimension must be divisible by the number of groups.
+ axis: Integer, the axis that should be normalized.
+ epsilon: Small float added to variance to avoid dividing by zero.
+ center: If True, add offset of `beta` to normalized tensor.
+ If False, `beta` is ignored.
+ scale: If True, multiply by `gamma`.
+ If False, `gamma` is not used.
+ beta_initializer: Initializer for the beta weight.
+ gamma_initializer: Initializer for the gamma weight.
+ beta_regularizer: Optional regularizer for the beta weight.
+ gamma_regularizer: Optional regularizer for the gamma weight.
+ beta_constraint: Optional constraint for the beta weight.
+ gamma_constraint: Optional constraint for the gamma weight.
+
+Input shape
+ Arbitrary. Use the keyword argument `input_shape`
+ (tuple of integers, does not include the samples axis)
+ when using this layer as the first layer in a model.
+
+Output shape
+ Same shape as input.
+References
+ - [Group Normalization](https://arxiv.org/abs/1803.08494)
+
+__init__
+
+``` python
+__init__(
+ groups=2,
+ axis=-1,
+ epsilon=0.001,
+ center=True,
+ scale=True,
+ beta_initializer='zeros',
+ gamma_initializer='ones',
+ beta_regularizer=None,
+ gamma_regularizer=None,
+ beta_constraint=None,
+ gamma_constraint=None,
+ **kwargs
+)
+```
+
+
+
+
+
+
+## Properties
+
+activity_regularizer
+
+Optional regularizer function for the output of this layer.
+
+
+dtype
+
+
+
+
+dynamic
+
+
+
+
+
+
+Retrieves the input tensor(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input tensor or list of input tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+* `AttributeError`: If no inbound nodes are found.
+
+
+
+Retrieves the input mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input mask tensor (potentially None) or list of input
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+
+Retrieves the input shape(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer, or if all inputs
+have the same shape.
+
+#### Returns:
+
+Input shape, as an integer shape tuple
+(or list of shape tuples, one tuple per input tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined input_shape.
+* `RuntimeError`: if called in Eager mode.
+
+
+
+
+
+
+losses
+
+Losses which are associated with this `Layer`.
+
+Variable regularization tensors are created when this property is accessed,
+so it is eager safe: accessing `losses` under a `tf.GradientTape` will
+propagate gradients back to the corresponding variables.
+
+#### Returns:
+
+A list of tensors.
+
+
+metrics
+
+
+
+
+name
+
+
+
+
+name_scope
+
+Returns a `tf.name_scope` instance for this class.
+
+
+non_trainable_variables
+
+
+
+
+non_trainable_weights
+
+
+
+
+output
+
+Retrieves the output tensor(s) of a layer.
+
+Only applicable if the layer has exactly one output,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output tensor or list of output tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to more than one incoming
+ layers.
+* `RuntimeError`: if called in Eager mode.
+
+output_mask
+
+Retrieves the output mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output mask tensor (potentially None) or list of output
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+output_shape
+
+Retrieves the output shape(s) of a layer.
+
+Only applicable if the layer has one output,
+or if all outputs have the same shape.
+
+#### Returns:
+
+Output shape, as an integer shape tuple
+(or list of shape tuples, one tuple per output tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined output shape.
+* `RuntimeError`: if called in Eager mode.
+
+submodules
+
+Sequence of all sub-modules.
+
+Submodules are modules which are properties of this module, or found as
+properties of modules which are properties of this module (and so on).
+
+```
+a = tf.Module()
+b = tf.Module()
+c = tf.Module()
+a.b = b
+b.c = c
+assert list(a.submodules) == [b, c]
+assert list(b.submodules) == [c]
+assert list(c.submodules) == []
+```
+
+#### Returns:
+
+A sequence of all submodules.
+
+
+trainable
+
+
+
+
+trainable_variables
+
+
+
+
+trainable_weights
+
+
+
+
+updates
+
+
+
+
+variables
+
+Returns the list of all layer variables/weights.
+
+Alias of `self.weights`.
+
+#### Returns:
+
+A list of variables.
+
+
+weights
+
+Returns the list of all layer variables/weights.
+
+
+#### Returns:
+
+A list of variables.
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Wraps `call`, applying pre- and post-processing steps.
+
+
+#### Arguments:
+
+
+* `inputs`: input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+
+#### Note:
+
+- The following optional keyword arguments are reserved for specific uses:
+ * `training`: Boolean scalar tensor of Python boolean indicating
+ whether the `call` is meant for training or inference.
+ * `mask`: Boolean input mask.
+- If the layer's `call` method takes a `mask` argument (as some Keras
+ layers do), its default value will be set to the mask generated
+ for `inputs` by the previous layer (if `input` did come from
+ a layer that generated a corresponding mask, i.e. if it came from
+ a Keras layer with masking support.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer's `call` method returns None (an invalid value).
+
+apply
+
+``` python
+apply(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Apply the layer on a input.
+
+This is an alias of `self.__call__`.
+
+#### Arguments:
+
+
+* `inputs`: Input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+build
+
+``` python
+build(input_shape)
+```
+
+
+
+
+compute_mask
+
+``` python
+compute_mask(
+ inputs,
+ mask=None
+)
+```
+
+Computes an output mask tensor.
+
+
+#### Arguments:
+
+
+* `inputs`: Tensor or list of tensors.
+* `mask`: Tensor or list of tensors.
+
+
+#### Returns:
+
+None or a tensor (or list of tensors,
+ one per output tensor of the layer).
+
+
+compute_output_shape
+
+``` python
+compute_output_shape(input_shape)
+```
+
+
+
+
+count_params
+
+``` python
+count_params()
+```
+
+Count the total number of scalars composing the weights.
+
+
+#### Returns:
+
+An integer count.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer isn't yet built
+ (in which case its weights aren't yet defined).
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config
+)
+```
+
+Creates a layer from its config.
+
+This method is the reverse of `get_config`,
+capable of instantiating the same layer from the config
+dictionary. It does not handle layer connectivity
+(handled by Network), nor weights (handled by `set_weights`).
+
+#### Arguments:
+
+
+* `config`: A Python dictionary, typically the
+ output of get_config.
+
+
+#### Returns:
+
+A layer instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+
+
+``` python
+get_input_at(node_index)
+```
+
+Retrieves the input tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+
+
+``` python
+get_input_mask_at(node_index)
+```
+
+Retrieves the input mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple inputs).
+
+
+
+
+``` python
+get_input_shape_at(node_index)
+```
+
+Retrieves the input shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_losses_for
+
+``` python
+get_losses_for(inputs)
+```
+
+Retrieves losses relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of loss tensors of the layer that depend on `inputs`.
+
+
+get_output_at
+
+``` python
+get_output_at(node_index)
+```
+
+Retrieves the output tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_output_mask_at
+
+``` python
+get_output_mask_at(node_index)
+```
+
+Retrieves the output mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple outputs).
+
+
+get_output_shape_at
+
+``` python
+get_output_shape_at(node_index)
+```
+
+Retrieves the output shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_updates_for
+
+``` python
+get_updates_for(inputs)
+```
+
+Retrieves updates relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of update ops of the layer that depend on `inputs`.
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+Returns the current weights of the layer.
+
+
+#### Returns:
+
+Weights values as a list of numpy arrays.
+
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+Sets the weights of the layer, from Numpy arrays.
+
+
+#### Arguments:
+
+
+* `weights`: a list of Numpy arrays. The number
+ of arrays and their shape must match
+ number of the dimensions of the weights
+ of the layer (i.e. it should match the
+ output of `get_weights`).
+
+
+#### Raises:
+
+
+* `ValueError`: If the provided weights list does not match the
+ layer's specifications.
+
+with_name_scope
+
+``` python
+with_name_scope(
+ cls,
+ method
+)
+```
+
+Decorator to automatically enter the module name scope.
+
+```
+class MyModule(tf.Module):
+ @tf.Module.with_name_scope
+ def __call__(self, x):
+ if not hasattr(self, 'w'):
+ self.w = tf.Variable(tf.random.normal([x.shape[1], 64]))
+ return tf.matmul(x, self.w)
+```
+
+Using the above module would produce `tf.Variable`s and `tf.Tensor`s whose
+names included the module name:
+
+```
+mod = MyModule()
+mod(tf.ones([8, 32]))
+# ==>
+mod.w
+# ==>
+```
+
+#### Args:
+
+
+* `method`: The method to wrap.
+
+
+#### Returns:
+
+The original method wrapped such that it enters the module's name scope.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/layers/InstanceNormalization.md b/docs/api_docs/python/tfa/layers/InstanceNormalization.md
new file mode 100644
index 0000000000..8046e4bee7
--- /dev/null
+++ b/docs/api_docs/python/tfa/layers/InstanceNormalization.md
@@ -0,0 +1,828 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.layers.InstanceNormalization
+
+## Class `InstanceNormalization`
+
+Instance normalization layer.
+
+Inherits From: [`GroupNormalization`](../../tfa/layers/GroupNormalization.md)
+
+### Aliases:
+
+* Class `tfa.layers.InstanceNormalization`
+* Class `tfa.layers.normalizations.InstanceNormalization`
+
+
+
+Defined in [`layers/normalizations.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/layers/normalizations.py).
+
+
+
+Instance Normalization is an specific case of ```GroupNormalization```since
+it normalizes all features of one channel. The Groupsize is equal to the
+channel size. Empirically, its accuracy is more stable than batch norm in a
+wide range of small batch sizes, if learning rate is adjusted linearly
+with batch sizes.
+
+Arguments
+ axis: Integer, the axis that should be normalized.
+ epsilon: Small float added to variance to avoid dividing by zero.
+ center: If True, add offset of `beta` to normalized tensor.
+ If False, `beta` is ignored.
+ scale: If True, multiply by `gamma`.
+ If False, `gamma` is not used.
+ beta_initializer: Initializer for the beta weight.
+ gamma_initializer: Initializer for the gamma weight.
+ beta_regularizer: Optional regularizer for the beta weight.
+ gamma_regularizer: Optional regularizer for the gamma weight.
+ beta_constraint: Optional constraint for the beta weight.
+ gamma_constraint: Optional constraint for the gamma weight.
+
+Input shape
+ Arbitrary. Use the keyword argument `input_shape`
+ (tuple of integers, does not include the samples axis)
+ when using this layer as the first layer in a model.
+
+Output shape
+ Same shape as input.
+
+References
+ - [Instance Normalization: The Missing Ingredient for Fast Stylization]
+ (https://arxiv.org/abs/1607.08022)
+
+__init__
+
+``` python
+__init__(**kwargs)
+```
+
+
+
+
+
+
+## Properties
+
+activity_regularizer
+
+Optional regularizer function for the output of this layer.
+
+
+dtype
+
+
+
+
+dynamic
+
+
+
+
+
+
+Retrieves the input tensor(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input tensor or list of input tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+* `AttributeError`: If no inbound nodes are found.
+
+
+
+Retrieves the input mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input mask tensor (potentially None) or list of input
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+
+Retrieves the input shape(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer, or if all inputs
+have the same shape.
+
+#### Returns:
+
+Input shape, as an integer shape tuple
+(or list of shape tuples, one tuple per input tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined input_shape.
+* `RuntimeError`: if called in Eager mode.
+
+
+
+
+
+
+losses
+
+Losses which are associated with this `Layer`.
+
+Variable regularization tensors are created when this property is accessed,
+so it is eager safe: accessing `losses` under a `tf.GradientTape` will
+propagate gradients back to the corresponding variables.
+
+#### Returns:
+
+A list of tensors.
+
+
+metrics
+
+
+
+
+name
+
+
+
+
+name_scope
+
+Returns a `tf.name_scope` instance for this class.
+
+
+non_trainable_variables
+
+
+
+
+non_trainable_weights
+
+
+
+
+output
+
+Retrieves the output tensor(s) of a layer.
+
+Only applicable if the layer has exactly one output,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output tensor or list of output tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to more than one incoming
+ layers.
+* `RuntimeError`: if called in Eager mode.
+
+output_mask
+
+Retrieves the output mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output mask tensor (potentially None) or list of output
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+output_shape
+
+Retrieves the output shape(s) of a layer.
+
+Only applicable if the layer has one output,
+or if all outputs have the same shape.
+
+#### Returns:
+
+Output shape, as an integer shape tuple
+(or list of shape tuples, one tuple per output tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined output shape.
+* `RuntimeError`: if called in Eager mode.
+
+submodules
+
+Sequence of all sub-modules.
+
+Submodules are modules which are properties of this module, or found as
+properties of modules which are properties of this module (and so on).
+
+```
+a = tf.Module()
+b = tf.Module()
+c = tf.Module()
+a.b = b
+b.c = c
+assert list(a.submodules) == [b, c]
+assert list(b.submodules) == [c]
+assert list(c.submodules) == []
+```
+
+#### Returns:
+
+A sequence of all submodules.
+
+
+trainable
+
+
+
+
+trainable_variables
+
+
+
+
+trainable_weights
+
+
+
+
+updates
+
+
+
+
+variables
+
+Returns the list of all layer variables/weights.
+
+Alias of `self.weights`.
+
+#### Returns:
+
+A list of variables.
+
+
+weights
+
+Returns the list of all layer variables/weights.
+
+
+#### Returns:
+
+A list of variables.
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Wraps `call`, applying pre- and post-processing steps.
+
+
+#### Arguments:
+
+
+* `inputs`: input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+
+#### Note:
+
+- The following optional keyword arguments are reserved for specific uses:
+ * `training`: Boolean scalar tensor of Python boolean indicating
+ whether the `call` is meant for training or inference.
+ * `mask`: Boolean input mask.
+- If the layer's `call` method takes a `mask` argument (as some Keras
+ layers do), its default value will be set to the mask generated
+ for `inputs` by the previous layer (if `input` did come from
+ a layer that generated a corresponding mask, i.e. if it came from
+ a Keras layer with masking support.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer's `call` method returns None (an invalid value).
+
+apply
+
+``` python
+apply(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Apply the layer on a input.
+
+This is an alias of `self.__call__`.
+
+#### Arguments:
+
+
+* `inputs`: Input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+build
+
+``` python
+build(input_shape)
+```
+
+
+
+
+compute_mask
+
+``` python
+compute_mask(
+ inputs,
+ mask=None
+)
+```
+
+Computes an output mask tensor.
+
+
+#### Arguments:
+
+
+* `inputs`: Tensor or list of tensors.
+* `mask`: Tensor or list of tensors.
+
+
+#### Returns:
+
+None or a tensor (or list of tensors,
+ one per output tensor of the layer).
+
+
+compute_output_shape
+
+``` python
+compute_output_shape(input_shape)
+```
+
+
+
+
+count_params
+
+``` python
+count_params()
+```
+
+Count the total number of scalars composing the weights.
+
+
+#### Returns:
+
+An integer count.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer isn't yet built
+ (in which case its weights aren't yet defined).
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config
+)
+```
+
+Creates a layer from its config.
+
+This method is the reverse of `get_config`,
+capable of instantiating the same layer from the config
+dictionary. It does not handle layer connectivity
+(handled by Network), nor weights (handled by `set_weights`).
+
+#### Arguments:
+
+
+* `config`: A Python dictionary, typically the
+ output of get_config.
+
+
+#### Returns:
+
+A layer instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+
+
+``` python
+get_input_at(node_index)
+```
+
+Retrieves the input tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+
+
+``` python
+get_input_mask_at(node_index)
+```
+
+Retrieves the input mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple inputs).
+
+
+
+
+``` python
+get_input_shape_at(node_index)
+```
+
+Retrieves the input shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_losses_for
+
+``` python
+get_losses_for(inputs)
+```
+
+Retrieves losses relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of loss tensors of the layer that depend on `inputs`.
+
+
+get_output_at
+
+``` python
+get_output_at(node_index)
+```
+
+Retrieves the output tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_output_mask_at
+
+``` python
+get_output_mask_at(node_index)
+```
+
+Retrieves the output mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple outputs).
+
+
+get_output_shape_at
+
+``` python
+get_output_shape_at(node_index)
+```
+
+Retrieves the output shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_updates_for
+
+``` python
+get_updates_for(inputs)
+```
+
+Retrieves updates relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of update ops of the layer that depend on `inputs`.
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+Returns the current weights of the layer.
+
+
+#### Returns:
+
+Weights values as a list of numpy arrays.
+
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+Sets the weights of the layer, from Numpy arrays.
+
+
+#### Arguments:
+
+
+* `weights`: a list of Numpy arrays. The number
+ of arrays and their shape must match
+ number of the dimensions of the weights
+ of the layer (i.e. it should match the
+ output of `get_weights`).
+
+
+#### Raises:
+
+
+* `ValueError`: If the provided weights list does not match the
+ layer's specifications.
+
+with_name_scope
+
+``` python
+with_name_scope(
+ cls,
+ method
+)
+```
+
+Decorator to automatically enter the module name scope.
+
+```
+class MyModule(tf.Module):
+ @tf.Module.with_name_scope
+ def __call__(self, x):
+ if not hasattr(self, 'w'):
+ self.w = tf.Variable(tf.random.normal([x.shape[1], 64]))
+ return tf.matmul(x, self.w)
+```
+
+Using the above module would produce `tf.Variable`s and `tf.Tensor`s whose
+names included the module name:
+
+```
+mod = MyModule()
+mod(tf.ones([8, 32]))
+# ==>
+mod.w
+# ==>
+```
+
+#### Args:
+
+
+* `method`: The method to wrap.
+
+
+#### Returns:
+
+The original method wrapped such that it enters the module's name scope.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/layers/Maxout.md b/docs/api_docs/python/tfa/layers/Maxout.md
new file mode 100644
index 0000000000..c5651e4bd1
--- /dev/null
+++ b/docs/api_docs/python/tfa/layers/Maxout.md
@@ -0,0 +1,839 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.layers.Maxout
+
+## Class `Maxout`
+
+Applies Maxout to the input.
+
+
+
+### Aliases:
+
+* Class `tfa.layers.Maxout`
+* Class `tfa.layers.maxout.Maxout`
+
+
+
+Defined in [`layers/maxout.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/layers/maxout.py).
+
+
+
+"Maxout Networks" Ian J. Goodfellow, David Warde-Farley, Mehdi Mirza, Aaron
+Courville, Yoshua Bengio. https://arxiv.org/abs/1302.4389
+
+Usually the operation is performed in the filter/channel dimension. This
+can also be used after Dense layers to reduce number of features.
+
+#### Arguments:
+
+
+* `num_units`: Specifies how many features will remain after maxout
+ in the `axis` dimension (usually channel).
+ This must be a factor of number of features.
+* `axis`: The dimension where max pooling will be performed. Default is the
+ last dimension.
+
+
+#### Input shape:
+
+nD tensor with shape: `(batch_size, ..., axis_dim, ...)`.
+
+
+
+#### Output shape:
+
+nD tensor with shape: `(batch_size, ..., num_units, ...)`.
+
+
+__init__
+
+``` python
+__init__(
+ num_units,
+ axis=-1,
+ **kwargs
+)
+```
+
+
+
+
+
+
+## Properties
+
+activity_regularizer
+
+Optional regularizer function for the output of this layer.
+
+
+dtype
+
+
+
+
+dynamic
+
+
+
+
+
+
+Retrieves the input tensor(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input tensor or list of input tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+* `AttributeError`: If no inbound nodes are found.
+
+
+
+Retrieves the input mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input mask tensor (potentially None) or list of input
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+
+Retrieves the input shape(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer, or if all inputs
+have the same shape.
+
+#### Returns:
+
+Input shape, as an integer shape tuple
+(or list of shape tuples, one tuple per input tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined input_shape.
+* `RuntimeError`: if called in Eager mode.
+
+
+
+
+
+
+losses
+
+Losses which are associated with this `Layer`.
+
+Variable regularization tensors are created when this property is accessed,
+so it is eager safe: accessing `losses` under a `tf.GradientTape` will
+propagate gradients back to the corresponding variables.
+
+#### Returns:
+
+A list of tensors.
+
+
+metrics
+
+
+
+
+name
+
+
+
+
+name_scope
+
+Returns a `tf.name_scope` instance for this class.
+
+
+non_trainable_variables
+
+
+
+
+non_trainable_weights
+
+
+
+
+output
+
+Retrieves the output tensor(s) of a layer.
+
+Only applicable if the layer has exactly one output,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output tensor or list of output tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to more than one incoming
+ layers.
+* `RuntimeError`: if called in Eager mode.
+
+output_mask
+
+Retrieves the output mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output mask tensor (potentially None) or list of output
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+output_shape
+
+Retrieves the output shape(s) of a layer.
+
+Only applicable if the layer has one output,
+or if all outputs have the same shape.
+
+#### Returns:
+
+Output shape, as an integer shape tuple
+(or list of shape tuples, one tuple per output tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined output shape.
+* `RuntimeError`: if called in Eager mode.
+
+submodules
+
+Sequence of all sub-modules.
+
+Submodules are modules which are properties of this module, or found as
+properties of modules which are properties of this module (and so on).
+
+```
+a = tf.Module()
+b = tf.Module()
+c = tf.Module()
+a.b = b
+b.c = c
+assert list(a.submodules) == [b, c]
+assert list(b.submodules) == [c]
+assert list(c.submodules) == []
+```
+
+#### Returns:
+
+A sequence of all submodules.
+
+
+trainable
+
+
+
+
+trainable_variables
+
+
+
+
+trainable_weights
+
+
+
+
+updates
+
+
+
+
+variables
+
+Returns the list of all layer variables/weights.
+
+Alias of `self.weights`.
+
+#### Returns:
+
+A list of variables.
+
+
+weights
+
+Returns the list of all layer variables/weights.
+
+
+#### Returns:
+
+A list of variables.
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Wraps `call`, applying pre- and post-processing steps.
+
+
+#### Arguments:
+
+
+* `inputs`: input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+
+#### Note:
+
+- The following optional keyword arguments are reserved for specific uses:
+ * `training`: Boolean scalar tensor of Python boolean indicating
+ whether the `call` is meant for training or inference.
+ * `mask`: Boolean input mask.
+- If the layer's `call` method takes a `mask` argument (as some Keras
+ layers do), its default value will be set to the mask generated
+ for `inputs` by the previous layer (if `input` did come from
+ a layer that generated a corresponding mask, i.e. if it came from
+ a Keras layer with masking support.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer's `call` method returns None (an invalid value).
+
+apply
+
+``` python
+apply(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Apply the layer on a input.
+
+This is an alias of `self.__call__`.
+
+#### Arguments:
+
+
+* `inputs`: Input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+build
+
+``` python
+build(input_shape)
+```
+
+Creates the variables of the layer (optional, for subclass implementers).
+
+This is a method that implementers of subclasses of `Layer` or `Model`
+can override if they need a state-creation step in-between
+layer instantiation and layer call.
+
+This is typically used to create the weights of `Layer` subclasses.
+
+#### Arguments:
+
+
+* `input_shape`: Instance of `TensorShape`, or list of instances of
+ `TensorShape` if the layer expects a list of inputs
+ (one instance per input).
+
+compute_mask
+
+``` python
+compute_mask(
+ inputs,
+ mask=None
+)
+```
+
+Computes an output mask tensor.
+
+
+#### Arguments:
+
+
+* `inputs`: Tensor or list of tensors.
+* `mask`: Tensor or list of tensors.
+
+
+#### Returns:
+
+None or a tensor (or list of tensors,
+ one per output tensor of the layer).
+
+
+compute_output_shape
+
+``` python
+compute_output_shape(input_shape)
+```
+
+
+
+
+count_params
+
+``` python
+count_params()
+```
+
+Count the total number of scalars composing the weights.
+
+
+#### Returns:
+
+An integer count.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer isn't yet built
+ (in which case its weights aren't yet defined).
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config
+)
+```
+
+Creates a layer from its config.
+
+This method is the reverse of `get_config`,
+capable of instantiating the same layer from the config
+dictionary. It does not handle layer connectivity
+(handled by Network), nor weights (handled by `set_weights`).
+
+#### Arguments:
+
+
+* `config`: A Python dictionary, typically the
+ output of get_config.
+
+
+#### Returns:
+
+A layer instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+
+
+``` python
+get_input_at(node_index)
+```
+
+Retrieves the input tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+
+
+``` python
+get_input_mask_at(node_index)
+```
+
+Retrieves the input mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple inputs).
+
+
+
+
+``` python
+get_input_shape_at(node_index)
+```
+
+Retrieves the input shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_losses_for
+
+``` python
+get_losses_for(inputs)
+```
+
+Retrieves losses relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of loss tensors of the layer that depend on `inputs`.
+
+
+get_output_at
+
+``` python
+get_output_at(node_index)
+```
+
+Retrieves the output tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_output_mask_at
+
+``` python
+get_output_mask_at(node_index)
+```
+
+Retrieves the output mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple outputs).
+
+
+get_output_shape_at
+
+``` python
+get_output_shape_at(node_index)
+```
+
+Retrieves the output shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_updates_for
+
+``` python
+get_updates_for(inputs)
+```
+
+Retrieves updates relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of update ops of the layer that depend on `inputs`.
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+Returns the current weights of the layer.
+
+
+#### Returns:
+
+Weights values as a list of numpy arrays.
+
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+Sets the weights of the layer, from Numpy arrays.
+
+
+#### Arguments:
+
+
+* `weights`: a list of Numpy arrays. The number
+ of arrays and their shape must match
+ number of the dimensions of the weights
+ of the layer (i.e. it should match the
+ output of `get_weights`).
+
+
+#### Raises:
+
+
+* `ValueError`: If the provided weights list does not match the
+ layer's specifications.
+
+with_name_scope
+
+``` python
+with_name_scope(
+ cls,
+ method
+)
+```
+
+Decorator to automatically enter the module name scope.
+
+```
+class MyModule(tf.Module):
+ @tf.Module.with_name_scope
+ def __call__(self, x):
+ if not hasattr(self, 'w'):
+ self.w = tf.Variable(tf.random.normal([x.shape[1], 64]))
+ return tf.matmul(x, self.w)
+```
+
+Using the above module would produce `tf.Variable`s and `tf.Tensor`s whose
+names included the module name:
+
+```
+mod = MyModule()
+mod(tf.ones([8, 32]))
+# ==>
+mod.w
+# ==>
+```
+
+#### Args:
+
+
+* `method`: The method to wrap.
+
+
+#### Returns:
+
+The original method wrapped such that it enters the module's name scope.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/layers/PoincareNormalize.md b/docs/api_docs/python/tfa/layers/PoincareNormalize.md
new file mode 100644
index 0000000000..3e8713f410
--- /dev/null
+++ b/docs/api_docs/python/tfa/layers/PoincareNormalize.md
@@ -0,0 +1,833 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.layers.PoincareNormalize
+
+## Class `PoincareNormalize`
+
+Project into the Poincare ball with norm <= 1.0 - epsilon.
+
+
+
+### Aliases:
+
+* Class `tfa.layers.PoincareNormalize`
+* Class `tfa.layers.poincare.PoincareNormalize`
+
+
+
+Defined in [`layers/poincare.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/layers/poincare.py).
+
+
+
+https://en.wikipedia.org/wiki/Poincare_ball_model
+
+Used in Poincare Embeddings for Learning Hierarchical Representations
+Maximilian Nickel, Douwe Kiela https://arxiv.org/pdf/1705.08039.pdf
+
+For a 1-D tensor with `axis = 0`, computes
+
+ (x * (1 - epsilon)) / ||x|| if ||x|| > 1 - epsilon
+ output =
+ x otherwise
+
+For `x` with more dimensions, independently normalizes each 1-D slice along
+dimension `axis`.
+
+#### Arguments:
+
+
+* `axis`: Axis along which to normalize. A scalar or a vector of integers.
+* `epsilon`: A small deviation from the edge of the unit sphere for
+ numerical stability.
+
+__init__
+
+``` python
+__init__(
+ axis=1,
+ epsilon=1e-05,
+ **kwargs
+)
+```
+
+
+
+
+
+
+## Properties
+
+activity_regularizer
+
+Optional regularizer function for the output of this layer.
+
+
+dtype
+
+
+
+
+dynamic
+
+
+
+
+
+
+Retrieves the input tensor(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input tensor or list of input tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+* `AttributeError`: If no inbound nodes are found.
+
+
+
+Retrieves the input mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input mask tensor (potentially None) or list of input
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+
+Retrieves the input shape(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer, or if all inputs
+have the same shape.
+
+#### Returns:
+
+Input shape, as an integer shape tuple
+(or list of shape tuples, one tuple per input tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined input_shape.
+* `RuntimeError`: if called in Eager mode.
+
+
+
+
+
+
+losses
+
+Losses which are associated with this `Layer`.
+
+Variable regularization tensors are created when this property is accessed,
+so it is eager safe: accessing `losses` under a `tf.GradientTape` will
+propagate gradients back to the corresponding variables.
+
+#### Returns:
+
+A list of tensors.
+
+
+metrics
+
+
+
+
+name
+
+
+
+
+name_scope
+
+Returns a `tf.name_scope` instance for this class.
+
+
+non_trainable_variables
+
+
+
+
+non_trainable_weights
+
+
+
+
+output
+
+Retrieves the output tensor(s) of a layer.
+
+Only applicable if the layer has exactly one output,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output tensor or list of output tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to more than one incoming
+ layers.
+* `RuntimeError`: if called in Eager mode.
+
+output_mask
+
+Retrieves the output mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output mask tensor (potentially None) or list of output
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+output_shape
+
+Retrieves the output shape(s) of a layer.
+
+Only applicable if the layer has one output,
+or if all outputs have the same shape.
+
+#### Returns:
+
+Output shape, as an integer shape tuple
+(or list of shape tuples, one tuple per output tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined output shape.
+* `RuntimeError`: if called in Eager mode.
+
+submodules
+
+Sequence of all sub-modules.
+
+Submodules are modules which are properties of this module, or found as
+properties of modules which are properties of this module (and so on).
+
+```
+a = tf.Module()
+b = tf.Module()
+c = tf.Module()
+a.b = b
+b.c = c
+assert list(a.submodules) == [b, c]
+assert list(b.submodules) == [c]
+assert list(c.submodules) == []
+```
+
+#### Returns:
+
+A sequence of all submodules.
+
+
+trainable
+
+
+
+
+trainable_variables
+
+
+
+
+trainable_weights
+
+
+
+
+updates
+
+
+
+
+variables
+
+Returns the list of all layer variables/weights.
+
+Alias of `self.weights`.
+
+#### Returns:
+
+A list of variables.
+
+
+weights
+
+Returns the list of all layer variables/weights.
+
+
+#### Returns:
+
+A list of variables.
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Wraps `call`, applying pre- and post-processing steps.
+
+
+#### Arguments:
+
+
+* `inputs`: input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+
+#### Note:
+
+- The following optional keyword arguments are reserved for specific uses:
+ * `training`: Boolean scalar tensor of Python boolean indicating
+ whether the `call` is meant for training or inference.
+ * `mask`: Boolean input mask.
+- If the layer's `call` method takes a `mask` argument (as some Keras
+ layers do), its default value will be set to the mask generated
+ for `inputs` by the previous layer (if `input` did come from
+ a layer that generated a corresponding mask, i.e. if it came from
+ a Keras layer with masking support.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer's `call` method returns None (an invalid value).
+
+apply
+
+``` python
+apply(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Apply the layer on a input.
+
+This is an alias of `self.__call__`.
+
+#### Arguments:
+
+
+* `inputs`: Input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+build
+
+``` python
+build(input_shape)
+```
+
+Creates the variables of the layer (optional, for subclass implementers).
+
+This is a method that implementers of subclasses of `Layer` or `Model`
+can override if they need a state-creation step in-between
+layer instantiation and layer call.
+
+This is typically used to create the weights of `Layer` subclasses.
+
+#### Arguments:
+
+
+* `input_shape`: Instance of `TensorShape`, or list of instances of
+ `TensorShape` if the layer expects a list of inputs
+ (one instance per input).
+
+compute_mask
+
+``` python
+compute_mask(
+ inputs,
+ mask=None
+)
+```
+
+Computes an output mask tensor.
+
+
+#### Arguments:
+
+
+* `inputs`: Tensor or list of tensors.
+* `mask`: Tensor or list of tensors.
+
+
+#### Returns:
+
+None or a tensor (or list of tensors,
+ one per output tensor of the layer).
+
+
+compute_output_shape
+
+``` python
+compute_output_shape(input_shape)
+```
+
+
+
+
+count_params
+
+``` python
+count_params()
+```
+
+Count the total number of scalars composing the weights.
+
+
+#### Returns:
+
+An integer count.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer isn't yet built
+ (in which case its weights aren't yet defined).
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config
+)
+```
+
+Creates a layer from its config.
+
+This method is the reverse of `get_config`,
+capable of instantiating the same layer from the config
+dictionary. It does not handle layer connectivity
+(handled by Network), nor weights (handled by `set_weights`).
+
+#### Arguments:
+
+
+* `config`: A Python dictionary, typically the
+ output of get_config.
+
+
+#### Returns:
+
+A layer instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+
+
+``` python
+get_input_at(node_index)
+```
+
+Retrieves the input tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+
+
+``` python
+get_input_mask_at(node_index)
+```
+
+Retrieves the input mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple inputs).
+
+
+
+
+``` python
+get_input_shape_at(node_index)
+```
+
+Retrieves the input shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_losses_for
+
+``` python
+get_losses_for(inputs)
+```
+
+Retrieves losses relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of loss tensors of the layer that depend on `inputs`.
+
+
+get_output_at
+
+``` python
+get_output_at(node_index)
+```
+
+Retrieves the output tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_output_mask_at
+
+``` python
+get_output_mask_at(node_index)
+```
+
+Retrieves the output mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple outputs).
+
+
+get_output_shape_at
+
+``` python
+get_output_shape_at(node_index)
+```
+
+Retrieves the output shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_updates_for
+
+``` python
+get_updates_for(inputs)
+```
+
+Retrieves updates relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of update ops of the layer that depend on `inputs`.
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+Returns the current weights of the layer.
+
+
+#### Returns:
+
+Weights values as a list of numpy arrays.
+
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+Sets the weights of the layer, from Numpy arrays.
+
+
+#### Arguments:
+
+
+* `weights`: a list of Numpy arrays. The number
+ of arrays and their shape must match
+ number of the dimensions of the weights
+ of the layer (i.e. it should match the
+ output of `get_weights`).
+
+
+#### Raises:
+
+
+* `ValueError`: If the provided weights list does not match the
+ layer's specifications.
+
+with_name_scope
+
+``` python
+with_name_scope(
+ cls,
+ method
+)
+```
+
+Decorator to automatically enter the module name scope.
+
+```
+class MyModule(tf.Module):
+ @tf.Module.with_name_scope
+ def __call__(self, x):
+ if not hasattr(self, 'w'):
+ self.w = tf.Variable(tf.random.normal([x.shape[1], 64]))
+ return tf.matmul(x, self.w)
+```
+
+Using the above module would produce `tf.Variable`s and `tf.Tensor`s whose
+names included the module name:
+
+```
+mod = MyModule()
+mod(tf.ones([8, 32]))
+# ==>
+mod.w
+# ==>
+```
+
+#### Args:
+
+
+* `method`: The method to wrap.
+
+
+#### Returns:
+
+The original method wrapped such that it enters the module's name scope.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/layers/Sparsemax.md b/docs/api_docs/python/tfa/layers/Sparsemax.md
new file mode 100644
index 0000000000..3c136b3676
--- /dev/null
+++ b/docs/api_docs/python/tfa/layers/Sparsemax.md
@@ -0,0 +1,820 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.layers.Sparsemax
+
+## Class `Sparsemax`
+
+Sparsemax activation function [1].
+
+
+
+### Aliases:
+
+* Class `tfa.layers.Sparsemax`
+* Class `tfa.layers.sparsemax.Sparsemax`
+
+
+
+Defined in [`layers/sparsemax.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/layers/sparsemax.py).
+
+
+
+The ouput shape is the same as the input shape.
+
+[1]: https://arxiv.org/abs/1602.02068
+
+#### Arguments:
+
+
+* `axis`: Integer, axis along which the sparsemax normalization is applied.
+
+__init__
+
+``` python
+__init__(
+ axis=-1,
+ **kwargs
+)
+```
+
+
+
+
+
+
+## Properties
+
+activity_regularizer
+
+Optional regularizer function for the output of this layer.
+
+
+dtype
+
+
+
+
+dynamic
+
+
+
+
+
+
+Retrieves the input tensor(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input tensor or list of input tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+* `AttributeError`: If no inbound nodes are found.
+
+
+
+Retrieves the input mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input mask tensor (potentially None) or list of input
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+
+Retrieves the input shape(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer, or if all inputs
+have the same shape.
+
+#### Returns:
+
+Input shape, as an integer shape tuple
+(or list of shape tuples, one tuple per input tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined input_shape.
+* `RuntimeError`: if called in Eager mode.
+
+
+
+
+
+
+losses
+
+Losses which are associated with this `Layer`.
+
+Variable regularization tensors are created when this property is accessed,
+so it is eager safe: accessing `losses` under a `tf.GradientTape` will
+propagate gradients back to the corresponding variables.
+
+#### Returns:
+
+A list of tensors.
+
+
+metrics
+
+
+
+
+name
+
+
+
+
+name_scope
+
+Returns a `tf.name_scope` instance for this class.
+
+
+non_trainable_variables
+
+
+
+
+non_trainable_weights
+
+
+
+
+output
+
+Retrieves the output tensor(s) of a layer.
+
+Only applicable if the layer has exactly one output,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output tensor or list of output tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to more than one incoming
+ layers.
+* `RuntimeError`: if called in Eager mode.
+
+output_mask
+
+Retrieves the output mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output mask tensor (potentially None) or list of output
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+output_shape
+
+Retrieves the output shape(s) of a layer.
+
+Only applicable if the layer has one output,
+or if all outputs have the same shape.
+
+#### Returns:
+
+Output shape, as an integer shape tuple
+(or list of shape tuples, one tuple per output tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined output shape.
+* `RuntimeError`: if called in Eager mode.
+
+submodules
+
+Sequence of all sub-modules.
+
+Submodules are modules which are properties of this module, or found as
+properties of modules which are properties of this module (and so on).
+
+```
+a = tf.Module()
+b = tf.Module()
+c = tf.Module()
+a.b = b
+b.c = c
+assert list(a.submodules) == [b, c]
+assert list(b.submodules) == [c]
+assert list(c.submodules) == []
+```
+
+#### Returns:
+
+A sequence of all submodules.
+
+
+trainable
+
+
+
+
+trainable_variables
+
+
+
+
+trainable_weights
+
+
+
+
+updates
+
+
+
+
+variables
+
+Returns the list of all layer variables/weights.
+
+Alias of `self.weights`.
+
+#### Returns:
+
+A list of variables.
+
+
+weights
+
+Returns the list of all layer variables/weights.
+
+
+#### Returns:
+
+A list of variables.
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Wraps `call`, applying pre- and post-processing steps.
+
+
+#### Arguments:
+
+
+* `inputs`: input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+
+#### Note:
+
+- The following optional keyword arguments are reserved for specific uses:
+ * `training`: Boolean scalar tensor of Python boolean indicating
+ whether the `call` is meant for training or inference.
+ * `mask`: Boolean input mask.
+- If the layer's `call` method takes a `mask` argument (as some Keras
+ layers do), its default value will be set to the mask generated
+ for `inputs` by the previous layer (if `input` did come from
+ a layer that generated a corresponding mask, i.e. if it came from
+ a Keras layer with masking support.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer's `call` method returns None (an invalid value).
+
+apply
+
+``` python
+apply(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Apply the layer on a input.
+
+This is an alias of `self.__call__`.
+
+#### Arguments:
+
+
+* `inputs`: Input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+build
+
+``` python
+build(input_shape)
+```
+
+Creates the variables of the layer (optional, for subclass implementers).
+
+This is a method that implementers of subclasses of `Layer` or `Model`
+can override if they need a state-creation step in-between
+layer instantiation and layer call.
+
+This is typically used to create the weights of `Layer` subclasses.
+
+#### Arguments:
+
+
+* `input_shape`: Instance of `TensorShape`, or list of instances of
+ `TensorShape` if the layer expects a list of inputs
+ (one instance per input).
+
+compute_mask
+
+``` python
+compute_mask(
+ inputs,
+ mask=None
+)
+```
+
+Computes an output mask tensor.
+
+
+#### Arguments:
+
+
+* `inputs`: Tensor or list of tensors.
+* `mask`: Tensor or list of tensors.
+
+
+#### Returns:
+
+None or a tensor (or list of tensors,
+ one per output tensor of the layer).
+
+
+compute_output_shape
+
+``` python
+compute_output_shape(input_shape)
+```
+
+
+
+
+count_params
+
+``` python
+count_params()
+```
+
+Count the total number of scalars composing the weights.
+
+
+#### Returns:
+
+An integer count.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer isn't yet built
+ (in which case its weights aren't yet defined).
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config
+)
+```
+
+Creates a layer from its config.
+
+This method is the reverse of `get_config`,
+capable of instantiating the same layer from the config
+dictionary. It does not handle layer connectivity
+(handled by Network), nor weights (handled by `set_weights`).
+
+#### Arguments:
+
+
+* `config`: A Python dictionary, typically the
+ output of get_config.
+
+
+#### Returns:
+
+A layer instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+
+
+``` python
+get_input_at(node_index)
+```
+
+Retrieves the input tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+
+
+``` python
+get_input_mask_at(node_index)
+```
+
+Retrieves the input mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple inputs).
+
+
+
+
+``` python
+get_input_shape_at(node_index)
+```
+
+Retrieves the input shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_losses_for
+
+``` python
+get_losses_for(inputs)
+```
+
+Retrieves losses relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of loss tensors of the layer that depend on `inputs`.
+
+
+get_output_at
+
+``` python
+get_output_at(node_index)
+```
+
+Retrieves the output tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_output_mask_at
+
+``` python
+get_output_mask_at(node_index)
+```
+
+Retrieves the output mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple outputs).
+
+
+get_output_shape_at
+
+``` python
+get_output_shape_at(node_index)
+```
+
+Retrieves the output shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_updates_for
+
+``` python
+get_updates_for(inputs)
+```
+
+Retrieves updates relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of update ops of the layer that depend on `inputs`.
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+Returns the current weights of the layer.
+
+
+#### Returns:
+
+Weights values as a list of numpy arrays.
+
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+Sets the weights of the layer, from Numpy arrays.
+
+
+#### Arguments:
+
+
+* `weights`: a list of Numpy arrays. The number
+ of arrays and their shape must match
+ number of the dimensions of the weights
+ of the layer (i.e. it should match the
+ output of `get_weights`).
+
+
+#### Raises:
+
+
+* `ValueError`: If the provided weights list does not match the
+ layer's specifications.
+
+with_name_scope
+
+``` python
+with_name_scope(
+ cls,
+ method
+)
+```
+
+Decorator to automatically enter the module name scope.
+
+```
+class MyModule(tf.Module):
+ @tf.Module.with_name_scope
+ def __call__(self, x):
+ if not hasattr(self, 'w'):
+ self.w = tf.Variable(tf.random.normal([x.shape[1], 64]))
+ return tf.matmul(x, self.w)
+```
+
+Using the above module would produce `tf.Variable`s and `tf.Tensor`s whose
+names included the module name:
+
+```
+mod = MyModule()
+mod(tf.ones([8, 32]))
+# ==>
+mod.w
+# ==>
+```
+
+#### Args:
+
+
+* `method`: The method to wrap.
+
+
+#### Returns:
+
+The original method wrapped such that it enters the module's name scope.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/layers/WeightNormalization.md b/docs/api_docs/python/tfa/layers/WeightNormalization.md
new file mode 100644
index 0000000000..0722840c90
--- /dev/null
+++ b/docs/api_docs/python/tfa/layers/WeightNormalization.md
@@ -0,0 +1,815 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.layers.WeightNormalization
+
+## Class `WeightNormalization`
+
+This wrapper reparameterizes a layer by decoupling the weight's
+
+
+
+### Aliases:
+
+* Class `tfa.layers.WeightNormalization`
+* Class `tfa.layers.wrappers.WeightNormalization`
+
+
+
+Defined in [`layers/wrappers.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/layers/wrappers.py).
+
+
+magnitude and direction.
+
+This speeds up convergence by improving the
+conditioning of the optimization problem.
+Weight Normalization: A Simple Reparameterization to Accelerate
+Training of Deep Neural Networks: https://arxiv.org/abs/1602.07868
+Tim Salimans, Diederik P. Kingma (2016)
+WeightNormalization wrapper works for keras and tf layers.
+```python
+ net = WeightNormalization(
+ tf.keras.layers.Conv2D(2, 2, activation='relu'),
+ input_shape=(32, 32, 3),
+ data_init=True)(x)
+ net = WeightNormalization(
+ tf.keras.layers.Conv2D(16, 5, activation='relu'),
+ data_init=True)(net)
+ net = WeightNormalization(
+ tf.keras.layers.Dense(120, activation='relu'),
+ data_init=True)(net)
+ net = WeightNormalization(
+ tf.keras.layers.Dense(n_classes),
+ data_init=True)(net)
+```
+Arguments:
+ layer: a layer instance.
+ data_init: If `True` use data dependent variable initialization
+Raises:
+ ValueError: If not initialized with a `Layer` instance.
+ ValueError: If `Layer` does not contain a `kernel` of weights
+ NotImplementedError: If `data_init` is True and running graph execution
+
+__init__
+
+``` python
+__init__(
+ layer,
+ data_init=True,
+ **kwargs
+)
+```
+
+
+
+
+
+
+## Properties
+
+activity_regularizer
+
+
+
+
+dtype
+
+
+
+
+dynamic
+
+
+
+
+
+
+Retrieves the input tensor(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input tensor or list of input tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+* `AttributeError`: If no inbound nodes are found.
+
+
+
+Retrieves the input mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input mask tensor (potentially None) or list of input
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+
+Retrieves the input shape(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer, or if all inputs
+have the same shape.
+
+#### Returns:
+
+Input shape, as an integer shape tuple
+(or list of shape tuples, one tuple per input tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined input_shape.
+* `RuntimeError`: if called in Eager mode.
+
+
+
+
+
+
+losses
+
+Losses which are associated with this `Layer`.
+
+Variable regularization tensors are created when this property is accessed,
+so it is eager safe: accessing `losses` under a `tf.GradientTape` will
+propagate gradients back to the corresponding variables.
+
+#### Returns:
+
+A list of tensors.
+
+
+metrics
+
+
+
+
+name
+
+
+
+
+name_scope
+
+Returns a `tf.name_scope` instance for this class.
+
+
+non_trainable_variables
+
+
+
+
+non_trainable_weights
+
+
+
+
+output
+
+Retrieves the output tensor(s) of a layer.
+
+Only applicable if the layer has exactly one output,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output tensor or list of output tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to more than one incoming
+ layers.
+* `RuntimeError`: if called in Eager mode.
+
+output_mask
+
+Retrieves the output mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output mask tensor (potentially None) or list of output
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+output_shape
+
+Retrieves the output shape(s) of a layer.
+
+Only applicable if the layer has one output,
+or if all outputs have the same shape.
+
+#### Returns:
+
+Output shape, as an integer shape tuple
+(or list of shape tuples, one tuple per output tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined output shape.
+* `RuntimeError`: if called in Eager mode.
+
+submodules
+
+Sequence of all sub-modules.
+
+Submodules are modules which are properties of this module, or found as
+properties of modules which are properties of this module (and so on).
+
+```
+a = tf.Module()
+b = tf.Module()
+c = tf.Module()
+a.b = b
+b.c = c
+assert list(a.submodules) == [b, c]
+assert list(b.submodules) == [c]
+assert list(c.submodules) == []
+```
+
+#### Returns:
+
+A sequence of all submodules.
+
+
+trainable
+
+
+
+
+trainable_variables
+
+
+
+
+trainable_weights
+
+
+
+
+updates
+
+
+
+
+variables
+
+Returns the list of all layer variables/weights.
+
+Alias of `self.weights`.
+
+#### Returns:
+
+A list of variables.
+
+
+weights
+
+Returns the list of all layer variables/weights.
+
+
+#### Returns:
+
+A list of variables.
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Wraps `call`, applying pre- and post-processing steps.
+
+
+#### Arguments:
+
+
+* `inputs`: input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+
+#### Note:
+
+- The following optional keyword arguments are reserved for specific uses:
+ * `training`: Boolean scalar tensor of Python boolean indicating
+ whether the `call` is meant for training or inference.
+ * `mask`: Boolean input mask.
+- If the layer's `call` method takes a `mask` argument (as some Keras
+ layers do), its default value will be set to the mask generated
+ for `inputs` by the previous layer (if `input` did come from
+ a layer that generated a corresponding mask, i.e. if it came from
+ a Keras layer with masking support.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer's `call` method returns None (an invalid value).
+
+apply
+
+``` python
+apply(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Apply the layer on a input.
+
+This is an alias of `self.__call__`.
+
+#### Arguments:
+
+
+* `inputs`: Input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+build
+
+``` python
+build(input_shape)
+```
+
+Build `Layer`
+
+
+compute_mask
+
+``` python
+compute_mask(
+ inputs,
+ mask=None
+)
+```
+
+Computes an output mask tensor.
+
+
+#### Arguments:
+
+
+* `inputs`: Tensor or list of tensors.
+* `mask`: Tensor or list of tensors.
+
+
+#### Returns:
+
+None or a tensor (or list of tensors,
+ one per output tensor of the layer).
+
+
+compute_output_shape
+
+``` python
+compute_output_shape(input_shape)
+```
+
+
+
+
+count_params
+
+``` python
+count_params()
+```
+
+Count the total number of scalars composing the weights.
+
+
+#### Returns:
+
+An integer count.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer isn't yet built
+ (in which case its weights aren't yet defined).
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config,
+ custom_objects=None
+)
+```
+
+
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+
+
+``` python
+get_input_at(node_index)
+```
+
+Retrieves the input tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+
+
+``` python
+get_input_mask_at(node_index)
+```
+
+Retrieves the input mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple inputs).
+
+
+
+
+``` python
+get_input_shape_at(node_index)
+```
+
+Retrieves the input shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_losses_for
+
+``` python
+get_losses_for(inputs)
+```
+
+Retrieves losses relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of loss tensors of the layer that depend on `inputs`.
+
+
+get_output_at
+
+``` python
+get_output_at(node_index)
+```
+
+Retrieves the output tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_output_mask_at
+
+``` python
+get_output_mask_at(node_index)
+```
+
+Retrieves the output mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple outputs).
+
+
+get_output_shape_at
+
+``` python
+get_output_shape_at(node_index)
+```
+
+Retrieves the output shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_updates_for
+
+``` python
+get_updates_for(inputs)
+```
+
+Retrieves updates relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of update ops of the layer that depend on `inputs`.
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+Returns the current weights of the layer.
+
+
+#### Returns:
+
+Weights values as a list of numpy arrays.
+
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+Sets the weights of the layer, from Numpy arrays.
+
+
+#### Arguments:
+
+
+* `weights`: a list of Numpy arrays. The number
+ of arrays and their shape must match
+ number of the dimensions of the weights
+ of the layer (i.e. it should match the
+ output of `get_weights`).
+
+
+#### Raises:
+
+
+* `ValueError`: If the provided weights list does not match the
+ layer's specifications.
+
+with_name_scope
+
+``` python
+with_name_scope(
+ cls,
+ method
+)
+```
+
+Decorator to automatically enter the module name scope.
+
+```
+class MyModule(tf.Module):
+ @tf.Module.with_name_scope
+ def __call__(self, x):
+ if not hasattr(self, 'w'):
+ self.w = tf.Variable(tf.random.normal([x.shape[1], 64]))
+ return tf.matmul(x, self.w)
+```
+
+Using the above module would produce `tf.Variable`s and `tf.Tensor`s whose
+names included the module name:
+
+```
+mod = MyModule()
+mod(tf.ones([8, 32]))
+# ==>
+mod.w
+# ==>
+```
+
+#### Args:
+
+
+* `method`: The method to wrap.
+
+
+#### Returns:
+
+The original method wrapped such that it enters the module's name scope.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/layers/maxout.md b/docs/api_docs/python/tfa/layers/maxout.md
new file mode 100644
index 0000000000..549999941b
--- /dev/null
+++ b/docs/api_docs/python/tfa/layers/maxout.md
@@ -0,0 +1,20 @@
+
+
+
+
+
+# Module: tfa.layers.maxout
+
+Implementing Maxout layer.
+
+
+
+Defined in [`layers/maxout.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/layers/maxout.py).
+
+
+
+
+## Classes
+
+[`class Maxout`](../../tfa/layers/Maxout.md): Applies Maxout to the input.
+
diff --git a/docs/api_docs/python/tfa/layers/normalizations.md b/docs/api_docs/python/tfa/layers/normalizations.md
new file mode 100644
index 0000000000..e0daf2a090
--- /dev/null
+++ b/docs/api_docs/python/tfa/layers/normalizations.md
@@ -0,0 +1,22 @@
+
+
+
+
+
+# Module: tfa.layers.normalizations
+
+
+
+
+
+Defined in [`layers/normalizations.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/layers/normalizations.py).
+
+
+
+
+## Classes
+
+[`class GroupNormalization`](../../tfa/layers/GroupNormalization.md): Group normalization layer.
+
+[`class InstanceNormalization`](../../tfa/layers/InstanceNormalization.md): Instance normalization layer.
+
diff --git a/docs/api_docs/python/tfa/layers/poincare.md b/docs/api_docs/python/tfa/layers/poincare.md
new file mode 100644
index 0000000000..62db1071b0
--- /dev/null
+++ b/docs/api_docs/python/tfa/layers/poincare.md
@@ -0,0 +1,20 @@
+
+
+
+
+
+# Module: tfa.layers.poincare
+
+Implementing PoincareNormalize layer.
+
+
+
+Defined in [`layers/poincare.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/layers/poincare.py).
+
+
+
+
+## Classes
+
+[`class PoincareNormalize`](../../tfa/layers/PoincareNormalize.md): Project into the Poincare ball with norm <= 1.0 - epsilon.
+
diff --git a/docs/api_docs/python/tfa/layers/sparsemax.md b/docs/api_docs/python/tfa/layers/sparsemax.md
new file mode 100644
index 0000000000..9820c5c185
--- /dev/null
+++ b/docs/api_docs/python/tfa/layers/sparsemax.md
@@ -0,0 +1,24 @@
+
+
+
+
+
+# Module: tfa.layers.sparsemax
+
+
+
+
+
+Defined in [`layers/sparsemax.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/layers/sparsemax.py).
+
+
+
+
+## Classes
+
+[`class Sparsemax`](../../tfa/layers/Sparsemax.md): Sparsemax activation function [1].
+
+## Functions
+
+[`sparsemax(...)`](../../tfa/activations/sparsemax.md): Sparsemax activation function [1].
+
diff --git a/docs/api_docs/python/tfa/layers/wrappers.md b/docs/api_docs/python/tfa/layers/wrappers.md
new file mode 100644
index 0000000000..6ed1fdf9ee
--- /dev/null
+++ b/docs/api_docs/python/tfa/layers/wrappers.md
@@ -0,0 +1,20 @@
+
+
+
+
+
+# Module: tfa.layers.wrappers
+
+
+
+
+
+Defined in [`layers/wrappers.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/layers/wrappers.py).
+
+
+
+
+## Classes
+
+[`class WeightNormalization`](../../tfa/layers/WeightNormalization.md): This wrapper reparameterizes a layer by decoupling the weight's
+
diff --git a/docs/api_docs/python/tfa/losses.md b/docs/api_docs/python/tfa/losses.md
new file mode 100644
index 0000000000..07c74a8db3
--- /dev/null
+++ b/docs/api_docs/python/tfa/losses.md
@@ -0,0 +1,52 @@
+
+
+
+
+
+# Module: tfa.losses
+
+Additional losses that conform to Keras API.
+
+
+
+Defined in [`losses/__init__.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/losses/__init__.py).
+
+
+
+
+## Modules
+
+[`contrastive`](../tfa/losses/contrastive.md) module: Implements contrastive loss.
+
+[`focal_loss`](../tfa/losses/focal_loss.md) module: Implements Focal loss.
+
+[`lifted`](../tfa/losses/lifted.md) module: Implements lifted_struct_loss.
+
+[`metric_learning`](../tfa/losses/metric_learning.md) module: Functions of metric learning.
+
+[`triplet`](../tfa/losses/triplet.md) module: Implements triplet loss.
+
+## Classes
+
+[`class ContrastiveLoss`](../tfa/losses/ContrastiveLoss.md): Computes the contrastive loss between `y_true` and `y_pred`.
+
+[`class LiftedStructLoss`](../tfa/losses/LiftedStructLoss.md): Computes the lifted structured loss.
+
+[`class SigmoidFocalCrossEntropy`](../tfa/losses/SigmoidFocalCrossEntropy.md): Implements the focal loss function.
+
+[`class SparsemaxLoss`](../tfa/losses/SparsemaxLoss.md): Sparsemax loss function.
+
+[`class TripletSemiHardLoss`](../tfa/losses/TripletSemiHardLoss.md): Computes the triplet loss with semi-hard negative mining.
+
+## Functions
+
+[`contrastive_loss(...)`](../tfa/losses/contrastive_loss.md): Computes the contrastive loss between `y_true` and `y_pred`.
+
+[`lifted_struct_loss(...)`](../tfa/losses/lifted_struct_loss.md): Computes the lifted structured loss.
+
+[`sigmoid_focal_crossentropy(...)`](../tfa/losses/sigmoid_focal_crossentropy.md): Args
+
+[`sparsemax_loss(...)`](../tfa/losses/sparsemax_loss.md): Sparsemax loss function [1].
+
+[`triplet_semihard_loss(...)`](../tfa/losses/triplet_semihard_loss.md): Computes the triplet loss with semi-hard negative mining.
+
diff --git a/docs/api_docs/python/tfa/losses/ContrastiveLoss.md b/docs/api_docs/python/tfa/losses/ContrastiveLoss.md
new file mode 100644
index 0000000000..5c9e4dcf48
--- /dev/null
+++ b/docs/api_docs/python/tfa/losses/ContrastiveLoss.md
@@ -0,0 +1,149 @@
+
+
+
+
+
+
+
+
+
+# tfa.losses.ContrastiveLoss
+
+## Class `ContrastiveLoss`
+
+Computes the contrastive loss between `y_true` and `y_pred`.
+
+
+
+### Aliases:
+
+* Class `tfa.losses.ContrastiveLoss`
+* Class `tfa.losses.contrastive.ContrastiveLoss`
+
+
+
+Defined in [`losses/contrastive.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/losses/contrastive.py).
+
+
+
+This loss encourages the embedding to be close to each other for
+the samples of the same label and the embedding to be far apart at least
+by the margin constant for the samples of different labels.
+
+See: http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
+
+We expect labels `y_true` to be provided as 1-D integer `Tensor`
+with shape [batch_size] of binary integer labels. And `y_pred` must be
+1-D float `Tensor` with shape [batch_size] of distances between two
+embedding matrices.
+
+The euclidean distances `y_pred` between two embedding matrices
+`a` and `b` with shape [batch_size, hidden_size] can be computed
+as follows:
+
+```python
+# y_pred = \sqrt (\sum_i (a[:, i] - b[:, i])^2)
+y_pred = tf.linalg.norm(a - b, axis=1)
+```
+
+#### Args:
+
+
+* `margin`: `Float`, margin term in the loss definition.
+ Default value is 1.0.
+* `reduction`: (Optional) Type of `tf.keras.losses.Reduction` to apply.
+ Default value is `SUM_OVER_BATCH_SIZE`.
+* `name`: (Optional) name for the loss.
+
+__init__
+
+``` python
+__init__(
+ margin=1.0,
+ reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE,
+ name='contrasitve_loss'
+)
+```
+
+
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ y_true,
+ y_pred,
+ sample_weight=None
+)
+```
+
+Invokes the `Loss` instance.
+
+
+#### Args:
+
+
+* `y_true`: Ground truth values.
+* `y_pred`: The predicted values.
+* `sample_weight`: Optional `Tensor` whose rank is either 0, or the same rank
+ as `y_true`, or is broadcastable to `y_true`. `sample_weight` acts as a
+ coefficient for the loss. If a scalar is provided, then the loss is
+ simply scaled by the given value. If `sample_weight` is a tensor of size
+ `[batch_size]`, then the total loss for each sample of the batch is
+ rescaled by the corresponding element in the `sample_weight` vector. If
+ the shape of `sample_weight` matches the shape of `y_pred`, then the
+ loss of each measurable element of `y_pred` is scaled by the
+ corresponding value of `sample_weight`.
+
+
+#### Returns:
+
+Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
+ shape as `y_true`; otherwise, it is scalar.
+
+
+
+#### Raises:
+
+
+* `ValueError`: If the shape of `sample_weight` is invalid.
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config
+)
+```
+
+Instantiates a `Loss` from its config (output of `get_config()`).
+
+
+#### Args:
+
+
+* `config`: Output of `get_config()`.
+
+
+#### Returns:
+
+A `Loss` instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+
+
diff --git a/docs/api_docs/python/tfa/losses/LiftedStructLoss.md b/docs/api_docs/python/tfa/losses/LiftedStructLoss.md
new file mode 100644
index 0000000000..2f7329e147
--- /dev/null
+++ b/docs/api_docs/python/tfa/losses/LiftedStructLoss.md
@@ -0,0 +1,131 @@
+
+
+
+
+
+
+
+
+
+# tfa.losses.LiftedStructLoss
+
+## Class `LiftedStructLoss`
+
+Computes the lifted structured loss.
+
+
+
+### Aliases:
+
+* Class `tfa.losses.LiftedStructLoss`
+* Class `tfa.losses.lifted.LiftedStructLoss`
+
+
+
+Defined in [`losses/lifted.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/losses/lifted.py).
+
+
+
+The loss encourages the positive distances (between a pair of embeddings
+with the same labels) to be smaller than any negative distances (between
+a pair of embeddings with different labels) in the mini-batch in a way
+that is differentiable with respect to the embedding vectors.
+See: https://arxiv.org/abs/1511.06452.
+
+#### Args:
+
+
+* `margin`: Float, margin term in the loss definition.
+* `name`: Optional name for the op.
+
+__init__
+
+``` python
+__init__(
+ margin=1.0,
+ name=None
+)
+```
+
+
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ y_true,
+ y_pred,
+ sample_weight=None
+)
+```
+
+Invokes the `Loss` instance.
+
+
+#### Args:
+
+
+* `y_true`: Ground truth values.
+* `y_pred`: The predicted values.
+* `sample_weight`: Optional `Tensor` whose rank is either 0, or the same rank
+ as `y_true`, or is broadcastable to `y_true`. `sample_weight` acts as a
+ coefficient for the loss. If a scalar is provided, then the loss is
+ simply scaled by the given value. If `sample_weight` is a tensor of size
+ `[batch_size]`, then the total loss for each sample of the batch is
+ rescaled by the corresponding element in the `sample_weight` vector. If
+ the shape of `sample_weight` matches the shape of `y_pred`, then the
+ loss of each measurable element of `y_pred` is scaled by the
+ corresponding value of `sample_weight`.
+
+
+#### Returns:
+
+Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
+ shape as `y_true`; otherwise, it is scalar.
+
+
+
+#### Raises:
+
+
+* `ValueError`: If the shape of `sample_weight` is invalid.
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config
+)
+```
+
+Instantiates a `Loss` from its config (output of `get_config()`).
+
+
+#### Args:
+
+
+* `config`: Output of `get_config()`.
+
+
+#### Returns:
+
+A `Loss` instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+
+
diff --git a/docs/api_docs/python/tfa/losses/SigmoidFocalCrossEntropy.md b/docs/api_docs/python/tfa/losses/SigmoidFocalCrossEntropy.md
new file mode 100644
index 0000000000..d63ac959fb
--- /dev/null
+++ b/docs/api_docs/python/tfa/losses/SigmoidFocalCrossEntropy.md
@@ -0,0 +1,168 @@
+
+
+
+
+
+
+
+
+
+# tfa.losses.SigmoidFocalCrossEntropy
+
+## Class `SigmoidFocalCrossEntropy`
+
+Implements the focal loss function.
+
+
+
+### Aliases:
+
+* Class `tfa.losses.SigmoidFocalCrossEntropy`
+* Class `tfa.losses.focal_loss.SigmoidFocalCrossEntropy`
+
+
+
+Defined in [`losses/focal_loss.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/losses/focal_loss.py).
+
+
+
+Focal loss was first introduced in the RetinaNet paper
+(https://arxiv.org/pdf/1708.02002.pdf). Focal loss is extremely useful for
+classification when you have highly imbalanced classes. It down-weights
+well-classified examples and focuses on hard examples. The loss value is
+much high for a sample which is misclassified by the classifier as compared
+to the loss value corresponding to a well-classified example. One of the
+best use-cases of focal loss is its usage in object detection where the
+imbalance between the background class and other classes is extremely high.
+
+#### Usage:
+
+
+
+```python
+fl = tfa.losses.SigmoidFocalCrossEntropy()
+loss = fl(
+ [[0.97], [0.91], [0.03]],
+ [[1], [1], [0])
+print('Loss: ', loss.numpy()) # Loss: [[0.03045921]
+ [0.09431068]
+ [0.31471074]
+```
+Usage with tf.keras API:
+
+```python
+model = tf.keras.Model(inputs, outputs)
+model.compile('sgd', loss=tf.keras.losses.SigmoidFocalCrossEntropy())
+```
+
+Args
+ alpha: balancing factor, default value is 0.25
+ gamma: modulating factor, default value is 2.0
+
+#### Returns:
+
+Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
+ shape as `y_true`; otherwise, it is scalar.
+
+
+
+#### Raises:
+
+
+* `ValueError`: If the shape of `sample_weight` is invalid or value of
+ `gamma` is less than zero
+
+__init__
+
+``` python
+__init__(
+ from_logits=False,
+ alpha=0.25,
+ gamma=2.0,
+ reduction=tf.keras.losses.Reduction.NONE,
+ name='sigmoid_focal_crossentropy'
+)
+```
+
+
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ y_true,
+ y_pred,
+ sample_weight=None
+)
+```
+
+Invokes the `Loss` instance.
+
+
+#### Args:
+
+
+* `y_true`: Ground truth values.
+* `y_pred`: The predicted values.
+* `sample_weight`: Optional `Tensor` whose rank is either 0, or the same rank
+ as `y_true`, or is broadcastable to `y_true`. `sample_weight` acts as a
+ coefficient for the loss. If a scalar is provided, then the loss is
+ simply scaled by the given value. If `sample_weight` is a tensor of size
+ `[batch_size]`, then the total loss for each sample of the batch is
+ rescaled by the corresponding element in the `sample_weight` vector. If
+ the shape of `sample_weight` matches the shape of `y_pred`, then the
+ loss of each measurable element of `y_pred` is scaled by the
+ corresponding value of `sample_weight`.
+
+
+#### Returns:
+
+Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
+ shape as `y_true`; otherwise, it is scalar.
+
+
+
+#### Raises:
+
+
+* `ValueError`: If the shape of `sample_weight` is invalid.
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config
+)
+```
+
+Instantiates a `Loss` from its config (output of `get_config()`).
+
+
+#### Args:
+
+
+* `config`: Output of `get_config()`.
+
+
+#### Returns:
+
+A `Loss` instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+
+
diff --git a/docs/api_docs/python/tfa/losses/SparsemaxLoss.md b/docs/api_docs/python/tfa/losses/SparsemaxLoss.md
new file mode 100644
index 0000000000..d58aeafa86
--- /dev/null
+++ b/docs/api_docs/python/tfa/losses/SparsemaxLoss.md
@@ -0,0 +1,133 @@
+
+
+
+
+
+
+
+
+
+# tfa.losses.SparsemaxLoss
+
+## Class `SparsemaxLoss`
+
+Sparsemax loss function.
+
+
+
+
+
+Defined in [`losses/sparsemax_loss.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/losses/sparsemax_loss.py).
+
+
+
+Computes the generalized multi-label classification loss for the sparsemax
+function.
+
+Because the sparsemax loss function needs both the properbility output and
+the logits to compute the loss value, `from_logits` must be `True`.
+
+Because it computes the generalized multi-label loss, the shape of both
+`y_pred` and `y_true` must be `[batch_size, num_classes]`.
+
+#### Args:
+
+
+* `from_logits`: Whether `y_pred` is expected to be a logits tensor. Default
+ is `True`, meaning `y_pred` is the logits.
+* `reduction`: (Optional) Type of `tf.keras.losses.Reduction` to apply to
+ loss. Default value is `SUM_OVER_BATCH_SIZE`.
+* `name`: Optional name for the op.
+
+__init__
+
+``` python
+__init__(
+ from_logits=True,
+ reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE,
+ name='sparsemax_loss'
+)
+```
+
+
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ y_true,
+ y_pred,
+ sample_weight=None
+)
+```
+
+Invokes the `Loss` instance.
+
+
+#### Args:
+
+
+* `y_true`: Ground truth values.
+* `y_pred`: The predicted values.
+* `sample_weight`: Optional `Tensor` whose rank is either 0, or the same rank
+ as `y_true`, or is broadcastable to `y_true`. `sample_weight` acts as a
+ coefficient for the loss. If a scalar is provided, then the loss is
+ simply scaled by the given value. If `sample_weight` is a tensor of size
+ `[batch_size]`, then the total loss for each sample of the batch is
+ rescaled by the corresponding element in the `sample_weight` vector. If
+ the shape of `sample_weight` matches the shape of `y_pred`, then the
+ loss of each measurable element of `y_pred` is scaled by the
+ corresponding value of `sample_weight`.
+
+
+#### Returns:
+
+Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
+ shape as `y_true`; otherwise, it is scalar.
+
+
+
+#### Raises:
+
+
+* `ValueError`: If the shape of `sample_weight` is invalid.
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config
+)
+```
+
+Instantiates a `Loss` from its config (output of `get_config()`).
+
+
+#### Args:
+
+
+* `config`: Output of `get_config()`.
+
+
+#### Returns:
+
+A `Loss` instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+
+
diff --git a/docs/api_docs/python/tfa/losses/TripletSemiHardLoss.md b/docs/api_docs/python/tfa/losses/TripletSemiHardLoss.md
new file mode 100644
index 0000000000..34e9b9a48a
--- /dev/null
+++ b/docs/api_docs/python/tfa/losses/TripletSemiHardLoss.md
@@ -0,0 +1,136 @@
+
+
+
+
+
+
+
+
+
+# tfa.losses.TripletSemiHardLoss
+
+## Class `TripletSemiHardLoss`
+
+Computes the triplet loss with semi-hard negative mining.
+
+
+
+### Aliases:
+
+* Class `tfa.losses.TripletSemiHardLoss`
+* Class `tfa.losses.triplet.TripletSemiHardLoss`
+
+
+
+Defined in [`losses/triplet.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/losses/triplet.py).
+
+
+
+The loss encourages the positive distances (between a pair of embeddings
+with the same labels) to be smaller than the minimum negative distance
+among which are at least greater than the positive distance plus the
+margin constant (called semi-hard negative) in the mini-batch.
+If no such negative exists, uses the largest negative distance instead.
+See: https://arxiv.org/abs/1503.03832.
+
+We expect labels `y_true` to be provided as 1-D integer `Tensor` with shape
+[batch_size] of multi-class integer labels. And embeddings `y_pred` must be
+2-D float `Tensor` of l2 normalized embedding vectors.
+
+#### Args:
+
+
+* `margin`: Float, margin term in the loss definition. Default value is 1.0.
+* `name`: Optional name for the op.
+
+__init__
+
+``` python
+__init__(
+ margin=1.0,
+ name=None
+)
+```
+
+
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ y_true,
+ y_pred,
+ sample_weight=None
+)
+```
+
+Invokes the `Loss` instance.
+
+
+#### Args:
+
+
+* `y_true`: Ground truth values.
+* `y_pred`: The predicted values.
+* `sample_weight`: Optional `Tensor` whose rank is either 0, or the same rank
+ as `y_true`, or is broadcastable to `y_true`. `sample_weight` acts as a
+ coefficient for the loss. If a scalar is provided, then the loss is
+ simply scaled by the given value. If `sample_weight` is a tensor of size
+ `[batch_size]`, then the total loss for each sample of the batch is
+ rescaled by the corresponding element in the `sample_weight` vector. If
+ the shape of `sample_weight` matches the shape of `y_pred`, then the
+ loss of each measurable element of `y_pred` is scaled by the
+ corresponding value of `sample_weight`.
+
+
+#### Returns:
+
+Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
+ shape as `y_true`; otherwise, it is scalar.
+
+
+
+#### Raises:
+
+
+* `ValueError`: If the shape of `sample_weight` is invalid.
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config
+)
+```
+
+Instantiates a `Loss` from its config (output of `get_config()`).
+
+
+#### Args:
+
+
+* `config`: Output of `get_config()`.
+
+
+#### Returns:
+
+A `Loss` instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+
+
diff --git a/docs/api_docs/python/tfa/losses/contrastive.md b/docs/api_docs/python/tfa/losses/contrastive.md
new file mode 100644
index 0000000000..2a46f15249
--- /dev/null
+++ b/docs/api_docs/python/tfa/losses/contrastive.md
@@ -0,0 +1,24 @@
+
+
+
+
+
+# Module: tfa.losses.contrastive
+
+Implements contrastive loss.
+
+
+
+Defined in [`losses/contrastive.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/losses/contrastive.py).
+
+
+
+
+## Classes
+
+[`class ContrastiveLoss`](../../tfa/losses/ContrastiveLoss.md): Computes the contrastive loss between `y_true` and `y_pred`.
+
+## Functions
+
+[`contrastive_loss(...)`](../../tfa/losses/contrastive_loss.md): Computes the contrastive loss between `y_true` and `y_pred`.
+
diff --git a/docs/api_docs/python/tfa/losses/contrastive_loss.md b/docs/api_docs/python/tfa/losses/contrastive_loss.md
new file mode 100644
index 0000000000..fbfcb80da7
--- /dev/null
+++ b/docs/api_docs/python/tfa/losses/contrastive_loss.md
@@ -0,0 +1,57 @@
+
+
+
+
+
+# tfa.losses.contrastive_loss
+
+Computes the contrastive loss between `y_true` and `y_pred`.
+
+### Aliases:
+
+* `tfa.losses.contrastive.contrastive_loss`
+* `tfa.losses.contrastive_loss`
+
+``` python
+tfa.losses.contrastive_loss(
+ y_true,
+ y_pred,
+ margin=1.0
+)
+```
+
+
+
+Defined in [`losses/contrastive.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/losses/contrastive.py).
+
+
+
+This loss encourages the embedding to be close to each other for
+the samples of the same label and the embedding to be far apart at least
+by the margin constant for the samples of different labels.
+
+The euclidean distances `y_pred` between two embedding matrices
+`a` and `b` with shape [batch_size, hidden_size] can be computed
+as follows:
+
+```python
+# y_pred = \sqrt (\sum_i (a[:, i] - b[:, i])^2)
+y_pred = tf.linalg.norm(a - b, axis=1)
+```
+
+See: http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
+
+#### Args:
+
+
+* `y_true`: 1-D integer `Tensor` with shape [batch_size] of
+ binary labels indicating positive vs negative pair.
+* `y_pred`: 1-D float `Tensor` with shape [batch_size] of
+ distances between two embedding matrices.
+* `margin`: margin term in the loss definition.
+
+
+#### Returns:
+
+
+* `contrastive_loss`: 1-D float `Tensor` with shape [batch_size].
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/losses/focal_loss.md b/docs/api_docs/python/tfa/losses/focal_loss.md
new file mode 100644
index 0000000000..7cee5cd250
--- /dev/null
+++ b/docs/api_docs/python/tfa/losses/focal_loss.md
@@ -0,0 +1,24 @@
+
+
+
+
+
+# Module: tfa.losses.focal_loss
+
+Implements Focal loss.
+
+
+
+Defined in [`losses/focal_loss.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/losses/focal_loss.py).
+
+
+
+
+## Classes
+
+[`class SigmoidFocalCrossEntropy`](../../tfa/losses/SigmoidFocalCrossEntropy.md): Implements the focal loss function.
+
+## Functions
+
+[`sigmoid_focal_crossentropy(...)`](../../tfa/losses/sigmoid_focal_crossentropy.md): Args
+
diff --git a/docs/api_docs/python/tfa/losses/lifted.md b/docs/api_docs/python/tfa/losses/lifted.md
new file mode 100644
index 0000000000..17ccfdaea1
--- /dev/null
+++ b/docs/api_docs/python/tfa/losses/lifted.md
@@ -0,0 +1,24 @@
+
+
+
+
+
+# Module: tfa.losses.lifted
+
+Implements lifted_struct_loss.
+
+
+
+Defined in [`losses/lifted.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/losses/lifted.py).
+
+
+
+
+## Classes
+
+[`class LiftedStructLoss`](../../tfa/losses/LiftedStructLoss.md): Computes the lifted structured loss.
+
+## Functions
+
+[`lifted_struct_loss(...)`](../../tfa/losses/lifted_struct_loss.md): Computes the lifted structured loss.
+
diff --git a/docs/api_docs/python/tfa/losses/lifted_struct_loss.md b/docs/api_docs/python/tfa/losses/lifted_struct_loss.md
new file mode 100644
index 0000000000..269e1759de
--- /dev/null
+++ b/docs/api_docs/python/tfa/losses/lifted_struct_loss.md
@@ -0,0 +1,43 @@
+
+
+
+
+
+# tfa.losses.lifted_struct_loss
+
+Computes the lifted structured loss.
+
+### Aliases:
+
+* `tfa.losses.lifted.lifted_struct_loss`
+* `tfa.losses.lifted_struct_loss`
+
+``` python
+tfa.losses.lifted_struct_loss(
+ labels,
+ embeddings,
+ margin=1.0
+)
+```
+
+
+
+Defined in [`losses/lifted.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/losses/lifted.py).
+
+
+
+
+#### Args:
+
+
+* `labels`: 1-D tf.int32 `Tensor` with shape [batch_size] of
+ multiclass integer labels.
+* `embeddings`: 2-D float `Tensor` of embedding vectors. Embeddings should
+ not be l2 normalized.
+* `margin`: Float, margin term in the loss definition.
+
+
+#### Returns:
+
+
+* `lifted_loss`: tf.float32 scalar.
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/losses/metric_learning.md b/docs/api_docs/python/tfa/losses/metric_learning.md
new file mode 100644
index 0000000000..87853e9490
--- /dev/null
+++ b/docs/api_docs/python/tfa/losses/metric_learning.md
@@ -0,0 +1,20 @@
+
+
+
+
+
+# Module: tfa.losses.metric_learning
+
+Functions of metric learning.
+
+
+
+Defined in [`losses/metric_learning.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/losses/metric_learning.py).
+
+
+
+
+## Functions
+
+[`pairwise_distance(...)`](../../tfa/losses/metric_learning/pairwise_distance.md): Computes the pairwise distance matrix with numerical stability.
+
diff --git a/docs/api_docs/python/tfa/losses/metric_learning/pairwise_distance.md b/docs/api_docs/python/tfa/losses/metric_learning/pairwise_distance.md
new file mode 100644
index 0000000000..0b2e1e459a
--- /dev/null
+++ b/docs/api_docs/python/tfa/losses/metric_learning/pairwise_distance.md
@@ -0,0 +1,35 @@
+
+
+
+
+
+# tfa.losses.metric_learning.pairwise_distance
+
+Computes the pairwise distance matrix with numerical stability.
+
+``` python
+tfa.losses.metric_learning.pairwise_distance(
+ feature,
+ squared=False
+)
+```
+
+
+
+Defined in [`losses/metric_learning.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/losses/metric_learning.py).
+
+
+
+output[i, j] = || feature[i, :] - feature[j, :] ||_2
+
+#### Args:
+
+
+* `feature`: 2-D Tensor of size [number of data, feature dimension].
+* `squared`: Boolean, whether or not to square the pairwise distances.
+
+
+#### Returns:
+
+
+* `pairwise_distances`: 2-D Tensor of size [number of data, number of data].
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/losses/sigmoid_focal_crossentropy.md b/docs/api_docs/python/tfa/losses/sigmoid_focal_crossentropy.md
new file mode 100644
index 0000000000..fc27310f35
--- /dev/null
+++ b/docs/api_docs/python/tfa/losses/sigmoid_focal_crossentropy.md
@@ -0,0 +1,38 @@
+
+
+
+
+
+# tfa.losses.sigmoid_focal_crossentropy
+
+Args
+
+### Aliases:
+
+* `tfa.losses.focal_loss.sigmoid_focal_crossentropy`
+* `tfa.losses.sigmoid_focal_crossentropy`
+
+``` python
+tfa.losses.sigmoid_focal_crossentropy(
+ y_true,
+ y_pred,
+ alpha=0.25,
+ gamma=2.0,
+ from_logits=False
+)
+```
+
+
+
+Defined in [`losses/focal_loss.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/losses/focal_loss.py).
+
+
+ y_true: true targets tensor.
+ y_pred: predictions tensor.
+ alpha: balancing factor.
+ gamma: modulating factor.
+
+#### Returns:
+
+Weighted loss float `Tensor`. If `reduction` is `NONE`,this has the
+same shape as `y_true`; otherwise, it is scalar.
diff --git a/docs/api_docs/python/tfa/losses/sparsemax_loss.md b/docs/api_docs/python/tfa/losses/sparsemax_loss.md
new file mode 100644
index 0000000000..5152b8ce68
--- /dev/null
+++ b/docs/api_docs/python/tfa/losses/sparsemax_loss.md
@@ -0,0 +1,44 @@
+
+
+
+
+
+# tfa.losses.sparsemax_loss
+
+Sparsemax loss function [1].
+
+``` python
+tfa.losses.sparsemax_loss(
+ logits,
+ sparsemax,
+ labels,
+ name=None
+)
+```
+
+
+
+Defined in [`losses/sparsemax_loss.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/losses/sparsemax_loss.py).
+
+
+
+Computes the generalized multi-label classification loss for the sparsemax
+function. The implementation is a reformulation of the original loss
+function such that it uses the sparsemax properbility output instead of the
+internal au variable. However, the output is identical to the original
+loss function.
+
+[1]: https://arxiv.org/abs/1602.02068
+
+#### Args:
+
+
+* `logits`: A `Tensor`. Must be one of the following types: `float32`,
+ `float64`.
+* `sparsemax`: A `Tensor`. Must have the same type as `logits`.
+* `labels`: A `Tensor`. Must have the same type as `logits`.
+* `name`: A name for the operation (optional).
+
+#### Returns:
+
+A `Tensor`. Has the same type as `logits`.
diff --git a/docs/api_docs/python/tfa/losses/triplet.md b/docs/api_docs/python/tfa/losses/triplet.md
new file mode 100644
index 0000000000..0b392ac50e
--- /dev/null
+++ b/docs/api_docs/python/tfa/losses/triplet.md
@@ -0,0 +1,24 @@
+
+
+
+
+
+# Module: tfa.losses.triplet
+
+Implements triplet loss.
+
+
+
+Defined in [`losses/triplet.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/losses/triplet.py).
+
+
+
+
+## Classes
+
+[`class TripletSemiHardLoss`](../../tfa/losses/TripletSemiHardLoss.md): Computes the triplet loss with semi-hard negative mining.
+
+## Functions
+
+[`triplet_semihard_loss(...)`](../../tfa/losses/triplet_semihard_loss.md): Computes the triplet loss with semi-hard negative mining.
+
diff --git a/docs/api_docs/python/tfa/losses/triplet_semihard_loss.md b/docs/api_docs/python/tfa/losses/triplet_semihard_loss.md
new file mode 100644
index 0000000000..0c040e1f40
--- /dev/null
+++ b/docs/api_docs/python/tfa/losses/triplet_semihard_loss.md
@@ -0,0 +1,37 @@
+
+
+
+
+
+# tfa.losses.triplet_semihard_loss
+
+Computes the triplet loss with semi-hard negative mining.
+
+### Aliases:
+
+* `tfa.losses.triplet.triplet_semihard_loss`
+* `tfa.losses.triplet_semihard_loss`
+
+``` python
+tfa.losses.triplet_semihard_loss(
+ y_true,
+ y_pred,
+ margin=1.0
+)
+```
+
+
+
+Defined in [`losses/triplet.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/losses/triplet.py).
+
+
+
+
+#### Args:
+
+
+* `y_true`: 1-D integer `Tensor` with shape [batch_size] of
+ multiclass integer labels.
+* `y_pred`: 2-D float `Tensor` of embedding vectors. Embeddings should
+ be l2 normalized.
+* `margin`: Float, margin term in the loss definition.
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/metrics.md b/docs/api_docs/python/tfa/metrics.md
new file mode 100644
index 0000000000..ffe2d4da26
--- /dev/null
+++ b/docs/api_docs/python/tfa/metrics.md
@@ -0,0 +1,24 @@
+
+
+
+
+
+# Module: tfa.metrics
+
+A module containing metrics that conform to Keras API.
+
+
+
+Defined in [`metrics/__init__.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/metrics/__init__.py).
+
+
+
+
+## Modules
+
+[`cohens_kappa`](../tfa/metrics/cohens_kappa.md) module: Implements Cohen's Kappa.
+
+## Classes
+
+[`class CohenKappa`](../tfa/metrics/CohenKappa.md): Computes Kappa score between two raters.
+
diff --git a/docs/api_docs/python/tfa/metrics/CohenKappa.md b/docs/api_docs/python/tfa/metrics/CohenKappa.md
new file mode 100644
index 0000000000..b42fcfd6af
--- /dev/null
+++ b/docs/api_docs/python/tfa/metrics/CohenKappa.md
@@ -0,0 +1,911 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.metrics.CohenKappa
+
+## Class `CohenKappa`
+
+Computes Kappa score between two raters.
+
+
+
+### Aliases:
+
+* Class `tfa.metrics.CohenKappa`
+* Class `tfa.metrics.cohens_kappa.CohenKappa`
+
+
+
+Defined in [`metrics/cohens_kappa.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/metrics/cohens_kappa.py).
+
+
+
+The score lies in the range [-1, 1]. A score of -1 represents
+complete disagreement between two raters whereas a score of 1
+represents complete agreement between the two raters.
+A score of 0 means agreement by chance.
+
+Note: As of now, this implementation considers all labels
+while calculating the Cohen's Kappa score.
+
+#### Usage:
+
+
+```python
+actuals = np.array([4, 4, 3, 4, 2, 4, 1, 1], dtype=np.int32)
+preds = np.array([4, 4, 3, 4, 4, 2, 1, 1], dtype=np.int32)
+
+m = tf.keras.metrics.CohenKappa(num_classes=5)
+m.update_state(actuals, preds, "quadratic")
+print('Final result: ', m.result().numpy()) # Result: 0.68932
+```
+Usage with tf.keras API:
+```python
+model = keras.models.Model(inputs, outputs)
+model.add_metric(tf.keras.metrics.CohenKappa(num_classes=5)(outputs))
+model.compile('sgd', loss='mse')
+```
+
+#### Args:
+
+
+* `num_classes`: Number of unique classes in your dataset
+* `weightage`: Weighting to be considered for calculating
+ kappa statistics. A valid value is one of
+ [None, 'linear', 'quadratic']. Defaults to None.
+
+
+#### Returns:
+
+
+* `kappa_score`: float
+ The kappa statistic, which is a number between -1 and 1. The maximum
+ value means complete agreement; zero or lower means chance agreement.
+
+
+#### Raises:
+
+
+* `ValueError`: If the value passed for `weightage` is invalid
+ i.e. not any one of [None, 'linear', 'quadratic']
+
+__init__
+
+``` python
+__init__(
+ num_classes,
+ name='cohen_kappa',
+ weightage=None,
+ dtype=tf.float32
+)
+```
+
+
+
+
+
+
+## Properties
+
+activity_regularizer
+
+Optional regularizer function for the output of this layer.
+
+
+dtype
+
+
+
+
+dynamic
+
+
+
+
+
+
+Retrieves the input tensor(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input tensor or list of input tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+* `AttributeError`: If no inbound nodes are found.
+
+
+
+Retrieves the input mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input mask tensor (potentially None) or list of input
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+
+Retrieves the input shape(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer, or if all inputs
+have the same shape.
+
+#### Returns:
+
+Input shape, as an integer shape tuple
+(or list of shape tuples, one tuple per input tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined input_shape.
+* `RuntimeError`: if called in Eager mode.
+
+
+
+
+
+
+losses
+
+Losses which are associated with this `Layer`.
+
+Variable regularization tensors are created when this property is accessed,
+so it is eager safe: accessing `losses` under a `tf.GradientTape` will
+propagate gradients back to the corresponding variables.
+
+#### Returns:
+
+A list of tensors.
+
+
+metrics
+
+
+
+
+name
+
+
+
+
+name_scope
+
+Returns a `tf.name_scope` instance for this class.
+
+
+non_trainable_variables
+
+
+
+
+non_trainable_weights
+
+
+
+
+output
+
+Retrieves the output tensor(s) of a layer.
+
+Only applicable if the layer has exactly one output,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output tensor or list of output tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to more than one incoming
+ layers.
+* `RuntimeError`: if called in Eager mode.
+
+output_mask
+
+Retrieves the output mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output mask tensor (potentially None) or list of output
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+output_shape
+
+Retrieves the output shape(s) of a layer.
+
+Only applicable if the layer has one output,
+or if all outputs have the same shape.
+
+#### Returns:
+
+Output shape, as an integer shape tuple
+(or list of shape tuples, one tuple per output tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined output shape.
+* `RuntimeError`: if called in Eager mode.
+
+submodules
+
+Sequence of all sub-modules.
+
+Submodules are modules which are properties of this module, or found as
+properties of modules which are properties of this module (and so on).
+
+```
+a = tf.Module()
+b = tf.Module()
+c = tf.Module()
+a.b = b
+b.c = c
+assert list(a.submodules) == [b, c]
+assert list(b.submodules) == [c]
+assert list(c.submodules) == []
+```
+
+#### Returns:
+
+A sequence of all submodules.
+
+
+trainable
+
+
+
+
+trainable_variables
+
+
+
+
+trainable_weights
+
+
+
+
+updates
+
+
+
+
+variables
+
+Returns the list of all layer variables/weights.
+
+Alias of `self.weights`.
+
+#### Returns:
+
+A list of variables.
+
+
+weights
+
+Returns the list of all layer variables/weights.
+
+
+#### Returns:
+
+A list of variables.
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ *args,
+ **kwargs
+)
+```
+
+Accumulates statistics and then computes metric result value.
+
+
+#### Args:
+
+
+* `*args`: * `**kwargs`: A mini-batch of inputs to the Metric,
+ passed on to `update_state()`.
+
+
+#### Returns:
+
+The metric value tensor.
+
+
+apply
+
+``` python
+apply(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Apply the layer on a input.
+
+This is an alias of `self.__call__`.
+
+#### Arguments:
+
+
+* `inputs`: Input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+build
+
+``` python
+build(input_shape)
+```
+
+Creates the variables of the layer (optional, for subclass implementers).
+
+This is a method that implementers of subclasses of `Layer` or `Model`
+can override if they need a state-creation step in-between
+layer instantiation and layer call.
+
+This is typically used to create the weights of `Layer` subclasses.
+
+#### Arguments:
+
+
+* `input_shape`: Instance of `TensorShape`, or list of instances of
+ `TensorShape` if the layer expects a list of inputs
+ (one instance per input).
+
+compute_mask
+
+``` python
+compute_mask(
+ inputs,
+ mask=None
+)
+```
+
+Computes an output mask tensor.
+
+
+#### Arguments:
+
+
+* `inputs`: Tensor or list of tensors.
+* `mask`: Tensor or list of tensors.
+
+
+#### Returns:
+
+None or a tensor (or list of tensors,
+ one per output tensor of the layer).
+
+
+compute_output_shape
+
+``` python
+compute_output_shape(input_shape)
+```
+
+Computes the output shape of the layer.
+
+Assumes that the layer will be built
+to match that input shape provided.
+
+#### Arguments:
+
+
+* `input_shape`: Shape tuple (tuple of integers)
+ or list of shape tuples (one per output tensor of the layer).
+ Shape tuples can include None for free dimensions,
+ instead of an integer.
+
+
+#### Returns:
+
+An input shape tuple.
+
+
+count_params
+
+``` python
+count_params()
+```
+
+Count the total number of scalars composing the weights.
+
+
+#### Returns:
+
+An integer count.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer isn't yet built
+ (in which case its weights aren't yet defined).
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config
+)
+```
+
+Creates a layer from its config.
+
+This method is the reverse of `get_config`,
+capable of instantiating the same layer from the config
+dictionary. It does not handle layer connectivity
+(handled by Network), nor weights (handled by `set_weights`).
+
+#### Arguments:
+
+
+* `config`: A Python dictionary, typically the
+ output of get_config.
+
+
+#### Returns:
+
+A layer instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+Returns the serializable config of the metric.
+
+
+
+
+``` python
+get_input_at(node_index)
+```
+
+Retrieves the input tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+
+
+``` python
+get_input_mask_at(node_index)
+```
+
+Retrieves the input mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple inputs).
+
+
+
+
+``` python
+get_input_shape_at(node_index)
+```
+
+Retrieves the input shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_losses_for
+
+``` python
+get_losses_for(inputs)
+```
+
+Retrieves losses relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of loss tensors of the layer that depend on `inputs`.
+
+
+get_output_at
+
+``` python
+get_output_at(node_index)
+```
+
+Retrieves the output tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_output_mask_at
+
+``` python
+get_output_mask_at(node_index)
+```
+
+Retrieves the output mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple outputs).
+
+
+get_output_shape_at
+
+``` python
+get_output_shape_at(node_index)
+```
+
+Retrieves the output shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_updates_for
+
+``` python
+get_updates_for(inputs)
+```
+
+Retrieves updates relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of update ops of the layer that depend on `inputs`.
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+Returns the current weights of the layer.
+
+
+#### Returns:
+
+Weights values as a list of numpy arrays.
+
+
+reset_states
+
+``` python
+reset_states()
+```
+
+Resets all of the metric state variables.
+
+
+result
+
+``` python
+result()
+```
+
+
+
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+Sets the weights of the layer, from Numpy arrays.
+
+
+#### Arguments:
+
+
+* `weights`: a list of Numpy arrays. The number
+ of arrays and their shape must match
+ number of the dimensions of the weights
+ of the layer (i.e. it should match the
+ output of `get_weights`).
+
+
+#### Raises:
+
+
+* `ValueError`: If the provided weights list does not match the
+ layer's specifications.
+
+update_state
+
+``` python
+update_state(
+ y_true,
+ y_pred,
+ sample_weight=None
+)
+```
+
+Accumulates the confusion matrix condition statistics.
+
+
+#### Args:
+
+
+* `y_true`: array, shape = [n_samples]
+ Labels assigned by the first annotator.
+* `y_pred`: array, shape = [n_samples]
+ Labels assigned by the second annotator. The kappa statistic
+ is symmetric, so swapping ``y_true`` and ``y_pred`` doesn't
+ change the value.
+sample_weight(optional) : for weighting labels in confusion matrix
+ Default is None. The dtype for weights should be the same
+ as the dtype for confusion matrix. For more details,
+ please check tf.math.confusion_matrix.
+
+
+
+#### Returns:
+
+Update op.
+
+
+with_name_scope
+
+``` python
+with_name_scope(
+ cls,
+ method
+)
+```
+
+Decorator to automatically enter the module name scope.
+
+```
+class MyModule(tf.Module):
+ @tf.Module.with_name_scope
+ def __call__(self, x):
+ if not hasattr(self, 'w'):
+ self.w = tf.Variable(tf.random.normal([x.shape[1], 64]))
+ return tf.matmul(x, self.w)
+```
+
+Using the above module would produce `tf.Variable`s and `tf.Tensor`s whose
+names included the module name:
+
+```
+mod = MyModule()
+mod(tf.ones([8, 32]))
+# ==>
+mod.w
+# ==>
+```
+
+#### Args:
+
+
+* `method`: The method to wrap.
+
+
+#### Returns:
+
+The original method wrapped such that it enters the module's name scope.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/metrics/cohens_kappa.md b/docs/api_docs/python/tfa/metrics/cohens_kappa.md
new file mode 100644
index 0000000000..7c01111f0a
--- /dev/null
+++ b/docs/api_docs/python/tfa/metrics/cohens_kappa.md
@@ -0,0 +1,20 @@
+
+
+
+
+
+# Module: tfa.metrics.cohens_kappa
+
+Implements Cohen's Kappa.
+
+
+
+Defined in [`metrics/cohens_kappa.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/metrics/cohens_kappa.py).
+
+
+
+
+## Classes
+
+[`class CohenKappa`](../../tfa/metrics/CohenKappa.md): Computes Kappa score between two raters.
+
diff --git a/docs/api_docs/python/tfa/optimizers.md b/docs/api_docs/python/tfa/optimizers.md
new file mode 100644
index 0000000000..3dd4e6e411
--- /dev/null
+++ b/docs/api_docs/python/tfa/optimizers.md
@@ -0,0 +1,38 @@
+
+
+
+
+
+# Module: tfa.optimizers
+
+Additional optimizers that conform to Keras API.
+
+
+
+Defined in [`optimizers/__init__.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/optimizers/__init__.py).
+
+
+
+
+## Modules
+
+[`lazy_adam`](../tfa/optimizers/lazy_adam.md) module: Variant of the Adam optimizer that handles sparse updates more efficiently.
+
+[`moving_average`](../tfa/optimizers/moving_average.md) module
+
+[`weight_decay_optimizers`](../tfa/optimizers/weight_decay_optimizers.md) module: Base class to make optimizers weight decay ready.
+
+## Classes
+
+[`class AdamW`](../tfa/optimizers/AdamW.md): Optimizer that implements the Adam algorithm with weight decay.
+
+[`class LazyAdam`](../tfa/optimizers/LazyAdam.md): Variant of the Adam optimizer that handles sparse updates more
+
+[`class MovingAverage`](../tfa/optimizers/MovingAverage.md): Optimizer that computes a moving average of the variables.
+
+[`class SGDW`](../tfa/optimizers/SGDW.md): Optimizer that implements the Momentum algorithm with weight_decay.
+
+## Functions
+
+[`extend_with_decoupled_weight_decay(...)`](../tfa/optimizers/extend_with_decoupled_weight_decay.md): Factory function returning an optimizer class with decoupled weight
+
diff --git a/docs/api_docs/python/tfa/optimizers/AdamW.md b/docs/api_docs/python/tfa/optimizers/AdamW.md
new file mode 100644
index 0000000000..92265900d5
--- /dev/null
+++ b/docs/api_docs/python/tfa/optimizers/AdamW.md
@@ -0,0 +1,384 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.optimizers.AdamW
+
+## Class `AdamW`
+
+Optimizer that implements the Adam algorithm with weight decay.
+
+Inherits From: [`DecoupledWeightDecayExtension`](../../tfa/optimizers/weight_decay_optimizers/DecoupledWeightDecayExtension.md)
+
+### Aliases:
+
+* Class `tfa.optimizers.AdamW`
+* Class `tfa.optimizers.weight_decay_optimizers.AdamW`
+
+
+
+Defined in [`optimizers/weight_decay_optimizers.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/optimizers/weight_decay_optimizers.py).
+
+
+
+This is an implementation of the AdamW optimizer described in "Decoupled
+Weight Decay Regularization" by Loshchilov & Hutter
+(https://arxiv.org/abs/1711.05101)
+([pdf])(https://arxiv.org/pdf/1711.05101.pdf).
+
+It computes the update step of `tf.keras.optimizers.Adam` and additionally
+decays the variable. Note that this is different from adding L2
+regularization on the variables to the loss: it regularizes variables with
+large gradients more than L2 regularization would, which was shown to yield
+better training loss and generalization error in the paper above.
+
+For further information see the documentation of the Adam Optimizer.
+
+This optimizer can also be instantiated as
+```python
+extend_with_decoupled_weight_decay(tf.keras.optimizers.Adam,
+ weight_decay=weight_decay)
+```
+
+Note: when applying a decay to the learning rate, be sure to manually apply
+the decay to the `weight_decay` as well. For example:
+
+```python
+step = tf.Variable(0, trainable=False)
+schedule = tf.optimizers.schedules.PiecewiseConstantDecay(
+ [10000, 15000], [1e-0, 1e-1, 1e-2])
+# lr and wd can be a function or a tensor
+lr = 1e-1 * schedule(step)
+wd = lambda: 1e-4 * schedule(step)
+
+# ...
+
+optimizer = tfa.optimizers.AdamW(learning_rate=lr, weight_decay=wd)
+```
+
+__init__
+
+``` python
+__init__(
+ weight_decay,
+ learning_rate=0.001,
+ beta_1=0.9,
+ beta_2=0.999,
+ epsilon=1e-07,
+ amsgrad=False,
+ name='AdamW',
+ **kwargs
+)
+```
+
+Construct a new AdamW optimizer.
+
+For further information see the documentation of the Adam Optimizer.
+
+#### Args:
+
+
+* `weight_decay`: A Tensor or a floating point value. The weight decay.
+* `learning_rate`: A Tensor or a floating point value. The learning
+ rate.
+* `beta_1`: A float value or a constant float tensor. The exponential
+ decay rate for the 1st moment estimates.
+* `beta_2`: A float value or a constant float tensor. The exponential
+ decay rate for the 2nd moment estimates.
+* `epsilon`: A small constant for numerical stability. This epsilon is
+ "epsilon hat" in the Kingma and Ba paper (in the formula just
+ before Section 2.1), not the epsilon in Algorithm 1 of the
+ paper.
+* `amsgrad`: boolean. Whether to apply AMSGrad variant of this
+ algorithm from the paper "On the Convergence of Adam and
+ beyond".
+* `name`: Optional name for the operations created when applying
+ gradients. Defaults to "AdamW".
+* `**kwargs`: keyword arguments. Allowed to be {`clipnorm`,
+ `clipvalue`, `lr`, `decay`}. `clipnorm` is clip gradients by
+ norm; `clipvalue` is clip gradients by value, `decay` is
+ included for backward compatibility to allow time inverse decay
+ of learning rate. `lr` is included for backward compatibility,
+ recommended to use `learning_rate` instead.
+
+
+
+## Properties
+
+iterations
+
+Variable. The number of training steps this Optimizer has run.
+
+
+weights
+
+Returns variables of this Optimizer based on the order created.
+
+
+
+
+## Methods
+
+add_slot
+
+``` python
+add_slot(
+ var,
+ slot_name,
+ initializer='zeros'
+)
+```
+
+Add a new slot variable for `var`.
+
+
+add_weight
+
+``` python
+add_weight(
+ name,
+ shape,
+ dtype=None,
+ initializer='zeros',
+ trainable=None,
+ synchronization=tf_variables.VariableSynchronization.AUTO,
+ aggregation=tf_variables.VariableAggregation.NONE
+)
+```
+
+
+
+
+apply_gradients
+
+``` python
+apply_gradients(
+ grads_and_vars,
+ name=None,
+ decay_var_list=None
+)
+```
+
+Apply gradients to variables.
+
+This is the second part of `minimize()`. It returns an `Operation` that
+applies gradients.
+
+#### Args:
+
+
+* `grads_and_vars`: List of (gradient, variable) pairs.
+* `name`: Optional name for the returned operation. Default to the
+ name passed to the `Optimizer` constructor.
+* `decay_var_list`: Optional list of variables to be decayed. Defaults
+ to all variables in var_list.
+
+#### Returns:
+
+An `Operation` that applies the specified gradients. If
+`global_step` was not None, that operation also increments
+`global_step`.
+
+
+#### Raises:
+
+
+* `TypeError`: If `grads_and_vars` is malformed.
+* `ValueError`: If none of the variables have gradients.
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config,
+ custom_objects=None
+)
+```
+
+Creates an optimizer from its config.
+
+This method is the reverse of `get_config`,
+capable of instantiating the same optimizer from the config
+dictionary.
+
+#### Arguments:
+
+
+* `config`: A Python dictionary, typically the output of get_config.
+* `custom_objects`: A Python dictionary mapping names to additional Python
+ objects used to create this optimizer, such as a function used for a
+ hyperparameter.
+
+
+#### Returns:
+
+An optimizer instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+get_gradients
+
+``` python
+get_gradients(
+ loss,
+ params
+)
+```
+
+Returns gradients of `loss` with respect to `params`.
+
+
+#### Arguments:
+
+
+* `loss`: Loss tensor.
+* `params`: List of variables.
+
+
+#### Returns:
+
+List of gradient tensors.
+
+
+
+#### Raises:
+
+
+* `ValueError`: In case any gradient cannot be computed (e.g. if gradient
+ function not implemented).
+
+get_slot
+
+``` python
+get_slot(
+ var,
+ slot_name
+)
+```
+
+
+
+
+get_slot_names
+
+``` python
+get_slot_names()
+```
+
+A list of names for this optimizer's slots.
+
+
+get_updates
+
+``` python
+get_updates(
+ loss,
+ params
+)
+```
+
+
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+
+
+
+minimize
+
+``` python
+minimize(
+ loss,
+ var_list,
+ grad_loss=None,
+ name=None,
+ decay_var_list=None
+)
+```
+
+Minimize `loss` by updating `var_list`.
+
+This method simply computes gradient using `tf.GradientTape` and calls
+`apply_gradients()`. If you want to process the gradient before
+applying then call `tf.GradientTape` and `apply_gradients()` explicitly
+instead of using this function.
+
+#### Args:
+
+
+* `loss`: A callable taking no arguments which returns the value to
+ minimize.
+* `var_list`: list or tuple of `Variable` objects to update to
+ minimize `loss`, or a callable returning the list or tuple of
+ `Variable` objects. Use callable when the variable list would
+ otherwise be incomplete before `minimize` since the variables
+ are created at the first time `loss` is called.
+* `grad_loss`: Optional. A `Tensor` holding the gradient computed for
+ `loss`.
+* `decay_var_list`: Optional list of variables to be decayed. Defaults
+ to all variables in var_list.
+* `name`: Optional name for the returned operation.
+
+#### Returns:
+
+An Operation that updates the variables in `var_list`. If
+`global_step` was not `None`, that operation also increments
+`global_step`.
+
+
+#### Raises:
+
+
+* `ValueError`: If some of the variables are not `Variable` objects.
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+
+
+
+variables
+
+``` python
+variables()
+```
+
+Returns variables of this Optimizer based on the order created.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/optimizers/LazyAdam.md b/docs/api_docs/python/tfa/optimizers/LazyAdam.md
new file mode 100644
index 0000000000..68439b093f
--- /dev/null
+++ b/docs/api_docs/python/tfa/optimizers/LazyAdam.md
@@ -0,0 +1,329 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.optimizers.LazyAdam
+
+## Class `LazyAdam`
+
+Variant of the Adam optimizer that handles sparse updates more
+
+
+
+### Aliases:
+
+* Class `tfa.optimizers.LazyAdam`
+* Class `tfa.optimizers.lazy_adam.LazyAdam`
+
+
+
+Defined in [`optimizers/lazy_adam.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/optimizers/lazy_adam.py).
+
+
+efficiently.
+
+The original Adam algorithm maintains two moving-average accumulators for
+each trainable variable; the accumulators are updated at every step.
+This class provides lazier handling of gradient updates for sparse
+variables. It only updates moving-average accumulators for sparse variable
+indices that appear in the current batch, rather than updating the
+accumulators for all indices. Compared with the original Adam optimizer,
+it can provide large improvements in model training throughput for some
+applications. However, it provides slightly different semantics than the
+original Adam algorithm, and may lead to different empirical results.
+
+Note, amsgrad is currently not supported and the argument can only be
+False.
+
+__init__
+
+``` python
+__init__(
+ learning_rate=0.001,
+ beta_1=0.9,
+ beta_2=0.999,
+ epsilon=1e-07,
+ amsgrad=False,
+ name='LazyAdam',
+ **kwargs
+)
+```
+
+
+
+
+
+
+## Properties
+
+iterations
+
+Variable. The number of training steps this Optimizer has run.
+
+
+weights
+
+Returns variables of this Optimizer based on the order created.
+
+
+
+
+## Methods
+
+add_slot
+
+``` python
+add_slot(
+ var,
+ slot_name,
+ initializer='zeros'
+)
+```
+
+Add a new slot variable for `var`.
+
+
+add_weight
+
+``` python
+add_weight(
+ name,
+ shape,
+ dtype=None,
+ initializer='zeros',
+ trainable=None,
+ synchronization=tf_variables.VariableSynchronization.AUTO,
+ aggregation=tf_variables.VariableAggregation.NONE
+)
+```
+
+
+
+
+apply_gradients
+
+``` python
+apply_gradients(
+ grads_and_vars,
+ name=None
+)
+```
+
+Apply gradients to variables.
+
+This is the second part of `minimize()`. It returns an `Operation` that
+applies gradients.
+
+#### Args:
+
+
+* `grads_and_vars`: List of (gradient, variable) pairs.
+* `name`: Optional name for the returned operation. Default to the name
+ passed to the `Optimizer` constructor.
+
+
+#### Returns:
+
+An `Operation` that applies the specified gradients. If `global_step`
+was not None, that operation also increments `global_step`.
+
+
+
+#### Raises:
+
+
+* `TypeError`: If `grads_and_vars` is malformed.
+* `ValueError`: If none of the variables have gradients.
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config,
+ custom_objects=None
+)
+```
+
+Creates an optimizer from its config.
+
+This method is the reverse of `get_config`,
+capable of instantiating the same optimizer from the config
+dictionary.
+
+#### Arguments:
+
+
+* `config`: A Python dictionary, typically the output of get_config.
+* `custom_objects`: A Python dictionary mapping names to additional Python
+ objects used to create this optimizer, such as a function used for a
+ hyperparameter.
+
+
+#### Returns:
+
+An optimizer instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+get_gradients
+
+``` python
+get_gradients(
+ loss,
+ params
+)
+```
+
+Returns gradients of `loss` with respect to `params`.
+
+
+#### Arguments:
+
+
+* `loss`: Loss tensor.
+* `params`: List of variables.
+
+
+#### Returns:
+
+List of gradient tensors.
+
+
+
+#### Raises:
+
+
+* `ValueError`: In case any gradient cannot be computed (e.g. if gradient
+ function not implemented).
+
+get_slot
+
+``` python
+get_slot(
+ var,
+ slot_name
+)
+```
+
+
+
+
+get_slot_names
+
+``` python
+get_slot_names()
+```
+
+A list of names for this optimizer's slots.
+
+
+get_updates
+
+``` python
+get_updates(
+ loss,
+ params
+)
+```
+
+
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+
+
+
+minimize
+
+``` python
+minimize(
+ loss,
+ var_list,
+ grad_loss=None,
+ name=None
+)
+```
+
+Minimize `loss` by updating `var_list`.
+
+This method simply computes gradient using `tf.GradientTape` and calls
+`apply_gradients()`. If you want to process the gradient before applying
+then call `tf.GradientTape` and `apply_gradients()` explicitly instead
+of using this function.
+
+#### Args:
+
+
+* `loss`: A callable taking no arguments which returns the value to minimize.
+* `var_list`: list or tuple of `Variable` objects to update to minimize
+ `loss`, or a callable returning the list or tuple of `Variable` objects.
+ Use callable when the variable list would otherwise be incomplete before
+ `minimize` since the variables are created at the first time `loss` is
+ called.
+* `grad_loss`: Optional. A `Tensor` holding the gradient computed for `loss`.
+* `name`: Optional name for the returned operation.
+
+
+#### Returns:
+
+An Operation that updates the variables in `var_list`. If `global_step`
+was not `None`, that operation also increments `global_step`.
+
+
+
+#### Raises:
+
+
+* `ValueError`: If some of the variables are not `Variable` objects.
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+
+
+
+variables
+
+``` python
+variables()
+```
+
+Returns variables of this Optimizer based on the order created.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/optimizers/MovingAverage.md b/docs/api_docs/python/tfa/optimizers/MovingAverage.md
new file mode 100644
index 0000000000..acd7601ee0
--- /dev/null
+++ b/docs/api_docs/python/tfa/optimizers/MovingAverage.md
@@ -0,0 +1,334 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.optimizers.MovingAverage
+
+## Class `MovingAverage`
+
+Optimizer that computes a moving average of the variables.
+
+
+
+### Aliases:
+
+* Class `tfa.optimizers.MovingAverage`
+* Class `tfa.optimizers.moving_average.MovingAverage`
+
+
+
+Defined in [`optimizers/moving_average.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/optimizers/moving_average.py).
+
+
+
+Empirically it has been found that using the moving average of the trained
+parameters of a deep network is better than using its trained parameters
+directly. This optimizer allows you to compute this moving average and swap
+the variables at save time so that any code outside of the training loop
+will use by default the average values instead of the original ones.
+
+#### Example of usage:
+
+
+
+```python
+opt = tf.keras.optimizers.SGD(learning_rate)
+opt = tfa.optimizers.MovingAverage(opt)
+
+```
+
+__init__
+
+``` python
+__init__(
+ optimizer,
+ average_decay=0.1,
+ num_updates=None,
+ sequential_update=True,
+ name='MovingAverage',
+ **kwargs
+)
+```
+
+
+
+
+
+
+## Properties
+
+iterations
+
+Variable. The number of training steps this Optimizer has run.
+
+
+weights
+
+
+
+
+
+
+## Methods
+
+add_slot
+
+``` python
+add_slot(
+ var,
+ slot_name,
+ initializer='zeros'
+)
+```
+
+Add a new slot variable for `var`.
+
+
+add_weight
+
+``` python
+add_weight(
+ name,
+ shape,
+ dtype=None,
+ initializer='zeros',
+ trainable=None,
+ synchronization=tf_variables.VariableSynchronization.AUTO,
+ aggregation=tf_variables.VariableAggregation.NONE
+)
+```
+
+
+
+
+apply_gradients
+
+``` python
+apply_gradients(
+ grads_and_vars,
+ name=None
+)
+```
+
+
+
+
+assign_average_vars
+
+``` python
+assign_average_vars(var_list)
+```
+
+Update variables in var_list with the running mean of the variables.
+
+
+#### Example:
+
+
+```python
+model = tf.Sequential([...])
+opt = tfa.optimizers.MovingAverage(
+ tf.keras.optimizers.SGD(lr=2.0), 0.5)
+
+model.compile(opt, ...)
+model.fit(x, y, ...)
+
+# Update the weights to their mean before saving
+opt.assign_average_vars(model.variables)
+
+model.save('model.h5')
+```
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config,
+ custom_objects=None
+)
+```
+
+Creates an optimizer from its config.
+
+This method is the reverse of `get_config`,
+capable of instantiating the same optimizer from the config
+dictionary.
+
+#### Arguments:
+
+
+* `config`: A Python dictionary, typically the output of get_config.
+* `custom_objects`: A Python dictionary mapping names to additional Python
+ objects used to create this optimizer, such as a function used for a
+ hyperparameter.
+
+
+#### Returns:
+
+An optimizer instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+get_gradients
+
+``` python
+get_gradients(
+ loss,
+ params
+)
+```
+
+Returns gradients of `loss` with respect to `params`.
+
+
+#### Arguments:
+
+
+* `loss`: Loss tensor.
+* `params`: List of variables.
+
+
+#### Returns:
+
+List of gradient tensors.
+
+
+
+#### Raises:
+
+
+* `ValueError`: In case any gradient cannot be computed (e.g. if gradient
+ function not implemented).
+
+get_slot
+
+``` python
+get_slot(
+ var,
+ slot_name
+)
+```
+
+
+
+
+get_slot_names
+
+``` python
+get_slot_names()
+```
+
+A list of names for this optimizer's slots.
+
+
+get_updates
+
+``` python
+get_updates(
+ loss,
+ params
+)
+```
+
+
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+
+
+
+minimize
+
+``` python
+minimize(
+ loss,
+ var_list,
+ grad_loss=None,
+ name=None
+)
+```
+
+Minimize `loss` by updating `var_list`.
+
+This method simply computes gradient using `tf.GradientTape` and calls
+`apply_gradients()`. If you want to process the gradient before applying
+then call `tf.GradientTape` and `apply_gradients()` explicitly instead
+of using this function.
+
+#### Args:
+
+
+* `loss`: A callable taking no arguments which returns the value to minimize.
+* `var_list`: list or tuple of `Variable` objects to update to minimize
+ `loss`, or a callable returning the list or tuple of `Variable` objects.
+ Use callable when the variable list would otherwise be incomplete before
+ `minimize` since the variables are created at the first time `loss` is
+ called.
+* `grad_loss`: Optional. A `Tensor` holding the gradient computed for `loss`.
+* `name`: Optional name for the returned operation.
+
+
+#### Returns:
+
+An Operation that updates the variables in `var_list`. If `global_step`
+was not `None`, that operation also increments `global_step`.
+
+
+
+#### Raises:
+
+
+* `ValueError`: If some of the variables are not `Variable` objects.
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+
+
+
+variables
+
+``` python
+variables()
+```
+
+Returns variables of this Optimizer based on the order created.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/optimizers/SGDW.md b/docs/api_docs/python/tfa/optimizers/SGDW.md
new file mode 100644
index 0000000000..a564362933
--- /dev/null
+++ b/docs/api_docs/python/tfa/optimizers/SGDW.md
@@ -0,0 +1,372 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.optimizers.SGDW
+
+## Class `SGDW`
+
+Optimizer that implements the Momentum algorithm with weight_decay.
+
+Inherits From: [`DecoupledWeightDecayExtension`](../../tfa/optimizers/weight_decay_optimizers/DecoupledWeightDecayExtension.md)
+
+### Aliases:
+
+* Class `tfa.optimizers.SGDW`
+* Class `tfa.optimizers.weight_decay_optimizers.SGDW`
+
+
+
+Defined in [`optimizers/weight_decay_optimizers.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/optimizers/weight_decay_optimizers.py).
+
+
+
+This is an implementation of the SGDW optimizer described in "Decoupled
+Weight Decay Regularization" by Loshchilov & Hutter
+(https://arxiv.org/abs/1711.05101)
+([pdf])(https://arxiv.org/pdf/1711.05101.pdf).
+It computes the update step of `tf.keras.optimizers.SGD` and additionally
+decays the variable. Note that this is different from adding
+L2 regularization on the variables to the loss. Decoupling the weight decay
+from other hyperparameters (in particular the learning rate) simplifies
+hyperparameter search.
+
+For further information see the documentation of the SGD Optimizer.
+
+This optimizer can also be instantiated as
+```python
+extend_with_decoupled_weight_decay(tf.keras.optimizers.SGD,
+ weight_decay=weight_decay)
+```
+
+Note: when applying a decay to the learning rate, be sure to manually apply
+the decay to the `weight_decay` as well. For example:
+
+```python
+step = tf.Variable(0, trainable=False)
+schedule = tf.optimizers.schedules.PiecewiseConstantDecay(
+ [10000, 15000], [1e-0, 1e-1, 1e-2])
+# lr and wd can be a function or a tensor
+lr = 1e-1 * schedule(step)
+wd = lambda: 1e-4 * schedule(step)
+
+# ...
+
+optimizer = tfa.optimizers.SGDW(
+ learning_rate=lr, weight_decay=wd, momentum=0.9)
+```
+
+__init__
+
+``` python
+__init__(
+ weight_decay,
+ learning_rate=0.001,
+ momentum=0.0,
+ nesterov=False,
+ name='SGDW',
+ **kwargs
+)
+```
+
+Construct a new SGDW optimizer.
+
+For further information see the documentation of the SGD Optimizer.
+
+#### Args:
+
+
+* `learning_rate`: float hyperparameter >= 0. Learning rate.
+* `momentum`: float hyperparameter >= 0 that accelerates SGD in the
+ relevant direction and dampens oscillations.
+* `nesterov`: boolean. Whether to apply Nesterov momentum.
+* `name`: Optional name prefix for the operations created when applying
+ gradients. Defaults to 'SGD'.
+* `**kwargs`: keyword arguments. Allowed to be {`clipnorm`,
+ `clipvalue`, `lr`, `decay`}. `clipnorm` is clip gradients by
+ norm; `clipvalue` is clip gradients by value, `decay` is
+ included for backward compatibility to allow time inverse decay
+ of learning rate. `lr` is included for backward compatibility,
+ recommended to use `learning_rate` instead.
+
+
+
+## Properties
+
+iterations
+
+Variable. The number of training steps this Optimizer has run.
+
+
+weights
+
+Returns variables of this Optimizer based on the order created.
+
+
+
+
+## Methods
+
+add_slot
+
+``` python
+add_slot(
+ var,
+ slot_name,
+ initializer='zeros'
+)
+```
+
+Add a new slot variable for `var`.
+
+
+add_weight
+
+``` python
+add_weight(
+ name,
+ shape,
+ dtype=None,
+ initializer='zeros',
+ trainable=None,
+ synchronization=tf_variables.VariableSynchronization.AUTO,
+ aggregation=tf_variables.VariableAggregation.NONE
+)
+```
+
+
+
+
+apply_gradients
+
+``` python
+apply_gradients(
+ grads_and_vars,
+ name=None,
+ decay_var_list=None
+)
+```
+
+Apply gradients to variables.
+
+This is the second part of `minimize()`. It returns an `Operation` that
+applies gradients.
+
+#### Args:
+
+
+* `grads_and_vars`: List of (gradient, variable) pairs.
+* `name`: Optional name for the returned operation. Default to the
+ name passed to the `Optimizer` constructor.
+* `decay_var_list`: Optional list of variables to be decayed. Defaults
+ to all variables in var_list.
+
+#### Returns:
+
+An `Operation` that applies the specified gradients. If
+`global_step` was not None, that operation also increments
+`global_step`.
+
+
+#### Raises:
+
+
+* `TypeError`: If `grads_and_vars` is malformed.
+* `ValueError`: If none of the variables have gradients.
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config,
+ custom_objects=None
+)
+```
+
+Creates an optimizer from its config.
+
+This method is the reverse of `get_config`,
+capable of instantiating the same optimizer from the config
+dictionary.
+
+#### Arguments:
+
+
+* `config`: A Python dictionary, typically the output of get_config.
+* `custom_objects`: A Python dictionary mapping names to additional Python
+ objects used to create this optimizer, such as a function used for a
+ hyperparameter.
+
+
+#### Returns:
+
+An optimizer instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+get_gradients
+
+``` python
+get_gradients(
+ loss,
+ params
+)
+```
+
+Returns gradients of `loss` with respect to `params`.
+
+
+#### Arguments:
+
+
+* `loss`: Loss tensor.
+* `params`: List of variables.
+
+
+#### Returns:
+
+List of gradient tensors.
+
+
+
+#### Raises:
+
+
+* `ValueError`: In case any gradient cannot be computed (e.g. if gradient
+ function not implemented).
+
+get_slot
+
+``` python
+get_slot(
+ var,
+ slot_name
+)
+```
+
+
+
+
+get_slot_names
+
+``` python
+get_slot_names()
+```
+
+A list of names for this optimizer's slots.
+
+
+get_updates
+
+``` python
+get_updates(
+ loss,
+ params
+)
+```
+
+
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+
+
+
+minimize
+
+``` python
+minimize(
+ loss,
+ var_list,
+ grad_loss=None,
+ name=None,
+ decay_var_list=None
+)
+```
+
+Minimize `loss` by updating `var_list`.
+
+This method simply computes gradient using `tf.GradientTape` and calls
+`apply_gradients()`. If you want to process the gradient before
+applying then call `tf.GradientTape` and `apply_gradients()` explicitly
+instead of using this function.
+
+#### Args:
+
+
+* `loss`: A callable taking no arguments which returns the value to
+ minimize.
+* `var_list`: list or tuple of `Variable` objects to update to
+ minimize `loss`, or a callable returning the list or tuple of
+ `Variable` objects. Use callable when the variable list would
+ otherwise be incomplete before `minimize` since the variables
+ are created at the first time `loss` is called.
+* `grad_loss`: Optional. A `Tensor` holding the gradient computed for
+ `loss`.
+* `decay_var_list`: Optional list of variables to be decayed. Defaults
+ to all variables in var_list.
+* `name`: Optional name for the returned operation.
+
+#### Returns:
+
+An Operation that updates the variables in `var_list`. If
+`global_step` was not `None`, that operation also increments
+`global_step`.
+
+
+#### Raises:
+
+
+* `ValueError`: If some of the variables are not `Variable` objects.
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+
+
+
+variables
+
+``` python
+variables()
+```
+
+Returns variables of this Optimizer based on the order created.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/optimizers/extend_with_decoupled_weight_decay.md b/docs/api_docs/python/tfa/optimizers/extend_with_decoupled_weight_decay.md
new file mode 100644
index 0000000000..833f2a8169
--- /dev/null
+++ b/docs/api_docs/python/tfa/optimizers/extend_with_decoupled_weight_decay.md
@@ -0,0 +1,83 @@
+
+
+
+
+
+# tfa.optimizers.extend_with_decoupled_weight_decay
+
+Factory function returning an optimizer class with decoupled weight
+
+### Aliases:
+
+* `tfa.optimizers.extend_with_decoupled_weight_decay`
+* `tfa.optimizers.weight_decay_optimizers.extend_with_decoupled_weight_decay`
+
+``` python
+tfa.optimizers.extend_with_decoupled_weight_decay(base_optimizer)
+```
+
+
+
+Defined in [`optimizers/weight_decay_optimizers.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/optimizers/weight_decay_optimizers.py).
+
+
+decay.
+
+Returns an optimizer class. An instance of the returned class computes the
+update step of `base_optimizer` and additionally decays the weights.
+E.g., the class returned by
+`extend_with_decoupled_weight_decay(tf.keras.optimizers.Adam)` is
+equivalent to tfa.optimizers.AdamW
.
+
+The API of the new optimizer class slightly differs from the API of the
+base optimizer:
+- The first argument to the constructor is the weight decay rate.
+- `minimize` and `apply_gradients` accept the optional keyword argument
+ `decay_var_list`, which specifies the variables that should be decayed.
+ If `None`, all variables that are optimized are decayed.
+
+#### Usage example:
+
+
+```python
+# MyAdamW is a new class
+MyAdamW = extend_with_decoupled_weight_decay(tf.keras.optimizers.Adam)
+# Create a MyAdamW object
+optimizer = MyAdamW(weight_decay=0.001, learning_rate=0.001)
+# update var1, var2 but only decay var1
+optimizer.minimize(loss, var_list=[var1, var2], decay_variables=[var1])
+
+Note: this extension decays weights BEFORE applying the update based
+on the gradient, i.e. this extension only has the desired behaviour for
+optimizers which do not depend on the value of 'var' in the update step!
+
+Note: when applying a decay to the learning rate, be sure to manually apply
+the decay to the `weight_decay` as well. For example:
+
+```python
+step = tf.Variable(0, trainable=False)
+schedule = tf.optimizers.schedules.PiecewiseConstantDecay(
+ [10000, 15000], [1e-0, 1e-1, 1e-2])
+# lr and wd can be a function or a tensor
+lr = 1e-1 * schedule(step)
+wd = lambda: 1e-4 * schedule(step)
+
+# ...
+
+optimizer = tfa.optimizers.AdamW(learning_rate=lr, weight_decay=wd)
+```
+
+Note: you might want to register your own custom optimizer using
+`tf.keras.utils.get_custom_objects()`.
+
+#### Args:
+
+
+* `base_optimizer`: An optimizer class that inherits from
+ tf.optimizers.Optimizer.
+
+
+#### Returns:
+
+A new optimizer class that inherits from DecoupledWeightDecayExtension
+and base_optimizer.
diff --git a/docs/api_docs/python/tfa/optimizers/lazy_adam.md b/docs/api_docs/python/tfa/optimizers/lazy_adam.md
new file mode 100644
index 0000000000..d4c7a6e1c4
--- /dev/null
+++ b/docs/api_docs/python/tfa/optimizers/lazy_adam.md
@@ -0,0 +1,24 @@
+
+
+
+
+
+# Module: tfa.optimizers.lazy_adam
+
+Variant of the Adam optimizer that handles sparse updates more efficiently.
+
+
+
+Defined in [`optimizers/lazy_adam.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/optimizers/lazy_adam.py).
+
+
+
+Compared with the original Adam optimizer, the one in this file can
+provide a large improvement in model training throughput for some
+applications. However, it provides slightly different semantics than the
+original Adam algorithm, and may lead to different empirical results.
+
+## Classes
+
+[`class LazyAdam`](../../tfa/optimizers/LazyAdam.md): Variant of the Adam optimizer that handles sparse updates more
+
diff --git a/docs/api_docs/python/tfa/optimizers/moving_average.md b/docs/api_docs/python/tfa/optimizers/moving_average.md
new file mode 100644
index 0000000000..e069a84e92
--- /dev/null
+++ b/docs/api_docs/python/tfa/optimizers/moving_average.md
@@ -0,0 +1,20 @@
+
+
+
+
+
+# Module: tfa.optimizers.moving_average
+
+
+
+
+
+Defined in [`optimizers/moving_average.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/optimizers/moving_average.py).
+
+
+
+
+## Classes
+
+[`class MovingAverage`](../../tfa/optimizers/MovingAverage.md): Optimizer that computes a moving average of the variables.
+
diff --git a/docs/api_docs/python/tfa/optimizers/weight_decay_optimizers.md b/docs/api_docs/python/tfa/optimizers/weight_decay_optimizers.md
new file mode 100644
index 0000000000..3029d1bb54
--- /dev/null
+++ b/docs/api_docs/python/tfa/optimizers/weight_decay_optimizers.md
@@ -0,0 +1,28 @@
+
+
+
+
+
+# Module: tfa.optimizers.weight_decay_optimizers
+
+Base class to make optimizers weight decay ready.
+
+
+
+Defined in [`optimizers/weight_decay_optimizers.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/optimizers/weight_decay_optimizers.py).
+
+
+
+
+## Classes
+
+[`class AdamW`](../../tfa/optimizers/AdamW.md): Optimizer that implements the Adam algorithm with weight decay.
+
+[`class DecoupledWeightDecayExtension`](../../tfa/optimizers/weight_decay_optimizers/DecoupledWeightDecayExtension.md): This class allows to extend optimizers with decoupled weight decay.
+
+[`class SGDW`](../../tfa/optimizers/SGDW.md): Optimizer that implements the Momentum algorithm with weight_decay.
+
+## Functions
+
+[`extend_with_decoupled_weight_decay(...)`](../../tfa/optimizers/extend_with_decoupled_weight_decay.md): Factory function returning an optimizer class with decoupled weight
+
diff --git a/docs/api_docs/python/tfa/optimizers/weight_decay_optimizers/DecoupledWeightDecayExtension.md b/docs/api_docs/python/tfa/optimizers/weight_decay_optimizers/DecoupledWeightDecayExtension.md
new file mode 100644
index 0000000000..e8ff936bde
--- /dev/null
+++ b/docs/api_docs/python/tfa/optimizers/weight_decay_optimizers/DecoupledWeightDecayExtension.md
@@ -0,0 +1,186 @@
+
+
+
+
+
+
+
+
+
+# tfa.optimizers.weight_decay_optimizers.DecoupledWeightDecayExtension
+
+## Class `DecoupledWeightDecayExtension`
+
+This class allows to extend optimizers with decoupled weight decay.
+
+
+
+
+
+Defined in [`optimizers/weight_decay_optimizers.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/optimizers/weight_decay_optimizers.py).
+
+
+
+It implements the decoupled weight decay described by Loshchilov & Hutter
+(https://arxiv.org/pdf/1711.05101.pdf), in which the weight decay is
+decoupled from the optimization steps w.r.t. to the loss function.
+For SGD variants, this simplifies hyperparameter search since it decouples
+the settings of weight decay and learning rate.
+For adaptive gradient algorithms, it regularizes variables with large
+gradients more than L2 regularization would, which was shown to yield
+better training loss and generalization error in the paper above.
+
+This class alone is not an optimizer but rather extends existing
+optimizers with decoupled weight decay. We explicitly define the two
+examples used in the above paper (SGDW and AdamW), but in general this
+can extend any OptimizerX by using
+`extend_with_decoupled_weight_decay(
+ OptimizerX, weight_decay=weight_decay)`.
+In order for it to work, it must be the first class the Optimizer with
+weight decay inherits from, e.g.
+
+```python
+class AdamW(DecoupledWeightDecayExtension, tf.keras.optimizers.Adam):
+ def __init__(self, weight_decay, *args, **kwargs):
+ super(AdamW, self).__init__(weight_decay, *args, **kwargs).
+```
+
+Note: this extension decays weights BEFORE applying the update based
+on the gradient, i.e. this extension only has the desired behaviour for
+optimizers which do not depend on the value of'var' in the update step!
+
+Note: when applying a decay to the learning rate, be sure to manually apply
+the decay to the `weight_decay` as well. For example:
+
+```python
+step = tf.Variable(0, trainable=False)
+schedule = tf.optimizers.schedules.PiecewiseConstantDecay(
+ [10000, 15000], [1e-0, 1e-1, 1e-2])
+# lr and wd can be a function or a tensor
+lr = 1e-1 * schedule(step)
+wd = lambda: 1e-4 * schedule(step)
+
+# ...
+
+optimizer = tfa.optimizers.AdamW(learning_rate=lr, weight_decay=wd)
+```
+
+__init__
+
+``` python
+__init__(
+ weight_decay,
+ **kwargs
+)
+```
+
+Extension class that adds weight decay to an optimizer.
+
+
+#### Args:
+
+
+* `weight_decay`: A `Tensor` or a floating point value, the factor by
+ which a variable is decayed in the update step.
+* `**kwargs`: Optional list or tuple or set of `Variable` objects to
+ decay.
+
+
+
+## Methods
+
+apply_gradients
+
+``` python
+apply_gradients(
+ grads_and_vars,
+ name=None,
+ decay_var_list=None
+)
+```
+
+Apply gradients to variables.
+
+This is the second part of `minimize()`. It returns an `Operation` that
+applies gradients.
+
+#### Args:
+
+
+* `grads_and_vars`: List of (gradient, variable) pairs.
+* `name`: Optional name for the returned operation. Default to the
+ name passed to the `Optimizer` constructor.
+* `decay_var_list`: Optional list of variables to be decayed. Defaults
+ to all variables in var_list.
+
+#### Returns:
+
+An `Operation` that applies the specified gradients. If
+`global_step` was not None, that operation also increments
+`global_step`.
+
+
+#### Raises:
+
+
+* `TypeError`: If `grads_and_vars` is malformed.
+* `ValueError`: If none of the variables have gradients.
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+minimize
+
+``` python
+minimize(
+ loss,
+ var_list,
+ grad_loss=None,
+ name=None,
+ decay_var_list=None
+)
+```
+
+Minimize `loss` by updating `var_list`.
+
+This method simply computes gradient using `tf.GradientTape` and calls
+`apply_gradients()`. If you want to process the gradient before
+applying then call `tf.GradientTape` and `apply_gradients()` explicitly
+instead of using this function.
+
+#### Args:
+
+
+* `loss`: A callable taking no arguments which returns the value to
+ minimize.
+* `var_list`: list or tuple of `Variable` objects to update to
+ minimize `loss`, or a callable returning the list or tuple of
+ `Variable` objects. Use callable when the variable list would
+ otherwise be incomplete before `minimize` since the variables
+ are created at the first time `loss` is called.
+* `grad_loss`: Optional. A `Tensor` holding the gradient computed for
+ `loss`.
+* `decay_var_list`: Optional list of variables to be decayed. Defaults
+ to all variables in var_list.
+* `name`: Optional name for the returned operation.
+
+#### Returns:
+
+An Operation that updates the variables in `var_list`. If
+`global_step` was not `None`, that operation also increments
+`global_step`.
+
+
+#### Raises:
+
+
+* `ValueError`: If some of the variables are not `Variable` objects.
+
+
+
diff --git a/docs/api_docs/python/tfa/rnn.md b/docs/api_docs/python/tfa/rnn.md
new file mode 100644
index 0000000000..07eac378ce
--- /dev/null
+++ b/docs/api_docs/python/tfa/rnn.md
@@ -0,0 +1,26 @@
+
+
+
+
+
+# Module: tfa.rnn
+
+Customized RNN cells.
+
+
+
+Defined in [`rnn/__init__.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/rnn/__init__.py).
+
+
+
+
+## Modules
+
+[`cell`](../tfa/rnn/cell.md) module: Module for RNN Cells.
+
+## Classes
+
+[`class LayerNormLSTMCell`](../tfa/rnn/LayerNormLSTMCell.md): LSTM cell with layer normalization and recurrent dropout.
+
+[`class NASCell`](../tfa/rnn/NASCell.md): Neural Architecture Search (NAS) recurrent network cell.
+
diff --git a/docs/api_docs/python/tfa/rnn/LayerNormLSTMCell.md b/docs/api_docs/python/tfa/rnn/LayerNormLSTMCell.md
new file mode 100644
index 0000000000..4d13c7dd0a
--- /dev/null
+++ b/docs/api_docs/python/tfa/rnn/LayerNormLSTMCell.md
@@ -0,0 +1,995 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.rnn.LayerNormLSTMCell
+
+## Class `LayerNormLSTMCell`
+
+LSTM cell with layer normalization and recurrent dropout.
+
+
+
+### Aliases:
+
+* Class `tfa.rnn.LayerNormLSTMCell`
+* Class `tfa.rnn.cell.LayerNormLSTMCell`
+
+
+
+Defined in [`rnn/cell.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/rnn/cell.py).
+
+
+
+This class adds layer normalization and recurrent dropout to a LSTM unit.
+Layer normalization implementation is based on:
+
+ https://arxiv.org/abs/1607.06450.
+
+"Layer Normalization" Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton
+
+and is applied before the internal nonlinearities.
+Recurrent dropout is based on:
+
+ https://arxiv.org/abs/1603.05118
+
+"Recurrent Dropout without Memory Loss"
+Stanislau Semeniuta, Aliaksei Severyn, Erhardt Barth.
+
+__init__
+
+``` python
+__init__(
+ units,
+ activation='tanh',
+ recurrent_activation='sigmoid',
+ use_bias=True,
+ kernel_initializer='glorot_uniform',
+ recurrent_initializer='orthogonal',
+ bias_initializer='zeros',
+ unit_forget_bias=True,
+ kernel_regularizer=None,
+ recurrent_regularizer=None,
+ bias_regularizer=None,
+ kernel_constraint=None,
+ recurrent_constraint=None,
+ bias_constraint=None,
+ dropout=0.0,
+ recurrent_dropout=0.0,
+ norm_gamma_initializer='ones',
+ norm_beta_initializer='zeros',
+ norm_epsilon=0.001,
+ **kwargs
+)
+```
+
+Initializes the LSTM cell.
+
+
+#### Args:
+
+
+* `units`: Positive integer, dimensionality of the output space.
+* `activation`: Activation function to use. Default: hyperbolic tangent
+ (`tanh`). If you pass `None`, no activation is applied (ie.
+ "linear" activation: `a(x) = x`).
+* `recurrent_activation`: Activation function to use for the recurrent
+ step. Default: sigmoid (`sigmoid`). If you pass `None`, no
+ activation is applied (ie. "linear" activation: `a(x) = x`).
+* `use_bias`: Boolean, whether the layer uses a bias vector.
+* `kernel_initializer`: Initializer for the `kernel` weights matrix, used
+ for the linear transformation of the inputs.
+* `recurrent_initializer`: Initializer for the `recurrent_kernel` weights
+ matrix, used for the linear transformation of the recurrent state.
+* `bias_initializer`: Initializer for the bias vector.
+* `unit_forget_bias`: Boolean. If True, add 1 to the bias of the forget
+ gate at initialization. Setting it to true will also force
+ `bias_initializer="zeros"`. This is recommended in [Jozefowicz et
+ al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)
+* `kernel_regularizer`: Regularizer function applied to the `kernel`
+ weights matrix.
+* `recurrent_regularizer`: Regularizer function applied to
+ the `recurrent_kernel` weights matrix.
+* `bias_regularizer`: Regularizer function applied to the bias vector.
+* `kernel_constraint`: Constraint function applied to the `kernel`
+ weights matrix.
+* `recurrent_constraint`: Constraint function applied to the
+ `recurrent_kernel` weights matrix.
+* `bias_constraint`: Constraint function applied to the bias vector.
+* `dropout`: Float between 0 and 1. Fraction of the units to drop for the
+ linear transformation of the inputs.
+* `recurrent_dropout`: Float between 0 and 1. Fraction of the units to
+ drop for the linear transformation of the recurrent state.
+* `norm_gamma_initializer`: Initializer for the layer normalization gain
+ initial value.
+* `norm_beta_initializer`: Initializer for the layer normalization shift
+ initial value.
+* `norm_epsilon`: Float, the epsilon value for normalization layers.
+* `**kwargs`: Dict, the other keyword arguments for layer creation.
+
+
+
+## Properties
+
+activity_regularizer
+
+Optional regularizer function for the output of this layer.
+
+
+dtype
+
+
+
+
+dynamic
+
+
+
+
+
+
+Retrieves the input tensor(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input tensor or list of input tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+* `AttributeError`: If no inbound nodes are found.
+
+
+
+Retrieves the input mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input mask tensor (potentially None) or list of input
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+
+Retrieves the input shape(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer, or if all inputs
+have the same shape.
+
+#### Returns:
+
+Input shape, as an integer shape tuple
+(or list of shape tuples, one tuple per input tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined input_shape.
+* `RuntimeError`: if called in Eager mode.
+
+
+
+
+
+
+losses
+
+Losses which are associated with this `Layer`.
+
+Variable regularization tensors are created when this property is accessed,
+so it is eager safe: accessing `losses` under a `tf.GradientTape` will
+propagate gradients back to the corresponding variables.
+
+#### Returns:
+
+A list of tensors.
+
+
+metrics
+
+
+
+
+name
+
+
+
+
+name_scope
+
+Returns a `tf.name_scope` instance for this class.
+
+
+non_trainable_variables
+
+
+
+
+non_trainable_weights
+
+
+
+
+output
+
+Retrieves the output tensor(s) of a layer.
+
+Only applicable if the layer has exactly one output,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output tensor or list of output tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to more than one incoming
+ layers.
+* `RuntimeError`: if called in Eager mode.
+
+output_mask
+
+Retrieves the output mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output mask tensor (potentially None) or list of output
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+output_shape
+
+Retrieves the output shape(s) of a layer.
+
+Only applicable if the layer has one output,
+or if all outputs have the same shape.
+
+#### Returns:
+
+Output shape, as an integer shape tuple
+(or list of shape tuples, one tuple per output tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined output shape.
+* `RuntimeError`: if called in Eager mode.
+
+submodules
+
+Sequence of all sub-modules.
+
+Submodules are modules which are properties of this module, or found as
+properties of modules which are properties of this module (and so on).
+
+```
+a = tf.Module()
+b = tf.Module()
+c = tf.Module()
+a.b = b
+b.c = c
+assert list(a.submodules) == [b, c]
+assert list(b.submodules) == [c]
+assert list(c.submodules) == []
+```
+
+#### Returns:
+
+A sequence of all submodules.
+
+
+trainable
+
+
+
+
+trainable_variables
+
+
+
+
+trainable_weights
+
+
+
+
+updates
+
+
+
+
+variables
+
+Returns the list of all layer variables/weights.
+
+Alias of `self.weights`.
+
+#### Returns:
+
+A list of variables.
+
+
+weights
+
+Returns the list of all layer variables/weights.
+
+
+#### Returns:
+
+A list of variables.
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Wraps `call`, applying pre- and post-processing steps.
+
+
+#### Arguments:
+
+
+* `inputs`: input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+
+#### Note:
+
+- The following optional keyword arguments are reserved for specific uses:
+ * `training`: Boolean scalar tensor of Python boolean indicating
+ whether the `call` is meant for training or inference.
+ * `mask`: Boolean input mask.
+- If the layer's `call` method takes a `mask` argument (as some Keras
+ layers do), its default value will be set to the mask generated
+ for `inputs` by the previous layer (if `input` did come from
+ a layer that generated a corresponding mask, i.e. if it came from
+ a Keras layer with masking support.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer's `call` method returns None (an invalid value).
+
+apply
+
+``` python
+apply(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Apply the layer on a input.
+
+This is an alias of `self.__call__`.
+
+#### Arguments:
+
+
+* `inputs`: Input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+build
+
+``` python
+build(input_shape)
+```
+
+
+
+
+compute_mask
+
+``` python
+compute_mask(
+ inputs,
+ mask=None
+)
+```
+
+Computes an output mask tensor.
+
+
+#### Arguments:
+
+
+* `inputs`: Tensor or list of tensors.
+* `mask`: Tensor or list of tensors.
+
+
+#### Returns:
+
+None or a tensor (or list of tensors,
+ one per output tensor of the layer).
+
+
+compute_output_shape
+
+``` python
+compute_output_shape(input_shape)
+```
+
+Computes the output shape of the layer.
+
+Assumes that the layer will be built
+to match that input shape provided.
+
+#### Arguments:
+
+
+* `input_shape`: Shape tuple (tuple of integers)
+ or list of shape tuples (one per output tensor of the layer).
+ Shape tuples can include None for free dimensions,
+ instead of an integer.
+
+
+#### Returns:
+
+An input shape tuple.
+
+
+count_params
+
+``` python
+count_params()
+```
+
+Count the total number of scalars composing the weights.
+
+
+#### Returns:
+
+An integer count.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer isn't yet built
+ (in which case its weights aren't yet defined).
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config
+)
+```
+
+Creates a layer from its config.
+
+This method is the reverse of `get_config`,
+capable of instantiating the same layer from the config
+dictionary. It does not handle layer connectivity
+(handled by Network), nor weights (handled by `set_weights`).
+
+#### Arguments:
+
+
+* `config`: A Python dictionary, typically the
+ output of get_config.
+
+
+#### Returns:
+
+A layer instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+get_dropout_mask_for_cell
+
+``` python
+get_dropout_mask_for_cell(
+ inputs,
+ training,
+ count=1
+)
+```
+
+Get the dropout mask for RNN cell's input.
+
+It will create mask based on context if there isn't any existing cached
+mask. If a new mask is generated, it will update the cache in the cell.
+
+#### Args:
+
+
+* `inputs`: the input tensor whose shape will be used to generate dropout
+ mask.
+* `training`: boolean tensor, whether its in training mode, dropout will be
+ ignored in non-training mode.
+* `count`: int, how many dropout mask will be generated. It is useful for cell
+ that has internal weights fused together.
+
+#### Returns:
+
+List of mask tensor, generated or cached mask based on context.
+
+
+get_initial_state
+
+``` python
+get_initial_state(
+ inputs=None,
+ batch_size=None,
+ dtype=None
+)
+```
+
+
+
+
+
+
+``` python
+get_input_at(node_index)
+```
+
+Retrieves the input tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+
+
+``` python
+get_input_mask_at(node_index)
+```
+
+Retrieves the input mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple inputs).
+
+
+
+
+``` python
+get_input_shape_at(node_index)
+```
+
+Retrieves the input shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_losses_for
+
+``` python
+get_losses_for(inputs)
+```
+
+Retrieves losses relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of loss tensors of the layer that depend on `inputs`.
+
+
+get_output_at
+
+``` python
+get_output_at(node_index)
+```
+
+Retrieves the output tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_output_mask_at
+
+``` python
+get_output_mask_at(node_index)
+```
+
+Retrieves the output mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple outputs).
+
+
+get_output_shape_at
+
+``` python
+get_output_shape_at(node_index)
+```
+
+Retrieves the output shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_recurrent_dropout_mask_for_cell
+
+``` python
+get_recurrent_dropout_mask_for_cell(
+ inputs,
+ training,
+ count=1
+)
+```
+
+Get the recurrent dropout mask for RNN cell.
+
+It will create mask based on context if there isn't any existing cached
+mask. If a new mask is generated, it will update the cache in the cell.
+
+#### Args:
+
+
+* `inputs`: the input tensor whose shape will be used to generate dropout
+ mask.
+* `training`: boolean tensor, whether its in training mode, dropout will be
+ ignored in non-training mode.
+* `count`: int, how many dropout mask will be generated. It is useful for cell
+ that has internal weights fused together.
+
+#### Returns:
+
+List of mask tensor, generated or cached mask based on context.
+
+
+get_updates_for
+
+``` python
+get_updates_for(inputs)
+```
+
+Retrieves updates relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of update ops of the layer that depend on `inputs`.
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+Returns the current weights of the layer.
+
+
+#### Returns:
+
+Weights values as a list of numpy arrays.
+
+
+reset_dropout_mask
+
+``` python
+reset_dropout_mask()
+```
+
+Reset the cached dropout masks if any.
+
+This is important for the RNN layer to invoke this in it call() method so
+that the cached mask is cleared before calling the cell.call(). The mask
+should be cached across the timestep within the same batch, but shouldn't
+be cached between batches. Otherwise it will introduce unreasonable bias
+against certain index of data within the batch.
+
+reset_recurrent_dropout_mask
+
+``` python
+reset_recurrent_dropout_mask()
+```
+
+Reset the cached recurrent dropout masks if any.
+
+This is important for the RNN layer to invoke this in it call() method so
+that the cached mask is cleared before calling the cell.call(). The mask
+should be cached across the timestep within the same batch, but shouldn't
+be cached between batches. Otherwise it will introduce unreasonable bias
+against certain index of data within the batch.
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+Sets the weights of the layer, from Numpy arrays.
+
+
+#### Arguments:
+
+
+* `weights`: a list of Numpy arrays. The number
+ of arrays and their shape must match
+ number of the dimensions of the weights
+ of the layer (i.e. it should match the
+ output of `get_weights`).
+
+
+#### Raises:
+
+
+* `ValueError`: If the provided weights list does not match the
+ layer's specifications.
+
+with_name_scope
+
+``` python
+with_name_scope(
+ cls,
+ method
+)
+```
+
+Decorator to automatically enter the module name scope.
+
+```
+class MyModule(tf.Module):
+ @tf.Module.with_name_scope
+ def __call__(self, x):
+ if not hasattr(self, 'w'):
+ self.w = tf.Variable(tf.random.normal([x.shape[1], 64]))
+ return tf.matmul(x, self.w)
+```
+
+Using the above module would produce `tf.Variable`s and `tf.Tensor`s whose
+names included the module name:
+
+```
+mod = MyModule()
+mod(tf.ones([8, 32]))
+# ==>
+mod.w
+# ==>
+```
+
+#### Args:
+
+
+* `method`: The method to wrap.
+
+
+#### Returns:
+
+The original method wrapped such that it enters the module's name scope.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/rnn/NASCell.md b/docs/api_docs/python/tfa/rnn/NASCell.md
new file mode 100644
index 0000000000..eb101fabd8
--- /dev/null
+++ b/docs/api_docs/python/tfa/rnn/NASCell.md
@@ -0,0 +1,871 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.rnn.NASCell
+
+## Class `NASCell`
+
+Neural Architecture Search (NAS) recurrent network cell.
+
+
+
+### Aliases:
+
+* Class `tfa.rnn.NASCell`
+* Class `tfa.rnn.cell.NASCell`
+
+
+
+Defined in [`rnn/cell.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/rnn/cell.py).
+
+
+
+This implements the recurrent cell from the paper:
+
+ https://arxiv.org/abs/1611.01578
+
+Barret Zoph and Quoc V. Le.
+"Neural Architecture Search with Reinforcement Learning" Proc. ICLR 2017.
+
+The class uses an optional projection layer.
+
+__init__
+
+``` python
+__init__(
+ units,
+ projection=None,
+ use_bias=False,
+ kernel_initializer='glorot_uniform',
+ recurrent_initializer='glorot_uniform',
+ projection_initializer='glorot_uniform',
+ bias_initializer='zeros',
+ **kwargs
+)
+```
+
+Initialize the parameters for a NAS cell.
+
+
+#### Args:
+
+
+* `units`: int, The number of units in the NAS cell.
+* `projection`: (optional) int, The output dimensionality for the
+ projection matrices. If None, no projection is performed.
+* `use_bias`: (optional) bool, If True then use biases within the cell.
+ This is False by default.
+* `kernel_initializer`: Initializer for kernel weight.
+* `recurrent_initializer`: Initializer for recurrent kernel weight.
+* `projection_initializer`: Initializer for projection weight, used when
+ projection is not None.
+* `bias_initializer`: Initializer for bias, used when use_bias is True.
+* `**kwargs`: Additional keyword arguments.
+
+
+
+## Properties
+
+activity_regularizer
+
+Optional regularizer function for the output of this layer.
+
+
+dtype
+
+
+
+
+dynamic
+
+
+
+
+
+
+Retrieves the input tensor(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input tensor or list of input tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+* `AttributeError`: If no inbound nodes are found.
+
+
+
+Retrieves the input mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input mask tensor (potentially None) or list of input
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+
+Retrieves the input shape(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer, or if all inputs
+have the same shape.
+
+#### Returns:
+
+Input shape, as an integer shape tuple
+(or list of shape tuples, one tuple per input tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined input_shape.
+* `RuntimeError`: if called in Eager mode.
+
+
+
+
+
+
+losses
+
+Losses which are associated with this `Layer`.
+
+Variable regularization tensors are created when this property is accessed,
+so it is eager safe: accessing `losses` under a `tf.GradientTape` will
+propagate gradients back to the corresponding variables.
+
+#### Returns:
+
+A list of tensors.
+
+
+metrics
+
+
+
+
+name
+
+
+
+
+name_scope
+
+Returns a `tf.name_scope` instance for this class.
+
+
+non_trainable_variables
+
+
+
+
+non_trainable_weights
+
+
+
+
+output
+
+Retrieves the output tensor(s) of a layer.
+
+Only applicable if the layer has exactly one output,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output tensor or list of output tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to more than one incoming
+ layers.
+* `RuntimeError`: if called in Eager mode.
+
+output_mask
+
+Retrieves the output mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output mask tensor (potentially None) or list of output
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+output_shape
+
+Retrieves the output shape(s) of a layer.
+
+Only applicable if the layer has one output,
+or if all outputs have the same shape.
+
+#### Returns:
+
+Output shape, as an integer shape tuple
+(or list of shape tuples, one tuple per output tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined output shape.
+* `RuntimeError`: if called in Eager mode.
+
+output_size
+
+
+
+
+state_size
+
+
+
+
+submodules
+
+Sequence of all sub-modules.
+
+Submodules are modules which are properties of this module, or found as
+properties of modules which are properties of this module (and so on).
+
+```
+a = tf.Module()
+b = tf.Module()
+c = tf.Module()
+a.b = b
+b.c = c
+assert list(a.submodules) == [b, c]
+assert list(b.submodules) == [c]
+assert list(c.submodules) == []
+```
+
+#### Returns:
+
+A sequence of all submodules.
+
+
+trainable
+
+
+
+
+trainable_variables
+
+
+
+
+trainable_weights
+
+
+
+
+updates
+
+
+
+
+variables
+
+Returns the list of all layer variables/weights.
+
+Alias of `self.weights`.
+
+#### Returns:
+
+A list of variables.
+
+
+weights
+
+Returns the list of all layer variables/weights.
+
+
+#### Returns:
+
+A list of variables.
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Wraps `call`, applying pre- and post-processing steps.
+
+
+#### Arguments:
+
+
+* `inputs`: input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+
+#### Note:
+
+- The following optional keyword arguments are reserved for specific uses:
+ * `training`: Boolean scalar tensor of Python boolean indicating
+ whether the `call` is meant for training or inference.
+ * `mask`: Boolean input mask.
+- If the layer's `call` method takes a `mask` argument (as some Keras
+ layers do), its default value will be set to the mask generated
+ for `inputs` by the previous layer (if `input` did come from
+ a layer that generated a corresponding mask, i.e. if it came from
+ a Keras layer with masking support.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer's `call` method returns None (an invalid value).
+
+apply
+
+``` python
+apply(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Apply the layer on a input.
+
+This is an alias of `self.__call__`.
+
+#### Arguments:
+
+
+* `inputs`: Input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+build
+
+``` python
+build(inputs_shape)
+```
+
+
+
+
+compute_mask
+
+``` python
+compute_mask(
+ inputs,
+ mask=None
+)
+```
+
+Computes an output mask tensor.
+
+
+#### Arguments:
+
+
+* `inputs`: Tensor or list of tensors.
+* `mask`: Tensor or list of tensors.
+
+
+#### Returns:
+
+None or a tensor (or list of tensors,
+ one per output tensor of the layer).
+
+
+compute_output_shape
+
+``` python
+compute_output_shape(input_shape)
+```
+
+Computes the output shape of the layer.
+
+Assumes that the layer will be built
+to match that input shape provided.
+
+#### Arguments:
+
+
+* `input_shape`: Shape tuple (tuple of integers)
+ or list of shape tuples (one per output tensor of the layer).
+ Shape tuples can include None for free dimensions,
+ instead of an integer.
+
+
+#### Returns:
+
+An input shape tuple.
+
+
+count_params
+
+``` python
+count_params()
+```
+
+Count the total number of scalars composing the weights.
+
+
+#### Returns:
+
+An integer count.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer isn't yet built
+ (in which case its weights aren't yet defined).
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config
+)
+```
+
+Creates a layer from its config.
+
+This method is the reverse of `get_config`,
+capable of instantiating the same layer from the config
+dictionary. It does not handle layer connectivity
+(handled by Network), nor weights (handled by `set_weights`).
+
+#### Arguments:
+
+
+* `config`: A Python dictionary, typically the
+ output of get_config.
+
+
+#### Returns:
+
+A layer instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+get_initial_state
+
+``` python
+get_initial_state(
+ inputs=None,
+ batch_size=None,
+ dtype=None
+)
+```
+
+
+
+
+
+
+``` python
+get_input_at(node_index)
+```
+
+Retrieves the input tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+
+
+``` python
+get_input_mask_at(node_index)
+```
+
+Retrieves the input mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple inputs).
+
+
+
+
+``` python
+get_input_shape_at(node_index)
+```
+
+Retrieves the input shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_losses_for
+
+``` python
+get_losses_for(inputs)
+```
+
+Retrieves losses relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of loss tensors of the layer that depend on `inputs`.
+
+
+get_output_at
+
+``` python
+get_output_at(node_index)
+```
+
+Retrieves the output tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_output_mask_at
+
+``` python
+get_output_mask_at(node_index)
+```
+
+Retrieves the output mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple outputs).
+
+
+get_output_shape_at
+
+``` python
+get_output_shape_at(node_index)
+```
+
+Retrieves the output shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_updates_for
+
+``` python
+get_updates_for(inputs)
+```
+
+Retrieves updates relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of update ops of the layer that depend on `inputs`.
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+Returns the current weights of the layer.
+
+
+#### Returns:
+
+Weights values as a list of numpy arrays.
+
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+Sets the weights of the layer, from Numpy arrays.
+
+
+#### Arguments:
+
+
+* `weights`: a list of Numpy arrays. The number
+ of arrays and their shape must match
+ number of the dimensions of the weights
+ of the layer (i.e. it should match the
+ output of `get_weights`).
+
+
+#### Raises:
+
+
+* `ValueError`: If the provided weights list does not match the
+ layer's specifications.
+
+with_name_scope
+
+``` python
+with_name_scope(
+ cls,
+ method
+)
+```
+
+Decorator to automatically enter the module name scope.
+
+```
+class MyModule(tf.Module):
+ @tf.Module.with_name_scope
+ def __call__(self, x):
+ if not hasattr(self, 'w'):
+ self.w = tf.Variable(tf.random.normal([x.shape[1], 64]))
+ return tf.matmul(x, self.w)
+```
+
+Using the above module would produce `tf.Variable`s and `tf.Tensor`s whose
+names included the module name:
+
+```
+mod = MyModule()
+mod(tf.ones([8, 32]))
+# ==>
+mod.w
+# ==>
+```
+
+#### Args:
+
+
+* `method`: The method to wrap.
+
+
+#### Returns:
+
+The original method wrapped such that it enters the module's name scope.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/rnn/cell.md b/docs/api_docs/python/tfa/rnn/cell.md
new file mode 100644
index 0000000000..d271c28ee1
--- /dev/null
+++ b/docs/api_docs/python/tfa/rnn/cell.md
@@ -0,0 +1,22 @@
+
+
+
+
+
+# Module: tfa.rnn.cell
+
+Module for RNN Cells.
+
+
+
+Defined in [`rnn/cell.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/rnn/cell.py).
+
+
+
+
+## Classes
+
+[`class LayerNormLSTMCell`](../../tfa/rnn/LayerNormLSTMCell.md): LSTM cell with layer normalization and recurrent dropout.
+
+[`class NASCell`](../../tfa/rnn/NASCell.md): Neural Architecture Search (NAS) recurrent network cell.
+
diff --git a/docs/api_docs/python/tfa/seq2seq.md b/docs/api_docs/python/tfa/seq2seq.md
new file mode 100644
index 0000000000..6b734a1491
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq.md
@@ -0,0 +1,96 @@
+
+
+
+
+
+# Module: tfa.seq2seq
+
+Ops for building neural network sequence to sequence decoders and losses.
+
+
+
+Defined in [`seq2seq/__init__.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/__init__.py).
+
+
+
+
+## Modules
+
+[`attention_wrapper`](../tfa/seq2seq/attention_wrapper.md) module: A powerful dynamic attention wrapper object.
+
+[`basic_decoder`](../tfa/seq2seq/basic_decoder.md) module: A class of Decoders that may sample to generate the next input.
+
+[`beam_search_decoder`](../tfa/seq2seq/beam_search_decoder.md) module: A decoder that performs beam search.
+
+[`decoder`](../tfa/seq2seq/decoder.md) module: Seq2seq layer operations for use in neural networks.
+
+[`loss`](../tfa/seq2seq/loss.md) module: Seq2seq loss operations for use in sequence models.
+
+[`sampler`](../tfa/seq2seq/sampler.md) module: A library of sampler for use with SamplingDecoders.
+
+## Classes
+
+[`class AttentionMechanism`](../tfa/seq2seq/AttentionMechanism.md)
+
+[`class AttentionWrapper`](../tfa/seq2seq/AttentionWrapper.md): Wraps another `RNNCell` with attention.
+
+[`class AttentionWrapperState`](../tfa/seq2seq/AttentionWrapperState.md): `namedtuple` storing the state of a `AttentionWrapper`.
+
+[`class BahdanauAttention`](../tfa/seq2seq/BahdanauAttention.md): Implements Bahdanau-style (additive) attention.
+
+[`class BahdanauMonotonicAttention`](../tfa/seq2seq/BahdanauMonotonicAttention.md): Monotonic attention mechanism with Bahadanau-style energy function.
+
+[`class BaseDecoder`](../tfa/seq2seq/BaseDecoder.md): An RNN Decoder that is based on a Keras layer.
+
+[`class BasicDecoder`](../tfa/seq2seq/BasicDecoder.md): Basic sampling decoder.
+
+[`class BasicDecoderOutput`](../tfa/seq2seq/BasicDecoderOutput.md)
+
+[`class BeamSearchDecoder`](../tfa/seq2seq/BeamSearchDecoder.md): BeamSearch sampling decoder.
+
+[`class BeamSearchDecoderOutput`](../tfa/seq2seq/BeamSearchDecoderOutput.md)
+
+[`class BeamSearchDecoderState`](../tfa/seq2seq/BeamSearchDecoderState.md)
+
+[`class CustomSampler`](../tfa/seq2seq/CustomSampler.md): Base abstract class that allows the user to customize sampling.
+
+[`class Decoder`](../tfa/seq2seq/Decoder.md): An RNN Decoder abstract interface object.
+
+[`class FinalBeamSearchDecoderOutput`](../tfa/seq2seq/FinalBeamSearchDecoderOutput.md): Final outputs returned by the beam search after all decoding is
+
+[`class GreedyEmbeddingSampler`](../tfa/seq2seq/GreedyEmbeddingSampler.md): A sampler for use during inference.
+
+[`class InferenceSampler`](../tfa/seq2seq/InferenceSampler.md): A helper to use during inference with a custom sampling function.
+
+[`class LuongAttention`](../tfa/seq2seq/LuongAttention.md): Implements Luong-style (multiplicative) attention scoring.
+
+[`class LuongMonotonicAttention`](../tfa/seq2seq/LuongMonotonicAttention.md): Monotonic attention mechanism with Luong-style energy function.
+
+[`class SampleEmbeddingSampler`](../tfa/seq2seq/SampleEmbeddingSampler.md): A sampler for use during inference.
+
+[`class Sampler`](../tfa/seq2seq/Sampler.md): Interface for implementing sampling in seq2seq decoders.
+
+[`class ScheduledEmbeddingTrainingSampler`](../tfa/seq2seq/ScheduledEmbeddingTrainingSampler.md): A training sampler that adds scheduled sampling.
+
+[`class ScheduledOutputTrainingSampler`](../tfa/seq2seq/ScheduledOutputTrainingSampler.md): A training sampler that adds scheduled sampling directly to outputs.
+
+[`class SequenceLoss`](../tfa/seq2seq/SequenceLoss.md): Weighted cross-entropy loss for a sequence of logits.
+
+[`class TrainingSampler`](../tfa/seq2seq/TrainingSampler.md): A Sampler for use during training.
+
+## Functions
+
+[`dynamic_decode(...)`](../tfa/seq2seq/dynamic_decode.md): Perform dynamic decoding with `decoder`.
+
+[`gather_tree_from_array(...)`](../tfa/seq2seq/gather_tree_from_array.md): Calculates the full beams for `TensorArray`s.
+
+[`hardmax(...)`](../tfa/seq2seq/hardmax.md): Returns batched one-hot vectors.
+
+[`monotonic_attention(...)`](../tfa/seq2seq/monotonic_attention.md): Compute monotonic attention distribution from choosing probabilities.
+
+[`safe_cumprod(...)`](../tfa/seq2seq/safe_cumprod.md): Computes cumprod of x in logspace using cumsum to avoid underflow.
+
+[`sequence_loss(...)`](../tfa/seq2seq/sequence_loss.md): Weighted cross-entropy loss for a sequence of logits.
+
+[`tile_batch(...)`](../tfa/seq2seq/tile_batch.md): Tile the batch dimension of a (possibly nested structure of) tensor(s)
+
diff --git a/docs/api_docs/python/tfa/seq2seq/AttentionMechanism.md b/docs/api_docs/python/tfa/seq2seq/AttentionMechanism.md
new file mode 100644
index 0000000000..ef8158bd10
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/AttentionMechanism.md
@@ -0,0 +1,41 @@
+
+
+
+
+
+
+
+# tfa.seq2seq.AttentionMechanism
+
+## Class `AttentionMechanism`
+
+
+
+
+
+### Aliases:
+
+* Class `tfa.seq2seq.AttentionMechanism`
+* Class `tfa.seq2seq.attention_wrapper.AttentionMechanism`
+
+
+
+Defined in [`seq2seq/attention_wrapper.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/attention_wrapper.py).
+
+
+
+
+## Properties
+
+alignments_size
+
+
+
+
+state_size
+
+
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/AttentionWrapper.md b/docs/api_docs/python/tfa/seq2seq/AttentionWrapper.md
new file mode 100644
index 0000000000..f45fe89c7b
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/AttentionWrapper.md
@@ -0,0 +1,1000 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.AttentionWrapper
+
+## Class `AttentionWrapper`
+
+Wraps another `RNNCell` with attention.
+
+
+
+### Aliases:
+
+* Class `tfa.seq2seq.AttentionWrapper`
+* Class `tfa.seq2seq.attention_wrapper.AttentionWrapper`
+
+
+
+Defined in [`seq2seq/attention_wrapper.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/attention_wrapper.py).
+
+
+
+
+__init__
+
+``` python
+__init__(
+ cell,
+ attention_mechanism,
+ attention_layer_size=None,
+ alignment_history=False,
+ cell_input_fn=None,
+ output_attention=True,
+ initial_cell_state=None,
+ name=None,
+ attention_layer=None,
+ attention_fn=None
+)
+```
+
+Construct the `AttentionWrapper`.
+
+**NOTE** If you are using the `BeamSearchDecoder` with a cell wrapped
+in `AttentionWrapper`, then you must ensure that:
+
+- The encoder output has been tiled to `beam_width` via
+ `tf.contrib.seq2seq.tile_batch` (NOT `tf.tile`).
+- The `batch_size` argument passed to the `get_initial_state` method of
+ this wrapper is equal to `true_batch_size * beam_width`.
+- The initial state created with `get_initial_state` above contains a
+ `cell_state` value containing properly tiled final state from the
+ encoder.
+
+#### An example:
+
+
+
+```
+tiled_encoder_outputs = tf.contrib.seq2seq.tile_batch(
+ encoder_outputs, multiplier=beam_width)
+tiled_encoder_final_state = tf.conrib.seq2seq.tile_batch(
+ encoder_final_state, multiplier=beam_width)
+tiled_sequence_length = tf.contrib.seq2seq.tile_batch(
+ sequence_length, multiplier=beam_width)
+attention_mechanism = MyFavoriteAttentionMechanism(
+ num_units=attention_depth,
+ memory=tiled_inputs,
+ memory_sequence_length=tiled_sequence_length)
+attention_cell = AttentionWrapper(cell, attention_mechanism, ...)
+decoder_initial_state = attention_cell.get_initial_state(
+ batch_size=true_batch_size * beam_width, dtype=dtype)
+decoder_initial_state = decoder_initial_state.clone(
+ cell_state=tiled_encoder_final_state)
+```
+
+#### Args:
+
+
+* `cell`: An instance of `RNNCell`.
+* `attention_mechanism`: A list of `AttentionMechanism` instances or a
+ single instance.
+* `attention_layer_size`: A list of Python integers or a single Python
+ integer, the depth of the attention (output) layer(s). If None
+ (default), use the context as attention at each time step.
+ Otherwise, feed the context and cell output into the attention
+ layer to generate attention at each time step. If
+ attention_mechanism is a list, attention_layer_size must be a list
+ of the same length. If attention_layer is set, this must be None.
+ If attention_fn is set, it must guaranteed that the outputs of
+ attention_fn also meet the above requirements.
+* `alignment_history`: Python boolean, whether to store alignment history
+ from all time steps in the final output state (currently stored as
+ a time major `TensorArray` on which you must call `stack()`).
+* `cell_input_fn`: (optional) A `callable`. The default is:
+ `lambda inputs, attention:
+ tf.concat([inputs, attention], -1)`.
+* `output_attention`: Python bool. If `True` (default), the output at
+ each time step is the attention value. This is the behavior of
+ Luong-style attention mechanisms. If `False`, the output at each
+ time step is the output of `cell`. This is the behavior of
+ Bhadanau-style attention mechanisms. In both cases, the
+ `attention` tensor is propagated to the next time step via the
+ state and is used there. This flag only controls whether the
+ attention mechanism is propagated up to the next cell in an RNN
+ stack or to the top RNN output.
+* `initial_cell_state`: The initial state value to use for the cell when
+ the user calls `get_initial_state()`. Note that if this value is
+ provided now, and the user uses a `batch_size` argument of
+ `get_initial_state` which does not match the batch size of
+ `initial_cell_state`, proper behavior is not guaranteed.
+* `name`: Name to use when creating ops.
+* `attention_layer`: A list of `tf.tf.keras.layers.Layer` instances or a
+ single `tf.tf.keras.layers.Layer` instance taking the context
+ and cell output as inputs to generate attention at each time step.
+ If None (default), use the context as attention at each time step.
+ If attention_mechanism is a list, attention_layer must be a list of
+ the same length. If attention_layers_size is set, this must be
+ None.
+* `attention_fn`: An optional callable function that allows users to
+ provide their own customized attention function, which takes input
+ (attention_mechanism, cell_output, attention_state,
+ attention_layer) and outputs (attention, alignments,
+ next_attention_state). If provided, the attention_layer_size should
+ be the size of the outputs of attention_fn.
+
+
+#### Raises:
+
+
+* `TypeError`: `attention_layer_size` is not None and
+ (`attention_mechanism` is a list but `attention_layer_size` is not;
+ or vice versa).
+* `ValueError`: if `attention_layer_size` is not None,
+ `attention_mechanism` is a list, and its length does not match that
+ of `attention_layer_size`; if `attention_layer_size` and
+ `attention_layer` are set simultaneously.
+
+
+
+## Properties
+
+activity_regularizer
+
+Optional regularizer function for the output of this layer.
+
+
+dtype
+
+
+
+
+dynamic
+
+
+
+
+
+
+Retrieves the input tensor(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input tensor or list of input tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+* `AttributeError`: If no inbound nodes are found.
+
+
+
+Retrieves the input mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input mask tensor (potentially None) or list of input
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+
+Retrieves the input shape(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer, or if all inputs
+have the same shape.
+
+#### Returns:
+
+Input shape, as an integer shape tuple
+(or list of shape tuples, one tuple per input tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined input_shape.
+* `RuntimeError`: if called in Eager mode.
+
+
+
+
+
+
+losses
+
+Losses which are associated with this `Layer`.
+
+Variable regularization tensors are created when this property is accessed,
+so it is eager safe: accessing `losses` under a `tf.GradientTape` will
+propagate gradients back to the corresponding variables.
+
+#### Returns:
+
+A list of tensors.
+
+
+metrics
+
+
+
+
+name
+
+
+
+
+name_scope
+
+Returns a `tf.name_scope` instance for this class.
+
+
+non_trainable_variables
+
+
+
+
+non_trainable_weights
+
+
+
+
+output
+
+Retrieves the output tensor(s) of a layer.
+
+Only applicable if the layer has exactly one output,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output tensor or list of output tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to more than one incoming
+ layers.
+* `RuntimeError`: if called in Eager mode.
+
+output_mask
+
+Retrieves the output mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output mask tensor (potentially None) or list of output
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+output_shape
+
+Retrieves the output shape(s) of a layer.
+
+Only applicable if the layer has one output,
+or if all outputs have the same shape.
+
+#### Returns:
+
+Output shape, as an integer shape tuple
+(or list of shape tuples, one tuple per output tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined output shape.
+* `RuntimeError`: if called in Eager mode.
+
+output_size
+
+
+
+
+state_size
+
+The `state_size` property of `AttentionWrapper`.
+
+
+#### Returns:
+
+An `AttentionWrapperState` tuple containing shapes used
+by this object.
+
+
+submodules
+
+Sequence of all sub-modules.
+
+Submodules are modules which are properties of this module, or found as
+properties of modules which are properties of this module (and so on).
+
+```
+a = tf.Module()
+b = tf.Module()
+c = tf.Module()
+a.b = b
+b.c = c
+assert list(a.submodules) == [b, c]
+assert list(b.submodules) == [c]
+assert list(c.submodules) == []
+```
+
+#### Returns:
+
+A sequence of all submodules.
+
+
+trainable
+
+
+
+
+trainable_variables
+
+
+
+
+trainable_weights
+
+
+
+
+updates
+
+
+
+
+variables
+
+Returns the list of all layer variables/weights.
+
+Alias of `self.weights`.
+
+#### Returns:
+
+A list of variables.
+
+
+weights
+
+Returns the list of all layer variables/weights.
+
+
+#### Returns:
+
+A list of variables.
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Wraps `call`, applying pre- and post-processing steps.
+
+
+#### Arguments:
+
+
+* `inputs`: input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+
+#### Note:
+
+- The following optional keyword arguments are reserved for specific uses:
+ * `training`: Boolean scalar tensor of Python boolean indicating
+ whether the `call` is meant for training or inference.
+ * `mask`: Boolean input mask.
+- If the layer's `call` method takes a `mask` argument (as some Keras
+ layers do), its default value will be set to the mask generated
+ for `inputs` by the previous layer (if `input` did come from
+ a layer that generated a corresponding mask, i.e. if it came from
+ a Keras layer with masking support.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer's `call` method returns None (an invalid value).
+
+apply
+
+``` python
+apply(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Apply the layer on a input.
+
+This is an alias of `self.__call__`.
+
+#### Arguments:
+
+
+* `inputs`: Input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+build
+
+``` python
+build(input_shape)
+```
+
+Creates the variables of the layer (optional, for subclass implementers).
+
+This is a method that implementers of subclasses of `Layer` or `Model`
+can override if they need a state-creation step in-between
+layer instantiation and layer call.
+
+This is typically used to create the weights of `Layer` subclasses.
+
+#### Arguments:
+
+
+* `input_shape`: Instance of `TensorShape`, or list of instances of
+ `TensorShape` if the layer expects a list of inputs
+ (one instance per input).
+
+compute_mask
+
+``` python
+compute_mask(
+ inputs,
+ mask=None
+)
+```
+
+Computes an output mask tensor.
+
+
+#### Arguments:
+
+
+* `inputs`: Tensor or list of tensors.
+* `mask`: Tensor or list of tensors.
+
+
+#### Returns:
+
+None or a tensor (or list of tensors,
+ one per output tensor of the layer).
+
+
+compute_output_shape
+
+``` python
+compute_output_shape(input_shape)
+```
+
+Computes the output shape of the layer.
+
+Assumes that the layer will be built
+to match that input shape provided.
+
+#### Arguments:
+
+
+* `input_shape`: Shape tuple (tuple of integers)
+ or list of shape tuples (one per output tensor of the layer).
+ Shape tuples can include None for free dimensions,
+ instead of an integer.
+
+
+#### Returns:
+
+An input shape tuple.
+
+
+count_params
+
+``` python
+count_params()
+```
+
+Count the total number of scalars composing the weights.
+
+
+#### Returns:
+
+An integer count.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer isn't yet built
+ (in which case its weights aren't yet defined).
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config
+)
+```
+
+Creates a layer from its config.
+
+This method is the reverse of `get_config`,
+capable of instantiating the same layer from the config
+dictionary. It does not handle layer connectivity
+(handled by Network), nor weights (handled by `set_weights`).
+
+#### Arguments:
+
+
+* `config`: A Python dictionary, typically the
+ output of get_config.
+
+
+#### Returns:
+
+A layer instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+Returns the config of the layer.
+
+A layer config is a Python dictionary (serializable)
+containing the configuration of a layer.
+The same layer can be reinstantiated later
+(without its trained weights) from this configuration.
+
+The config of a layer does not include connectivity
+information, nor the layer class name. These are handled
+by `Network` (one layer of abstraction above).
+
+#### Returns:
+
+Python dictionary.
+
+
+get_initial_state
+
+``` python
+get_initial_state(
+ inputs=None,
+ batch_size=None,
+ dtype=None
+)
+```
+
+Return an initial (zero) state tuple for this `AttentionWrapper`.
+
+**NOTE** Please see the initializer documentation for details of how
+to call `get_initial_state` if using an `AttentionWrapper` with a
+`BeamSearchDecoder`.
+
+#### Args:
+
+
+* `inputs`: The inputs that will be fed to this cell.
+* `batch_size`: `0D` integer tensor: the batch size.
+* `dtype`: The internal state data type.
+
+
+#### Returns:
+
+An `AttentionWrapperState` tuple containing zeroed out tensors and,
+possibly, empty `TensorArray` objects.
+
+
+
+#### Raises:
+
+
+* `ValueError`: (or, possibly at runtime, InvalidArgument), if
+ `batch_size` does not match the output size of the encoder passed
+ to the wrapper object at initialization time.
+
+
+
+``` python
+get_input_at(node_index)
+```
+
+Retrieves the input tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+
+
+``` python
+get_input_mask_at(node_index)
+```
+
+Retrieves the input mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple inputs).
+
+
+
+
+``` python
+get_input_shape_at(node_index)
+```
+
+Retrieves the input shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_losses_for
+
+``` python
+get_losses_for(inputs)
+```
+
+Retrieves losses relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of loss tensors of the layer that depend on `inputs`.
+
+
+get_output_at
+
+``` python
+get_output_at(node_index)
+```
+
+Retrieves the output tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_output_mask_at
+
+``` python
+get_output_mask_at(node_index)
+```
+
+Retrieves the output mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple outputs).
+
+
+get_output_shape_at
+
+``` python
+get_output_shape_at(node_index)
+```
+
+Retrieves the output shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_updates_for
+
+``` python
+get_updates_for(inputs)
+```
+
+Retrieves updates relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of update ops of the layer that depend on `inputs`.
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+Returns the current weights of the layer.
+
+
+#### Returns:
+
+Weights values as a list of numpy arrays.
+
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+Sets the weights of the layer, from Numpy arrays.
+
+
+#### Arguments:
+
+
+* `weights`: a list of Numpy arrays. The number
+ of arrays and their shape must match
+ number of the dimensions of the weights
+ of the layer (i.e. it should match the
+ output of `get_weights`).
+
+
+#### Raises:
+
+
+* `ValueError`: If the provided weights list does not match the
+ layer's specifications.
+
+with_name_scope
+
+``` python
+with_name_scope(
+ cls,
+ method
+)
+```
+
+Decorator to automatically enter the module name scope.
+
+```
+class MyModule(tf.Module):
+ @tf.Module.with_name_scope
+ def __call__(self, x):
+ if not hasattr(self, 'w'):
+ self.w = tf.Variable(tf.random.normal([x.shape[1], 64]))
+ return tf.matmul(x, self.w)
+```
+
+Using the above module would produce `tf.Variable`s and `tf.Tensor`s whose
+names included the module name:
+
+```
+mod = MyModule()
+mod(tf.ones([8, 32]))
+# ==>
+mod.w
+# ==>
+```
+
+#### Args:
+
+
+* `method`: The method to wrap.
+
+
+#### Returns:
+
+The original method wrapped such that it enters the module's name scope.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/AttentionWrapperState.md b/docs/api_docs/python/tfa/seq2seq/AttentionWrapperState.md
new file mode 100644
index 0000000000..c1d6d70c41
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/AttentionWrapperState.md
@@ -0,0 +1,123 @@
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.AttentionWrapperState
+
+## Class `AttentionWrapperState`
+
+`namedtuple` storing the state of a `AttentionWrapper`.
+
+
+
+### Aliases:
+
+* Class `tfa.seq2seq.AttentionWrapperState`
+* Class `tfa.seq2seq.attention_wrapper.AttentionWrapperState`
+
+
+
+Defined in [`seq2seq/attention_wrapper.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/attention_wrapper.py).
+
+
+
+
+#### Contains:
+
+
+- `cell_state`: The state of the wrapped `RNNCell` at the previous time
+ step.
+- `attention`: The attention emitted at the previous time step.
+- `time`: int32 scalar containing the current time step.
+- `alignments`: A single or tuple of `Tensor`(s) containing the
+ alignments emitted at the previous time step for each attention
+ mechanism.
+- `alignment_history`: (if enabled) a single or tuple of `TensorArray`(s)
+ containing alignment matrices from all time steps for each attention
+ mechanism. Call `stack()` on each to convert to a `Tensor`.
+- `attention_state`: A single or tuple of nested objects
+ containing attention mechanism state for each attention mechanism.
+ The objects may contain Tensors or TensorArrays.
+
+
+## Properties
+
+cell_state
+
+
+
+
+attention
+
+
+
+
+time
+
+
+
+
+alignments
+
+
+
+
+alignment_history
+
+
+
+
+attention_state
+
+
+
+
+
+
+## Methods
+
+clone
+
+``` python
+clone(**kwargs)
+```
+
+Clone this object, overriding components provided by kwargs.
+
+The new state fields' shape must match original state fields' shape.
+This will be validated, and original fields' shape will be propagated
+to new fields.
+
+#### Example:
+
+
+
+```python
+initial_state = attention_wrapper.get_initial_state(
+ batch_size=..., dtype=...)
+initial_state = initial_state.clone(cell_state=encoder_state)
+```
+
+#### Args:
+
+
+* `**kwargs`: Any properties of the state object to replace in the
+ returned `AttentionWrapperState`.
+
+
+#### Returns:
+
+A new `AttentionWrapperState` whose properties are the same as
+this one, except any overridden properties as provided in `kwargs`.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/BahdanauAttention.md b/docs/api_docs/python/tfa/seq2seq/BahdanauAttention.md
new file mode 100644
index 0000000000..bd20a0647e
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/BahdanauAttention.md
@@ -0,0 +1,927 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.BahdanauAttention
+
+## Class `BahdanauAttention`
+
+Implements Bahdanau-style (additive) attention.
+
+
+
+### Aliases:
+
+* Class `tfa.seq2seq.BahdanauAttention`
+* Class `tfa.seq2seq.attention_wrapper.BahdanauAttention`
+
+
+
+Defined in [`seq2seq/attention_wrapper.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/attention_wrapper.py).
+
+
+
+This attention has two forms. The first is Bahdanau attention,
+as described in:
+
+Dzmitry Bahdanau, Kyunghyun Cho, Yoshua Bengio.
+"Neural Machine Translation by Jointly Learning to Align and Translate."
+ICLR 2015. https://arxiv.org/abs/1409.0473
+
+The second is the normalized form. This form is inspired by the
+weight normalization article:
+
+Tim Salimans, Diederik P. Kingma.
+"Weight Normalization: A Simple Reparameterization to Accelerate
+ Training of Deep Neural Networks."
+https://arxiv.org/abs/1602.07868
+
+To enable the second form, construct the object with parameter
+`normalize=True`.
+
+__init__
+
+``` python
+__init__(
+ units,
+ memory,
+ memory_sequence_length=None,
+ normalize=False,
+ probability_fn='softmax',
+ kernel_initializer='glorot_uniform',
+ dtype=None,
+ name='BahdanauAttention',
+ **kwargs
+)
+```
+
+Construct the Attention mechanism.
+
+
+#### Args:
+
+
+* `units`: The depth of the query mechanism.
+* `memory`: The memory to query; usually the output of an RNN encoder.
+ This tensor should be shaped `[batch_size, max_time, ...]`.
+* `memory_sequence_length`: (optional): Sequence lengths for the batch
+ entries in memory. If provided, the memory tensor rows are masked
+ with zeros for values past the respective sequence lengths.
+* `normalize`: Python boolean. Whether to normalize the energy term.
+* `probability_fn`: (optional) string, the name of function to convert
+ the attention score to probabilities. The default is `softmax`
+ which is `tf.nn.softmax`. Other options is `hardmax`, which is
+ hardmax() within this module. Any other value will result into
+ validation error. Default to use `softmax`.
+* `kernel_initializer`: (optional), the name of the initializer for the
+ attention kernel.
+* `dtype`: The data type for the query and memory layers of the attention
+ mechanism.
+* `name`: Name to use when creating ops.
+* `**kwargs`: Dictionary that contains other common arguments for layer
+ creation.
+
+
+
+## Properties
+
+activity_regularizer
+
+Optional regularizer function for the output of this layer.
+
+
+alignments_size
+
+
+
+
+dtype
+
+
+
+
+dynamic
+
+
+
+
+
+
+Retrieves the input tensor(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input tensor or list of input tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+* `AttributeError`: If no inbound nodes are found.
+
+
+
+Retrieves the input mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input mask tensor (potentially None) or list of input
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+
+Retrieves the input shape(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer, or if all inputs
+have the same shape.
+
+#### Returns:
+
+Input shape, as an integer shape tuple
+(or list of shape tuples, one tuple per input tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined input_shape.
+* `RuntimeError`: if called in Eager mode.
+
+
+
+
+
+
+losses
+
+Losses which are associated with this `Layer`.
+
+Variable regularization tensors are created when this property is accessed,
+so it is eager safe: accessing `losses` under a `tf.GradientTape` will
+propagate gradients back to the corresponding variables.
+
+#### Returns:
+
+A list of tensors.
+
+
+metrics
+
+
+
+
+name
+
+
+
+
+name_scope
+
+Returns a `tf.name_scope` instance for this class.
+
+
+non_trainable_variables
+
+
+
+
+non_trainable_weights
+
+
+
+
+output
+
+Retrieves the output tensor(s) of a layer.
+
+Only applicable if the layer has exactly one output,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output tensor or list of output tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to more than one incoming
+ layers.
+* `RuntimeError`: if called in Eager mode.
+
+output_mask
+
+Retrieves the output mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output mask tensor (potentially None) or list of output
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+output_shape
+
+Retrieves the output shape(s) of a layer.
+
+Only applicable if the layer has one output,
+or if all outputs have the same shape.
+
+#### Returns:
+
+Output shape, as an integer shape tuple
+(or list of shape tuples, one tuple per output tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined output shape.
+* `RuntimeError`: if called in Eager mode.
+
+state_size
+
+
+
+
+submodules
+
+Sequence of all sub-modules.
+
+Submodules are modules which are properties of this module, or found as
+properties of modules which are properties of this module (and so on).
+
+```
+a = tf.Module()
+b = tf.Module()
+c = tf.Module()
+a.b = b
+b.c = c
+assert list(a.submodules) == [b, c]
+assert list(b.submodules) == [c]
+assert list(c.submodules) == []
+```
+
+#### Returns:
+
+A sequence of all submodules.
+
+
+trainable
+
+
+
+
+trainable_variables
+
+
+
+
+trainable_weights
+
+
+
+
+updates
+
+
+
+
+variables
+
+Returns the list of all layer variables/weights.
+
+Alias of `self.weights`.
+
+#### Returns:
+
+A list of variables.
+
+
+weights
+
+Returns the list of all layer variables/weights.
+
+
+#### Returns:
+
+A list of variables.
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ inputs,
+ **kwargs
+)
+```
+
+Preprocess the inputs before calling `base_layer.__call__()`.
+
+Note that there are situation here, one for setup memory, and one with
+actual query and state.
+1. When the memory has not been configured, we just pass all the param
+ to base_layer.__call__(), which will then invoke self.call() with
+ proper inputs, which allows this class to setup memory.
+2. When the memory has already been setup, the input should contain
+ query and state, and optionally processed memory. If the processed
+ memory is not included in the input, we will have to append it to
+ the inputs and give it to the base_layer.__call__(). The processed
+ memory is the output of first invocation of self.__call__(). If we
+ don't add it here, then from keras perspective, the graph is
+ disconnected since the output from previous call is never used.
+
+#### Args:
+
+
+* `inputs`: the inputs tensors.
+* `**kwargs`: dict, other keyeword arguments for the `__call__()`
+
+apply
+
+``` python
+apply(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Apply the layer on a input.
+
+This is an alias of `self.__call__`.
+
+#### Arguments:
+
+
+* `inputs`: Input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+build
+
+``` python
+build(input_shape)
+```
+
+
+
+
+compute_mask
+
+``` python
+compute_mask(
+ inputs,
+ mask=None
+)
+```
+
+
+
+
+compute_output_shape
+
+``` python
+compute_output_shape(input_shape)
+```
+
+Computes the output shape of the layer.
+
+Assumes that the layer will be built
+to match that input shape provided.
+
+#### Arguments:
+
+
+* `input_shape`: Shape tuple (tuple of integers)
+ or list of shape tuples (one per output tensor of the layer).
+ Shape tuples can include None for free dimensions,
+ instead of an integer.
+
+
+#### Returns:
+
+An input shape tuple.
+
+
+count_params
+
+``` python
+count_params()
+```
+
+Count the total number of scalars composing the weights.
+
+
+#### Returns:
+
+An integer count.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer isn't yet built
+ (in which case its weights aren't yet defined).
+
+deserialize_inner_layer_from_config
+
+``` python
+deserialize_inner_layer_from_config(
+ cls,
+ config,
+ custom_objects
+)
+```
+
+Helper method that reconstruct the query and memory from the config.
+
+In the get_config() method, the query and memory layer configs are
+serialized into dict for persistence, this method perform the reverse
+action to reconstruct the layer from the config.
+
+#### Args:
+
+
+* `config`: dict, the configs that will be used to reconstruct the
+ object.
+* `custom_objects`: dict mapping class names (or function names) of
+ custom (non-Keras) objects to class/functions.
+
+#### Returns:
+
+
+* `config`: dict, the config with layer instance created, which is ready
+ to be used as init parameters.
+
+from_config
+
+``` python
+@classmethod
+from_config(
+ cls,
+ config,
+ custom_objects=None
+)
+```
+
+
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+
+
+``` python
+get_input_at(node_index)
+```
+
+Retrieves the input tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+
+
+``` python
+get_input_mask_at(node_index)
+```
+
+Retrieves the input mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple inputs).
+
+
+
+
+``` python
+get_input_shape_at(node_index)
+```
+
+Retrieves the input shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_losses_for
+
+``` python
+get_losses_for(inputs)
+```
+
+Retrieves losses relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of loss tensors of the layer that depend on `inputs`.
+
+
+get_output_at
+
+``` python
+get_output_at(node_index)
+```
+
+Retrieves the output tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_output_mask_at
+
+``` python
+get_output_mask_at(node_index)
+```
+
+Retrieves the output mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple outputs).
+
+
+get_output_shape_at
+
+``` python
+get_output_shape_at(node_index)
+```
+
+Retrieves the output shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_updates_for
+
+``` python
+get_updates_for(inputs)
+```
+
+Retrieves updates relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of update ops of the layer that depend on `inputs`.
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+Returns the current weights of the layer.
+
+
+#### Returns:
+
+Weights values as a list of numpy arrays.
+
+
+initial_alignments
+
+``` python
+initial_alignments(
+ batch_size,
+ dtype
+)
+```
+
+Creates the initial alignment values for the `AttentionWrapper`
+class.
+
+This is important for AttentionMechanisms that use the previous
+alignment to calculate the alignment at the next time step
+(e.g. monotonic attention).
+
+The default behavior is to return a tensor of all zeros.
+
+#### Args:
+
+
+* `batch_size`: `int32` scalar, the batch_size.
+* `dtype`: The `dtype`.
+
+
+#### Returns:
+
+A `dtype` tensor shaped `[batch_size, alignments_size]`
+(`alignments_size` is the values' `max_time`).
+
+
+initial_state
+
+``` python
+initial_state(
+ batch_size,
+ dtype
+)
+```
+
+Creates the initial state values for the `AttentionWrapper` class.
+
+This is important for AttentionMechanisms that use the previous
+alignment to calculate the alignment at the next time step
+(e.g. monotonic attention).
+
+The default behavior is to return the same output as
+initial_alignments.
+
+#### Args:
+
+
+* `batch_size`: `int32` scalar, the batch_size.
+* `dtype`: The `dtype`.
+
+
+#### Returns:
+
+A structure of all-zero tensors with shapes as described by
+`state_size`.
+
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+Sets the weights of the layer, from Numpy arrays.
+
+
+#### Arguments:
+
+
+* `weights`: a list of Numpy arrays. The number
+ of arrays and their shape must match
+ number of the dimensions of the weights
+ of the layer (i.e. it should match the
+ output of `get_weights`).
+
+
+#### Raises:
+
+
+* `ValueError`: If the provided weights list does not match the
+ layer's specifications.
+
+with_name_scope
+
+``` python
+with_name_scope(
+ cls,
+ method
+)
+```
+
+Decorator to automatically enter the module name scope.
+
+```
+class MyModule(tf.Module):
+ @tf.Module.with_name_scope
+ def __call__(self, x):
+ if not hasattr(self, 'w'):
+ self.w = tf.Variable(tf.random.normal([x.shape[1], 64]))
+ return tf.matmul(x, self.w)
+```
+
+Using the above module would produce `tf.Variable`s and `tf.Tensor`s whose
+names included the module name:
+
+```
+mod = MyModule()
+mod(tf.ones([8, 32]))
+# ==>
+mod.w
+# ==>
+```
+
+#### Args:
+
+
+* `method`: The method to wrap.
+
+
+#### Returns:
+
+The original method wrapped such that it enters the module's name scope.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/BahdanauMonotonicAttention.md b/docs/api_docs/python/tfa/seq2seq/BahdanauMonotonicAttention.md
new file mode 100644
index 0000000000..58f4e5b883
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/BahdanauMonotonicAttention.md
@@ -0,0 +1,925 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.BahdanauMonotonicAttention
+
+## Class `BahdanauMonotonicAttention`
+
+Monotonic attention mechanism with Bahadanau-style energy function.
+
+
+
+### Aliases:
+
+* Class `tfa.seq2seq.BahdanauMonotonicAttention`
+* Class `tfa.seq2seq.attention_wrapper.BahdanauMonotonicAttention`
+
+
+
+Defined in [`seq2seq/attention_wrapper.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/attention_wrapper.py).
+
+
+
+This type of attention enforces a monotonic constraint on the attention
+distributions; that is once the model attends to a given point in the
+memory it can't attend to any prior points at subsequence output timesteps.
+It achieves this by using the _monotonic_probability_fn instead of softmax
+to construct its attention distributions. Since the attention scores are
+passed through a sigmoid, a learnable scalar bias parameter is applied
+after the score function and before the sigmoid. Otherwise, it is
+equivalent to BahdanauAttention. This approach is proposed in
+
+Colin Raffel, Minh-Thang Luong, Peter J. Liu, Ron J. Weiss, Douglas Eck,
+"Online and Linear-Time Attention by Enforcing Monotonic Alignments."
+ICML 2017. https://arxiv.org/abs/1704.00784
+
+__init__
+
+``` python
+__init__(
+ units,
+ memory,
+ memory_sequence_length=None,
+ normalize=False,
+ sigmoid_noise=0.0,
+ sigmoid_noise_seed=None,
+ score_bias_init=0.0,
+ mode='parallel',
+ kernel_initializer='glorot_uniform',
+ dtype=None,
+ name='BahdanauMonotonicAttention',
+ **kwargs
+)
+```
+
+Construct the Attention mechanism.
+
+
+#### Args:
+
+
+* `units`: The depth of the query mechanism.
+* `memory`: The memory to query; usually the output of an RNN encoder.
+ This tensor should be shaped `[batch_size, max_time, ...]`.
+* `memory_sequence_length`: (optional): Sequence lengths for the batch
+ entries in memory. If provided, the memory tensor rows are masked
+ with zeros for values past the respective sequence lengths.
+* `normalize`: Python boolean. Whether to normalize the energy term.
+* `sigmoid_noise`: Standard deviation of pre-sigmoid noise. See the
+ docstring for `_monotonic_probability_fn` for more information.
+* `sigmoid_noise_seed`: (optional) Random seed for pre-sigmoid noise.
+* `score_bias_init`: Initial value for score bias scalar. It's
+ recommended to initialize this to a negative value when the length
+ of the memory is large.
+* `mode`: How to compute the attention distribution. Must be one of
+ 'recursive', 'parallel', or 'hard'. See the docstring for
+ `tf.contrib.seq2seq.monotonic_attention` for more information.
+* `kernel_initializer`: (optional), the name of the initializer for the
+ attention kernel.
+* `dtype`: The data type for the query and memory layers of the attention
+ mechanism.
+* `name`: Name to use when creating ops.
+* `**kwargs`: Dictionary that contains other common arguments for layer
+ creation.
+
+
+
+## Properties
+
+activity_regularizer
+
+Optional regularizer function for the output of this layer.
+
+
+alignments_size
+
+
+
+
+dtype
+
+
+
+
+dynamic
+
+
+
+
+
+
+Retrieves the input tensor(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input tensor or list of input tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+* `AttributeError`: If no inbound nodes are found.
+
+
+
+Retrieves the input mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input mask tensor (potentially None) or list of input
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+
+Retrieves the input shape(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer, or if all inputs
+have the same shape.
+
+#### Returns:
+
+Input shape, as an integer shape tuple
+(or list of shape tuples, one tuple per input tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined input_shape.
+* `RuntimeError`: if called in Eager mode.
+
+
+
+
+
+
+losses
+
+Losses which are associated with this `Layer`.
+
+Variable regularization tensors are created when this property is accessed,
+so it is eager safe: accessing `losses` under a `tf.GradientTape` will
+propagate gradients back to the corresponding variables.
+
+#### Returns:
+
+A list of tensors.
+
+
+metrics
+
+
+
+
+name
+
+
+
+
+name_scope
+
+Returns a `tf.name_scope` instance for this class.
+
+
+non_trainable_variables
+
+
+
+
+non_trainable_weights
+
+
+
+
+output
+
+Retrieves the output tensor(s) of a layer.
+
+Only applicable if the layer has exactly one output,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output tensor or list of output tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to more than one incoming
+ layers.
+* `RuntimeError`: if called in Eager mode.
+
+output_mask
+
+Retrieves the output mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output mask tensor (potentially None) or list of output
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+output_shape
+
+Retrieves the output shape(s) of a layer.
+
+Only applicable if the layer has one output,
+or if all outputs have the same shape.
+
+#### Returns:
+
+Output shape, as an integer shape tuple
+(or list of shape tuples, one tuple per output tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined output shape.
+* `RuntimeError`: if called in Eager mode.
+
+state_size
+
+
+
+
+submodules
+
+Sequence of all sub-modules.
+
+Submodules are modules which are properties of this module, or found as
+properties of modules which are properties of this module (and so on).
+
+```
+a = tf.Module()
+b = tf.Module()
+c = tf.Module()
+a.b = b
+b.c = c
+assert list(a.submodules) == [b, c]
+assert list(b.submodules) == [c]
+assert list(c.submodules) == []
+```
+
+#### Returns:
+
+A sequence of all submodules.
+
+
+trainable
+
+
+
+
+trainable_variables
+
+
+
+
+trainable_weights
+
+
+
+
+updates
+
+
+
+
+variables
+
+Returns the list of all layer variables/weights.
+
+Alias of `self.weights`.
+
+#### Returns:
+
+A list of variables.
+
+
+weights
+
+Returns the list of all layer variables/weights.
+
+
+#### Returns:
+
+A list of variables.
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ inputs,
+ **kwargs
+)
+```
+
+Preprocess the inputs before calling `base_layer.__call__()`.
+
+Note that there are situation here, one for setup memory, and one with
+actual query and state.
+1. When the memory has not been configured, we just pass all the param
+ to base_layer.__call__(), which will then invoke self.call() with
+ proper inputs, which allows this class to setup memory.
+2. When the memory has already been setup, the input should contain
+ query and state, and optionally processed memory. If the processed
+ memory is not included in the input, we will have to append it to
+ the inputs and give it to the base_layer.__call__(). The processed
+ memory is the output of first invocation of self.__call__(). If we
+ don't add it here, then from keras perspective, the graph is
+ disconnected since the output from previous call is never used.
+
+#### Args:
+
+
+* `inputs`: the inputs tensors.
+* `**kwargs`: dict, other keyeword arguments for the `__call__()`
+
+apply
+
+``` python
+apply(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Apply the layer on a input.
+
+This is an alias of `self.__call__`.
+
+#### Arguments:
+
+
+* `inputs`: Input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+build
+
+``` python
+build(input_shape)
+```
+
+
+
+
+compute_mask
+
+``` python
+compute_mask(
+ inputs,
+ mask=None
+)
+```
+
+
+
+
+compute_output_shape
+
+``` python
+compute_output_shape(input_shape)
+```
+
+Computes the output shape of the layer.
+
+Assumes that the layer will be built
+to match that input shape provided.
+
+#### Arguments:
+
+
+* `input_shape`: Shape tuple (tuple of integers)
+ or list of shape tuples (one per output tensor of the layer).
+ Shape tuples can include None for free dimensions,
+ instead of an integer.
+
+
+#### Returns:
+
+An input shape tuple.
+
+
+count_params
+
+``` python
+count_params()
+```
+
+Count the total number of scalars composing the weights.
+
+
+#### Returns:
+
+An integer count.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer isn't yet built
+ (in which case its weights aren't yet defined).
+
+deserialize_inner_layer_from_config
+
+``` python
+deserialize_inner_layer_from_config(
+ cls,
+ config,
+ custom_objects
+)
+```
+
+Helper method that reconstruct the query and memory from the config.
+
+In the get_config() method, the query and memory layer configs are
+serialized into dict for persistence, this method perform the reverse
+action to reconstruct the layer from the config.
+
+#### Args:
+
+
+* `config`: dict, the configs that will be used to reconstruct the
+ object.
+* `custom_objects`: dict mapping class names (or function names) of
+ custom (non-Keras) objects to class/functions.
+
+#### Returns:
+
+
+* `config`: dict, the config with layer instance created, which is ready
+ to be used as init parameters.
+
+from_config
+
+``` python
+@classmethod
+from_config(
+ cls,
+ config,
+ custom_objects=None
+)
+```
+
+
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+
+
+``` python
+get_input_at(node_index)
+```
+
+Retrieves the input tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+
+
+``` python
+get_input_mask_at(node_index)
+```
+
+Retrieves the input mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple inputs).
+
+
+
+
+``` python
+get_input_shape_at(node_index)
+```
+
+Retrieves the input shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_losses_for
+
+``` python
+get_losses_for(inputs)
+```
+
+Retrieves losses relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of loss tensors of the layer that depend on `inputs`.
+
+
+get_output_at
+
+``` python
+get_output_at(node_index)
+```
+
+Retrieves the output tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_output_mask_at
+
+``` python
+get_output_mask_at(node_index)
+```
+
+Retrieves the output mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple outputs).
+
+
+get_output_shape_at
+
+``` python
+get_output_shape_at(node_index)
+```
+
+Retrieves the output shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_updates_for
+
+``` python
+get_updates_for(inputs)
+```
+
+Retrieves updates relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of update ops of the layer that depend on `inputs`.
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+Returns the current weights of the layer.
+
+
+#### Returns:
+
+Weights values as a list of numpy arrays.
+
+
+initial_alignments
+
+``` python
+initial_alignments(
+ batch_size,
+ dtype
+)
+```
+
+Creates the initial alignment values for the monotonic attentions.
+
+Initializes to dirac distributions, i.e.
+[1, 0, 0, ...memory length..., 0] for all entries in the batch.
+
+#### Args:
+
+
+* `batch_size`: `int32` scalar, the batch_size.
+* `dtype`: The `dtype`.
+
+
+#### Returns:
+
+A `dtype` tensor shaped `[batch_size, alignments_size]`
+(`alignments_size` is the values' `max_time`).
+
+
+initial_state
+
+``` python
+initial_state(
+ batch_size,
+ dtype
+)
+```
+
+Creates the initial state values for the `AttentionWrapper` class.
+
+This is important for AttentionMechanisms that use the previous
+alignment to calculate the alignment at the next time step
+(e.g. monotonic attention).
+
+The default behavior is to return the same output as
+initial_alignments.
+
+#### Args:
+
+
+* `batch_size`: `int32` scalar, the batch_size.
+* `dtype`: The `dtype`.
+
+
+#### Returns:
+
+A structure of all-zero tensors with shapes as described by
+`state_size`.
+
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+Sets the weights of the layer, from Numpy arrays.
+
+
+#### Arguments:
+
+
+* `weights`: a list of Numpy arrays. The number
+ of arrays and their shape must match
+ number of the dimensions of the weights
+ of the layer (i.e. it should match the
+ output of `get_weights`).
+
+
+#### Raises:
+
+
+* `ValueError`: If the provided weights list does not match the
+ layer's specifications.
+
+with_name_scope
+
+``` python
+with_name_scope(
+ cls,
+ method
+)
+```
+
+Decorator to automatically enter the module name scope.
+
+```
+class MyModule(tf.Module):
+ @tf.Module.with_name_scope
+ def __call__(self, x):
+ if not hasattr(self, 'w'):
+ self.w = tf.Variable(tf.random.normal([x.shape[1], 64]))
+ return tf.matmul(x, self.w)
+```
+
+Using the above module would produce `tf.Variable`s and `tf.Tensor`s whose
+names included the module name:
+
+```
+mod = MyModule()
+mod(tf.ones([8, 32]))
+# ==>
+mod.w
+# ==>
+```
+
+#### Args:
+
+
+* `method`: The method to wrap.
+
+
+#### Returns:
+
+The original method wrapped such that it enters the module's name scope.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/BaseDecoder.md b/docs/api_docs/python/tfa/seq2seq/BaseDecoder.md
new file mode 100644
index 0000000000..6a56b0c256
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/BaseDecoder.md
@@ -0,0 +1,977 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.BaseDecoder
+
+## Class `BaseDecoder`
+
+An RNN Decoder that is based on a Keras layer.
+
+
+
+### Aliases:
+
+* Class `tfa.seq2seq.BaseDecoder`
+* Class `tfa.seq2seq.decoder.BaseDecoder`
+
+
+
+Defined in [`seq2seq/decoder.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/decoder.py).
+
+
+
+Concepts used by this interface:
+- `inputs`: (structure of) tensors and TensorArrays that is passed as input
+ to the RNNCell composing the decoder, at each time step.
+- `state`: (structure of) tensors and TensorArrays that is passed to the
+ RNNCell instance as the state.
+- `memory`: (sturecute of) tensors that is usually the full output of the
+ encoder, which will be used for the attention wrapper for the RNNCell.
+- `finished`: boolean tensor telling whether each sequence in the batch is
+ finished.
+- `outputs`: Instance of BasicDecoderOutput. Result of the decoding, at
+ each time step.
+
+__init__
+
+``` python
+__init__(
+ output_time_major=False,
+ impute_finished=False,
+ maximum_iterations=None,
+ parallel_iterations=32,
+ swap_memory=False,
+ **kwargs
+)
+```
+
+
+
+
+
+
+## Properties
+
+activity_regularizer
+
+Optional regularizer function for the output of this layer.
+
+
+batch_size
+
+The batch size of input values.
+
+
+dtype
+
+
+
+
+dynamic
+
+
+
+
+
+
+Retrieves the input tensor(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input tensor or list of input tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+* `AttributeError`: If no inbound nodes are found.
+
+
+
+Retrieves the input mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input mask tensor (potentially None) or list of input
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+
+Retrieves the input shape(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer, or if all inputs
+have the same shape.
+
+#### Returns:
+
+Input shape, as an integer shape tuple
+(or list of shape tuples, one tuple per input tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined input_shape.
+* `RuntimeError`: if called in Eager mode.
+
+
+
+
+
+
+losses
+
+Losses which are associated with this `Layer`.
+
+Variable regularization tensors are created when this property is accessed,
+so it is eager safe: accessing `losses` under a `tf.GradientTape` will
+propagate gradients back to the corresponding variables.
+
+#### Returns:
+
+A list of tensors.
+
+
+metrics
+
+
+
+
+name
+
+
+
+
+name_scope
+
+Returns a `tf.name_scope` instance for this class.
+
+
+non_trainable_variables
+
+
+
+
+non_trainable_weights
+
+
+
+
+output
+
+Retrieves the output tensor(s) of a layer.
+
+Only applicable if the layer has exactly one output,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output tensor or list of output tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to more than one incoming
+ layers.
+* `RuntimeError`: if called in Eager mode.
+
+output_dtype
+
+A (possibly nested tuple of...) dtype[s].
+
+
+output_mask
+
+Retrieves the output mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output mask tensor (potentially None) or list of output
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+output_shape
+
+Retrieves the output shape(s) of a layer.
+
+Only applicable if the layer has one output,
+or if all outputs have the same shape.
+
+#### Returns:
+
+Output shape, as an integer shape tuple
+(or list of shape tuples, one tuple per output tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined output shape.
+* `RuntimeError`: if called in Eager mode.
+
+output_size
+
+A (possibly nested tuple of...) integer[s] or `TensorShape`
+object[s].
+
+submodules
+
+Sequence of all sub-modules.
+
+Submodules are modules which are properties of this module, or found as
+properties of modules which are properties of this module (and so on).
+
+```
+a = tf.Module()
+b = tf.Module()
+c = tf.Module()
+a.b = b
+b.c = c
+assert list(a.submodules) == [b, c]
+assert list(b.submodules) == [c]
+assert list(c.submodules) == []
+```
+
+#### Returns:
+
+A sequence of all submodules.
+
+
+tracks_own_finished
+
+Describes whether the Decoder keeps track of finished states.
+
+Most decoders will emit a true/false `finished` value independently
+at each time step. In this case, the `dynamic_decode` function keeps
+track of which batch entries are already finished, and performs a
+logical OR to insert new batches to the finished set.
+
+Some decoders, however, shuffle batches / beams between time steps and
+`dynamic_decode` will mix up the finished state across these entries
+because it does not track the reshuffle across time steps. In this
+case, it is up to the decoder to declare that it will keep track of its
+own finished state by setting this property to `True`.
+
+#### Returns:
+
+Python bool.
+
+
+trainable
+
+
+
+
+trainable_variables
+
+
+
+
+trainable_weights
+
+
+
+
+updates
+
+
+
+
+variables
+
+Returns the list of all layer variables/weights.
+
+Alias of `self.weights`.
+
+#### Returns:
+
+A list of variables.
+
+
+weights
+
+Returns the list of all layer variables/weights.
+
+
+#### Returns:
+
+A list of variables.
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Wraps `call`, applying pre- and post-processing steps.
+
+
+#### Arguments:
+
+
+* `inputs`: input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+
+#### Note:
+
+- The following optional keyword arguments are reserved for specific uses:
+ * `training`: Boolean scalar tensor of Python boolean indicating
+ whether the `call` is meant for training or inference.
+ * `mask`: Boolean input mask.
+- If the layer's `call` method takes a `mask` argument (as some Keras
+ layers do), its default value will be set to the mask generated
+ for `inputs` by the previous layer (if `input` did come from
+ a layer that generated a corresponding mask, i.e. if it came from
+ a Keras layer with masking support.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer's `call` method returns None (an invalid value).
+
+apply
+
+``` python
+apply(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Apply the layer on a input.
+
+This is an alias of `self.__call__`.
+
+#### Arguments:
+
+
+* `inputs`: Input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+build
+
+``` python
+build(input_shape)
+```
+
+Creates the variables of the layer (optional, for subclass implementers).
+
+This is a method that implementers of subclasses of `Layer` or `Model`
+can override if they need a state-creation step in-between
+layer instantiation and layer call.
+
+This is typically used to create the weights of `Layer` subclasses.
+
+#### Arguments:
+
+
+* `input_shape`: Instance of `TensorShape`, or list of instances of
+ `TensorShape` if the layer expects a list of inputs
+ (one instance per input).
+
+compute_mask
+
+``` python
+compute_mask(
+ inputs,
+ mask=None
+)
+```
+
+Computes an output mask tensor.
+
+
+#### Arguments:
+
+
+* `inputs`: Tensor or list of tensors.
+* `mask`: Tensor or list of tensors.
+
+
+#### Returns:
+
+None or a tensor (or list of tensors,
+ one per output tensor of the layer).
+
+
+compute_output_shape
+
+``` python
+compute_output_shape(input_shape)
+```
+
+Computes the output shape of the layer.
+
+Assumes that the layer will be built
+to match that input shape provided.
+
+#### Arguments:
+
+
+* `input_shape`: Shape tuple (tuple of integers)
+ or list of shape tuples (one per output tensor of the layer).
+ Shape tuples can include None for free dimensions,
+ instead of an integer.
+
+
+#### Returns:
+
+An input shape tuple.
+
+
+count_params
+
+``` python
+count_params()
+```
+
+Count the total number of scalars composing the weights.
+
+
+#### Returns:
+
+An integer count.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer isn't yet built
+ (in which case its weights aren't yet defined).
+
+finalize
+
+``` python
+finalize(
+ outputs,
+ final_state,
+ sequence_lengths
+)
+```
+
+
+
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config
+)
+```
+
+Creates a layer from its config.
+
+This method is the reverse of `get_config`,
+capable of instantiating the same layer from the config
+dictionary. It does not handle layer connectivity
+(handled by Network), nor weights (handled by `set_weights`).
+
+#### Arguments:
+
+
+* `config`: A Python dictionary, typically the
+ output of get_config.
+
+
+#### Returns:
+
+A layer instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+Returns the config of the layer.
+
+A layer config is a Python dictionary (serializable)
+containing the configuration of a layer.
+The same layer can be reinstantiated later
+(without its trained weights) from this configuration.
+
+The config of a layer does not include connectivity
+information, nor the layer class name. These are handled
+by `Network` (one layer of abstraction above).
+
+#### Returns:
+
+Python dictionary.
+
+
+
+
+``` python
+get_input_at(node_index)
+```
+
+Retrieves the input tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+
+
+``` python
+get_input_mask_at(node_index)
+```
+
+Retrieves the input mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple inputs).
+
+
+
+
+``` python
+get_input_shape_at(node_index)
+```
+
+Retrieves the input shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_losses_for
+
+``` python
+get_losses_for(inputs)
+```
+
+Retrieves losses relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of loss tensors of the layer that depend on `inputs`.
+
+
+get_output_at
+
+``` python
+get_output_at(node_index)
+```
+
+Retrieves the output tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_output_mask_at
+
+``` python
+get_output_mask_at(node_index)
+```
+
+Retrieves the output mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple outputs).
+
+
+get_output_shape_at
+
+``` python
+get_output_shape_at(node_index)
+```
+
+Retrieves the output shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_updates_for
+
+``` python
+get_updates_for(inputs)
+```
+
+Retrieves updates relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of update ops of the layer that depend on `inputs`.
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+Returns the current weights of the layer.
+
+
+#### Returns:
+
+Weights values as a list of numpy arrays.
+
+
+initialize
+
+``` python
+initialize(
+ inputs,
+ initial_state=None,
+ **kwargs
+)
+```
+
+Called before any decoding iterations.
+
+This methods must compute initial input values and initial state.
+
+#### Args:
+
+
+* `inputs`: (structure of) tensors that contains the input for the
+ decoder. In the normal case, its a tensor with shape
+ [batch, timestep, embedding].
+* `initial_state`: (structure of) tensors that contains the initial state
+ for the RNNCell.
+* `**kwargs`: Other arguments that are passed in from layer.call()
+ method. It could contains item like input sequence_length, or
+ masking for input.
+
+
+#### Returns:
+
+`(finished, initial_inputs, initial_state)`: initial values of
+'finished' flags, inputs and state.
+
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+Sets the weights of the layer, from Numpy arrays.
+
+
+#### Arguments:
+
+
+* `weights`: a list of Numpy arrays. The number
+ of arrays and their shape must match
+ number of the dimensions of the weights
+ of the layer (i.e. it should match the
+ output of `get_weights`).
+
+
+#### Raises:
+
+
+* `ValueError`: If the provided weights list does not match the
+ layer's specifications.
+
+step
+
+``` python
+step(
+ time,
+ inputs,
+ state
+)
+```
+
+Called per step of decoding (but only once for dynamic decoding).
+
+
+#### Args:
+
+
+* `time`: Scalar `int32` tensor. Current step number.
+* `inputs`: RNNCell input (possibly nested tuple of) tensor[s] for this
+ time step.
+* `state`: RNNCell state (possibly nested tuple of) tensor[s] from
+ previous time step.
+
+
+#### Returns:
+
+`(outputs, next_state, next_inputs, finished)`: `outputs` is an
+object containing the decoder output, `next_state` is a
+(structure of) state tensors and TensorArrays, `next_inputs` is the
+tensor that should be used as input for the next step, `finished` is
+a boolean tensor telling whether the sequence is complete, for each
+sequence in the batch.
+
+
+with_name_scope
+
+``` python
+with_name_scope(
+ cls,
+ method
+)
+```
+
+Decorator to automatically enter the module name scope.
+
+```
+class MyModule(tf.Module):
+ @tf.Module.with_name_scope
+ def __call__(self, x):
+ if not hasattr(self, 'w'):
+ self.w = tf.Variable(tf.random.normal([x.shape[1], 64]))
+ return tf.matmul(x, self.w)
+```
+
+Using the above module would produce `tf.Variable`s and `tf.Tensor`s whose
+names included the module name:
+
+```
+mod = MyModule()
+mod(tf.ones([8, 32]))
+# ==>
+mod.w
+# ==>
+```
+
+#### Args:
+
+
+* `method`: The method to wrap.
+
+
+#### Returns:
+
+The original method wrapped such that it enters the module's name scope.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/BasicDecoder.md b/docs/api_docs/python/tfa/seq2seq/BasicDecoder.md
new file mode 100644
index 0000000000..053bb24011
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/BasicDecoder.md
@@ -0,0 +1,954 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.BasicDecoder
+
+## Class `BasicDecoder`
+
+Basic sampling decoder.
+
+Inherits From: [`BaseDecoder`](../../tfa/seq2seq/BaseDecoder.md)
+
+### Aliases:
+
+* Class `tfa.seq2seq.BasicDecoder`
+* Class `tfa.seq2seq.basic_decoder.BasicDecoder`
+
+
+
+Defined in [`seq2seq/basic_decoder.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/basic_decoder.py).
+
+
+
+
+__init__
+
+``` python
+__init__(
+ cell,
+ sampler,
+ output_layer=None,
+ **kwargs
+)
+```
+
+Initialize BasicDecoder.
+
+
+#### Args:
+
+
+* `cell`: An `RNNCell` instance.
+* `sampler`: A `Sampler` instance.
+* `output_layer`: (Optional) An instance of `tf.layers.Layer`, i.e.,
+ `tf.layers.Dense`. Optional layer to apply to the RNN output prior
+ to storing the result or sampling.
+* `**kwargs`: Other keyward arguments for layer creation.
+
+
+#### Raises:
+
+
+* `TypeError`: if `cell`, `helper` or `output_layer` have an incorrect
+type.
+
+
+
+## Properties
+
+activity_regularizer
+
+Optional regularizer function for the output of this layer.
+
+
+batch_size
+
+
+
+
+dtype
+
+
+
+
+dynamic
+
+
+
+
+
+
+Retrieves the input tensor(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input tensor or list of input tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+* `AttributeError`: If no inbound nodes are found.
+
+
+
+Retrieves the input mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input mask tensor (potentially None) or list of input
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+
+Retrieves the input shape(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer, or if all inputs
+have the same shape.
+
+#### Returns:
+
+Input shape, as an integer shape tuple
+(or list of shape tuples, one tuple per input tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined input_shape.
+* `RuntimeError`: if called in Eager mode.
+
+
+
+
+
+
+losses
+
+Losses which are associated with this `Layer`.
+
+Variable regularization tensors are created when this property is accessed,
+so it is eager safe: accessing `losses` under a `tf.GradientTape` will
+propagate gradients back to the corresponding variables.
+
+#### Returns:
+
+A list of tensors.
+
+
+metrics
+
+
+
+
+name
+
+
+
+
+name_scope
+
+Returns a `tf.name_scope` instance for this class.
+
+
+non_trainable_variables
+
+
+
+
+non_trainable_weights
+
+
+
+
+output
+
+Retrieves the output tensor(s) of a layer.
+
+Only applicable if the layer has exactly one output,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output tensor or list of output tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to more than one incoming
+ layers.
+* `RuntimeError`: if called in Eager mode.
+
+output_dtype
+
+
+
+
+output_mask
+
+Retrieves the output mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output mask tensor (potentially None) or list of output
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+output_shape
+
+Retrieves the output shape(s) of a layer.
+
+Only applicable if the layer has one output,
+or if all outputs have the same shape.
+
+#### Returns:
+
+Output shape, as an integer shape tuple
+(or list of shape tuples, one tuple per output tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined output shape.
+* `RuntimeError`: if called in Eager mode.
+
+output_size
+
+
+
+
+submodules
+
+Sequence of all sub-modules.
+
+Submodules are modules which are properties of this module, or found as
+properties of modules which are properties of this module (and so on).
+
+```
+a = tf.Module()
+b = tf.Module()
+c = tf.Module()
+a.b = b
+b.c = c
+assert list(a.submodules) == [b, c]
+assert list(b.submodules) == [c]
+assert list(c.submodules) == []
+```
+
+#### Returns:
+
+A sequence of all submodules.
+
+
+tracks_own_finished
+
+Describes whether the Decoder keeps track of finished states.
+
+Most decoders will emit a true/false `finished` value independently
+at each time step. In this case, the `dynamic_decode` function keeps
+track of which batch entries are already finished, and performs a
+logical OR to insert new batches to the finished set.
+
+Some decoders, however, shuffle batches / beams between time steps and
+`dynamic_decode` will mix up the finished state across these entries
+because it does not track the reshuffle across time steps. In this
+case, it is up to the decoder to declare that it will keep track of its
+own finished state by setting this property to `True`.
+
+#### Returns:
+
+Python bool.
+
+
+trainable
+
+
+
+
+trainable_variables
+
+
+
+
+trainable_weights
+
+
+
+
+updates
+
+
+
+
+variables
+
+Returns the list of all layer variables/weights.
+
+Alias of `self.weights`.
+
+#### Returns:
+
+A list of variables.
+
+
+weights
+
+Returns the list of all layer variables/weights.
+
+
+#### Returns:
+
+A list of variables.
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Wraps `call`, applying pre- and post-processing steps.
+
+
+#### Arguments:
+
+
+* `inputs`: input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+
+#### Note:
+
+- The following optional keyword arguments are reserved for specific uses:
+ * `training`: Boolean scalar tensor of Python boolean indicating
+ whether the `call` is meant for training or inference.
+ * `mask`: Boolean input mask.
+- If the layer's `call` method takes a `mask` argument (as some Keras
+ layers do), its default value will be set to the mask generated
+ for `inputs` by the previous layer (if `input` did come from
+ a layer that generated a corresponding mask, i.e. if it came from
+ a Keras layer with masking support.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer's `call` method returns None (an invalid value).
+
+apply
+
+``` python
+apply(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Apply the layer on a input.
+
+This is an alias of `self.__call__`.
+
+#### Arguments:
+
+
+* `inputs`: Input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+build
+
+``` python
+build(input_shape)
+```
+
+Creates the variables of the layer (optional, for subclass implementers).
+
+This is a method that implementers of subclasses of `Layer` or `Model`
+can override if they need a state-creation step in-between
+layer instantiation and layer call.
+
+This is typically used to create the weights of `Layer` subclasses.
+
+#### Arguments:
+
+
+* `input_shape`: Instance of `TensorShape`, or list of instances of
+ `TensorShape` if the layer expects a list of inputs
+ (one instance per input).
+
+compute_mask
+
+``` python
+compute_mask(
+ inputs,
+ mask=None
+)
+```
+
+Computes an output mask tensor.
+
+
+#### Arguments:
+
+
+* `inputs`: Tensor or list of tensors.
+* `mask`: Tensor or list of tensors.
+
+
+#### Returns:
+
+None or a tensor (or list of tensors,
+ one per output tensor of the layer).
+
+
+compute_output_shape
+
+``` python
+compute_output_shape(input_shape)
+```
+
+Computes the output shape of the layer.
+
+Assumes that the layer will be built
+to match that input shape provided.
+
+#### Arguments:
+
+
+* `input_shape`: Shape tuple (tuple of integers)
+ or list of shape tuples (one per output tensor of the layer).
+ Shape tuples can include None for free dimensions,
+ instead of an integer.
+
+
+#### Returns:
+
+An input shape tuple.
+
+
+count_params
+
+``` python
+count_params()
+```
+
+Count the total number of scalars composing the weights.
+
+
+#### Returns:
+
+An integer count.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer isn't yet built
+ (in which case its weights aren't yet defined).
+
+finalize
+
+``` python
+finalize(
+ outputs,
+ final_state,
+ sequence_lengths
+)
+```
+
+
+
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config
+)
+```
+
+Creates a layer from its config.
+
+This method is the reverse of `get_config`,
+capable of instantiating the same layer from the config
+dictionary. It does not handle layer connectivity
+(handled by Network), nor weights (handled by `set_weights`).
+
+#### Arguments:
+
+
+* `config`: A Python dictionary, typically the
+ output of get_config.
+
+
+#### Returns:
+
+A layer instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+Returns the config of the layer.
+
+A layer config is a Python dictionary (serializable)
+containing the configuration of a layer.
+The same layer can be reinstantiated later
+(without its trained weights) from this configuration.
+
+The config of a layer does not include connectivity
+information, nor the layer class name. These are handled
+by `Network` (one layer of abstraction above).
+
+#### Returns:
+
+Python dictionary.
+
+
+
+
+``` python
+get_input_at(node_index)
+```
+
+Retrieves the input tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+
+
+``` python
+get_input_mask_at(node_index)
+```
+
+Retrieves the input mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple inputs).
+
+
+
+
+``` python
+get_input_shape_at(node_index)
+```
+
+Retrieves the input shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_losses_for
+
+``` python
+get_losses_for(inputs)
+```
+
+Retrieves losses relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of loss tensors of the layer that depend on `inputs`.
+
+
+get_output_at
+
+``` python
+get_output_at(node_index)
+```
+
+Retrieves the output tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_output_mask_at
+
+``` python
+get_output_mask_at(node_index)
+```
+
+Retrieves the output mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple outputs).
+
+
+get_output_shape_at
+
+``` python
+get_output_shape_at(node_index)
+```
+
+Retrieves the output shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_updates_for
+
+``` python
+get_updates_for(inputs)
+```
+
+Retrieves updates relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of update ops of the layer that depend on `inputs`.
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+Returns the current weights of the layer.
+
+
+#### Returns:
+
+Weights values as a list of numpy arrays.
+
+
+initialize
+
+``` python
+initialize(
+ inputs,
+ initial_state=None,
+ **kwargs
+)
+```
+
+Initialize the decoder.
+
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+Sets the weights of the layer, from Numpy arrays.
+
+
+#### Arguments:
+
+
+* `weights`: a list of Numpy arrays. The number
+ of arrays and their shape must match
+ number of the dimensions of the weights
+ of the layer (i.e. it should match the
+ output of `get_weights`).
+
+
+#### Raises:
+
+
+* `ValueError`: If the provided weights list does not match the
+ layer's specifications.
+
+step
+
+``` python
+step(
+ time,
+ inputs,
+ state
+)
+```
+
+Perform a decoding step.
+
+
+#### Args:
+
+
+* `time`: scalar `int32` tensor.
+* `inputs`: A (structure of) input tensors.
+* `state`: A (structure of) state tensors and TensorArrays.
+
+
+#### Returns:
+
+`(outputs, next_state, next_inputs, finished)`.
+
+
+with_name_scope
+
+``` python
+with_name_scope(
+ cls,
+ method
+)
+```
+
+Decorator to automatically enter the module name scope.
+
+```
+class MyModule(tf.Module):
+ @tf.Module.with_name_scope
+ def __call__(self, x):
+ if not hasattr(self, 'w'):
+ self.w = tf.Variable(tf.random.normal([x.shape[1], 64]))
+ return tf.matmul(x, self.w)
+```
+
+Using the above module would produce `tf.Variable`s and `tf.Tensor`s whose
+names included the module name:
+
+```
+mod = MyModule()
+mod(tf.ones([8, 32]))
+# ==>
+mod.w
+# ==>
+```
+
+#### Args:
+
+
+* `method`: The method to wrap.
+
+
+#### Returns:
+
+The original method wrapped such that it enters the module's name scope.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/BasicDecoderOutput.md b/docs/api_docs/python/tfa/seq2seq/BasicDecoderOutput.md
new file mode 100644
index 0000000000..7008d26ef0
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/BasicDecoderOutput.md
@@ -0,0 +1,41 @@
+
+
+
+
+
+
+
+# tfa.seq2seq.BasicDecoderOutput
+
+## Class `BasicDecoderOutput`
+
+
+
+
+
+### Aliases:
+
+* Class `tfa.seq2seq.BasicDecoderOutput`
+* Class `tfa.seq2seq.basic_decoder.BasicDecoderOutput`
+
+
+
+Defined in [`seq2seq/basic_decoder.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/basic_decoder.py).
+
+
+
+
+## Properties
+
+rnn_output
+
+
+
+
+sample_id
+
+
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/BeamSearchDecoder.md b/docs/api_docs/python/tfa/seq2seq/BeamSearchDecoder.md
new file mode 100644
index 0000000000..406be683f0
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/BeamSearchDecoder.md
@@ -0,0 +1,1043 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.BeamSearchDecoder
+
+## Class `BeamSearchDecoder`
+
+BeamSearch sampling decoder.
+
+Inherits From: [`BeamSearchDecoderMixin`](../../tfa/seq2seq/beam_search_decoder/BeamSearchDecoderMixin.md), [`BaseDecoder`](../../tfa/seq2seq/BaseDecoder.md)
+
+### Aliases:
+
+* Class `tfa.seq2seq.BeamSearchDecoder`
+* Class `tfa.seq2seq.beam_search_decoder.BeamSearchDecoder`
+
+
+
+Defined in [`seq2seq/beam_search_decoder.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/beam_search_decoder.py).
+
+
+
+**NOTE** If you are using the `BeamSearchDecoder` with a cell wrapped in
+`AttentionWrapper`, then you must ensure that:
+
+- The encoder output has been tiled to `beam_width` via
+ `tf.contrib.seq2seq.tile_batch` (NOT `tf.tile`).
+- The `batch_size` argument passed to the `get_initial_state` method of
+ this wrapper is equal to `true_batch_size * beam_width`.
+- The initial state created with `get_initial_state` above contains a
+ `cell_state` value containing properly tiled final state from the
+ encoder.
+
+#### An example:
+
+
+
+```
+tiled_encoder_outputs = tf.contrib.seq2seq.tile_batch(
+ encoder_outputs, multiplier=beam_width)
+tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch(
+ encoder_final_state, multiplier=beam_width)
+tiled_sequence_length = tf.contrib.seq2seq.tile_batch(
+ sequence_length, multiplier=beam_width)
+attention_mechanism = MyFavoriteAttentionMechanism(
+ num_units=attention_depth,
+ memory=tiled_inputs,
+ memory_sequence_length=tiled_sequence_length)
+attention_cell = AttentionWrapper(cell, attention_mechanism, ...)
+decoder_initial_state = attention_cell.get_initial_state(
+ batch_size=true_batch_size * beam_width, dtype=dtype)
+decoder_initial_state = decoder_initial_state.clone(
+ cell_state=tiled_encoder_final_state)
+```
+
+Meanwhile, with `AttentionWrapper`, coverage penalty is suggested to use
+when computing scores (https://arxiv.org/pdf/1609.08144.pdf). It encourages
+the decoding to cover all inputs.
+
+__init__
+
+``` python
+__init__(
+ cell,
+ beam_width,
+ embedding_fn=None,
+ output_layer=None,
+ length_penalty_weight=0.0,
+ coverage_penalty_weight=0.0,
+ reorder_tensor_arrays=True,
+ **kwargs
+)
+```
+
+Initialize the BeamSearchDecoder.
+
+
+#### Args:
+
+
+* `cell`: An `RNNCell` instance.
+* `beam_width`: Python integer, the number of beams.
+* `embedding_fn`: A callable that takes a vector tensor of `ids`
+ (argmax ids).
+* `output_layer`: (Optional) An instance of `tf.keras.layers.Layer`,
+ i.e., `tf.keras.layers.Dense`. Optional layer to apply to the RNN
+ output prior to storing the result or sampling.
+* `length_penalty_weight`: Float weight to penalize length. Disabled with
+ 0.0.
+* `coverage_penalty_weight`: Float weight to penalize the coverage of
+ source sentence. Disabled with 0.0.
+* `reorder_tensor_arrays`: If `True`, `TensorArray`s' elements within the
+ cell state will be reordered according to the beam search path. If
+ the `TensorArray` can be reordered, the stacked form will be
+ returned. Otherwise, the `TensorArray` will be returned as is. Set
+ this flag to `False` if the cell state contains `TensorArray`s that
+ are not amenable to reordering.
+* `**kwargs`: Dict, other keyword arguments for initialization.
+
+
+#### Raises:
+
+
+* `TypeError`: if `cell` is not an instance of `RNNCell`,
+ or `output_layer` is not an instance of `tf.keras.layers.Layer`.
+
+
+
+## Properties
+
+activity_regularizer
+
+Optional regularizer function for the output of this layer.
+
+
+batch_size
+
+
+
+
+dtype
+
+
+
+
+dynamic
+
+
+
+
+
+
+Retrieves the input tensor(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input tensor or list of input tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+* `AttributeError`: If no inbound nodes are found.
+
+
+
+Retrieves the input mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input mask tensor (potentially None) or list of input
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+
+Retrieves the input shape(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer, or if all inputs
+have the same shape.
+
+#### Returns:
+
+Input shape, as an integer shape tuple
+(or list of shape tuples, one tuple per input tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined input_shape.
+* `RuntimeError`: if called in Eager mode.
+
+
+
+
+
+
+losses
+
+Losses which are associated with this `Layer`.
+
+Variable regularization tensors are created when this property is accessed,
+so it is eager safe: accessing `losses` under a `tf.GradientTape` will
+propagate gradients back to the corresponding variables.
+
+#### Returns:
+
+A list of tensors.
+
+
+metrics
+
+
+
+
+name
+
+
+
+
+name_scope
+
+Returns a `tf.name_scope` instance for this class.
+
+
+non_trainable_variables
+
+
+
+
+non_trainable_weights
+
+
+
+
+output
+
+Retrieves the output tensor(s) of a layer.
+
+Only applicable if the layer has exactly one output,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output tensor or list of output tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to more than one incoming
+ layers.
+* `RuntimeError`: if called in Eager mode.
+
+output_dtype
+
+
+
+
+output_mask
+
+Retrieves the output mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output mask tensor (potentially None) or list of output
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+output_shape
+
+Retrieves the output shape(s) of a layer.
+
+Only applicable if the layer has one output,
+or if all outputs have the same shape.
+
+#### Returns:
+
+Output shape, as an integer shape tuple
+(or list of shape tuples, one tuple per output tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined output shape.
+* `RuntimeError`: if called in Eager mode.
+
+output_size
+
+
+
+
+submodules
+
+Sequence of all sub-modules.
+
+Submodules are modules which are properties of this module, or found as
+properties of modules which are properties of this module (and so on).
+
+```
+a = tf.Module()
+b = tf.Module()
+c = tf.Module()
+a.b = b
+b.c = c
+assert list(a.submodules) == [b, c]
+assert list(b.submodules) == [c]
+assert list(c.submodules) == []
+```
+
+#### Returns:
+
+A sequence of all submodules.
+
+
+tracks_own_finished
+
+The BeamSearchDecoder shuffles its beams and their finished state.
+
+For this reason, it conflicts with the `dynamic_decode` function's
+tracking of finished states. Setting this property to true avoids
+early stopping of decoding due to mismanagement of the finished state
+in `dynamic_decode`.
+
+#### Returns:
+
+`True`.
+
+
+trainable
+
+
+
+
+trainable_variables
+
+
+
+
+trainable_weights
+
+
+
+
+updates
+
+
+
+
+variables
+
+Returns the list of all layer variables/weights.
+
+Alias of `self.weights`.
+
+#### Returns:
+
+A list of variables.
+
+
+weights
+
+Returns the list of all layer variables/weights.
+
+
+#### Returns:
+
+A list of variables.
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Wraps `call`, applying pre- and post-processing steps.
+
+
+#### Arguments:
+
+
+* `inputs`: input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+
+#### Note:
+
+- The following optional keyword arguments are reserved for specific uses:
+ * `training`: Boolean scalar tensor of Python boolean indicating
+ whether the `call` is meant for training or inference.
+ * `mask`: Boolean input mask.
+- If the layer's `call` method takes a `mask` argument (as some Keras
+ layers do), its default value will be set to the mask generated
+ for `inputs` by the previous layer (if `input` did come from
+ a layer that generated a corresponding mask, i.e. if it came from
+ a Keras layer with masking support.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer's `call` method returns None (an invalid value).
+
+apply
+
+``` python
+apply(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Apply the layer on a input.
+
+This is an alias of `self.__call__`.
+
+#### Arguments:
+
+
+* `inputs`: Input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+build
+
+``` python
+build(input_shape)
+```
+
+Creates the variables of the layer (optional, for subclass implementers).
+
+This is a method that implementers of subclasses of `Layer` or `Model`
+can override if they need a state-creation step in-between
+layer instantiation and layer call.
+
+This is typically used to create the weights of `Layer` subclasses.
+
+#### Arguments:
+
+
+* `input_shape`: Instance of `TensorShape`, or list of instances of
+ `TensorShape` if the layer expects a list of inputs
+ (one instance per input).
+
+compute_mask
+
+``` python
+compute_mask(
+ inputs,
+ mask=None
+)
+```
+
+Computes an output mask tensor.
+
+
+#### Arguments:
+
+
+* `inputs`: Tensor or list of tensors.
+* `mask`: Tensor or list of tensors.
+
+
+#### Returns:
+
+None or a tensor (or list of tensors,
+ one per output tensor of the layer).
+
+
+compute_output_shape
+
+``` python
+compute_output_shape(input_shape)
+```
+
+Computes the output shape of the layer.
+
+Assumes that the layer will be built
+to match that input shape provided.
+
+#### Arguments:
+
+
+* `input_shape`: Shape tuple (tuple of integers)
+ or list of shape tuples (one per output tensor of the layer).
+ Shape tuples can include None for free dimensions,
+ instead of an integer.
+
+
+#### Returns:
+
+An input shape tuple.
+
+
+count_params
+
+``` python
+count_params()
+```
+
+Count the total number of scalars composing the weights.
+
+
+#### Returns:
+
+An integer count.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer isn't yet built
+ (in which case its weights aren't yet defined).
+
+finalize
+
+``` python
+finalize(
+ outputs,
+ final_state,
+ sequence_lengths
+)
+```
+
+Finalize and return the predicted_ids.
+
+
+#### Args:
+
+
+* `outputs`: An instance of BeamSearchDecoderOutput.
+* `final_state`: An instance of BeamSearchDecoderState. Passed through to
+ the output.
+* `sequence_lengths`: An `int64` tensor shaped
+ `[batch_size, beam_width]`. The sequence lengths determined for
+ each beam during decode. **NOTE** These are ignored; the updated
+ sequence lengths are stored in `final_state.lengths`.
+
+
+#### Returns:
+
+
+* `outputs`: An instance of `FinalBeamSearchDecoderOutput` where the
+ predicted_ids are the result of calling _gather_tree.
+* `final_state`: The same input instance of `BeamSearchDecoderState`.
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config
+)
+```
+
+Creates a layer from its config.
+
+This method is the reverse of `get_config`,
+capable of instantiating the same layer from the config
+dictionary. It does not handle layer connectivity
+(handled by Network), nor weights (handled by `set_weights`).
+
+#### Arguments:
+
+
+* `config`: A Python dictionary, typically the
+ output of get_config.
+
+
+#### Returns:
+
+A layer instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+Returns the config of the layer.
+
+A layer config is a Python dictionary (serializable)
+containing the configuration of a layer.
+The same layer can be reinstantiated later
+(without its trained weights) from this configuration.
+
+The config of a layer does not include connectivity
+information, nor the layer class name. These are handled
+by `Network` (one layer of abstraction above).
+
+#### Returns:
+
+Python dictionary.
+
+
+
+
+``` python
+get_input_at(node_index)
+```
+
+Retrieves the input tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+
+
+``` python
+get_input_mask_at(node_index)
+```
+
+Retrieves the input mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple inputs).
+
+
+
+
+``` python
+get_input_shape_at(node_index)
+```
+
+Retrieves the input shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_losses_for
+
+``` python
+get_losses_for(inputs)
+```
+
+Retrieves losses relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of loss tensors of the layer that depend on `inputs`.
+
+
+get_output_at
+
+``` python
+get_output_at(node_index)
+```
+
+Retrieves the output tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_output_mask_at
+
+``` python
+get_output_mask_at(node_index)
+```
+
+Retrieves the output mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple outputs).
+
+
+get_output_shape_at
+
+``` python
+get_output_shape_at(node_index)
+```
+
+Retrieves the output shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_updates_for
+
+``` python
+get_updates_for(inputs)
+```
+
+Retrieves updates relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of update ops of the layer that depend on `inputs`.
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+Returns the current weights of the layer.
+
+
+#### Returns:
+
+Weights values as a list of numpy arrays.
+
+
+initialize
+
+``` python
+initialize(
+ embedding,
+ start_tokens,
+ end_token,
+ initial_state
+)
+```
+
+Initialize the decoder.
+
+
+#### Args:
+
+
+* `embedding`: A tensor from the embedding layer output, which is the
+ `params` argument for `embedding_lookup`.
+* `start_tokens`: `int32` vector shaped `[batch_size]`, the start tokens.
+* `end_token`: `int32` scalar, the token that marks end of decoding.
+* `initial_state`: A (possibly nested tuple of...) tensors and
+TensorArrays.
+
+#### Returns:
+
+`(finished, start_inputs, initial_state)`.
+
+
+#### Raises:
+
+
+* `ValueError`: If `start_tokens` is not a vector or `end_token` is not a
+ scalar.
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+Sets the weights of the layer, from Numpy arrays.
+
+
+#### Arguments:
+
+
+* `weights`: a list of Numpy arrays. The number
+ of arrays and their shape must match
+ number of the dimensions of the weights
+ of the layer (i.e. it should match the
+ output of `get_weights`).
+
+
+#### Raises:
+
+
+* `ValueError`: If the provided weights list does not match the
+ layer's specifications.
+
+step
+
+``` python
+step(
+ time,
+ inputs,
+ state,
+ name=None
+)
+```
+
+Perform a decoding step.
+
+
+#### Args:
+
+
+* `time`: scalar `int32` tensor.
+* `inputs`: A (structure of) input tensors.
+* `state`: A (structure of) state tensors and TensorArrays.
+* `name`: Name scope for any created operations.
+
+
+#### Returns:
+
+`(outputs, next_state, next_inputs, finished)`.
+
+
+with_name_scope
+
+``` python
+with_name_scope(
+ cls,
+ method
+)
+```
+
+Decorator to automatically enter the module name scope.
+
+```
+class MyModule(tf.Module):
+ @tf.Module.with_name_scope
+ def __call__(self, x):
+ if not hasattr(self, 'w'):
+ self.w = tf.Variable(tf.random.normal([x.shape[1], 64]))
+ return tf.matmul(x, self.w)
+```
+
+Using the above module would produce `tf.Variable`s and `tf.Tensor`s whose
+names included the module name:
+
+```
+mod = MyModule()
+mod(tf.ones([8, 32]))
+# ==>
+mod.w
+# ==>
+```
+
+#### Args:
+
+
+* `method`: The method to wrap.
+
+
+#### Returns:
+
+The original method wrapped such that it enters the module's name scope.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/BeamSearchDecoderOutput.md b/docs/api_docs/python/tfa/seq2seq/BeamSearchDecoderOutput.md
new file mode 100644
index 0000000000..c7a3ed1ba3
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/BeamSearchDecoderOutput.md
@@ -0,0 +1,47 @@
+
+
+
+
+
+
+
+
+# tfa.seq2seq.BeamSearchDecoderOutput
+
+## Class `BeamSearchDecoderOutput`
+
+
+
+
+
+### Aliases:
+
+* Class `tfa.seq2seq.BeamSearchDecoderOutput`
+* Class `tfa.seq2seq.beam_search_decoder.BeamSearchDecoderOutput`
+
+
+
+Defined in [`seq2seq/beam_search_decoder.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/beam_search_decoder.py).
+
+
+
+
+## Properties
+
+scores
+
+
+
+
+predicted_ids
+
+
+
+
+parent_ids
+
+
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/BeamSearchDecoderState.md b/docs/api_docs/python/tfa/seq2seq/BeamSearchDecoderState.md
new file mode 100644
index 0000000000..ee2ff86db4
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/BeamSearchDecoderState.md
@@ -0,0 +1,59 @@
+
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.BeamSearchDecoderState
+
+## Class `BeamSearchDecoderState`
+
+
+
+
+
+### Aliases:
+
+* Class `tfa.seq2seq.BeamSearchDecoderState`
+* Class `tfa.seq2seq.beam_search_decoder.BeamSearchDecoderState`
+
+
+
+Defined in [`seq2seq/beam_search_decoder.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/beam_search_decoder.py).
+
+
+
+
+## Properties
+
+cell_state
+
+
+
+
+log_probs
+
+
+
+
+finished
+
+
+
+
+lengths
+
+
+
+
+accumulated_attention_probs
+
+
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/CustomSampler.md b/docs/api_docs/python/tfa/seq2seq/CustomSampler.md
new file mode 100644
index 0000000000..be8b08328b
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/CustomSampler.md
@@ -0,0 +1,127 @@
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.CustomSampler
+
+## Class `CustomSampler`
+
+Base abstract class that allows the user to customize sampling.
+
+Inherits From: [`Sampler`](../../tfa/seq2seq/Sampler.md)
+
+### Aliases:
+
+* Class `tfa.seq2seq.CustomSampler`
+* Class `tfa.seq2seq.sampler.CustomSampler`
+
+
+
+Defined in [`seq2seq/sampler.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/sampler.py).
+
+
+
+
+__init__
+
+``` python
+__init__(
+ initialize_fn,
+ sample_fn,
+ next_inputs_fn,
+ sample_ids_shape=None,
+ sample_ids_dtype=None
+)
+```
+
+Initializer.
+
+
+#### Args:
+
+
+* `initialize_fn`: callable that returns `(finished, next_inputs)` for
+ the first iteration.
+* `sample_fn`: callable that takes `(time, outputs, state)` and emits
+ tensor `sample_ids`.
+* `next_inputs_fn`: callable that takes
+ `(time, outputs, state, sample_ids)` and emits
+ `(finished, next_inputs, next_state)`.
+* `sample_ids_shape`: Either a list of integers, or a 1-D Tensor of type
+ `int32`, the shape of each value in the `sample_ids` batch.
+ Defaults to a scalar.
+* `sample_ids_dtype`: The dtype of the `sample_ids` tensor. Defaults to
+ int32.
+
+
+
+## Properties
+
+batch_size
+
+
+
+
+sample_ids_dtype
+
+
+
+
+sample_ids_shape
+
+
+
+
+
+
+## Methods
+
+initialize
+
+``` python
+initialize(
+ inputs,
+ **kwargs
+)
+```
+
+
+
+
+
+
+``` python
+next_inputs(
+ time,
+ outputs,
+ state,
+ sample_ids
+)
+```
+
+
+
+
+sample
+
+``` python
+sample(
+ time,
+ outputs,
+ state
+)
+```
+
+
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/Decoder.md b/docs/api_docs/python/tfa/seq2seq/Decoder.md
new file mode 100644
index 0000000000..da123f73dc
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/Decoder.md
@@ -0,0 +1,154 @@
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.Decoder
+
+## Class `Decoder`
+
+An RNN Decoder abstract interface object.
+
+
+
+### Aliases:
+
+* Class `tfa.seq2seq.Decoder`
+* Class `tfa.seq2seq.decoder.Decoder`
+
+
+
+Defined in [`seq2seq/decoder.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/decoder.py).
+
+
+
+Concepts used by this interface:
+- `inputs`: (structure of) tensors and TensorArrays that is passed as input
+ to the RNNCell composing the decoder, at each time step.
+- `state`: (structure of) tensors and TensorArrays that is passed to the
+ RNNCell instance as the state.
+- `finished`: boolean tensor telling whether each sequence in the batch is
+ finished.
+- `outputs`: Instance of BasicDecoderOutput. Result of the decoding, at
+ each time step.
+
+## Properties
+
+batch_size
+
+The batch size of input values.
+
+
+output_dtype
+
+A (possibly nested tuple of...) dtype[s].
+
+
+output_size
+
+A (possibly nested tuple of...) integer[s] or `TensorShape`
+object[s].
+
+tracks_own_finished
+
+Describes whether the Decoder keeps track of finished states.
+
+Most decoders will emit a true/false `finished` value independently
+at each time step. In this case, the `dynamic_decode` function keeps
+track of which batch entries are already finished, and performs a
+logical OR to insert new batches to the finished set.
+
+Some decoders, however, shuffle batches / beams between time steps and
+`dynamic_decode` will mix up the finished state across these entries
+because it does not track the reshuffle across time steps. In this
+case, it is up to the decoder to declare that it will keep track of its
+own finished state by setting this property to `True`.
+
+#### Returns:
+
+Python bool.
+
+
+
+
+## Methods
+
+finalize
+
+``` python
+finalize(
+ outputs,
+ final_state,
+ sequence_lengths
+)
+```
+
+
+
+
+initialize
+
+``` python
+initialize(name=None)
+```
+
+Called before any decoding iterations.
+
+This methods must compute initial input values and initial state.
+
+#### Args:
+
+
+* `name`: Name scope for any created operations.
+
+
+#### Returns:
+
+`(finished, initial_inputs, initial_state)`: initial values of
+'finished' flags, inputs and state.
+
+
+step
+
+``` python
+step(
+ time,
+ inputs,
+ state,
+ name=None
+)
+```
+
+Called per step of decoding (but only once for dynamic decoding).
+
+
+#### Args:
+
+
+* `time`: Scalar `int32` tensor. Current step number.
+* `inputs`: RNNCell input (possibly nested tuple of) tensor[s] for this
+ time step.
+* `state`: RNNCell state (possibly nested tuple of) tensor[s] from
+ previous time step.
+* `name`: Name scope for any created operations.
+
+
+#### Returns:
+
+`(outputs, next_state, next_inputs, finished)`: `outputs` is an
+object containing the decoder output, `next_state` is a (structure
+of) state tensors and TensorArrays, `next_inputs` is the tensor that
+should be used as input for the next step, `finished` is a boolean
+tensor telling whether the sequence is complete, for each sequence in
+the batch.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/FinalBeamSearchDecoderOutput.md b/docs/api_docs/python/tfa/seq2seq/FinalBeamSearchDecoderOutput.md
new file mode 100644
index 0000000000..90d2b3712f
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/FinalBeamSearchDecoderOutput.md
@@ -0,0 +1,50 @@
+
+
+
+
+
+
+
+# tfa.seq2seq.FinalBeamSearchDecoderOutput
+
+## Class `FinalBeamSearchDecoderOutput`
+
+Final outputs returned by the beam search after all decoding is
+
+
+
+### Aliases:
+
+* Class `tfa.seq2seq.FinalBeamSearchDecoderOutput`
+* Class `tfa.seq2seq.beam_search_decoder.FinalBeamSearchDecoderOutput`
+
+
+
+Defined in [`seq2seq/beam_search_decoder.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/beam_search_decoder.py).
+
+
+finished.
+
+#### Args:
+
+
+* `predicted_ids`: The final prediction. A tensor of shape
+ `[batch_size, T, beam_width]` (or `[T, batch_size, beam_width]` if
+ `output_time_major` is True). Beams are ordered from best to worst.
+* `beam_search_decoder_output`: An instance of `BeamSearchDecoderOutput` that
+ describes the state of the beam search.
+
+## Properties
+
+predicted_ids
+
+
+
+
+beam_search_decoder_output
+
+
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/GreedyEmbeddingSampler.md b/docs/api_docs/python/tfa/seq2seq/GreedyEmbeddingSampler.md
new file mode 100644
index 0000000000..7f7d726744
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/GreedyEmbeddingSampler.md
@@ -0,0 +1,138 @@
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.GreedyEmbeddingSampler
+
+## Class `GreedyEmbeddingSampler`
+
+A sampler for use during inference.
+
+Inherits From: [`Sampler`](../../tfa/seq2seq/Sampler.md)
+
+### Aliases:
+
+* Class `tfa.seq2seq.GreedyEmbeddingSampler`
+* Class `tfa.seq2seq.sampler.GreedyEmbeddingSampler`
+
+
+
+Defined in [`seq2seq/sampler.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/sampler.py).
+
+
+
+Uses the argmax of the output (treated as logits) and passes the
+result through an embedding layer to get the next input.
+
+__init__
+
+``` python
+__init__(embedding_fn=None)
+```
+
+Initializer.
+
+
+#### Args:
+
+
+* `embedding_fn`: A optional callable that takes a vector tensor of `ids`
+ (argmax ids), or the `params` argument for `embedding_lookup`. The
+ returned tensor will be passed to the decoder input. Default to use
+ `tf.nn.embedding_lookup`.
+
+
+
+## Properties
+
+batch_size
+
+
+
+
+sample_ids_dtype
+
+
+
+
+sample_ids_shape
+
+
+
+
+
+
+## Methods
+
+initialize
+
+``` python
+initialize(
+ embedding,
+ start_tokens=None,
+ end_token=None
+)
+```
+
+Initialize the GreedyEmbeddingSampler.
+
+
+#### Args:
+
+
+* `embedding`: tensor that contains embedding states matrix. It will be
+ used to generate generate outputs with start_tokens and end_tokens.
+ The embedding will be ignored if the embedding_fn has been provided
+ at __init__().
+* `start_tokens`: `int32` vector shaped `[batch_size]`, the start tokens.
+* `end_token`: `int32` scalar, the token that marks end of decoding.
+
+
+#### Returns:
+
+Tuple of two items: `(finished, self.start_inputs)`.
+
+
+#### Raises:
+
+
+* `ValueError`: if `start_tokens` is not a 1D tensor or `end_token` is
+ not a scalar.
+
+
+
+``` python
+next_inputs(
+ time,
+ outputs,
+ state,
+ sample_ids
+)
+```
+
+next_inputs_fn for GreedyEmbeddingHelper.
+
+
+sample
+
+``` python
+sample(
+ time,
+ outputs,
+ state
+)
+```
+
+sample for GreedyEmbeddingHelper.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/InferenceSampler.md b/docs/api_docs/python/tfa/seq2seq/InferenceSampler.md
new file mode 100644
index 0000000000..5c2d4bd043
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/InferenceSampler.md
@@ -0,0 +1,124 @@
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.InferenceSampler
+
+## Class `InferenceSampler`
+
+A helper to use during inference with a custom sampling function.
+
+Inherits From: [`Sampler`](../../tfa/seq2seq/Sampler.md)
+
+### Aliases:
+
+* Class `tfa.seq2seq.InferenceSampler`
+* Class `tfa.seq2seq.sampler.InferenceSampler`
+
+
+
+Defined in [`seq2seq/sampler.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/sampler.py).
+
+
+
+
+__init__
+
+``` python
+__init__(
+ sample_fn,
+ sample_shape,
+ sample_dtype,
+ end_fn,
+ next_inputs_fn=None
+)
+```
+
+Initializer.
+
+
+#### Args:
+
+
+* `sample_fn`: A callable that takes `outputs` and emits tensor
+ `sample_ids`.
+* `sample_shape`: Either a list of integers, or a 1-D Tensor of type
+ `int32`, the shape of the each sample in the batch returned by
+ `sample_fn`.
+* `sample_dtype`: the dtype of the sample returned by `sample_fn`.
+* `end_fn`: A callable that takes `sample_ids` and emits a `bool` vector
+ shaped `[batch_size]` indicating whether each sample is an end
+ token.
+* `next_inputs_fn`: (Optional) A callable that takes `sample_ids` and
+ returns the next batch of inputs. If not provided, `sample_ids` is
+ used as the next batch of inputs.
+
+
+
+## Properties
+
+batch_size
+
+
+
+
+sample_ids_dtype
+
+
+
+
+sample_ids_shape
+
+
+
+
+
+
+## Methods
+
+initialize
+
+``` python
+initialize(start_inputs)
+```
+
+
+
+
+
+
+``` python
+next_inputs(
+ time,
+ outputs,
+ state,
+ sample_ids
+)
+```
+
+
+
+
+sample
+
+``` python
+sample(
+ time,
+ outputs,
+ state
+)
+```
+
+
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/LuongAttention.md b/docs/api_docs/python/tfa/seq2seq/LuongAttention.md
new file mode 100644
index 0000000000..b727880f34
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/LuongAttention.md
@@ -0,0 +1,918 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.LuongAttention
+
+## Class `LuongAttention`
+
+Implements Luong-style (multiplicative) attention scoring.
+
+
+
+### Aliases:
+
+* Class `tfa.seq2seq.LuongAttention`
+* Class `tfa.seq2seq.attention_wrapper.LuongAttention`
+
+
+
+Defined in [`seq2seq/attention_wrapper.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/attention_wrapper.py).
+
+
+
+This attention has two forms. The first is standard Luong attention,
+as described in:
+
+Minh-Thang Luong, Hieu Pham, Christopher D. Manning.
+[Effective Approaches to Attention-based Neural Machine Translation.
+EMNLP 2015.](https://arxiv.org/abs/1508.04025)
+
+The second is the scaled form inspired partly by the normalized form of
+Bahdanau attention.
+
+To enable the second form, construct the object with parameter
+`scale=True`.
+
+__init__
+
+``` python
+__init__(
+ units,
+ memory,
+ memory_sequence_length=None,
+ scale=False,
+ probability_fn='softmax',
+ dtype=None,
+ name='LuongAttention',
+ **kwargs
+)
+```
+
+Construct the AttentionMechanism mechanism.
+
+
+#### Args:
+
+
+* `units`: The depth of the attention mechanism.
+* `memory`: The memory to query; usually the output of an RNN encoder.
+ This tensor should be shaped `[batch_size, max_time, ...]`.
+* `memory_sequence_length`: (optional): Sequence lengths for the batch
+ entries in memory. If provided, the memory tensor rows are masked
+ with zeros for values past the respective sequence lengths.
+* `scale`: Python boolean. Whether to scale the energy term.
+* `probability_fn`: (optional) string, the name of function to convert
+ the attention score to probabilities. The default is `softmax`
+ which is `tf.nn.softmax`. Other options is `hardmax`, which is
+ hardmax() within this module. Any other value will result
+ intovalidation error. Default to use `softmax`.
+* `dtype`: The data type for the memory layer of the attention mechanism.
+* `name`: Name to use when creating ops.
+* `**kwargs`: Dictionary that contains other common arguments for layer
+ creation.
+
+
+
+## Properties
+
+activity_regularizer
+
+Optional regularizer function for the output of this layer.
+
+
+alignments_size
+
+
+
+
+dtype
+
+
+
+
+dynamic
+
+
+
+
+
+
+Retrieves the input tensor(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input tensor or list of input tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+* `AttributeError`: If no inbound nodes are found.
+
+
+
+Retrieves the input mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input mask tensor (potentially None) or list of input
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+
+Retrieves the input shape(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer, or if all inputs
+have the same shape.
+
+#### Returns:
+
+Input shape, as an integer shape tuple
+(or list of shape tuples, one tuple per input tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined input_shape.
+* `RuntimeError`: if called in Eager mode.
+
+
+
+
+
+
+losses
+
+Losses which are associated with this `Layer`.
+
+Variable regularization tensors are created when this property is accessed,
+so it is eager safe: accessing `losses` under a `tf.GradientTape` will
+propagate gradients back to the corresponding variables.
+
+#### Returns:
+
+A list of tensors.
+
+
+metrics
+
+
+
+
+name
+
+
+
+
+name_scope
+
+Returns a `tf.name_scope` instance for this class.
+
+
+non_trainable_variables
+
+
+
+
+non_trainable_weights
+
+
+
+
+output
+
+Retrieves the output tensor(s) of a layer.
+
+Only applicable if the layer has exactly one output,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output tensor or list of output tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to more than one incoming
+ layers.
+* `RuntimeError`: if called in Eager mode.
+
+output_mask
+
+Retrieves the output mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output mask tensor (potentially None) or list of output
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+output_shape
+
+Retrieves the output shape(s) of a layer.
+
+Only applicable if the layer has one output,
+or if all outputs have the same shape.
+
+#### Returns:
+
+Output shape, as an integer shape tuple
+(or list of shape tuples, one tuple per output tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined output shape.
+* `RuntimeError`: if called in Eager mode.
+
+state_size
+
+
+
+
+submodules
+
+Sequence of all sub-modules.
+
+Submodules are modules which are properties of this module, or found as
+properties of modules which are properties of this module (and so on).
+
+```
+a = tf.Module()
+b = tf.Module()
+c = tf.Module()
+a.b = b
+b.c = c
+assert list(a.submodules) == [b, c]
+assert list(b.submodules) == [c]
+assert list(c.submodules) == []
+```
+
+#### Returns:
+
+A sequence of all submodules.
+
+
+trainable
+
+
+
+
+trainable_variables
+
+
+
+
+trainable_weights
+
+
+
+
+updates
+
+
+
+
+variables
+
+Returns the list of all layer variables/weights.
+
+Alias of `self.weights`.
+
+#### Returns:
+
+A list of variables.
+
+
+weights
+
+Returns the list of all layer variables/weights.
+
+
+#### Returns:
+
+A list of variables.
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ inputs,
+ **kwargs
+)
+```
+
+Preprocess the inputs before calling `base_layer.__call__()`.
+
+Note that there are situation here, one for setup memory, and one with
+actual query and state.
+1. When the memory has not been configured, we just pass all the param
+ to base_layer.__call__(), which will then invoke self.call() with
+ proper inputs, which allows this class to setup memory.
+2. When the memory has already been setup, the input should contain
+ query and state, and optionally processed memory. If the processed
+ memory is not included in the input, we will have to append it to
+ the inputs and give it to the base_layer.__call__(). The processed
+ memory is the output of first invocation of self.__call__(). If we
+ don't add it here, then from keras perspective, the graph is
+ disconnected since the output from previous call is never used.
+
+#### Args:
+
+
+* `inputs`: the inputs tensors.
+* `**kwargs`: dict, other keyeword arguments for the `__call__()`
+
+apply
+
+``` python
+apply(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Apply the layer on a input.
+
+This is an alias of `self.__call__`.
+
+#### Arguments:
+
+
+* `inputs`: Input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+build
+
+``` python
+build(input_shape)
+```
+
+
+
+
+compute_mask
+
+``` python
+compute_mask(
+ inputs,
+ mask=None
+)
+```
+
+
+
+
+compute_output_shape
+
+``` python
+compute_output_shape(input_shape)
+```
+
+Computes the output shape of the layer.
+
+Assumes that the layer will be built
+to match that input shape provided.
+
+#### Arguments:
+
+
+* `input_shape`: Shape tuple (tuple of integers)
+ or list of shape tuples (one per output tensor of the layer).
+ Shape tuples can include None for free dimensions,
+ instead of an integer.
+
+
+#### Returns:
+
+An input shape tuple.
+
+
+count_params
+
+``` python
+count_params()
+```
+
+Count the total number of scalars composing the weights.
+
+
+#### Returns:
+
+An integer count.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer isn't yet built
+ (in which case its weights aren't yet defined).
+
+deserialize_inner_layer_from_config
+
+``` python
+deserialize_inner_layer_from_config(
+ cls,
+ config,
+ custom_objects
+)
+```
+
+Helper method that reconstruct the query and memory from the config.
+
+In the get_config() method, the query and memory layer configs are
+serialized into dict for persistence, this method perform the reverse
+action to reconstruct the layer from the config.
+
+#### Args:
+
+
+* `config`: dict, the configs that will be used to reconstruct the
+ object.
+* `custom_objects`: dict mapping class names (or function names) of
+ custom (non-Keras) objects to class/functions.
+
+#### Returns:
+
+
+* `config`: dict, the config with layer instance created, which is ready
+ to be used as init parameters.
+
+from_config
+
+``` python
+@classmethod
+from_config(
+ cls,
+ config,
+ custom_objects=None
+)
+```
+
+
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+
+
+``` python
+get_input_at(node_index)
+```
+
+Retrieves the input tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+
+
+``` python
+get_input_mask_at(node_index)
+```
+
+Retrieves the input mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple inputs).
+
+
+
+
+``` python
+get_input_shape_at(node_index)
+```
+
+Retrieves the input shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_losses_for
+
+``` python
+get_losses_for(inputs)
+```
+
+Retrieves losses relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of loss tensors of the layer that depend on `inputs`.
+
+
+get_output_at
+
+``` python
+get_output_at(node_index)
+```
+
+Retrieves the output tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_output_mask_at
+
+``` python
+get_output_mask_at(node_index)
+```
+
+Retrieves the output mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple outputs).
+
+
+get_output_shape_at
+
+``` python
+get_output_shape_at(node_index)
+```
+
+Retrieves the output shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_updates_for
+
+``` python
+get_updates_for(inputs)
+```
+
+Retrieves updates relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of update ops of the layer that depend on `inputs`.
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+Returns the current weights of the layer.
+
+
+#### Returns:
+
+Weights values as a list of numpy arrays.
+
+
+initial_alignments
+
+``` python
+initial_alignments(
+ batch_size,
+ dtype
+)
+```
+
+Creates the initial alignment values for the `AttentionWrapper`
+class.
+
+This is important for AttentionMechanisms that use the previous
+alignment to calculate the alignment at the next time step
+(e.g. monotonic attention).
+
+The default behavior is to return a tensor of all zeros.
+
+#### Args:
+
+
+* `batch_size`: `int32` scalar, the batch_size.
+* `dtype`: The `dtype`.
+
+
+#### Returns:
+
+A `dtype` tensor shaped `[batch_size, alignments_size]`
+(`alignments_size` is the values' `max_time`).
+
+
+initial_state
+
+``` python
+initial_state(
+ batch_size,
+ dtype
+)
+```
+
+Creates the initial state values for the `AttentionWrapper` class.
+
+This is important for AttentionMechanisms that use the previous
+alignment to calculate the alignment at the next time step
+(e.g. monotonic attention).
+
+The default behavior is to return the same output as
+initial_alignments.
+
+#### Args:
+
+
+* `batch_size`: `int32` scalar, the batch_size.
+* `dtype`: The `dtype`.
+
+
+#### Returns:
+
+A structure of all-zero tensors with shapes as described by
+`state_size`.
+
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+Sets the weights of the layer, from Numpy arrays.
+
+
+#### Arguments:
+
+
+* `weights`: a list of Numpy arrays. The number
+ of arrays and their shape must match
+ number of the dimensions of the weights
+ of the layer (i.e. it should match the
+ output of `get_weights`).
+
+
+#### Raises:
+
+
+* `ValueError`: If the provided weights list does not match the
+ layer's specifications.
+
+with_name_scope
+
+``` python
+with_name_scope(
+ cls,
+ method
+)
+```
+
+Decorator to automatically enter the module name scope.
+
+```
+class MyModule(tf.Module):
+ @tf.Module.with_name_scope
+ def __call__(self, x):
+ if not hasattr(self, 'w'):
+ self.w = tf.Variable(tf.random.normal([x.shape[1], 64]))
+ return tf.matmul(x, self.w)
+```
+
+Using the above module would produce `tf.Variable`s and `tf.Tensor`s whose
+names included the module name:
+
+```
+mod = MyModule()
+mod(tf.ones([8, 32]))
+# ==>
+mod.w
+# ==>
+```
+
+#### Args:
+
+
+* `method`: The method to wrap.
+
+
+#### Returns:
+
+The original method wrapped such that it enters the module's name scope.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/LuongMonotonicAttention.md b/docs/api_docs/python/tfa/seq2seq/LuongMonotonicAttention.md
new file mode 100644
index 0000000000..c1d6721f9c
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/LuongMonotonicAttention.md
@@ -0,0 +1,920 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.LuongMonotonicAttention
+
+## Class `LuongMonotonicAttention`
+
+Monotonic attention mechanism with Luong-style energy function.
+
+
+
+### Aliases:
+
+* Class `tfa.seq2seq.LuongMonotonicAttention`
+* Class `tfa.seq2seq.attention_wrapper.LuongMonotonicAttention`
+
+
+
+Defined in [`seq2seq/attention_wrapper.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/attention_wrapper.py).
+
+
+
+This type of attention enforces a monotonic constraint on the attention
+distributions; that is once the model attends to a given point in the
+memory it can't attend to any prior points at subsequence output timesteps.
+It achieves this by using the _monotonic_probability_fn instead of softmax
+to construct its attention distributions. Otherwise, it is equivalent to
+LuongAttention. This approach is proposed in
+
+[Colin Raffel, Minh-Thang Luong, Peter J. Liu, Ron J. Weiss, Douglas Eck,
+"Online and Linear-Time Attention by Enforcing Monotonic Alignments."
+ICML 2017.](https://arxiv.org/abs/1704.00784)
+
+__init__
+
+``` python
+__init__(
+ units,
+ memory,
+ memory_sequence_length=None,
+ scale=False,
+ sigmoid_noise=0.0,
+ sigmoid_noise_seed=None,
+ score_bias_init=0.0,
+ mode='parallel',
+ dtype=None,
+ name='LuongMonotonicAttention',
+ **kwargs
+)
+```
+
+Construct the Attention mechanism.
+
+
+#### Args:
+
+
+* `units`: The depth of the query mechanism.
+* `memory`: The memory to query; usually the output of an RNN encoder.
+ This tensor should be shaped `[batch_size, max_time, ...]`.
+* `memory_sequence_length`: (optional): Sequence lengths for the batch
+ entries in memory. If provided, the memory tensor rows are masked
+ with zeros for values past the respective sequence lengths.
+* `scale`: Python boolean. Whether to scale the energy term.
+* `sigmoid_noise`: Standard deviation of pre-sigmoid noise. See the
+ docstring for `_monotonic_probability_fn` for more information.
+* `sigmoid_noise_seed`: (optional) Random seed for pre-sigmoid noise.
+* `score_bias_init`: Initial value for score bias scalar. It's
+ recommended to initialize this to a negative value when the length
+ of the memory is large.
+* `mode`: How to compute the attention distribution. Must be one of
+ 'recursive', 'parallel', or 'hard'. See the docstring for
+ `tf.contrib.seq2seq.monotonic_attention` for more information.
+* `dtype`: The data type for the query and memory layers of the attention
+ mechanism.
+* `name`: Name to use when creating ops.
+* `**kwargs`: Dictionary that contains other common arguments for layer
+ creation.
+
+
+
+## Properties
+
+activity_regularizer
+
+Optional regularizer function for the output of this layer.
+
+
+alignments_size
+
+
+
+
+dtype
+
+
+
+
+dynamic
+
+
+
+
+
+
+Retrieves the input tensor(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input tensor or list of input tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+* `AttributeError`: If no inbound nodes are found.
+
+
+
+Retrieves the input mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Input mask tensor (potentially None) or list of input
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+
+
+Retrieves the input shape(s) of a layer.
+
+Only applicable if the layer has exactly one input,
+i.e. if it is connected to one incoming layer, or if all inputs
+have the same shape.
+
+#### Returns:
+
+Input shape, as an integer shape tuple
+(or list of shape tuples, one tuple per input tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined input_shape.
+* `RuntimeError`: if called in Eager mode.
+
+
+
+
+
+
+losses
+
+Losses which are associated with this `Layer`.
+
+Variable regularization tensors are created when this property is accessed,
+so it is eager safe: accessing `losses` under a `tf.GradientTape` will
+propagate gradients back to the corresponding variables.
+
+#### Returns:
+
+A list of tensors.
+
+
+metrics
+
+
+
+
+name
+
+
+
+
+name_scope
+
+Returns a `tf.name_scope` instance for this class.
+
+
+non_trainable_variables
+
+
+
+
+non_trainable_weights
+
+
+
+
+output
+
+Retrieves the output tensor(s) of a layer.
+
+Only applicable if the layer has exactly one output,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output tensor or list of output tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to more than one incoming
+ layers.
+* `RuntimeError`: if called in Eager mode.
+
+output_mask
+
+Retrieves the output mask tensor(s) of a layer.
+
+Only applicable if the layer has exactly one inbound node,
+i.e. if it is connected to one incoming layer.
+
+#### Returns:
+
+Output mask tensor (potentially None) or list of output
+mask tensors.
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer is connected to
+more than one incoming layers.
+
+output_shape
+
+Retrieves the output shape(s) of a layer.
+
+Only applicable if the layer has one output,
+or if all outputs have the same shape.
+
+#### Returns:
+
+Output shape, as an integer shape tuple
+(or list of shape tuples, one tuple per output tensor).
+
+
+
+#### Raises:
+
+
+* `AttributeError`: if the layer has no defined output shape.
+* `RuntimeError`: if called in Eager mode.
+
+state_size
+
+
+
+
+submodules
+
+Sequence of all sub-modules.
+
+Submodules are modules which are properties of this module, or found as
+properties of modules which are properties of this module (and so on).
+
+```
+a = tf.Module()
+b = tf.Module()
+c = tf.Module()
+a.b = b
+b.c = c
+assert list(a.submodules) == [b, c]
+assert list(b.submodules) == [c]
+assert list(c.submodules) == []
+```
+
+#### Returns:
+
+A sequence of all submodules.
+
+
+trainable
+
+
+
+
+trainable_variables
+
+
+
+
+trainable_weights
+
+
+
+
+updates
+
+
+
+
+variables
+
+Returns the list of all layer variables/weights.
+
+Alias of `self.weights`.
+
+#### Returns:
+
+A list of variables.
+
+
+weights
+
+Returns the list of all layer variables/weights.
+
+
+#### Returns:
+
+A list of variables.
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ inputs,
+ **kwargs
+)
+```
+
+Preprocess the inputs before calling `base_layer.__call__()`.
+
+Note that there are situation here, one for setup memory, and one with
+actual query and state.
+1. When the memory has not been configured, we just pass all the param
+ to base_layer.__call__(), which will then invoke self.call() with
+ proper inputs, which allows this class to setup memory.
+2. When the memory has already been setup, the input should contain
+ query and state, and optionally processed memory. If the processed
+ memory is not included in the input, we will have to append it to
+ the inputs and give it to the base_layer.__call__(). The processed
+ memory is the output of first invocation of self.__call__(). If we
+ don't add it here, then from keras perspective, the graph is
+ disconnected since the output from previous call is never used.
+
+#### Args:
+
+
+* `inputs`: the inputs tensors.
+* `**kwargs`: dict, other keyeword arguments for the `__call__()`
+
+apply
+
+``` python
+apply(
+ inputs,
+ *args,
+ **kwargs
+)
+```
+
+Apply the layer on a input.
+
+This is an alias of `self.__call__`.
+
+#### Arguments:
+
+
+* `inputs`: Input tensor(s).
+* `*args`: additional positional arguments to be passed to `self.call`.
+* `**kwargs`: additional keyword arguments to be passed to `self.call`.
+
+
+#### Returns:
+
+Output tensor(s).
+
+
+build
+
+``` python
+build(input_shape)
+```
+
+
+
+
+compute_mask
+
+``` python
+compute_mask(
+ inputs,
+ mask=None
+)
+```
+
+
+
+
+compute_output_shape
+
+``` python
+compute_output_shape(input_shape)
+```
+
+Computes the output shape of the layer.
+
+Assumes that the layer will be built
+to match that input shape provided.
+
+#### Arguments:
+
+
+* `input_shape`: Shape tuple (tuple of integers)
+ or list of shape tuples (one per output tensor of the layer).
+ Shape tuples can include None for free dimensions,
+ instead of an integer.
+
+
+#### Returns:
+
+An input shape tuple.
+
+
+count_params
+
+``` python
+count_params()
+```
+
+Count the total number of scalars composing the weights.
+
+
+#### Returns:
+
+An integer count.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if the layer isn't yet built
+ (in which case its weights aren't yet defined).
+
+deserialize_inner_layer_from_config
+
+``` python
+deserialize_inner_layer_from_config(
+ cls,
+ config,
+ custom_objects
+)
+```
+
+Helper method that reconstruct the query and memory from the config.
+
+In the get_config() method, the query and memory layer configs are
+serialized into dict for persistence, this method perform the reverse
+action to reconstruct the layer from the config.
+
+#### Args:
+
+
+* `config`: dict, the configs that will be used to reconstruct the
+ object.
+* `custom_objects`: dict mapping class names (or function names) of
+ custom (non-Keras) objects to class/functions.
+
+#### Returns:
+
+
+* `config`: dict, the config with layer instance created, which is ready
+ to be used as init parameters.
+
+from_config
+
+``` python
+@classmethod
+from_config(
+ cls,
+ config,
+ custom_objects=None
+)
+```
+
+
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+
+
+``` python
+get_input_at(node_index)
+```
+
+Retrieves the input tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+
+
+``` python
+get_input_mask_at(node_index)
+```
+
+Retrieves the input mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple inputs).
+
+
+
+
+``` python
+get_input_shape_at(node_index)
+```
+
+Retrieves the input shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple inputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_losses_for
+
+``` python
+get_losses_for(inputs)
+```
+
+Retrieves losses relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of loss tensors of the layer that depend on `inputs`.
+
+
+get_output_at
+
+``` python
+get_output_at(node_index)
+```
+
+Retrieves the output tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A tensor (or list of tensors if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_output_mask_at
+
+``` python
+get_output_mask_at(node_index)
+```
+
+Retrieves the output mask tensor(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A mask tensor
+(or list of tensors if the layer has multiple outputs).
+
+
+get_output_shape_at
+
+``` python
+get_output_shape_at(node_index)
+```
+
+Retrieves the output shape(s) of a layer at a given node.
+
+
+#### Arguments:
+
+
+* `node_index`: Integer, index of the node
+ from which to retrieve the attribute.
+ E.g. `node_index=0` will correspond to the
+ first time the layer was called.
+
+
+#### Returns:
+
+A shape tuple
+(or list of shape tuples if the layer has multiple outputs).
+
+
+
+#### Raises:
+
+
+* `RuntimeError`: If called in Eager mode.
+
+get_updates_for
+
+``` python
+get_updates_for(inputs)
+```
+
+Retrieves updates relevant to a specific set of inputs.
+
+
+#### Arguments:
+
+
+* `inputs`: Input tensor or list/tuple of input tensors.
+
+
+#### Returns:
+
+List of update ops of the layer that depend on `inputs`.
+
+
+get_weights
+
+``` python
+get_weights()
+```
+
+Returns the current weights of the layer.
+
+
+#### Returns:
+
+Weights values as a list of numpy arrays.
+
+
+initial_alignments
+
+``` python
+initial_alignments(
+ batch_size,
+ dtype
+)
+```
+
+Creates the initial alignment values for the monotonic attentions.
+
+Initializes to dirac distributions, i.e.
+[1, 0, 0, ...memory length..., 0] for all entries in the batch.
+
+#### Args:
+
+
+* `batch_size`: `int32` scalar, the batch_size.
+* `dtype`: The `dtype`.
+
+
+#### Returns:
+
+A `dtype` tensor shaped `[batch_size, alignments_size]`
+(`alignments_size` is the values' `max_time`).
+
+
+initial_state
+
+``` python
+initial_state(
+ batch_size,
+ dtype
+)
+```
+
+Creates the initial state values for the `AttentionWrapper` class.
+
+This is important for AttentionMechanisms that use the previous
+alignment to calculate the alignment at the next time step
+(e.g. monotonic attention).
+
+The default behavior is to return the same output as
+initial_alignments.
+
+#### Args:
+
+
+* `batch_size`: `int32` scalar, the batch_size.
+* `dtype`: The `dtype`.
+
+
+#### Returns:
+
+A structure of all-zero tensors with shapes as described by
+`state_size`.
+
+
+set_weights
+
+``` python
+set_weights(weights)
+```
+
+Sets the weights of the layer, from Numpy arrays.
+
+
+#### Arguments:
+
+
+* `weights`: a list of Numpy arrays. The number
+ of arrays and their shape must match
+ number of the dimensions of the weights
+ of the layer (i.e. it should match the
+ output of `get_weights`).
+
+
+#### Raises:
+
+
+* `ValueError`: If the provided weights list does not match the
+ layer's specifications.
+
+with_name_scope
+
+``` python
+with_name_scope(
+ cls,
+ method
+)
+```
+
+Decorator to automatically enter the module name scope.
+
+```
+class MyModule(tf.Module):
+ @tf.Module.with_name_scope
+ def __call__(self, x):
+ if not hasattr(self, 'w'):
+ self.w = tf.Variable(tf.random.normal([x.shape[1], 64]))
+ return tf.matmul(x, self.w)
+```
+
+Using the above module would produce `tf.Variable`s and `tf.Tensor`s whose
+names included the module name:
+
+```
+mod = MyModule()
+mod(tf.ones([8, 32]))
+# ==>
+mod.w
+# ==>
+```
+
+#### Args:
+
+
+* `method`: The method to wrap.
+
+
+#### Returns:
+
+The original method wrapped such that it enters the module's name scope.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/SampleEmbeddingSampler.md b/docs/api_docs/python/tfa/seq2seq/SampleEmbeddingSampler.md
new file mode 100644
index 0000000000..0e5c21769f
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/SampleEmbeddingSampler.md
@@ -0,0 +1,155 @@
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.SampleEmbeddingSampler
+
+## Class `SampleEmbeddingSampler`
+
+A sampler for use during inference.
+
+Inherits From: [`GreedyEmbeddingSampler`](../../tfa/seq2seq/GreedyEmbeddingSampler.md)
+
+### Aliases:
+
+* Class `tfa.seq2seq.SampleEmbeddingSampler`
+* Class `tfa.seq2seq.sampler.SampleEmbeddingSampler`
+
+
+
+Defined in [`seq2seq/sampler.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/sampler.py).
+
+
+
+Uses sampling (from a distribution) instead of argmax and passes the
+result through an embedding layer to get the next input.
+
+__init__
+
+``` python
+__init__(
+ embedding_fn=None,
+ softmax_temperature=None,
+ seed=None
+)
+```
+
+Initializer.
+
+
+#### Args:
+
+
+* `embedding_fn`: (Optional) A callable that takes a vector tensor of
+ `ids` (argmax ids), or the `params` argument for
+ `embedding_lookup`. The returned tensor will be passed to the
+ decoder input.
+* `softmax_temperature`: (Optional) `float32` scalar, value to divide the
+ logits by before computing the softmax. Larger values (above 1.0)
+ result in more random samples, while smaller values push the
+ sampling distribution towards the argmax. Must be strictly greater
+ than 0. Defaults to 1.0.
+* `seed`: (Optional) The sampling seed.
+
+
+#### Raises:
+
+
+* `ValueError`: if `start_tokens` is not a 1D tensor or `end_token` is
+ not a scalar.
+
+
+
+## Properties
+
+batch_size
+
+
+
+
+sample_ids_dtype
+
+
+
+
+sample_ids_shape
+
+
+
+
+
+
+## Methods
+
+initialize
+
+``` python
+initialize(
+ embedding,
+ start_tokens=None,
+ end_token=None
+)
+```
+
+Initialize the GreedyEmbeddingSampler.
+
+
+#### Args:
+
+
+* `embedding`: tensor that contains embedding states matrix. It will be
+ used to generate generate outputs with start_tokens and end_tokens.
+ The embedding will be ignored if the embedding_fn has been provided
+ at __init__().
+* `start_tokens`: `int32` vector shaped `[batch_size]`, the start tokens.
+* `end_token`: `int32` scalar, the token that marks end of decoding.
+
+
+#### Returns:
+
+Tuple of two items: `(finished, self.start_inputs)`.
+
+
+#### Raises:
+
+
+* `ValueError`: if `start_tokens` is not a 1D tensor or `end_token` is
+ not a scalar.
+
+
+
+``` python
+next_inputs(
+ time,
+ outputs,
+ state,
+ sample_ids
+)
+```
+
+next_inputs_fn for GreedyEmbeddingHelper.
+
+
+sample
+
+``` python
+sample(
+ time,
+ outputs,
+ state
+)
+```
+
+sample for SampleEmbeddingHelper.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/Sampler.md b/docs/api_docs/python/tfa/seq2seq/Sampler.md
new file mode 100644
index 0000000000..cfd9219217
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/Sampler.md
@@ -0,0 +1,128 @@
+
+
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.Sampler
+
+## Class `Sampler`
+
+Interface for implementing sampling in seq2seq decoders.
+
+
+
+### Aliases:
+
+* Class `tfa.seq2seq.Sampler`
+* Class `tfa.seq2seq.sampler.Sampler`
+
+
+
+Defined in [`seq2seq/sampler.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/sampler.py).
+
+
+
+Sampler instances are used by `BasicDecoder`. The normal usage of a sampler
+is like below:
+sampler = Sampler(init_args)
+(initial_finished, initial_inputs) = sampler.initialize(input_tensors)
+for time_step in range(time):
+ cell_output, cell_state = cell.call(cell_input, previous_state)
+ sample_ids = sampler.sample(time_step, cell_output, cell_state)
+ (finished, next_inputs, next_state) = sampler.next_inputs(
+ time_step,cell_output, cell_state)
+
+Note that all the tensor input should not be feed to Sampler as __init__()
+parameters, instead, they should be feed by decoders via initialize().
+
+## Properties
+
+batch_size
+
+Batch size of tensor returned by `sample`.
+
+Returns a scalar int32 tensor. The return value might not
+available before the invocation of initialize(), in this case,
+ValueError is raised.
+
+sample_ids_dtype
+
+DType of tensor returned by `sample`.
+
+Returns a DType. The return value might not available before the
+invocation of initialize().
+
+sample_ids_shape
+
+Shape of tensor returned by `sample`, excluding the batch dimension.
+
+Returns a `TensorShape`. The return value might not available
+before the invocation of initialize().
+
+
+
+## Methods
+
+initialize
+
+``` python
+initialize(
+ inputs,
+ **kwargs
+)
+```
+
+initialize the sampler with the input tensors.
+
+This method suppose to be only invoke once before the calling other
+methods of the Sampler.
+
+#### Args:
+
+
+* `inputs`: A (structure of) input tensors, it could be a nested tuple or
+ a single tensor.
+* `**kwargs`: Other kwargs for initialization. It could contain tensors
+ like mask for inputs, or non tensor parameter.
+
+
+#### Returns:
+
+`(initial_finished, initial_inputs)`.
+
+
+
+
+``` python
+next_inputs(
+ time,
+ outputs,
+ state,
+ sample_ids
+)
+```
+
+Returns `(finished, next_inputs, next_state)`.
+
+
+sample
+
+``` python
+sample(
+ time,
+ outputs,
+ state
+)
+```
+
+Returns `sample_ids`.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/ScheduledEmbeddingTrainingSampler.md b/docs/api_docs/python/tfa/seq2seq/ScheduledEmbeddingTrainingSampler.md
new file mode 100644
index 0000000000..5f4f46f74d
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/ScheduledEmbeddingTrainingSampler.md
@@ -0,0 +1,133 @@
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.ScheduledEmbeddingTrainingSampler
+
+## Class `ScheduledEmbeddingTrainingSampler`
+
+A training sampler that adds scheduled sampling.
+
+Inherits From: [`TrainingSampler`](../../tfa/seq2seq/TrainingSampler.md)
+
+### Aliases:
+
+* Class `tfa.seq2seq.ScheduledEmbeddingTrainingSampler`
+* Class `tfa.seq2seq.sampler.ScheduledEmbeddingTrainingSampler`
+
+
+
+Defined in [`seq2seq/sampler.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/sampler.py).
+
+
+
+Returns -1s for sample_ids where no sampling took place; valid
+sample id values elsewhere.
+
+__init__
+
+``` python
+__init__(
+ sampling_probability,
+ embedding_fn=None,
+ time_major=False,
+ seed=None,
+ scheduling_seed=None
+)
+```
+
+Initializer.
+
+
+#### Args:
+
+
+* `sampling_probability`: A `float32` 0-D or 1-D tensor: the probability
+ of sampling categorically from the output ids instead of reading
+ directly from the inputs.
+* `embedding_fn`: A callable that takes a vector tensor of `ids`
+ (argmax ids), or the `params` argument for `embedding_lookup`.
+* `time_major`: Python bool. Whether the tensors in `inputs` are time
+ major. If `False` (default), they are assumed to be batch major.
+* `seed`: The sampling seed.
+* `scheduling_seed`: The schedule decision rule sampling seed.
+
+
+#### Raises:
+
+
+* `ValueError`: if `sampling_probability` is not a scalar or vector.
+
+
+
+## Properties
+
+batch_size
+
+
+
+
+sample_ids_dtype
+
+
+
+
+sample_ids_shape
+
+
+
+
+
+
+## Methods
+
+initialize
+
+``` python
+initialize(
+ inputs,
+ sequence_length=None,
+ embedding=None
+)
+```
+
+
+
+
+
+
+``` python
+next_inputs(
+ time,
+ outputs,
+ state,
+ sample_ids
+)
+```
+
+
+
+
+sample
+
+``` python
+sample(
+ time,
+ outputs,
+ state
+)
+```
+
+
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/ScheduledOutputTrainingSampler.md b/docs/api_docs/python/tfa/seq2seq/ScheduledOutputTrainingSampler.md
new file mode 100644
index 0000000000..fdeaa44a86
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/ScheduledOutputTrainingSampler.md
@@ -0,0 +1,132 @@
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.ScheduledOutputTrainingSampler
+
+## Class `ScheduledOutputTrainingSampler`
+
+A training sampler that adds scheduled sampling directly to outputs.
+
+Inherits From: [`TrainingSampler`](../../tfa/seq2seq/TrainingSampler.md)
+
+### Aliases:
+
+* Class `tfa.seq2seq.ScheduledOutputTrainingSampler`
+* Class `tfa.seq2seq.sampler.ScheduledOutputTrainingSampler`
+
+
+
+Defined in [`seq2seq/sampler.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/sampler.py).
+
+
+
+Returns False for sample_ids where no sampling took place; True
+elsewhere.
+
+__init__
+
+``` python
+__init__(
+ sampling_probability,
+ time_major=False,
+ seed=None,
+ next_inputs_fn=None
+)
+```
+
+Initializer.
+
+
+#### Args:
+
+
+* `sampling_probability`: A `float32` scalar tensor: the probability of
+ sampling from the outputs instead of reading directly from the
+ inputs.
+* `time_major`: Python bool. Whether the tensors in `inputs` are time
+ major. If `False` (default), they are assumed to be batch major.
+* `seed`: The sampling seed.
+* `next_inputs_fn`: (Optional) callable to apply to the RNN outputs to
+ create the next input when sampling. If `None` (default), the RNN
+ outputs will be used as the next inputs.
+
+
+#### Raises:
+
+
+* `ValueError`: if `sampling_probability` is not a scalar or vector.
+
+
+
+## Properties
+
+batch_size
+
+
+
+
+sample_ids_dtype
+
+
+
+
+sample_ids_shape
+
+
+
+
+
+
+## Methods
+
+initialize
+
+``` python
+initialize(
+ inputs,
+ sequence_length=None,
+ auxiliary_inputs=None
+)
+```
+
+
+
+
+
+
+``` python
+next_inputs(
+ time,
+ outputs,
+ state,
+ sample_ids
+)
+```
+
+
+
+
+sample
+
+``` python
+sample(
+ time,
+ outputs,
+ state
+)
+```
+
+
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/SequenceLoss.md b/docs/api_docs/python/tfa/seq2seq/SequenceLoss.md
new file mode 100644
index 0000000000..fe49b5b542
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/SequenceLoss.md
@@ -0,0 +1,96 @@
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.SequenceLoss
+
+## Class `SequenceLoss`
+
+Weighted cross-entropy loss for a sequence of logits.
+
+
+
+### Aliases:
+
+* Class `tfa.seq2seq.SequenceLoss`
+* Class `tfa.seq2seq.loss.SequenceLoss`
+
+
+
+Defined in [`seq2seq/loss.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/loss.py).
+
+
+
+
+__init__
+
+``` python
+__init__(
+ average_across_timesteps=False,
+ average_across_batch=False,
+ sum_over_timesteps=True,
+ sum_over_batch=True,
+ softmax_loss_function=None,
+ name=None
+)
+```
+
+
+
+
+
+
+## Methods
+
+__call__
+
+``` python
+__call__(
+ y_true,
+ y_pred,
+ sample_weight=None
+)
+```
+
+Override the parent __call__ to have a customized reduce
+behavior.
+
+from_config
+
+``` python
+from_config(
+ cls,
+ config
+)
+```
+
+Instantiates a `Loss` from its config (output of `get_config()`).
+
+
+#### Args:
+
+
+* `config`: Output of `get_config()`.
+
+
+#### Returns:
+
+A `Loss` instance.
+
+
+get_config
+
+``` python
+get_config()
+```
+
+
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/TrainingSampler.md b/docs/api_docs/python/tfa/seq2seq/TrainingSampler.md
new file mode 100644
index 0000000000..df8e1300be
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/TrainingSampler.md
@@ -0,0 +1,135 @@
+
+
+
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.TrainingSampler
+
+## Class `TrainingSampler`
+
+A Sampler for use during training.
+
+Inherits From: [`Sampler`](../../tfa/seq2seq/Sampler.md)
+
+### Aliases:
+
+* Class `tfa.seq2seq.TrainingSampler`
+* Class `tfa.seq2seq.sampler.TrainingSampler`
+
+
+
+Defined in [`seq2seq/sampler.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/sampler.py).
+
+
+
+Only reads inputs.
+
+Returned sample_ids are the argmax of the RNN output logits.
+
+__init__
+
+``` python
+__init__(time_major=False)
+```
+
+Initializer.
+
+
+#### Args:
+
+
+* `time_major`: Python bool. Whether the tensors in `inputs` are time
+ major. If `False` (default), they are assumed to be batch major.
+
+
+#### Raises:
+
+
+* `ValueError`: if `sequence_length` is not a 1D tensor.
+
+
+
+## Properties
+
+batch_size
+
+
+
+
+sample_ids_dtype
+
+
+
+
+sample_ids_shape
+
+
+
+
+
+
+## Methods
+
+initialize
+
+``` python
+initialize(
+ inputs,
+ sequence_length=None
+)
+```
+
+Initialize the TrainSampler.
+
+
+#### Args:
+
+
+* `inputs`: A (structure of) input tensors.
+* `sequence_length`: An int32 vector tensor.
+
+
+#### Returns:
+
+(finished, next_inputs), a tuple of two items. The first item is a
+ boolean vector to indicate whether the item in the batch has
+ finished. The second item is the first slide of input data based on
+ the timestep dimension (usually the second dim of the input).
+
+
+
+
+``` python
+next_inputs(
+ time,
+ outputs,
+ state,
+ sample_ids
+)
+```
+
+
+
+
+sample
+
+``` python
+sample(
+ time,
+ outputs,
+ state
+)
+```
+
+
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/attention_wrapper.md b/docs/api_docs/python/tfa/seq2seq/attention_wrapper.md
new file mode 100644
index 0000000000..23a464ed62
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/attention_wrapper.md
@@ -0,0 +1,40 @@
+
+
+
+
+
+# Module: tfa.seq2seq.attention_wrapper
+
+A powerful dynamic attention wrapper object.
+
+
+
+Defined in [`seq2seq/attention_wrapper.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/attention_wrapper.py).
+
+
+
+
+## Classes
+
+[`class AttentionMechanism`](../../tfa/seq2seq/AttentionMechanism.md)
+
+[`class AttentionWrapper`](../../tfa/seq2seq/AttentionWrapper.md): Wraps another `RNNCell` with attention.
+
+[`class AttentionWrapperState`](../../tfa/seq2seq/AttentionWrapperState.md): `namedtuple` storing the state of a `AttentionWrapper`.
+
+[`class BahdanauAttention`](../../tfa/seq2seq/BahdanauAttention.md): Implements Bahdanau-style (additive) attention.
+
+[`class BahdanauMonotonicAttention`](../../tfa/seq2seq/BahdanauMonotonicAttention.md): Monotonic attention mechanism with Bahadanau-style energy function.
+
+[`class LuongAttention`](../../tfa/seq2seq/LuongAttention.md): Implements Luong-style (multiplicative) attention scoring.
+
+[`class LuongMonotonicAttention`](../../tfa/seq2seq/LuongMonotonicAttention.md): Monotonic attention mechanism with Luong-style energy function.
+
+## Functions
+
+[`hardmax(...)`](../../tfa/seq2seq/hardmax.md): Returns batched one-hot vectors.
+
+[`monotonic_attention(...)`](../../tfa/seq2seq/monotonic_attention.md): Compute monotonic attention distribution from choosing probabilities.
+
+[`safe_cumprod(...)`](../../tfa/seq2seq/safe_cumprod.md): Computes cumprod of x in logspace using cumsum to avoid underflow.
+
diff --git a/docs/api_docs/python/tfa/seq2seq/basic_decoder.md b/docs/api_docs/python/tfa/seq2seq/basic_decoder.md
new file mode 100644
index 0000000000..8cc74c9532
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/basic_decoder.md
@@ -0,0 +1,22 @@
+
+
+
+
+
+# Module: tfa.seq2seq.basic_decoder
+
+A class of Decoders that may sample to generate the next input.
+
+
+
+Defined in [`seq2seq/basic_decoder.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/basic_decoder.py).
+
+
+
+
+## Classes
+
+[`class BasicDecoder`](../../tfa/seq2seq/BasicDecoder.md): Basic sampling decoder.
+
+[`class BasicDecoderOutput`](../../tfa/seq2seq/BasicDecoderOutput.md)
+
diff --git a/docs/api_docs/python/tfa/seq2seq/beam_search_decoder.md b/docs/api_docs/python/tfa/seq2seq/beam_search_decoder.md
new file mode 100644
index 0000000000..0a359ea60e
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/beam_search_decoder.md
@@ -0,0 +1,38 @@
+
+
+
+
+
+# Module: tfa.seq2seq.beam_search_decoder
+
+A decoder that performs beam search.
+
+
+
+Defined in [`seq2seq/beam_search_decoder.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/beam_search_decoder.py).
+
+
+
+
+## Classes
+
+[`class BeamSearchDecoder`](../../tfa/seq2seq/BeamSearchDecoder.md): BeamSearch sampling decoder.
+
+[`class BeamSearchDecoderMixin`](../../tfa/seq2seq/beam_search_decoder/BeamSearchDecoderMixin.md): BeamSearchDecoderMixin contains the common methods for
+
+[`class BeamSearchDecoderOutput`](../../tfa/seq2seq/BeamSearchDecoderOutput.md)
+
+[`class BeamSearchDecoderState`](../../tfa/seq2seq/BeamSearchDecoderState.md)
+
+[`class FinalBeamSearchDecoderOutput`](../../tfa/seq2seq/FinalBeamSearchDecoderOutput.md): Final outputs returned by the beam search after all decoding is
+
+## Functions
+
+[`attention_probs_from_attn_state(...)`](../../tfa/seq2seq/beam_search_decoder/attention_probs_from_attn_state.md): Calculates the average attention probabilities.
+
+[`gather_tree_from_array(...)`](../../tfa/seq2seq/gather_tree_from_array.md): Calculates the full beams for `TensorArray`s.
+
+[`get_attention_probs(...)`](../../tfa/seq2seq/beam_search_decoder/get_attention_probs.md): Get attention probabilities from the cell state.
+
+[`tile_batch(...)`](../../tfa/seq2seq/tile_batch.md): Tile the batch dimension of a (possibly nested structure of) tensor(s)
+
diff --git a/docs/api_docs/python/tfa/seq2seq/beam_search_decoder/BeamSearchDecoderMixin.md b/docs/api_docs/python/tfa/seq2seq/beam_search_decoder/BeamSearchDecoderMixin.md
new file mode 100644
index 0000000000..52efa9249f
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/beam_search_decoder/BeamSearchDecoderMixin.md
@@ -0,0 +1,168 @@
+
+
+
+
+
+
+
+
+
+
+
+# tfa.seq2seq.beam_search_decoder.BeamSearchDecoderMixin
+
+## Class `BeamSearchDecoderMixin`
+
+BeamSearchDecoderMixin contains the common methods for
+
+
+
+
+
+Defined in [`seq2seq/beam_search_decoder.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/beam_search_decoder.py).
+
+
+BeamSearchDecoder.
+
+It is expected to be used a base class for concrete
+BeamSearchDecoder. Since this is a mixin class, it is expected to be
+used together with other class as base.
+
+__init__
+
+``` python
+__init__(
+ cell,
+ beam_width,
+ output_layer=None,
+ length_penalty_weight=0.0,
+ coverage_penalty_weight=0.0,
+ reorder_tensor_arrays=True,
+ **kwargs
+)
+```
+
+Initialize the BeamSearchDecoderMixin.
+
+
+#### Args:
+
+
+* `cell`: An `RNNCell` instance.
+* `beam_width`: Python integer, the number of beams.
+* `output_layer`: (Optional) An instance of `tf.keras.layers.Layer`,
+ i.e., `tf.keras.layers.Dense`. Optional layer to apply to the RNN
+ output prior to storing the result or sampling.
+* `length_penalty_weight`: Float weight to penalize length. Disabled with
+ 0.0.
+* `coverage_penalty_weight`: Float weight to penalize the coverage of
+ source sentence. Disabled with 0.0.
+* `reorder_tensor_arrays`: If `True`, `TensorArray`s' elements within the
+ cell state will be reordered according to the beam search path. If
+ the `TensorArray` can be reordered, the stacked form will be
+ returned. Otherwise, the `TensorArray` will be returned as is. Set
+ this flag to `False` if the cell state contains `TensorArray`s that
+ are not amenable to reordering.
+* `**kwargs`: Dict, other keyword arguments for parent class.
+
+
+#### Raises:
+
+
+* `TypeError`: if `cell` is not an instance of `RNNCell`,
+ or `output_layer` is not an instance of `tf.keras.layers.Layer`.
+
+
+
+## Properties
+
+batch_size
+
+
+
+
+output_size
+
+
+
+
+tracks_own_finished
+
+The BeamSearchDecoder shuffles its beams and their finished state.
+
+For this reason, it conflicts with the `dynamic_decode` function's
+tracking of finished states. Setting this property to true avoids
+early stopping of decoding due to mismanagement of the finished state
+in `dynamic_decode`.
+
+#### Returns:
+
+`True`.
+
+
+
+
+## Methods
+
+finalize
+
+``` python
+finalize(
+ outputs,
+ final_state,
+ sequence_lengths
+)
+```
+
+Finalize and return the predicted_ids.
+
+
+#### Args:
+
+
+* `outputs`: An instance of BeamSearchDecoderOutput.
+* `final_state`: An instance of BeamSearchDecoderState. Passed through to
+ the output.
+* `sequence_lengths`: An `int64` tensor shaped
+ `[batch_size, beam_width]`. The sequence lengths determined for
+ each beam during decode. **NOTE** These are ignored; the updated
+ sequence lengths are stored in `final_state.lengths`.
+
+
+#### Returns:
+
+
+* `outputs`: An instance of `FinalBeamSearchDecoderOutput` where the
+ predicted_ids are the result of calling _gather_tree.
+* `final_state`: The same input instance of `BeamSearchDecoderState`.
+
+step
+
+``` python
+step(
+ time,
+ inputs,
+ state,
+ name=None
+)
+```
+
+Perform a decoding step.
+
+
+#### Args:
+
+
+* `time`: scalar `int32` tensor.
+* `inputs`: A (structure of) input tensors.
+* `state`: A (structure of) state tensors and TensorArrays.
+* `name`: Name scope for any created operations.
+
+
+#### Returns:
+
+`(outputs, next_state, next_inputs, finished)`.
+
+
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/beam_search_decoder/attention_probs_from_attn_state.md b/docs/api_docs/python/tfa/seq2seq/beam_search_decoder/attention_probs_from_attn_state.md
new file mode 100644
index 0000000000..c5e8495d04
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/beam_search_decoder/attention_probs_from_attn_state.md
@@ -0,0 +1,31 @@
+
+
+
+
+
+# tfa.seq2seq.beam_search_decoder.attention_probs_from_attn_state
+
+Calculates the average attention probabilities.
+
+``` python
+tfa.seq2seq.beam_search_decoder.attention_probs_from_attn_state(attention_state)
+```
+
+
+
+Defined in [`seq2seq/beam_search_decoder.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/beam_search_decoder.py).
+
+
+
+
+#### Args:
+
+
+* `attention_state`: An instance of `AttentionWrapperState`.
+
+
+#### Returns:
+
+The attention probabilities in the given AttentionWrapperState.
+If there're multiple attention mechanisms, return the average value from
+all attention mechanisms.
diff --git a/docs/api_docs/python/tfa/seq2seq/beam_search_decoder/get_attention_probs.md b/docs/api_docs/python/tfa/seq2seq/beam_search_decoder/get_attention_probs.md
new file mode 100644
index 0000000000..b21fd9b665
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/beam_search_decoder/get_attention_probs.md
@@ -0,0 +1,44 @@
+
+
+
+
+
+# tfa.seq2seq.beam_search_decoder.get_attention_probs
+
+Get attention probabilities from the cell state.
+
+``` python
+tfa.seq2seq.beam_search_decoder.get_attention_probs(
+ next_cell_state,
+ coverage_penalty_weight
+)
+```
+
+
+
+Defined in [`seq2seq/beam_search_decoder.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/beam_search_decoder.py).
+
+
+
+
+#### Args:
+
+
+* `next_cell_state`: The next state from the cell, e.g. an instance of
+ AttentionWrapperState if the cell is attentional.
+* `coverage_penalty_weight`: Float weight to penalize the coverage of source
+ sentence. Disabled with 0.0.
+
+
+#### Returns:
+
+The attention probabilities with shape
+ `[batch_size, beam_width, max_time]` if coverage penalty is enabled.
+ Otherwise, returns None.
+
+
+
+#### Raises:
+
+
+* `ValueError`: If no cell is attentional but coverage penalty is enabled.
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/seq2seq/decoder.md b/docs/api_docs/python/tfa/seq2seq/decoder.md
new file mode 100644
index 0000000000..7537ec70f1
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/decoder.md
@@ -0,0 +1,26 @@
+
+
+
+
+
+# Module: tfa.seq2seq.decoder
+
+Seq2seq layer operations for use in neural networks.
+
+
+
+Defined in [`seq2seq/decoder.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/decoder.py).
+
+
+
+
+## Classes
+
+[`class BaseDecoder`](../../tfa/seq2seq/BaseDecoder.md): An RNN Decoder that is based on a Keras layer.
+
+[`class Decoder`](../../tfa/seq2seq/Decoder.md): An RNN Decoder abstract interface object.
+
+## Functions
+
+[`dynamic_decode(...)`](../../tfa/seq2seq/dynamic_decode.md): Perform dynamic decoding with `decoder`.
+
diff --git a/docs/api_docs/python/tfa/seq2seq/dynamic_decode.md b/docs/api_docs/python/tfa/seq2seq/dynamic_decode.md
new file mode 100644
index 0000000000..005c496a9c
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/dynamic_decode.md
@@ -0,0 +1,70 @@
+
+
+
+
+
+# tfa.seq2seq.dynamic_decode
+
+Perform dynamic decoding with `decoder`.
+
+### Aliases:
+
+* `tfa.seq2seq.decoder.dynamic_decode`
+* `tfa.seq2seq.dynamic_decode`
+
+``` python
+tfa.seq2seq.dynamic_decode(
+ decoder,
+ output_time_major=False,
+ impute_finished=False,
+ maximum_iterations=None,
+ parallel_iterations=32,
+ swap_memory=False,
+ scope=None,
+ **kwargs
+)
+```
+
+
+
+Defined in [`seq2seq/decoder.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/decoder.py).
+
+
+
+Calls initialize() once and step() repeatedly on the Decoder object.
+
+#### Args:
+
+
+* `decoder`: A `Decoder` instance.
+* `output_time_major`: Python boolean. Default: `False` (batch major). If
+ `True`, outputs are returned as time major tensors (this mode is
+ faster). Otherwise, outputs are returned as batch major tensors (this
+ adds extra time to the computation).
+* `impute_finished`: Python boolean. If `True`, then states for batch
+ entries which are marked as finished get copied through and the
+ corresponding outputs get zeroed out. This causes some slowdown at
+ each time step, but ensures that the final state and outputs have
+ the correct values and that backprop ignores time steps that were
+ marked as finished.
+* `maximum_iterations`: `int32` scalar, maximum allowed number of decoding
+ steps. Default is `None` (decode until the decoder is fully done).
+* `parallel_iterations`: Argument passed to `tf.while_loop`.
+* `swap_memory`: Argument passed to `tf.while_loop`.
+* `scope`: Optional variable scope to use.
+* `**kwargs`: dict, other keyword arguments for dynamic_decode. It might
+ contain arguments for `BaseDecoder` to initialize, which takes all
+ tensor inputs during call().
+
+
+#### Returns:
+
+`(final_outputs, final_state, final_sequence_lengths)`.
+
+
+
+#### Raises:
+
+
+* `TypeError`: if `decoder` is not an instance of `Decoder`.
+* `ValueError`: if `maximum_iterations` is provided but is not a scalar.
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/seq2seq/gather_tree_from_array.md b/docs/api_docs/python/tfa/seq2seq/gather_tree_from_array.md
new file mode 100644
index 0000000000..8c884c4258
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/gather_tree_from_array.md
@@ -0,0 +1,44 @@
+
+
+
+
+
+# tfa.seq2seq.gather_tree_from_array
+
+Calculates the full beams for `TensorArray`s.
+
+### Aliases:
+
+* `tfa.seq2seq.beam_search_decoder.gather_tree_from_array`
+* `tfa.seq2seq.gather_tree_from_array`
+
+``` python
+tfa.seq2seq.gather_tree_from_array(
+ t,
+ parent_ids,
+ sequence_length
+)
+```
+
+
+
+Defined in [`seq2seq/beam_search_decoder.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/beam_search_decoder.py).
+
+
+
+
+#### Args:
+
+
+* `t`: A stacked `TensorArray` of size `max_time` that contains `Tensor`s of
+ shape `[batch_size, beam_width, s]` or `[batch_size * beam_width, s]`
+ where `s` is the depth shape.
+* `parent_ids`: The parent ids of shape `[max_time, batch_size, beam_width]`.
+* `sequence_length`: The sequence length of shape `[batch_size, beam_width]`.
+
+
+#### Returns:
+
+A `Tensor` which is a stacked `TensorArray` of the same size and type as
+`t` and where beams are sorted in each `Tensor` according to
+`parent_ids`.
diff --git a/docs/api_docs/python/tfa/seq2seq/hardmax.md b/docs/api_docs/python/tfa/seq2seq/hardmax.md
new file mode 100644
index 0000000000..ca6aed2e15
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/hardmax.md
@@ -0,0 +1,38 @@
+
+
+
+
+
+# tfa.seq2seq.hardmax
+
+Returns batched one-hot vectors.
+
+### Aliases:
+
+* `tfa.seq2seq.attention_wrapper.hardmax`
+* `tfa.seq2seq.hardmax`
+
+``` python
+tfa.seq2seq.hardmax(
+ logits,
+ name=None
+)
+```
+
+
+
+Defined in [`seq2seq/attention_wrapper.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/attention_wrapper.py).
+
+
+
+The depth index containing the `1` is that of the maximum logit value.
+
+#### Args:
+
+
+* `logits`: A batch tensor of logit values.
+* `name`: Name to use when creating ops.
+
+#### Returns:
+
+A batched one-hot tensor.
diff --git a/docs/api_docs/python/tfa/seq2seq/loss.md b/docs/api_docs/python/tfa/seq2seq/loss.md
new file mode 100644
index 0000000000..1408f71d38
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/loss.md
@@ -0,0 +1,24 @@
+
+
+
+
+
+# Module: tfa.seq2seq.loss
+
+Seq2seq loss operations for use in sequence models.
+
+
+
+Defined in [`seq2seq/loss.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/loss.py).
+
+
+
+
+## Classes
+
+[`class SequenceLoss`](../../tfa/seq2seq/SequenceLoss.md): Weighted cross-entropy loss for a sequence of logits.
+
+## Functions
+
+[`sequence_loss(...)`](../../tfa/seq2seq/sequence_loss.md): Weighted cross-entropy loss for a sequence of logits.
+
diff --git a/docs/api_docs/python/tfa/seq2seq/monotonic_attention.md b/docs/api_docs/python/tfa/seq2seq/monotonic_attention.md
new file mode 100644
index 0000000000..2392214a12
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/monotonic_attention.md
@@ -0,0 +1,73 @@
+
+
+
+
+
+# tfa.seq2seq.monotonic_attention
+
+Compute monotonic attention distribution from choosing probabilities.
+
+### Aliases:
+
+* `tfa.seq2seq.attention_wrapper.monotonic_attention`
+* `tfa.seq2seq.monotonic_attention`
+
+``` python
+tfa.seq2seq.monotonic_attention(
+ p_choose_i,
+ previous_attention,
+ mode
+)
+```
+
+
+
+Defined in [`seq2seq/attention_wrapper.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/attention_wrapper.py).
+
+
+
+Monotonic attention implies that the input sequence is processed in an
+explicitly left-to-right manner when generating the output sequence. In
+addition, once an input sequence element is attended to at a given output
+timestep, elements occurring before it cannot be attended to at subsequent
+output timesteps. This function generates attention distributions
+according to these assumptions. For more information, see `Online and
+Linear-Time Attention by Enforcing Monotonic Alignments`.
+
+#### Args:
+
+
+* `p_choose_i`: Probability of choosing input sequence/memory element i.
+ Should be of shape (batch_size, input_sequence_length), and should all
+ be in the range [0, 1].
+* `previous_attention`: The attention distribution from the previous output
+ timestep. Should be of shape (batch_size, input_sequence_length). For
+ the first output timestep, preevious_attention[n] should be
+ [1, 0, 0, ..., 0] for all n in [0, ... batch_size - 1].
+* `mode`: How to compute the attention distribution. Must be one of
+ 'recursive', 'parallel', or 'hard'.
+ * 'recursive' uses tf.scan to recursively compute the distribution.
+ This is slowest but is exact, general, and does not suffer from
+ numerical instabilities.
+ * 'parallel' uses parallelized cumulative-sum and cumulative-product
+ operations to compute a closed-form solution to the recurrence
+ relation defining the attention distribution. This makes it more
+ efficient than 'recursive', but it requires numerical checks which
+ make the distribution non-exact. This can be a problem in
+ particular when input_sequence_length is long and/or p_choose_i has
+ entries very close to 0 or 1.
+ * 'hard' requires that the probabilities in p_choose_i are all either
+ 0 or 1, and subsequently uses a more efficient and exact solution.
+
+
+#### Returns:
+
+A tensor of shape (batch_size, input_sequence_length) representing the
+attention distributions for each sequence in the batch.
+
+
+
+#### Raises:
+
+
+* `ValueError`: mode is not one of 'recursive', 'parallel', 'hard'.
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/seq2seq/safe_cumprod.md b/docs/api_docs/python/tfa/seq2seq/safe_cumprod.md
new file mode 100644
index 0000000000..ceee331bb7
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/safe_cumprod.md
@@ -0,0 +1,44 @@
+
+
+
+
+
+# tfa.seq2seq.safe_cumprod
+
+Computes cumprod of x in logspace using cumsum to avoid underflow.
+
+### Aliases:
+
+* `tfa.seq2seq.attention_wrapper.safe_cumprod`
+* `tfa.seq2seq.safe_cumprod`
+
+``` python
+tfa.seq2seq.safe_cumprod(
+ x,
+ *args,
+ **kwargs
+)
+```
+
+
+
+Defined in [`seq2seq/attention_wrapper.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/attention_wrapper.py).
+
+
+
+The cumprod function and its gradient can result in numerical instabilities
+when its argument has very small and/or zero values. As long as the
+argument is all positive, we can instead compute the cumulative product as
+exp(cumsum(log(x))). This function can be called identically to
+tf.cumprod.
+
+#### Args:
+
+
+* `x`: Tensor to take the cumulative product of.
+* `*args`: Passed on to cumsum; these are identical to those in cumprod.
+* `**kwargs`: Passed on to cumsum; these are identical to those in cumprod.
+
+#### Returns:
+
+Cumulative product of x.
diff --git a/docs/api_docs/python/tfa/seq2seq/sampler.md b/docs/api_docs/python/tfa/seq2seq/sampler.md
new file mode 100644
index 0000000000..c544e9d4ec
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/sampler.md
@@ -0,0 +1,40 @@
+
+
+
+
+
+# Module: tfa.seq2seq.sampler
+
+A library of sampler for use with SamplingDecoders.
+
+
+
+Defined in [`seq2seq/sampler.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/sampler.py).
+
+
+
+
+## Classes
+
+[`class CustomSampler`](../../tfa/seq2seq/CustomSampler.md): Base abstract class that allows the user to customize sampling.
+
+[`class GreedyEmbeddingSampler`](../../tfa/seq2seq/GreedyEmbeddingSampler.md): A sampler for use during inference.
+
+[`class InferenceSampler`](../../tfa/seq2seq/InferenceSampler.md): A helper to use during inference with a custom sampling function.
+
+[`class SampleEmbeddingSampler`](../../tfa/seq2seq/SampleEmbeddingSampler.md): A sampler for use during inference.
+
+[`class Sampler`](../../tfa/seq2seq/Sampler.md): Interface for implementing sampling in seq2seq decoders.
+
+[`class ScheduledEmbeddingTrainingSampler`](../../tfa/seq2seq/ScheduledEmbeddingTrainingSampler.md): A training sampler that adds scheduled sampling.
+
+[`class ScheduledOutputTrainingSampler`](../../tfa/seq2seq/ScheduledOutputTrainingSampler.md): A training sampler that adds scheduled sampling directly to outputs.
+
+[`class TrainingSampler`](../../tfa/seq2seq/TrainingSampler.md): A Sampler for use during training.
+
+## Functions
+
+[`bernoulli_sample(...)`](../../tfa/seq2seq/sampler/bernoulli_sample.md): Samples from Bernoulli distribution.
+
+[`categorical_sample(...)`](../../tfa/seq2seq/sampler/categorical_sample.md): Samples from categorical distribution.
+
diff --git a/docs/api_docs/python/tfa/seq2seq/sampler/bernoulli_sample.md b/docs/api_docs/python/tfa/seq2seq/sampler/bernoulli_sample.md
new file mode 100644
index 0000000000..bca0a10698
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/sampler/bernoulli_sample.md
@@ -0,0 +1,24 @@
+
+
+
+
+
+# tfa.seq2seq.sampler.bernoulli_sample
+
+Samples from Bernoulli distribution.
+
+``` python
+tfa.seq2seq.sampler.bernoulli_sample(
+ probs=None,
+ logits=None,
+ dtype=tf.int32,
+ sample_shape=(),
+ seed=None
+)
+```
+
+
+
+Defined in [`seq2seq/sampler.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/sampler.py).
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/sampler/categorical_sample.md b/docs/api_docs/python/tfa/seq2seq/sampler/categorical_sample.md
new file mode 100644
index 0000000000..d13ef6b34e
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/sampler/categorical_sample.md
@@ -0,0 +1,23 @@
+
+
+
+
+
+# tfa.seq2seq.sampler.categorical_sample
+
+Samples from categorical distribution.
+
+``` python
+tfa.seq2seq.sampler.categorical_sample(
+ logits,
+ dtype=tf.int32,
+ sample_shape=(),
+ seed=None
+)
+```
+
+
+
+Defined in [`seq2seq/sampler.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/sampler.py).
+
+
diff --git a/docs/api_docs/python/tfa/seq2seq/sequence_loss.md b/docs/api_docs/python/tfa/seq2seq/sequence_loss.md
new file mode 100644
index 0000000000..5fe50b724a
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/sequence_loss.md
@@ -0,0 +1,95 @@
+
+
+
+
+
+# tfa.seq2seq.sequence_loss
+
+Weighted cross-entropy loss for a sequence of logits.
+
+### Aliases:
+
+* `tfa.seq2seq.loss.sequence_loss`
+* `tfa.seq2seq.sequence_loss`
+
+``` python
+tfa.seq2seq.sequence_loss(
+ logits,
+ targets,
+ weights,
+ average_across_timesteps=True,
+ average_across_batch=True,
+ sum_over_timesteps=False,
+ sum_over_batch=False,
+ softmax_loss_function=None,
+ name=None
+)
+```
+
+
+
+Defined in [`seq2seq/loss.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/loss.py).
+
+
+
+Depending on the values of `average_across_timesteps` /
+`sum_over_timesteps` and `average_across_batch` / `sum_over_batch`, the
+return Tensor will have rank 0, 1, or 2 as these arguments reduce the
+cross-entropy at each target, which has shape
+`[batch_size, sequence_length]`, over their respective dimensions. For
+example, if `average_across_timesteps` is `True` and `average_across_batch`
+is `False`, then the return Tensor will have shape `[batch_size]`.
+
+Note that `average_across_timesteps` and `sum_over_timesteps` cannot be
+True at same time. Same for `average_across_batch` and `sum_over_batch`.
+
+The recommended loss reduction in tf 2.0 has been changed to sum_over,
+instead of weighted average. User are recommend to use `sum_over_timesteps`
+and `sum_over_batch` for reduction.
+
+#### Args:
+
+
+* `logits`: A Tensor of shape
+ `[batch_size, sequence_length, num_decoder_symbols]` and dtype float.
+ The logits correspond to the prediction across all classes at each
+ timestep.
+* `targets`: A Tensor of shape `[batch_size, sequence_length]` and dtype
+ int. The target represents the true class at each timestep.
+* `weights`: A Tensor of shape `[batch_size, sequence_length]` and dtype
+ float. `weights` constitutes the weighting of each prediction in the
+ sequence. When using `weights` as masking, set all valid timesteps to 1
+ and all padded timesteps to 0, e.g. a mask returned by
+ `tf.sequence_mask`.
+* `average_across_timesteps`: If set, sum the cost across the sequence
+ dimension and divide the cost by the total label weight across
+ timesteps.
+* `average_across_batch`: If set, sum the cost across the batch dimension and
+ divide the returned cost by the batch size.
+* `sum_over_timesteps`: If set, sum the cost across the sequence dimension
+ and divide the size of the sequence. Note that any element with 0
+ weights will be excluded from size calculation.
+* `sum_over_batch`: if set, sum the cost across the batch dimension and
+ divide the total cost by the batch size. Not that any element with 0
+ weights will be excluded from size calculation.
+* `softmax_loss_function`: Function (labels, logits) -> loss-batch
+ to be used instead of the standard softmax (the default if this is
+ None). **Note that to avoid confusion, it is required for the function
+ to accept named arguments.**
+* `name`: Optional name for this operation, defaults to "sequence_loss".
+
+
+#### Returns:
+
+A float Tensor of rank 0, 1, or 2 depending on the
+`average_across_timesteps` and `average_across_batch` arguments. By
+default, it has rank 0 (scalar) and is the weighted average cross-entropy
+(log-perplexity) per symbol.
+
+
+
+#### Raises:
+
+
+* `ValueError`: logits does not have 3 dimensions or targets does not have 2
+ dimensions or weights does not have 2 dimensions.
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/seq2seq/tile_batch.md b/docs/api_docs/python/tfa/seq2seq/tile_batch.md
new file mode 100644
index 0000000000..16a849718a
--- /dev/null
+++ b/docs/api_docs/python/tfa/seq2seq/tile_batch.md
@@ -0,0 +1,56 @@
+
+
+
+
+
+# tfa.seq2seq.tile_batch
+
+Tile the batch dimension of a (possibly nested structure of) tensor(s)
+
+### Aliases:
+
+* `tfa.seq2seq.beam_search_decoder.tile_batch`
+* `tfa.seq2seq.tile_batch`
+
+``` python
+tfa.seq2seq.tile_batch(
+ t,
+ multiplier,
+ name=None
+)
+```
+
+
+
+Defined in [`seq2seq/beam_search_decoder.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/seq2seq/beam_search_decoder.py).
+
+
+t.
+
+For each tensor t in a (possibly nested structure) of tensors,
+this function takes a tensor t shaped `[batch_size, s0, s1, ...]` composed
+of minibatch entries `t[0], ..., t[batch_size - 1]` and tiles it to have a
+shape `[batch_size * multiplier, s0, s1, ...]` composed of minibatch
+entries `t[0], t[0], ..., t[1], t[1], ...` where each minibatch entry is
+repeated `multiplier` times.
+
+#### Args:
+
+
+* `t`: `Tensor` shaped `[batch_size, ...]`.
+* `multiplier`: Python int.
+* `name`: Name scope for any created operations.
+
+
+#### Returns:
+
+A (possibly nested structure of) `Tensor` shaped
+`[batch_size * multiplier, ...]`.
+
+
+
+#### Raises:
+
+
+* `ValueError`: if tensor(s) `t` do not have a statically known rank or
+the rank is < 1.
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/text.md b/docs/api_docs/python/tfa/text.md
new file mode 100644
index 0000000000..8b5f6fbbda
--- /dev/null
+++ b/docs/api_docs/python/tfa/text.md
@@ -0,0 +1,26 @@
+
+
+
+
+
+# Module: tfa.text
+
+Text-processing ops.
+
+
+
+Defined in [`text/__init__.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/text/__init__.py).
+
+
+
+
+## Modules
+
+[`skip_gram_ops`](../tfa/text/skip_gram_ops.md) module: Skip-gram sampling ops from https://arxiv.org/abs/1301.3781.
+
+## Functions
+
+[`skip_gram_sample(...)`](../tfa/text/skip_gram_sample.md): Generates skip-gram token and label paired Tensors from the input
+
+[`skip_gram_sample_with_text_vocab(...)`](../tfa/text/skip_gram_sample_with_text_vocab.md): Skip-gram sampling with a text vocabulary file.
+
diff --git a/docs/api_docs/python/tfa/text/skip_gram_ops.md b/docs/api_docs/python/tfa/text/skip_gram_ops.md
new file mode 100644
index 0000000000..a8d6cf949f
--- /dev/null
+++ b/docs/api_docs/python/tfa/text/skip_gram_ops.md
@@ -0,0 +1,22 @@
+
+
+
+
+
+# Module: tfa.text.skip_gram_ops
+
+Skip-gram sampling ops from https://arxiv.org/abs/1301.3781.
+
+
+
+Defined in [`text/skip_gram_ops.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/text/skip_gram_ops.py).
+
+
+
+
+## Functions
+
+[`skip_gram_sample(...)`](../../tfa/text/skip_gram_sample.md): Generates skip-gram token and label paired Tensors from the input
+
+[`skip_gram_sample_with_text_vocab(...)`](../../tfa/text/skip_gram_sample_with_text_vocab.md): Skip-gram sampling with a text vocabulary file.
+
diff --git a/docs/api_docs/python/tfa/text/skip_gram_sample.md b/docs/api_docs/python/tfa/text/skip_gram_sample.md
new file mode 100644
index 0000000000..763cd12b20
--- /dev/null
+++ b/docs/api_docs/python/tfa/text/skip_gram_sample.md
@@ -0,0 +1,143 @@
+
+
+
+
+
+# tfa.text.skip_gram_sample
+
+Generates skip-gram token and label paired Tensors from the input
+
+### Aliases:
+
+* `tfa.text.skip_gram_ops.skip_gram_sample`
+* `tfa.text.skip_gram_sample`
+
+``` python
+tfa.text.skip_gram_sample(
+ input_tensor,
+ min_skips=1,
+ max_skips=5,
+ start=0,
+ limit=-1,
+ emit_self_as_target=False,
+ vocab_freq_table=None,
+ vocab_min_count=None,
+ vocab_subsampling=None,
+ corpus_size=None,
+ batch_size=None,
+ batch_capacity=None,
+ seed=None,
+ name=None
+)
+```
+
+
+
+Defined in [`text/skip_gram_ops.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/text/skip_gram_ops.py).
+
+
+tensor.
+
+Generates skip-gram `("token", "label")` pairs using each element in the
+rank-1 `input_tensor` as a token. The window size used for each token will
+be randomly selected from the range specified by `[min_skips, max_skips]`,
+inclusive. See https://arxiv.org/abs/1301.3781 for more details about
+skip-gram.
+
+For example, given `input_tensor = ["the", "quick", "brown", "fox",
+"jumps"]`, `min_skips = 1`, `max_skips = 2`, `emit_self_as_target = False`,
+the output `(tokens, labels)` pairs for the token "quick" will be randomly
+selected from either `(tokens=["quick", "quick"], labels=["the", "brown"])`
+for 1 skip, or `(tokens=["quick", "quick", "quick"],
+labels=["the", "brown", "fox"])` for 2 skips.
+
+If `emit_self_as_target = True`, each token will also be emitted as a label
+for itself. From the previous example, the output will be either
+`(tokens=["quick", "quick", "quick"], labels=["the", "quick", "brown"])`
+for 1 skip, or `(tokens=["quick", "quick", "quick", "quick"],
+labels=["the", "quick", "brown", "fox"])` for 2 skips.
+
+The same process is repeated for each element of `input_tensor` and
+concatenated together into the two output rank-1 `Tensors` (one for all the
+tokens, another for all the labels).
+
+If `vocab_freq_table` is specified, tokens in `input_tensor` that are not
+present in the vocabulary are discarded. Tokens whose frequency counts are
+below `vocab_min_count` are also discarded. Tokens whose frequency
+proportions in the corpus exceed `vocab_subsampling` may be randomly
+down-sampled. See Eq. 5 in http://arxiv.org/abs/1310.4546 for more details
+about subsampling.
+
+Due to the random window sizes used for each token, the lengths of the
+outputs are non-deterministic, unless `batch_size` is specified to batch
+the outputs to always return `Tensors` of length `batch_size`.
+
+#### Args:
+
+
+* `input_tensor`: A rank-1 `Tensor` from which to generate skip-gram
+ candidates.
+* `min_skips`: `int` or scalar `Tensor` specifying the minimum window size to
+ randomly use for each token. Must be >= 0 and <= `max_skips`. If
+ `min_skips` and `max_skips` are both 0, the only label outputted will
+ be the token itself when `emit_self_as_target = True` -
+ or no output otherwise.
+* `max_skips`: `int` or scalar `Tensor` specifying the maximum window size to
+ randomly use for each token. Must be >= 0.
+* `start`: `int` or scalar `Tensor` specifying the position in
+ `input_tensor` from which to start generating skip-gram candidates.
+* `limit`: `int` or scalar `Tensor` specifying the maximum number of
+ elements in `input_tensor` to use in generating skip-gram candidates.
+ -1 means to use the rest of the `Tensor` after `start`.
+* `emit_self_as_target`: `bool` or scalar `Tensor` specifying whether to emit
+ each token as a label for itself.
+* `vocab_freq_table`: (Optional) A lookup table (subclass of
+ `lookup.InitializableLookupTableBase`) that maps tokens to their raw
+ frequency counts. If specified, any token in `input_tensor` that is not
+ found in `vocab_freq_table` will be filtered out before generating
+ skip-gram candidates. While this will typically map to integer raw
+ frequency counts, it could also map to float frequency proportions.
+ `vocab_min_count` and `corpus_size` should be in the same units
+ as this.
+* `vocab_min_count`: (Optional) `int`, `float`, or scalar `Tensor` specifying
+ minimum frequency threshold (from `vocab_freq_table`) for a token to be
+ kept in `input_tensor`. If this is specified, `vocab_freq_table` must
+ also be specified - and they should both be in the same units.
+* `vocab_subsampling`: (Optional) `float` specifying frequency proportion
+ threshold for tokens from `input_tensor`. Tokens that occur more
+ frequently (based on the ratio of the token's `vocab_freq_table` value
+ to the `corpus_size`) will be randomly down-sampled. Reasonable
+ starting values may be around 1e-3 or 1e-5. If this is specified, both
+ `vocab_freq_table` and `corpus_size` must also be specified. See Eq. 5
+ in http://arxiv.org/abs/1310.4546 for more details.
+* `corpus_size`: (Optional) `int`, `float`, or scalar `Tensor` specifying the
+ total number of tokens in the corpus (e.g., sum of all the frequency
+ counts of `vocab_freq_table`). Used with `vocab_subsampling` for
+ down-sampling frequently occurring tokens. If this is specified,
+ `vocab_freq_table` and `vocab_subsampling` must also be specified.
+* `batch_size`: (Optional) `int` specifying batch size of returned `Tensors`.
+* `batch_capacity`: (Optional) `int` specifying batch capacity for the queue
+ used for batching returned `Tensors`. Only has an effect if
+ `batch_size` > 0. Defaults to 100 * `batch_size` if not specified.
+* `seed`: (Optional) `int` used to create a random seed for window size and
+ subsampling. See `set_random_seed` docs for behavior.
+* `name`: (Optional) A `string` name or a name scope for the operations.
+
+
+#### Returns:
+
+A `tuple` containing (token, label) `Tensors`. Each output `Tensor` is of
+rank-1 and has the same type as `input_tensor`. The `Tensors` will be of
+length `batch_size`; if `batch_size` is not specified, they will be of
+random length, though they will be in sync with each other as long as
+they are evaluated together.
+
+
+
+#### Raises:
+
+
+* `ValueError`: If `vocab_freq_table` is not provided, but `vocab_min_count`,
+ `vocab_subsampling`, or `corpus_size` is specified.
+ If `vocab_subsampling` and `corpus_size` are not both present or
+ both absent.
\ No newline at end of file
diff --git a/docs/api_docs/python/tfa/text/skip_gram_sample_with_text_vocab.md b/docs/api_docs/python/tfa/text/skip_gram_sample_with_text_vocab.md
new file mode 100644
index 0000000000..e5543eee6f
--- /dev/null
+++ b/docs/api_docs/python/tfa/text/skip_gram_sample_with_text_vocab.md
@@ -0,0 +1,138 @@
+
+
+
+
+
+# tfa.text.skip_gram_sample_with_text_vocab
+
+Skip-gram sampling with a text vocabulary file.
+
+### Aliases:
+
+* `tfa.text.skip_gram_ops.skip_gram_sample_with_text_vocab`
+* `tfa.text.skip_gram_sample_with_text_vocab`
+
+``` python
+tfa.text.skip_gram_sample_with_text_vocab(
+ input_tensor,
+ vocab_freq_file,
+ vocab_token_index=0,
+ vocab_token_dtype=tf.dtypes.string,
+ vocab_freq_index=1,
+ vocab_freq_dtype=tf.dtypes.float64,
+ vocab_delimiter=',',
+ vocab_min_count=0,
+ vocab_subsampling=None,
+ corpus_size=None,
+ min_skips=1,
+ max_skips=5,
+ start=0,
+ limit=-1,
+ emit_self_as_target=False,
+ batch_size=None,
+ batch_capacity=None,
+ seed=None,
+ name=None
+)
+```
+
+
+
+Defined in [`text/skip_gram_ops.py`](https://github.com/tensorflow/addons/tree/0.4-release/tensorflow_addons/text/skip_gram_ops.py).
+
+
+
+Wrapper around `skip_gram_sample()` for use with a text vocabulary file.
+The vocabulary file is expected to be a plain-text file, with lines of
+`vocab_delimiter`-separated columns. The `vocab_token_index` column should
+contain the vocabulary term, while the `vocab_freq_index` column should
+contain the number of times that term occurs in the corpus. For example,
+with a text vocabulary file of:
+
+ ```
+ bonjour,fr,42
+ hello,en,777
+ hola,es,99
+ ```
+
+You should set `vocab_delimiter=","`, `vocab_token_index=0`, and
+`vocab_freq_index=2`.
+
+See `skip_gram_sample()` documentation for more details about the skip-gram
+sampling process.
+
+#### Args:
+
+
+* `input_tensor`: A rank-1 `Tensor` from which to generate skip-gram candidates.
+* `vocab_freq_file`: `string` specifying full file path to the text vocab file.
+* `vocab_token_index`: `int` specifying which column in the text vocab file
+ contains the tokens.
+* `vocab_token_dtype`: `DType` specifying the format of the tokens in the text vocab file.
+* `vocab_freq_index`: `int` specifying which column in the text vocab file
+ contains the frequency counts of the tokens.
+* `vocab_freq_dtype`: `DType` specifying the format of the frequency counts
+ in the text vocab file.
+* `vocab_delimiter`: `string` specifying the delimiter used in the text vocab
+ file.
+* `vocab_min_count`: `int`, `float`, or scalar `Tensor` specifying
+ minimum frequency threshold (from `vocab_freq_file`) for a token to be
+ kept in `input_tensor`. This should correspond with `vocab_freq_dtype`.
+* `vocab_subsampling`: (Optional) `float` specifying frequency proportion
+ threshold for tokens from `input_tensor`. Tokens that occur more
+ frequently will be randomly down-sampled. Reasonable starting values
+ may be around 1e-3 or 1e-5. See Eq. 5 in http://arxiv.org/abs/1310.4546
+ for more details.
+* `corpus_size`: (Optional) `int`, `float`, or scalar `Tensor` specifying the
+ total number of tokens in the corpus (e.g., sum of all the frequency
+ counts of `vocab_freq_file`). Used with `vocab_subsampling` for
+ down-sampling frequently occurring tokens. If this is specified,
+ `vocab_freq_file` and `vocab_subsampling` must also be specified.
+ If `corpus_size` is needed but not supplied, then it will be calculated
+ from `vocab_freq_file`. You might want to supply your own value if you
+ have already eliminated infrequent tokens from your vocabulary files
+ (where frequency < vocab_min_count) to save memory in the internal
+ token lookup table. Otherwise, the unused tokens' variables will waste
+ memory. The user-supplied `corpus_size` value must be greater than or
+ equal to the sum of all the frequency counts of `vocab_freq_file`.
+* `min_skips`: `int` or scalar `Tensor` specifying the minimum window size to
+ randomly use for each token. Must be >= 0 and <= `max_skips`. If
+ `min_skips` and `max_skips` are both 0, the only label outputted will
+ be the token itself.
+* `max_skips`: `int` or scalar `Tensor` specifying the maximum window size to
+ randomly use for each token. Must be >= 0.
+* `start`: `int` or scalar `Tensor` specifying the position in `input_tensor`
+ from which to start generating skip-gram candidates.
+* `limit`: `int` or scalar `Tensor` specifying the maximum number of elements
+ in `input_tensor` to use in generating skip-gram candidates. -1 means
+ to use the rest of the `Tensor` after `start`.
+* `emit_self_as_target`: `bool` or scalar `Tensor` specifying whether to emit
+ each token as a label for itself.
+* `batch_size`: (Optional) `int` specifying batch size of returned `Tensors`.
+* `batch_capacity`: (Optional) `int` specifying batch capacity for the queue
+ used for batching returned `Tensors`. Only has an effect if
+ `batch_size` > 0. Defaults to 100 * `batch_size` if not specified.
+* `seed`: (Optional) `int` used to create a random seed for window size and
+ subsampling. See
+ [`set_random_seed`](../../g3doc/python/constant_op.md#set_random_seed)
+ for behavior.
+* `name`: (Optional) A `string` name or a name scope for the operations.
+
+
+#### Returns:
+
+A `tuple` containing (token, label) `Tensors`. Each output `Tensor` is of
+rank-1 and has the same type as `input_tensor`. The `Tensors` will be of
+length `batch_size`; if `batch_size` is not specified, they will be of
+random length, though they will be in sync with each other as long as
+they are evaluated together.
+
+
+
+#### Raises:
+
+
+* `ValueError`: If `vocab_token_index` or `vocab_freq_index` is less than 0
+ or exceeds the number of columns in `vocab_freq_file`.
+ If `vocab_token_index` and `vocab_freq_index` are both set to the same
+ column. If any token in `vocab_freq_file` has a negative frequency.
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 8b3c4f69bc..0349c9f771 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1 @@
-tf-nightly-2.0-preview
\ No newline at end of file
+tensorflow==2.0.0b1
\ No newline at end of file
diff --git a/tensorflow_addons/version.py b/tensorflow_addons/version.py
index 008d683d2e..1c210762c6 100644
--- a/tensorflow_addons/version.py
+++ b/tensorflow_addons/version.py
@@ -27,7 +27,7 @@
# stable release (indicated by `_VERSION_SUFFIX = ''`). Outside the context of a
# release branch, the current version is by default assumed to be a
# 'development' version, labeled 'dev'.
-_VERSION_SUFFIX = 'dev'
+_VERSION_SUFFIX = ''
# Example, '0.1.0-dev'
__version__ = '.'.join([
diff --git a/tools/ci_build/builds/release_linux.sh b/tools/ci_build/builds/release_linux.sh
index e6403153e6..873ae704d4 100755
--- a/tools/ci_build/builds/release_linux.sh
+++ b/tools/ci_build/builds/release_linux.sh
@@ -38,7 +38,7 @@ for version in ${PYTHON_VERSIONS}; do
build_pip_pkg
# Package Whl
- bazel-bin/build_pip_pkg artifacts --nightly
+ bazel-bin/build_pip_pkg artifacts
# Uncomment and use this command for release branches
#bazel-bin/build_pip_pkg artifacts
diff --git a/tools/ci_build/builds/release_macos.sh b/tools/ci_build/builds/release_macos.sh
index 4baeba68db..9e8c97ad04 100644
--- a/tools/ci_build/builds/release_macos.sh
+++ b/tools/ci_build/builds/release_macos.sh
@@ -41,7 +41,7 @@ for version in ${PYTHON_VERSIONS}; do
build_pip_pkg
# Package Whl
- bazel-bin/build_pip_pkg artifacts --nightly
+ bazel-bin/build_pip_pkg artifacts
# Uncomment and use this command for release branches
#bazel-bin/build_pip_pkg artifacts