Remove Optional pytype qualifier for parameters which are not actuall…

…y optional (should not be None), they just have a default value. PiperOrigin-RevId: 395659884
tensorflow · Sep 9, 2021 · 3a3f999 · 3a3f999
1 parent d0cf9ff
commit 3a3f999
Show file tree

Hide file tree

Showing 4 changed files with 35 additions and 36 deletions.
diff --git a/tensorflow_transform/analyzers.py b/tensorflow_transform/analyzers.py
@@ -47,6 +47,7 @@
 # `collections.namedtuple` or `typing.NamedTuple` once the Spark issue is
 # resolved.
 from tfx_bsl.types import tfx_namedtuple
+from typing_extensions import Literal
 
 from google.protobuf import descriptor_pb2
 
@@ -73,7 +74,7 @@
 builtin_min = min
 
 
-DEFAULT_VOCABULARY_FILE_FORMAT = 'text'
+DEFAULT_VOCABULARY_FILE_FORMAT: Literal['text'] = 'text'
 ALLOWED_VOCABULARY_FILE_FORMATS = ('text', 'tfrecord_gzip')
 
 VOCAB_FILENAME_PREFIX = 'vocab_'
@@ -931,7 +932,7 @@ def tukey_scale(x: common_types.TensorType,
 
 @common.log_api_use(common.ANALYZER_COLLECTION)
 def tukey_h_params(x: common_types.TensorType,
-                   reduce_instance_dims: Optional[bool] = True,
+                   reduce_instance_dims: bool = True,
                    output_dtype: Optional[tf.DType] = None,
                    name: Optional[str] = None) -> Tuple[tf.Tensor, tf.Tensor]:
   """Computes the h parameters of the values of a `Tensor` over the dataset.
@@ -968,7 +969,7 @@ def tukey_h_params(x: common_types.TensorType,
 
 def _tukey_parameters(
     x: common_types.TensorType,
-    reduce_instance_dims: Optional[bool] = True,
+    reduce_instance_dims: bool = True,
     output_dtype: Optional[tf.DType] = None
 ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]:
   """Efficient computation of L-moments."""
@@ -1110,8 +1111,8 @@ class WeightedMeanAndVarCombiner(analyzer_nodes.Combiner):
   def __init__(self,
                output_numpy_dtype,
                output_shape: Optional[Collection[Optional[int]]] = None,
-               compute_variance: Optional[bool] = True,
-               compute_weighted: Optional[bool] = False):
+               compute_variance: bool = True,
+               compute_weighted: bool = False):
     """Init method for WeightedMeanAndVarCombiner.
 
     Args:
@@ -1605,12 +1606,10 @@ class _VocabOrderingType:
   MUTUAL_INFORMATION = 5
 
 
-def _register_vocab(
-    sanitized_filename: str,
-    vocabulary_key: Optional[str] = None,
-    file_format: Optional[
-        common_types.VocabularyFileFormatType] = DEFAULT_VOCABULARY_FILE_FORMAT
-):
+def _register_vocab(sanitized_filename: str,
+                    vocabulary_key: Optional[str] = None,
+                    file_format: common_types
+                    .VocabularyFileFormatType = DEFAULT_VOCABULARY_FILE_FORMAT):
   """Register the specificed vocab within the asset map.
 
   Args:
@@ -1640,14 +1639,14 @@ def vocabulary(
     store_frequency: Optional[bool] = False,
     weights: Optional[tf.Tensor] = None,
     labels: Optional[tf.Tensor] = None,
-    use_adjusted_mutual_info: Optional[bool] = False,
+    use_adjusted_mutual_info: bool = False,
     min_diff_from_avg: Optional[int] = None,
     coverage_top_k: Optional[int] = None,
     coverage_frequency_threshold: Optional[int] = None,
     key_fn: Optional[Callable[[Any], Any]] = None,
     fingerprint_shuffle: Optional[bool] = False,
-    file_format: Optional[
-        common_types.VocabularyFileFormatType] = DEFAULT_VOCABULARY_FILE_FORMAT,
+    file_format: common_types
+    .VocabularyFileFormatType = DEFAULT_VOCABULARY_FILE_FORMAT,
     name: Optional[str] = None) -> common_types.TemporaryAnalyzerOutputType:
   r"""Computes the unique values of a `Tensor` over the whole dataset.
 
@@ -2112,7 +2111,7 @@ def quantiles(x: tf.Tensor,
               num_buckets: int,
               epsilon: float,
               weights: Optional[tf.Tensor] = None,
-              reduce_instance_dims: Optional[bool] = True,
+              reduce_instance_dims: bool = True,
               name: Optional[str] = None) -> tf.Tensor:
   """Computes the quantile boundaries of a `Tensor` over the whole dataset.
 

diff --git a/tensorflow_transform/mappers.py b/tensorflow_transform/mappers.py
@@ -178,8 +178,7 @@ def _scale_to_gaussian_internal(
 def sparse_tensor_to_dense_with_shape(
     x: tf.SparseTensor,
     shape: Union[tf.TensorShape, Iterable[int]],
-    default_value: Optional[Union[tf.Tensor, int, float,
-                                  str]] = 0) -> tf.Tensor:
+    default_value: Union[tf.Tensor, int, float, str] = 0) -> tf.Tensor:
   """Converts a `SparseTensor` into a dense tensor and sets its shape.
 
   Args:
@@ -916,22 +915,23 @@ def _count_docs_with_term(term_frequency: tf.SparseTensor) -> tf.Tensor:
 @common.log_api_use(common.MAPPER_COLLECTION)
 def compute_and_apply_vocabulary(
     x: common_types.ConsistentTensorType,
-    default_value: Optional[Any] = -1,
+    default_value: Any = -1,
     top_k: Optional[int] = None,
     frequency_threshold: Optional[int] = None,
-    num_oov_buckets: Optional[int] = 0,
+    num_oov_buckets: int = 0,
     vocab_filename: Optional[str] = None,
     weights: Optional[tf.Tensor] = None,
     labels: Optional[tf.Tensor] = None,
     use_adjusted_mutual_info: bool = False,
-    min_diff_from_avg: Optional[float] = 0.0,
+    min_diff_from_avg: float = 0.0,
     coverage_top_k: Optional[int] = None,
     coverage_frequency_threshold: Optional[int] = None,
     key_fn: Optional[Callable[[Any], Any]] = None,
     fingerprint_shuffle: bool = False,
-    file_format: Optional[common_types.VocabularyFileFormatType] = analyzers
+    file_format: common_types.VocabularyFileFormatType = analyzers
     .DEFAULT_VOCABULARY_FILE_FORMAT,
-    name: Optional[str] = None) -> common_types.ConsistentTensorType:  # TODO(b/64987151): Remove # pytype: disable=annotation-type-mismatch
+    name: Optional[str] = None
+) -> common_types.ConsistentTensorType:  # TODO(b/64987151, b/197716941): Remove # pytype: disable=annotation-type-mismatch
   r"""Generates a vocabulary for `x` and maps it to an integer with this vocab.
 
   In case one of the tokens contains the '\n' or '\r' characters or is empty it
@@ -1042,13 +1042,14 @@ def compute_and_apply_vocabulary(
 def apply_vocabulary(
     x: common_types.ConsistentTensorType,
     deferred_vocab_filename_tensor: common_types.TemporaryAnalyzerOutputType,
-    default_value: Optional[Any] = -1,
-    num_oov_buckets: Optional[int] = 0,
+    default_value: Any = -1,
+    num_oov_buckets: int = 0,
     lookup_fn: Optional[Callable[[common_types.TensorType, tf.Tensor],
                                  Tuple[tf.Tensor, tf.Tensor]]] = None,
-    file_format: Optional[common_types.VocabularyFileFormatType] = analyzers
+    file_format: common_types.VocabularyFileFormatType = analyzers
     .DEFAULT_VOCABULARY_FILE_FORMAT,
-    name: Optional[str] = None) -> common_types.ConsistentTensorType:  # TODO(b/64987151): Remove # pytype: disable=annotation-type-mismatch
+    name: Optional[str] = None
+) -> common_types.ConsistentTensorType:
   r"""Maps `x` to a vocabulary specified by the deferred tensor.
 
   This function also writes domain statistics about the vocabulary min and max

diff --git a/tensorflow_transform/output_wrapper.py b/tensorflow_transform/output_wrapper.py
@@ -245,7 +245,7 @@ def transform_features_layer(self) -> tf.keras.Model:
   def transform_raw_features(
       self,
       raw_features: Mapping[str, common_types.InputTensorType],
-      drop_unused_features: Optional[bool] = True  # LEGACY_VALUE=False
+      drop_unused_features: bool = True  # LEGACY_VALUE=False
   ) -> Dict[str, common_types.InputTensorType]:
     """Takes a dict of tensors representing raw features and transforms them.
 

diff --git a/tensorflow_transform/tf_utils.py b/tensorflow_transform/tf_utils.py
@@ -467,22 +467,21 @@ def is_vocabulary_tfrecord_supported():
           tf.version.VERSION >= '2.4')
 
 
-def apply_bucketize_op(
-    x: tf.Tensor,
-    boundaries: tf.Tensor,
-    remove_leftmost_boundary: Optional[bool] = False) -> tf.Tensor:
+def apply_bucketize_op(x: tf.Tensor,
+                       boundaries: tf.Tensor,
+                       remove_leftmost_boundary: bool = False) -> tf.Tensor:
   """Applies the bucketize op to every value in x.
 
   x and boundaries are expected to be in final form (before turning to lists).
 
   Args:
     x: a `Tensor` of dtype float32 with no more than one dimension.
-    boundaries:  The bucket boundaries represented as a rank 2 `Tensor`
-        of tf.int32|64. Should be sorted.
+    boundaries:  The bucket boundaries represented as a rank 2 `Tensor` of
+      tf.int32|64. Should be sorted.
     remove_leftmost_boundary (Optional): Remove lowest boundary if True.
-        BoostedTreesBucketize op assigns according to upper bound, and therefore
-        the leftmost boundary is assumed to be the upper bound of the first
-        bucket. If a lower bound is present, the indexes will be off by 1.
+      BoostedTreesBucketize op assigns according to upper bound, and therefore
+      the leftmost boundary is assumed to be the upper bound of the first
+      bucket. If a lower bound is present, the indexes will be off by 1.
 
   Returns:
     A `Tensor` of dtype int64 with the same shape as `x`, and each element in