Skip to content

Commit

Permalink
Remove Optional pytype qualifier for parameters which are not actuall…
Browse files Browse the repository at this point in the history
…y optional (should not be None), they just have a default value.

PiperOrigin-RevId: 395659884
  • Loading branch information
zoyahav authored and tf-transform-team committed Sep 9, 2021
1 parent d0cf9ff commit 3a3f999
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 36 deletions.
29 changes: 14 additions & 15 deletions tensorflow_transform/analyzers.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
# `collections.namedtuple` or `typing.NamedTuple` once the Spark issue is
# resolved.
from tfx_bsl.types import tfx_namedtuple
from typing_extensions import Literal

from google.protobuf import descriptor_pb2

Expand All @@ -73,7 +74,7 @@
builtin_min = min


DEFAULT_VOCABULARY_FILE_FORMAT = 'text'
DEFAULT_VOCABULARY_FILE_FORMAT: Literal['text'] = 'text'
ALLOWED_VOCABULARY_FILE_FORMATS = ('text', 'tfrecord_gzip')

VOCAB_FILENAME_PREFIX = 'vocab_'
Expand Down Expand Up @@ -931,7 +932,7 @@ def tukey_scale(x: common_types.TensorType,

@common.log_api_use(common.ANALYZER_COLLECTION)
def tukey_h_params(x: common_types.TensorType,
reduce_instance_dims: Optional[bool] = True,
reduce_instance_dims: bool = True,
output_dtype: Optional[tf.DType] = None,
name: Optional[str] = None) -> Tuple[tf.Tensor, tf.Tensor]:
"""Computes the h parameters of the values of a `Tensor` over the dataset.
Expand Down Expand Up @@ -968,7 +969,7 @@ def tukey_h_params(x: common_types.TensorType,

def _tukey_parameters(
x: common_types.TensorType,
reduce_instance_dims: Optional[bool] = True,
reduce_instance_dims: bool = True,
output_dtype: Optional[tf.DType] = None
) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]:
"""Efficient computation of L-moments."""
Expand Down Expand Up @@ -1110,8 +1111,8 @@ class WeightedMeanAndVarCombiner(analyzer_nodes.Combiner):
def __init__(self,
output_numpy_dtype,
output_shape: Optional[Collection[Optional[int]]] = None,
compute_variance: Optional[bool] = True,
compute_weighted: Optional[bool] = False):
compute_variance: bool = True,
compute_weighted: bool = False):
"""Init method for WeightedMeanAndVarCombiner.
Args:
Expand Down Expand Up @@ -1605,12 +1606,10 @@ class _VocabOrderingType:
MUTUAL_INFORMATION = 5


def _register_vocab(
sanitized_filename: str,
vocabulary_key: Optional[str] = None,
file_format: Optional[
common_types.VocabularyFileFormatType] = DEFAULT_VOCABULARY_FILE_FORMAT
):
def _register_vocab(sanitized_filename: str,
vocabulary_key: Optional[str] = None,
file_format: common_types
.VocabularyFileFormatType = DEFAULT_VOCABULARY_FILE_FORMAT):
"""Register the specificed vocab within the asset map.
Args:
Expand Down Expand Up @@ -1640,14 +1639,14 @@ def vocabulary(
store_frequency: Optional[bool] = False,
weights: Optional[tf.Tensor] = None,
labels: Optional[tf.Tensor] = None,
use_adjusted_mutual_info: Optional[bool] = False,
use_adjusted_mutual_info: bool = False,
min_diff_from_avg: Optional[int] = None,
coverage_top_k: Optional[int] = None,
coverage_frequency_threshold: Optional[int] = None,
key_fn: Optional[Callable[[Any], Any]] = None,
fingerprint_shuffle: Optional[bool] = False,
file_format: Optional[
common_types.VocabularyFileFormatType] = DEFAULT_VOCABULARY_FILE_FORMAT,
file_format: common_types
.VocabularyFileFormatType = DEFAULT_VOCABULARY_FILE_FORMAT,
name: Optional[str] = None) -> common_types.TemporaryAnalyzerOutputType:
r"""Computes the unique values of a `Tensor` over the whole dataset.
Expand Down Expand Up @@ -2112,7 +2111,7 @@ def quantiles(x: tf.Tensor,
num_buckets: int,
epsilon: float,
weights: Optional[tf.Tensor] = None,
reduce_instance_dims: Optional[bool] = True,
reduce_instance_dims: bool = True,
name: Optional[str] = None) -> tf.Tensor:
"""Computes the quantile boundaries of a `Tensor` over the whole dataset.
Expand Down
23 changes: 12 additions & 11 deletions tensorflow_transform/mappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,7 @@ def _scale_to_gaussian_internal(
def sparse_tensor_to_dense_with_shape(
x: tf.SparseTensor,
shape: Union[tf.TensorShape, Iterable[int]],
default_value: Optional[Union[tf.Tensor, int, float,
str]] = 0) -> tf.Tensor:
default_value: Union[tf.Tensor, int, float, str] = 0) -> tf.Tensor:
"""Converts a `SparseTensor` into a dense tensor and sets its shape.
Args:
Expand Down Expand Up @@ -916,22 +915,23 @@ def _count_docs_with_term(term_frequency: tf.SparseTensor) -> tf.Tensor:
@common.log_api_use(common.MAPPER_COLLECTION)
def compute_and_apply_vocabulary(
x: common_types.ConsistentTensorType,
default_value: Optional[Any] = -1,
default_value: Any = -1,
top_k: Optional[int] = None,
frequency_threshold: Optional[int] = None,
num_oov_buckets: Optional[int] = 0,
num_oov_buckets: int = 0,
vocab_filename: Optional[str] = None,
weights: Optional[tf.Tensor] = None,
labels: Optional[tf.Tensor] = None,
use_adjusted_mutual_info: bool = False,
min_diff_from_avg: Optional[float] = 0.0,
min_diff_from_avg: float = 0.0,
coverage_top_k: Optional[int] = None,
coverage_frequency_threshold: Optional[int] = None,
key_fn: Optional[Callable[[Any], Any]] = None,
fingerprint_shuffle: bool = False,
file_format: Optional[common_types.VocabularyFileFormatType] = analyzers
file_format: common_types.VocabularyFileFormatType = analyzers
.DEFAULT_VOCABULARY_FILE_FORMAT,
name: Optional[str] = None) -> common_types.ConsistentTensorType: # TODO(b/64987151): Remove # pytype: disable=annotation-type-mismatch
name: Optional[str] = None
) -> common_types.ConsistentTensorType: # TODO(b/64987151, b/197716941): Remove # pytype: disable=annotation-type-mismatch
r"""Generates a vocabulary for `x` and maps it to an integer with this vocab.
In case one of the tokens contains the '\n' or '\r' characters or is empty it
Expand Down Expand Up @@ -1042,13 +1042,14 @@ def compute_and_apply_vocabulary(
def apply_vocabulary(
x: common_types.ConsistentTensorType,
deferred_vocab_filename_tensor: common_types.TemporaryAnalyzerOutputType,
default_value: Optional[Any] = -1,
num_oov_buckets: Optional[int] = 0,
default_value: Any = -1,
num_oov_buckets: int = 0,
lookup_fn: Optional[Callable[[common_types.TensorType, tf.Tensor],
Tuple[tf.Tensor, tf.Tensor]]] = None,
file_format: Optional[common_types.VocabularyFileFormatType] = analyzers
file_format: common_types.VocabularyFileFormatType = analyzers
.DEFAULT_VOCABULARY_FILE_FORMAT,
name: Optional[str] = None) -> common_types.ConsistentTensorType: # TODO(b/64987151): Remove # pytype: disable=annotation-type-mismatch
name: Optional[str] = None
) -> common_types.ConsistentTensorType:
r"""Maps `x` to a vocabulary specified by the deferred tensor.
This function also writes domain statistics about the vocabulary min and max
Expand Down
2 changes: 1 addition & 1 deletion tensorflow_transform/output_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def transform_features_layer(self) -> tf.keras.Model:
def transform_raw_features(
self,
raw_features: Mapping[str, common_types.InputTensorType],
drop_unused_features: Optional[bool] = True # LEGACY_VALUE=False
drop_unused_features: bool = True # LEGACY_VALUE=False
) -> Dict[str, common_types.InputTensorType]:
"""Takes a dict of tensors representing raw features and transforms them.
Expand Down
17 changes: 8 additions & 9 deletions tensorflow_transform/tf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,22 +467,21 @@ def is_vocabulary_tfrecord_supported():
tf.version.VERSION >= '2.4')


def apply_bucketize_op(
x: tf.Tensor,
boundaries: tf.Tensor,
remove_leftmost_boundary: Optional[bool] = False) -> tf.Tensor:
def apply_bucketize_op(x: tf.Tensor,
boundaries: tf.Tensor,
remove_leftmost_boundary: bool = False) -> tf.Tensor:
"""Applies the bucketize op to every value in x.
x and boundaries are expected to be in final form (before turning to lists).
Args:
x: a `Tensor` of dtype float32 with no more than one dimension.
boundaries: The bucket boundaries represented as a rank 2 `Tensor`
of tf.int32|64. Should be sorted.
boundaries: The bucket boundaries represented as a rank 2 `Tensor` of
tf.int32|64. Should be sorted.
remove_leftmost_boundary (Optional): Remove lowest boundary if True.
BoostedTreesBucketize op assigns according to upper bound, and therefore
the leftmost boundary is assumed to be the upper bound of the first
bucket. If a lower bound is present, the indexes will be off by 1.
BoostedTreesBucketize op assigns according to upper bound, and therefore
the leftmost boundary is assumed to be the upper bound of the first
bucket. If a lower bound is present, the indexes will be off by 1.
Returns:
A `Tensor` of dtype int64 with the same shape as `x`, and each element in
Expand Down

0 comments on commit 3a3f999

Please sign in to comment.