Skip to content

Commit

Permalink
To not compute metrics for the slices with example count less than 'k…
Browse files Browse the repository at this point in the history
…'. (where value of 'k' is configurable and set to 1 by default)

PiperOrigin-RevId: 249854302
  • Loading branch information
tf-model-analysis-team committed May 24, 2019
1 parent cf5b1fb commit 545106a
Show file tree
Hide file tree
Showing 9 changed files with 268 additions and 33 deletions.
2 changes: 2 additions & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
* Added support for mean absolute error post export metric.
* Added support for mean squared error and root mean squared error post export
metric.
* Added support for not computing metrics for slices with less than a given
number of examples.

## Bug fixes and other changes
* Cast / convert labels for precision / recall at K so that they work even if
Expand Down
28 changes: 23 additions & 5 deletions tensorflow_model_analysis/api/model_eval_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,8 @@ def default_extractors( # pylint: disable=invalid-name
def default_evaluators( # pylint: disable=invalid-name
eval_shared_model: types.EvalSharedModel,
desired_batch_size: Optional[int] = None,
num_bootstrap_samples: Optional[int] = None) -> List[evaluator.Evaluator]:
num_bootstrap_samples: Optional[int] = None,
k_anonymization_count: int = 1) -> List[evaluator.Evaluator]:
"""Returns the default evaluators for use in ExtractAndEvaluate.
Args:
Expand All @@ -283,12 +284,17 @@ def default_evaluators( # pylint: disable=invalid-name
num_bootstrap_samples: Number of bootstrap samples to draw. If more than 1,
confidence intervals will be computed for metrics. Suggested value is at
least 20.
k_anonymization_count: If the number of examples in a specific slice is less
than k_anonymization_count, then an error will be returned for that slice.
This will be useful to ensure privacy by not displaying the aggregated
data for smaller number of examples.
"""
return [
metrics_and_plots_evaluator.MetricsAndPlotsEvaluator(
eval_shared_model,
desired_batch_size,
num_bootstrap_samples=num_bootstrap_samples)
num_bootstrap_samples=num_bootstrap_samples,
k_anonymization_count=k_anonymization_count)
]


Expand Down Expand Up @@ -479,7 +485,8 @@ def ExtractEvaluateAndWriteResults( # pylint: disable=invalid-name
evaluators: Optional[List[evaluator.Evaluator]] = None,
writers: Optional[List[writer.Writer]] = None,
write_config: Optional[bool] = True,
num_bootstrap_samples: Optional[int] = 1) -> beam.pvalue.PDone:
num_bootstrap_samples: Optional[int] = 1,
k_anonymization_count: int = 1) -> beam.pvalue.PDone:
"""PTransform for performing extraction, evaluation, and writing results.
Users who want to construct their own Beam pipelines instead of using the
Expand Down Expand Up @@ -533,6 +540,10 @@ def ExtractEvaluateAndWriteResults( # pylint: disable=invalid-name
write_config: True to write the config along with the results.
num_bootstrap_samples: Optional, set to at least 20 in order to calculate
metrics with confidence intervals.
k_anonymization_count: If the number of examples in a specific slice is less
than k_anonymization_count, then an error will be returned for that slice.
This will be useful to ensure privacy by not displaying the aggregated
data for smaller number of examples.
Raises:
ValueError: If matching Extractor not found for an Evaluator.
Expand All @@ -551,7 +562,8 @@ def ExtractEvaluateAndWriteResults( # pylint: disable=invalid-name
evaluators = default_evaluators(
eval_shared_model=eval_shared_model,
desired_batch_size=desired_batch_size,
num_bootstrap_samples=num_bootstrap_samples)
num_bootstrap_samples=num_bootstrap_samples,
k_anonymization_count=k_anonymization_count)

for v in evaluators:
evaluator.verify_evaluator(v, extractors)
Expand Down Expand Up @@ -602,6 +614,7 @@ def run_model_analysis(
write_config: Optional[bool] = True,
pipeline_options: Optional[Any] = None,
num_bootstrap_samples: Optional[int] = 1,
k_anonymization_count: int = 1,
) -> EvalResult:
"""Runs TensorFlow model analysis.
Expand Down Expand Up @@ -646,6 +659,10 @@ def run_model_analysis(
whether to run directly.
num_bootstrap_samples: Optional, set to at least 20 in order to calculate
metrics with confidence intervals.
k_anonymization_count: If the number of examples in a specific slice is less
than k_anonymization_count, then an error will be returned for that slice.
This will be useful to ensure privacy by not displaying the aggregated
data for smaller number of examples.
Returns:
An EvalResult that can be used with the TFMA visualization functions.
Expand Down Expand Up @@ -682,7 +699,8 @@ def run_model_analysis(
evaluators=evaluators,
writers=writers,
write_config=write_config,
num_bootstrap_samples=num_bootstrap_samples))
num_bootstrap_samples=num_bootstrap_samples,
k_anonymization_count=k_anonymization_count))
# pylint: enable=no-value-for-parameter

eval_result = load_eval_result(output_path=output_path)
Expand Down
28 changes: 24 additions & 4 deletions tensorflow_model_analysis/api/model_eval_lib_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,18 +123,28 @@ def testRunModelAnalysis(self):
self._makeExample(age=3.0, language='english', label=1.0),
self._makeExample(age=3.0, language='chinese', label=0.0),
self._makeExample(age=4.0, language='english', label=1.0),
self._makeExample(age=5.0, language='chinese', label=1.0)
self._makeExample(age=5.0, language='chinese', label=1.0),
self._makeExample(age=5.0, language='hindi', label=1.0)
]
data_location = self._writeTFExamplesToTFRecords(examples)
slice_spec = [slicer.SingleSliceSpec(columns=['language'])]
eval_result = model_eval_lib.run_model_analysis(
model_eval_lib.default_eval_shared_model(
eval_saved_model_path=model_location, example_weight_key='age'),
data_location,
slice_spec=slice_spec)
slice_spec=slice_spec,
k_anonymization_count=2)
# We only check some of the metrics to ensure that the end-to-end
# pipeline works.
expected = {
(('language', b'hindi'),): {
u'error': {
'debugMessage':
u'Example count for this slice key is lower than the '
u'minimum required value: 2. No data is aggregated for '
u'this slice.'
},
},
(('language', b'chinese'),): {
'accuracy': {
'doubleValue': 0.5
Expand Down Expand Up @@ -177,7 +187,8 @@ def testRunModelAnalysisWithUncertainty(self):
self._makeExample(age=3.0, language='english', label=1.0),
self._makeExample(age=3.0, language='chinese', label=0.0),
self._makeExample(age=4.0, language='english', label=1.0),
self._makeExample(age=5.0, language='chinese', label=1.0)
self._makeExample(age=5.0, language='chinese', label=1.0),
self._makeExample(age=5.0, language='hindi', label=1.0)
]
data_location = self._writeTFExamplesToTFRecords(examples)
slice_spec = [slicer.SingleSliceSpec(columns=['language'])]
Expand All @@ -186,10 +197,19 @@ def testRunModelAnalysisWithUncertainty(self):
eval_saved_model_path=model_location, example_weight_key='age'),
data_location,
slice_spec=slice_spec,
num_bootstrap_samples=20)
num_bootstrap_samples=20,
k_anonymization_count=2)
# We only check some of the metrics to ensure that the end-to-end
# pipeline works.
expected = {
(('language', b'hindi'),): {
u'error': {
'debugMessage':
u'Example count for this slice key is lower than the '
u'minimum required value: 2. No data is aggregated for '
u'this slice.'
},
},
(('language', b'chinese'),): {
metric_keys.EXAMPLE_WEIGHT: {
'doubleValue': 8.0
Expand Down
4 changes: 2 additions & 2 deletions tensorflow_model_analysis/api/tfma_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ def check_metrics(got):

with beam.Pipeline() as pipeline:
# pylint: disable=no-value-for-parameter
metrics, _ = (
(metrics, _), _ = (
pipeline
| 'CreateExamples' >> beam.Create(serialized_examples)
| 'InputsToExtracts' >> model_eval_lib.InputsToExtracts()
Expand Down Expand Up @@ -371,7 +371,7 @@ def check_metrics(got):
eval_shared_model=eval_shared_model, slice_spec=slice_spec)

# pylint: disable=no-value-for-parameter
metrics, _ = (
(metrics, _), _ = (
examples_pcollection
| 'InputsToExtracts' >> model_eval_lib.InputsToExtracts()
| 'Extract' >> Extract(extractors=extractors)
Expand Down
Loading

0 comments on commit 545106a

Please sign in to comment.