diff --git a/sdmetrics/single_table/privacy/disclosure_protection.py b/sdmetrics/single_table/privacy/disclosure_protection.py index ea81c920..cbed6f22 100644 --- a/sdmetrics/single_table/privacy/disclosure_protection.py +++ b/sdmetrics/single_table/privacy/disclosure_protection.py @@ -14,7 +14,7 @@ CategoricalZeroCAP, ) -MAX_NUM_ROWS = 50000 +MAX_NUM_ROWS = 10000 CAP_METHODS = { 'CAP': CategoricalCAP, @@ -212,7 +212,7 @@ def compute_breakdown( computation_method = computation_method.upper() if len(real_data) > MAX_NUM_ROWS or len(synthetic_data) > MAX_NUM_ROWS: warnings.warn( - f'Data exceeds {MAX_NUM_ROWS} rows, perfomance may be slow.' + f'Data exceeds {MAX_NUM_ROWS} rows, perfomance may be slow. ' 'Consider using the `DisclosureProtectionEstimate` for faster computation.' ) @@ -238,7 +238,7 @@ def compute_breakdown( ) if baseline_protection == 0: - score = 0 if cap_protection == 0 else 1 + score = np.nan else: score = min(cap_protection / baseline_protection, 1) @@ -363,7 +363,7 @@ def _compute_estimated_cap_metric( estimated_score_sum += estimated_cap_protection average_computed_score = estimated_score_sum / (i + 1.0) if baseline_protection == 0: - average_score = 0 if average_computed_score == 0 else 1 + average_score = np.nan else: average_score = min(average_computed_score / baseline_protection, 1) diff --git a/tests/unit/single_table/privacy/test_disclosure_protection.py b/tests/unit/single_table/privacy/test_disclosure_protection.py index b65cb13b..ffa9c45f 100644 --- a/tests/unit/single_table/privacy/test_disclosure_protection.py +++ b/tests/unit/single_table/privacy/test_disclosure_protection.py @@ -271,15 +271,7 @@ def test_compute_breakdown_zero_baseline(self, CAPMethodsMock): CAPMethodsMock.get.return_value = CAPMock # Run - score_breakdown_with_cap = DisclosureProtection.compute_breakdown( - real_data=real_data, - synthetic_data=synthetic_data, - known_column_names=['col1'], - sensitive_column_names=['col2'], - ) - - CAPMock._compute.return_value = 0 - score_breakdown_no_cap = DisclosureProtection.compute_breakdown( + score_breakdown = DisclosureProtection.compute_breakdown( real_data=real_data, synthetic_data=synthetic_data, known_column_names=['col1'], @@ -287,12 +279,11 @@ def test_compute_breakdown_zero_baseline(self, CAPMethodsMock): ) # Assert - assert score_breakdown_with_cap == { - 'score': 1, + assert score_breakdown == { + 'score': np.nan, 'baseline_protection': 0, 'cap_protection': 0.5, } - assert score_breakdown_no_cap == {'score': 0, 'baseline_protection': 0, 'cap_protection': 0} @patch('sdmetrics.single_table.privacy.disclosure_protection.CAP_METHODS') @patch( @@ -323,7 +314,7 @@ def test_compute_breakdown_warns_too_large( # Run expected_warning = re.escape( - 'Data exceeds 50000 rows, perfomance may be slow.' + 'Data exceeds 10000 rows, perfomance may be slow. ' 'Consider using the `DisclosureProtectionEstimate` for faster computation.' ) with pytest.warns(UserWarning, match=expected_warning): @@ -486,7 +477,7 @@ def test__compute_estimated_cap_metric_zero_baseline(self, CAPMethodsMock): ) # Assert - assert avg_score == 1 + assert np.isnan(avg_score) assert avg_computed_score == 0.38 @patch(