From 41c803d1653f1ec70e57f2f127a4cc1b031befbc Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Thu, 14 Nov 2024 09:35:01 -0500 Subject: [PATCH 1/3] fix --- sdmetrics/column_pairs/statistical/contingency_similarity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdmetrics/column_pairs/statistical/contingency_similarity.py b/sdmetrics/column_pairs/statistical/contingency_similarity.py index 5d2c801d..e41075e0 100644 --- a/sdmetrics/column_pairs/statistical/contingency_similarity.py +++ b/sdmetrics/column_pairs/statistical/contingency_similarity.py @@ -44,7 +44,7 @@ def compute(cls, real_data, synthetic_data): contingency_synthetic = synthetic.groupby(list(columns), dropna=False).size() / len( synthetic ) - combined_index = contingency_real.index.union(contingency_synthetic.index) + combined_index = contingency_real.index.union(contingency_synthetic.index, sort=False) contingency_synthetic = contingency_synthetic.reindex(combined_index, fill_value=0) contingency_real = contingency_real.reindex(combined_index, fill_value=0) diff = abs(contingency_real - contingency_synthetic).fillna(0) From 3a6c0dbbda3c64b4f9ddc0fc49479edef5781767 Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Thu, 14 Nov 2024 09:35:09 -0500 Subject: [PATCH 2/3] tests --- .../reports/multi_table/test_quality_report.py | 2 +- .../statistical/test_contingency_similarity.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/integration/reports/multi_table/test_quality_report.py b/tests/integration/reports/multi_table/test_quality_report.py index 9c3b79b1..fb571e81 100644 --- a/tests/integration/reports/multi_table/test_quality_report.py +++ b/tests/integration/reports/multi_table/test_quality_report.py @@ -342,7 +342,7 @@ def test_quality_report_with_errors(): None, ], }) - assert score == 0.7249603174603174 + assert score == 0.7249603174603175 pd.testing.assert_frame_equal(properties, expected_properties) pd.testing.assert_frame_equal(details_column_shapes, expected_details) diff --git a/tests/unit/column_pairs/statistical/test_contingency_similarity.py b/tests/unit/column_pairs/statistical/test_contingency_similarity.py index 713aab7d..dac7e428 100644 --- a/tests/unit/column_pairs/statistical/test_contingency_similarity.py +++ b/tests/unit/column_pairs/statistical/test_contingency_similarity.py @@ -1,6 +1,7 @@ from unittest.mock import patch import pandas as pd +import pytest from sdmetrics.column_pairs.statistical import ContingencySimilarity @@ -53,3 +54,16 @@ def test_normalize(self, normalize_mock): # Assert normalize_mock.assert_called_once_with(raw_score) assert result == normalize_mock.return_value + + @pytest.mark.filterwarnings('error:.*The values in the array are unorderable.*:RuntimeWarning') + def test_no_warning_raised(self): + """Test that no warning is raised when the metric is instantiated.""" + # Setup + real_data = pd.DataFrame(data={'A': ['value'] * 4, 'B': ['1', '2', '3', pd.NA]}) + + synthetic_data = pd.DataFrame(data={'A': ['value'] * 3, 'B': ['1', '2', pd.NA]}) + + # Run and Assert + ContingencySimilarity.compute( + real_data=real_data[['A', 'B']], synthetic_data=synthetic_data[['A', 'B']] + ) From e6d606632a94f79ee50b33502968dc16a619dcf0 Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Thu, 14 Nov 2024 09:46:37 -0500 Subject: [PATCH 3/3] docstring --- .../column_pairs/statistical/test_contingency_similarity.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/unit/column_pairs/statistical/test_contingency_similarity.py b/tests/unit/column_pairs/statistical/test_contingency_similarity.py index dac7e428..dc7bafdd 100644 --- a/tests/unit/column_pairs/statistical/test_contingency_similarity.py +++ b/tests/unit/column_pairs/statistical/test_contingency_similarity.py @@ -56,11 +56,10 @@ def test_normalize(self, normalize_mock): assert result == normalize_mock.return_value @pytest.mark.filterwarnings('error:.*The values in the array are unorderable.*:RuntimeWarning') - def test_no_warning_raised(self): - """Test that no warning is raised when the metric is instantiated.""" + def test_no_runtime_warning_raised(self): + """Test that no RuntimeWarning warning is raised when the metric is computed.""" # Setup real_data = pd.DataFrame(data={'A': ['value'] * 4, 'B': ['1', '2', '3', pd.NA]}) - synthetic_data = pd.DataFrame(data={'A': ['value'] * 3, 'B': ['1', '2', pd.NA]}) # Run and Assert