From ab7b940f5d0fbb64f3d0061e6b0db4b78a1c714e Mon Sep 17 00:00:00 2001 From: Wenhuan Date: Sun, 18 Dec 2022 18:25:51 +0800 Subject: [PATCH] fix metric_missing=0 is ignored in Scorecard._fit #226 --- optbinning/scorecard/scorecard.py | 2 +- tests/test_scorecard.py | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/optbinning/scorecard/scorecard.py b/optbinning/scorecard/scorecard.py index fe6174a..3ea2652 100644 --- a/optbinning/scorecard/scorecard.py +++ b/optbinning/scorecard/scorecard.py @@ -601,7 +601,7 @@ def _fit(self, X, y, sample_weight, metric_special, metric_missing, binning_table.loc[ nt-1-n_specials:nt-2, "Points"] = metric_special * c - elif metric_missing != 'empirical': + if metric_missing != 'empirical': binning_table.loc[nt-1, "Points"] = metric_missing * c binning_table.index.names = ['Bin id'] diff --git a/tests/test_scorecard.py b/tests/test_scorecard.py index a4aed27..f02a224 100644 --- a/tests/test_scorecard.py +++ b/tests/test_scorecard.py @@ -450,3 +450,27 @@ def test_verbose(): with open("tests/results/test_scorecard_verbose.txt", "w") as f: with redirect_stdout(f): scorecard.fit(X, y) + + +def test_missing_metrics(): + n = 100 + data = pd.DataFrame( + {'target': np.hstack( + (np.random.choice([0, 1], n, p=[0.1, 0.9]), + np.random.choice([0, 1], n, p=[0.9, 0.1]) + ) + ), + 'var': [np.nan] * n+['A'] * n} + ) + + binning_process = BinningProcess(['var']) + scaling_method_params = {'min': 0, 'max': 100} + + scorecard = Scorecard( + binning_process=binning_process, + estimator=LogisticRegression(), + scaling_method="min_max", + scaling_method_params=scaling_method_params + ).fit(data, data.target) + + assert scorecard.table()['Points'].iloc[-1] == 0