From 07d7db1165a75f10adc961489b3a9431ff498ca3 Mon Sep 17 00:00:00 2001 From: WhiteWolf47 Date: Mon, 24 Apr 2023 06:41:59 +0000 Subject: [PATCH 01/13] calibrated the regressor model --- bugbug/models/regressor.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bugbug/models/regressor.py b/bugbug/models/regressor.py index 6b4266f2c1..c2774f9984 100644 --- a/bugbug/models/regressor.py +++ b/bugbug/models/regressor.py @@ -18,6 +18,7 @@ from bugbug import bugzilla, commit_features, db, feature_cleanup, repository, utils from bugbug.model import CommitModel +from bugbug.model_calibration import IsotonicRegressionCalibrator logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -128,8 +129,11 @@ def __init__( ] ) - self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()) - self.clf.set_params(predictor="cpu_predictor") + base_clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()) + base_clf.set_params(predictor="cpu_predictor") + self.clf = IsotonicRegressionCalibrator(base_clf) + + self.calculate_importance = False def get_labels(self): classes = {} From 84e1b69ed6abb1f28b28518710a4564023e9baa8 Mon Sep 17 00:00:00 2001 From: WhiteWolf47 Date: Mon, 24 Apr 2023 06:58:27 +0000 Subject: [PATCH 02/13] calibrated the regressor model --- bugbug/models/regressor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bugbug/models/regressor.py b/bugbug/models/regressor.py index c2774f9984..9d0d3d8c61 100644 --- a/bugbug/models/regressor.py +++ b/bugbug/models/regressor.py @@ -133,6 +133,7 @@ def __init__( base_clf.set_params(predictor="cpu_predictor") self.clf = IsotonicRegressionCalibrator(base_clf) + # this is a temporary workaround for now self.calculate_importance = False def get_labels(self): From 3dd35cb69d0bf3d5ab21e38bb6d7dfc512bd5c94 Mon Sep 17 00:00:00 2001 From: WhiteWolf47 Date: Mon, 24 Apr 2023 07:54:15 +0000 Subject: [PATCH 03/13] calibrated the regressor model --- bugbug/models/regressor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bugbug/models/regressor.py b/bugbug/models/regressor.py index 9d0d3d8c61..0836f72586 100644 --- a/bugbug/models/regressor.py +++ b/bugbug/models/regressor.py @@ -132,8 +132,7 @@ def __init__( base_clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()) base_clf.set_params(predictor="cpu_predictor") self.clf = IsotonicRegressionCalibrator(base_clf) - - # this is a temporary workaround for now + # this is a temporary workaround self.calculate_importance = False def get_labels(self): From 58ac8f36d0e738d007b726a9edf54b1ec3eb7572 Mon Sep 17 00:00:00 2001 From: WhiteWolf47 Date: Mon, 24 Apr 2023 07:57:12 +0000 Subject: [PATCH 04/13] calibrated the regressor model --- bugbug/models/regressor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bugbug/models/regressor.py b/bugbug/models/regressor.py index 0836f72586..85cb8d3d78 100644 --- a/bugbug/models/regressor.py +++ b/bugbug/models/regressor.py @@ -132,7 +132,7 @@ def __init__( base_clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()) base_clf.set_params(predictor="cpu_predictor") self.clf = IsotonicRegressionCalibrator(base_clf) - # this is a temporary workaround + # this is a temporary workaround for now self.calculate_importance = False def get_labels(self): From c4c80a058639e48588ddd51210d04568e6b669c1 Mon Sep 17 00:00:00 2001 From: WhiteWolf47 Date: Mon, 24 Apr 2023 08:00:47 +0000 Subject: [PATCH 05/13] calibrated the regressor model --- bugbug/models/regressor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bugbug/models/regressor.py b/bugbug/models/regressor.py index 85cb8d3d78..0836f72586 100644 --- a/bugbug/models/regressor.py +++ b/bugbug/models/regressor.py @@ -132,7 +132,7 @@ def __init__( base_clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()) base_clf.set_params(predictor="cpu_predictor") self.clf = IsotonicRegressionCalibrator(base_clf) - # this is a temporary workaround for now + # this is a temporary workaround self.calculate_importance = False def get_labels(self): From c9499e85ecea6d195cc4effbaf5900ea38b82b72 Mon Sep 17 00:00:00 2001 From: WhiteWolf47 Date: Mon, 24 Apr 2023 08:02:07 +0000 Subject: [PATCH 06/13] calibrated the regressor model --- bugbug/models/regressor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bugbug/models/regressor.py b/bugbug/models/regressor.py index 0836f72586..85cb8d3d78 100644 --- a/bugbug/models/regressor.py +++ b/bugbug/models/regressor.py @@ -132,7 +132,7 @@ def __init__( base_clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()) base_clf.set_params(predictor="cpu_predictor") self.clf = IsotonicRegressionCalibrator(base_clf) - # this is a temporary workaround + # this is a temporary workaround for now self.calculate_importance = False def get_labels(self): From 79484390581695201362c69b9a7b25231bd93e39 Mon Sep 17 00:00:00 2001 From: WhiteWolf47 Date: Mon, 24 Apr 2023 08:22:42 +0000 Subject: [PATCH 07/13] calibrated regressor model --- bugbug/models/regressor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bugbug/models/regressor.py b/bugbug/models/regressor.py index 85cb8d3d78..0836f72586 100644 --- a/bugbug/models/regressor.py +++ b/bugbug/models/regressor.py @@ -132,7 +132,7 @@ def __init__( base_clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()) base_clf.set_params(predictor="cpu_predictor") self.clf = IsotonicRegressionCalibrator(base_clf) - # this is a temporary workaround for now + # this is a temporary workaround self.calculate_importance = False def get_labels(self): From ee9bff6a6bf2e1f4e07f575886538c253facf7dd Mon Sep 17 00:00:00 2001 From: WhiteWolf47 Date: Mon, 24 Apr 2023 08:23:38 +0000 Subject: [PATCH 08/13] calibrated regressor model --- bugbug/models/regressor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bugbug/models/regressor.py b/bugbug/models/regressor.py index 0836f72586..9d0d3d8c61 100644 --- a/bugbug/models/regressor.py +++ b/bugbug/models/regressor.py @@ -132,7 +132,8 @@ def __init__( base_clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()) base_clf.set_params(predictor="cpu_predictor") self.clf = IsotonicRegressionCalibrator(base_clf) - # this is a temporary workaround + + # this is a temporary workaround for now self.calculate_importance = False def get_labels(self): From 81a3f16d08fa970179eb901b6953f1d01a2714fd Mon Sep 17 00:00:00 2001 From: WhiteWolf47 Date: Mon, 24 Apr 2023 15:33:42 +0000 Subject: [PATCH 09/13] explained what the workaround is regarding --- bugbug/models/regressor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bugbug/models/regressor.py b/bugbug/models/regressor.py index 9d0d3d8c61..38cedc4f0a 100644 --- a/bugbug/models/regressor.py +++ b/bugbug/models/regressor.py @@ -133,7 +133,7 @@ def __init__( base_clf.set_params(predictor="cpu_predictor") self.clf = IsotonicRegressionCalibrator(base_clf) - # this is a temporary workaround for now + # this is a temporary workaround for the error - "shap.utils._exceptions.InvalidModelError: Model type not yet supported by TreeExplainer: " self.calculate_importance = False def get_labels(self): From ddc3cbf243cda30b04a10457edcc841b23def649 Mon Sep 17 00:00:00 2001 From: WhiteWolf47 Date: Mon, 24 Apr 2023 17:24:22 +0000 Subject: [PATCH 10/13] explained what the workaround is regarding --- bugbug/models/regressor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bugbug/models/regressor.py b/bugbug/models/regressor.py index 38cedc4f0a..d7b7799633 100644 --- a/bugbug/models/regressor.py +++ b/bugbug/models/regressor.py @@ -133,7 +133,7 @@ def __init__( base_clf.set_params(predictor="cpu_predictor") self.clf = IsotonicRegressionCalibrator(base_clf) - # this is a temporary workaround for the error - "shap.utils._exceptions.InvalidModelError: Model type not yet supported by TreeExplainer: " + # This is a temporary workaround for the error: "Model type not yet supported by TreeExplainer" self.calculate_importance = False def get_labels(self): From dc0ef29734fc15ebfb9822bb5dab29b4e9d05378 Mon Sep 17 00:00:00 2001 From: WhiteWolf47 Date: Wed, 3 May 2023 16:16:41 +0000 Subject: [PATCH 11/13] added a constructor to enable/disable calibration --- bugbug/models/regressor.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/bugbug/models/regressor.py b/bugbug/models/regressor.py index d7b7799633..09d32cc8fd 100644 --- a/bugbug/models/regressor.py +++ b/bugbug/models/regressor.py @@ -128,13 +128,16 @@ def __init__( ("union", ColumnTransformer(column_transformers)), ] ) - - base_clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()) - base_clf.set_params(predictor="cpu_predictor") - self.clf = IsotonicRegressionCalibrator(base_clf) - - # This is a temporary workaround for the error: "Model type not yet supported by TreeExplainer" - self.calculate_importance = False + self.calibration = True + if self.calibration: + base_clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()) + base_clf.set_params(predictor="cpu_predictor") + self.clf = IsotonicRegressionCalibrator(base_clf) + # This is a temporary workaround for the error : "Model type not yet supported by TreeExplainer" + self.calculate_importance = False + else: + self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()) + self.clf.set_params(predictor="cpu_predictor") def get_labels(self): classes = {} From 648919f90fc31a0e186d1d166a2bba0806d073d1 Mon Sep 17 00:00:00 2001 From: WhiteWolf47 Date: Thu, 4 May 2023 05:08:25 +0000 Subject: [PATCH 12/13] did some suggested changes --- bugbug/models/regressor.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/bugbug/models/regressor.py b/bugbug/models/regressor.py index 09d32cc8fd..d57a2b7cf5 100644 --- a/bugbug/models/regressor.py +++ b/bugbug/models/regressor.py @@ -52,6 +52,7 @@ class RegressorModel(CommitModel): def __init__( self, + calibration: bool = True, lemmatization: bool = False, interpretable: bool = True, use_finder: bool = False, @@ -128,16 +129,14 @@ def __init__( ("union", ColumnTransformer(column_transformers)), ] ) - self.calibration = True - if self.calibration: - base_clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()) - base_clf.set_params(predictor="cpu_predictor") + base_clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()) + base_clf.set_params(predictor="cpu_predictor") + if calibration: self.clf = IsotonicRegressionCalibrator(base_clf) # This is a temporary workaround for the error : "Model type not yet supported by TreeExplainer" self.calculate_importance = False else: - self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()) - self.clf.set_params(predictor="cpu_predictor") + self.clf = base_clf def get_labels(self): classes = {} From 98f5a1755c3606313d6af1e55d336018250229c2 Mon Sep 17 00:00:00 2001 From: WhiteWolf47 Date: Sun, 7 May 2023 07:31:21 +0000 Subject: [PATCH 13/13] did some suggested changes --- bugbug/models/regressor.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/bugbug/models/regressor.py b/bugbug/models/regressor.py index d57a2b7cf5..c3e16517d9 100644 --- a/bugbug/models/regressor.py +++ b/bugbug/models/regressor.py @@ -129,14 +129,12 @@ def __init__( ("union", ColumnTransformer(column_transformers)), ] ) - base_clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()) - base_clf.set_params(predictor="cpu_predictor") + self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()) + self.clf.set_params(predictor="cpu_predictor") if calibration: - self.clf = IsotonicRegressionCalibrator(base_clf) + self.clf = IsotonicRegressionCalibrator(self.clf) # This is a temporary workaround for the error : "Model type not yet supported by TreeExplainer" self.calculate_importance = False - else: - self.clf = base_clf def get_labels(self): classes = {}