diff --git a/supervised/algorithms/catboost.py b/supervised/algorithms/catboost.py index 216254f6..81dc87f4 100644 --- a/supervised/algorithms/catboost.py +++ b/supervised/algorithms/catboost.py @@ -35,10 +35,12 @@ def catboost_eval_metric(ml_task, eval_metric): "logloss": "Logloss", "f1": "F1", "average_precision": "average_precision", + "accuracy": "Accuracy", }, MULTICLASS_CLASSIFICATION: { "logloss": "MultiClass", "f1": "TotalF1:average=Micro", + "accuracy": "Accuracy", }, REGRESSION: { "rmse": "RMSE", @@ -245,7 +247,7 @@ def fit( model_init.evals_result_["validation"].get(self.log_metric_name) + validation_scores ) - iteration = None + iteration = None if train_scores is not None: iteration = range(len(validation_scores)) elif validation_scores is not None: @@ -314,6 +316,8 @@ def get_metric_name(self): return "mape" elif metric in ["F1", "TotalF1:average=Micro"]: return "f1" + elif metric == "Accuracy": + return "accuracy" return metric diff --git a/supervised/algorithms/lightgbm.py b/supervised/algorithms/lightgbm.py index 3aed3301..1c6ab134 100644 --- a/supervised/algorithms/lightgbm.py +++ b/supervised/algorithms/lightgbm.py @@ -20,6 +20,7 @@ lightgbm_eval_metric_pearson, lightgbm_eval_metric_f1, lightgbm_eval_metric_average_precision, + lightgbm_eval_metric_accuracy, ) from supervised.utils.config import LOG_LEVEL @@ -45,8 +46,13 @@ def lightgbm_eval_metric(ml_task, automl_eval_metric): "logloss": "binary_logloss", "f1": "custom", "average_precision": "custom", + "accuracy": "custom", + }, + MULTICLASS_CLASSIFICATION: { + "logloss": "multi_logloss", + "f1": "custom", + "accuracy": "custom", }, - MULTICLASS_CLASSIFICATION: {"logloss": "multi_logloss", "f1": "custom"}, REGRESSION: { "rmse": "rmse", "mae": "mae", @@ -60,7 +66,14 @@ def lightgbm_eval_metric(ml_task, automl_eval_metric): metric = metric_name_mapping[ml_task][automl_eval_metric] custom_eval_metric = None - if automl_eval_metric in ["r2", "spearman", "pearson", "f1", "average_precision"]: + if automl_eval_metric in [ + "r2", + "spearman", + "pearson", + "f1", + "average_precision", + "accuracy", + ]: custom_eval_metric = automl_eval_metric return metric, custom_eval_metric @@ -133,6 +146,8 @@ def __init__(self, params): self.custom_eval_metric = lightgbm_eval_metric_f1 elif self.params["custom_eval_metric_name"] == "average_precision": self.custom_eval_metric = lightgbm_eval_metric_average_precision + elif self.params["custom_eval_metric_name"] == "accuracy": + self.custom_eval_metric = lightgbm_eval_metric_accuracy logger.debug("LightgbmLearner __init__") diff --git a/supervised/algorithms/xgboost.py b/supervised/algorithms/xgboost.py index 7c29c5b7..e69c776d 100644 --- a/supervised/algorithms/xgboost.py +++ b/supervised/algorithms/xgboost.py @@ -19,6 +19,7 @@ xgboost_eval_metric_pearson, xgboost_eval_metric_f1, xgboost_eval_metric_average_precision, + xgboost_eval_metric_accuracy, ) from supervised.utils.config import LOG_LEVEL @@ -116,6 +117,8 @@ def __init__(self, params): self.custom_eval_metric = xgboost_eval_metric_f1 elif self.params.get("eval_metric", "") == "average_precision": self.custom_eval_metric = xgboost_eval_metric_average_precision + elif self.params.get("eval_metric", "") == "accuracy": + self.custom_eval_metric = xgboost_eval_metric_accuracy self.best_ntree_limit = 0 logger.debug("XgbLearner __init__") @@ -217,7 +220,14 @@ def fit( # it a is custom metric # that is always minimized # we need to revert it - if metric_name in ["r2", "spearman", "pearson", "f1", "average_precision"]: + if metric_name in [ + "r2", + "spearman", + "pearson", + "f1", + "average_precision", + "accuracy", + ]: result["train"] *= -1.0 result["validation"] *= -1.0 diff --git a/supervised/base_automl.py b/supervised/base_automl.py index 1ab95640..ffa935ef 100644 --- a/supervised/base_automl.py +++ b/supervised/base_automl.py @@ -1780,18 +1780,19 @@ def _validate_eval_metric(self): "auc", "f1", "average_precision", + "accuracy" ]: raise ValueError( f"Metric {self.eval_metric} is not allowed in ML task: {self._get_ml_task()}. \ - Use 'logloss', 'auc', 'f1', or 'average_precision'" + Use 'logloss', 'auc', 'f1', 'average_precision', or 'accuracy'" ) elif ( self._get_ml_task() == MULTICLASS_CLASSIFICATION - ) and self.eval_metric not in ["logloss", "f1"]: + ) and self.eval_metric not in ["logloss", "f1", "accuracy"]: raise ValueError( f"Metric {self.eval_metric} is not allowed in ML task: {self._get_ml_task()}. \ - Use 'logloss', or 'f1'" + Use 'logloss', 'f1', or 'accuracy'" ) elif self._get_ml_task() == REGRESSION and self.eval_metric not in [ diff --git a/supervised/tuner/optuna/lightgbm.py b/supervised/tuner/optuna/lightgbm.py index 1309ac93..c8527b9a 100644 --- a/supervised/tuner/optuna/lightgbm.py +++ b/supervised/tuner/optuna/lightgbm.py @@ -9,6 +9,7 @@ lightgbm_eval_metric_pearson, lightgbm_eval_metric_f1, lightgbm_eval_metric_average_precision, + lightgbm_eval_metric_accuracy ) from supervised.algorithms.registry import BINARY_CLASSIFICATION from supervised.algorithms.registry import MULTICLASS_CLASSIFICATION @@ -82,6 +83,8 @@ def __init__( self.custom_eval_metric = lightgbm_eval_metric_f1 elif self.eval_metric.name == "average_precision": self.custom_eval_metric = lightgbm_eval_metric_average_precision + elif self.eval_metric.name == "accuracy": + self.custom_eval_metric = lightgbm_eval_metric_accuracy self.num_class = ( len(np.unique(y_train)) if ml_task == MULTICLASS_CLASSIFICATION else None diff --git a/supervised/tuner/optuna/tuner.py b/supervised/tuner/optuna/tuner.py index f2ccdb10..c306908e 100644 --- a/supervised/tuner/optuna/tuner.py +++ b/supervised/tuner/optuna/tuner.py @@ -37,6 +37,7 @@ def __init__( "pearson", "f1", "average_precision", + "accuracy" ]: raise AutoMLException(f"Metric {eval_metric.name} is not supported") diff --git a/supervised/tuner/optuna/xgboost.py b/supervised/tuner/optuna/xgboost.py index 8f43e2fa..dbf5d11c 100644 --- a/supervised/tuner/optuna/xgboost.py +++ b/supervised/tuner/optuna/xgboost.py @@ -9,6 +9,7 @@ xgboost_eval_metric_pearson, xgboost_eval_metric_f1, xgboost_eval_metric_average_precision, + xgboost_eval_metric_accuracy ) from supervised.algorithms.registry import BINARY_CLASSIFICATION from supervised.algorithms.registry import MULTICLASS_CLASSIFICATION @@ -67,6 +68,8 @@ def __init__( self.custom_eval_metric = xgboost_eval_metric_f1 elif self.eval_metric_name == "average_precision": self.custom_eval_metric = xgboost_eval_metric_average_precision + elif self.eval_metric_name == "accuracy": + self.custom_eval_metric = xgboost_eval_metric_accuracy def __call__(self, trial): param = { diff --git a/supervised/utils/metric.py b/supervised/utils/metric.py index 10f7704a..0d1d3baa 100644 --- a/supervised/utils/metric.py +++ b/supervised/utils/metric.py @@ -15,6 +15,7 @@ from sklearn.metrics import mean_squared_log_error from sklearn.metrics import f1_score from sklearn.metrics import average_precision_score +from sklearn.metrics import accuracy_score def logloss(y_true, y_predicted, sample_weight=None): @@ -52,8 +53,11 @@ def negative_f1(y_true, y_predicted, sample_weight=None): if isinstance(y_predicted, pd.DataFrame): y_predicted = np.array(y_predicted) + if len(y_predicted.shape) == 2 and y_predicted.shape[1] == 1: + y_predicted = y_predicted.ravel() + average = None - if len(y_predicted.shape) == 1: + if len(y_predicted.shape) == 1 or (len(y_predicted.shape) == 2 and y_predicted.shape[1] == 1): y_predicted = (y_predicted > 0.5).astype(int) average = "binary" else: @@ -65,6 +69,26 @@ def negative_f1(y_true, y_predicted, sample_weight=None): return -val +def negative_accuracy(y_true, y_predicted, sample_weight=None): + + if isinstance(y_true, pd.DataFrame): + y_true = np.array(y_true) + if isinstance(y_predicted, pd.DataFrame): + y_predicted = np.array(y_predicted) + + if len(y_predicted.shape) == 2 and y_predicted.shape[1] == 1: + y_predicted = y_predicted.ravel() + + if len(y_predicted.shape) == 1: + y_predicted = (y_predicted > 0.5).astype(int) + else: + y_predicted = np.argmax(y_predicted, axis=1) + + val = accuracy_score(y_true, y_predicted, sample_weight=sample_weight) + + return -val + + def negative_average_precision(y_true, y_predicted, sample_weight=None): if isinstance(y_true, pd.DataFrame): @@ -137,6 +161,15 @@ def xgboost_eval_metric_average_precision(preds, dtrain): return "average_precision", negative_average_precision(target, preds, weight) +def xgboost_eval_metric_accuracy(preds, dtrain): + # Xgboost needs to minimize eval_metric + target = dtrain.get_label() + weight = dtrain.get_weight() + if len(weight) == 0: + weight = None + return "accuracy", negative_accuracy(target, preds, weight) + + def lightgbm_eval_metric_r2(preds, dtrain): target = dtrain.get_label() weight = dtrain.get_weight() @@ -159,8 +192,10 @@ def lightgbm_eval_metric_f1(preds, dtrain): unique_targets = np.unique(target) if len(unique_targets) > 2: - preds = preds.reshape(-1, len(unique_targets)) - + cols = len(unique_targets) + rows = int(preds.shape[0] / len(unique_targets)) + preds = np.reshape(preds, (rows, cols), order="F") + return "f1", -negative_f1(target, preds, weight), True @@ -171,6 +206,19 @@ def lightgbm_eval_metric_average_precision(preds, dtrain): return "average_precision", -negative_average_precision(target, preds, weight), True +def lightgbm_eval_metric_accuracy(preds, dtrain): + target = dtrain.get_label() + weight = dtrain.get_weight() + + unique_targets = np.unique(target) + if len(unique_targets) > 2: + cols = len(unique_targets) + rows = int(preds.shape[0] / len(unique_targets)) + preds = np.reshape(preds, (rows, cols), order="F") + + return "accuracy", -negative_accuracy(target, preds, weight), True + + class CatBoostEvalMetricSpearman(object): def get_final_error(self, error, weight): return error @@ -244,6 +292,7 @@ def __init__(self, params): "pearson", # negative "f1", # negative "average_precision", # negative + "accuracy", # negative ] if self.name == "logloss": self.metric = logloss @@ -269,6 +318,8 @@ def __init__(self, params): self.metric = negative_f1 elif self.name == "average_precision": self.metric = negative_average_precision + elif self.name == "accuracy": + self.metric = negative_accuracy # elif self.name == "rmsle": # need to update target preprocessing # self.metric = rmsle # to assure that target is not negative ... else: @@ -304,6 +355,7 @@ def is_negative(self): "pearson", "f1", "average_precision", + "accuracy", ] @staticmethod @@ -315,4 +367,5 @@ def optimize_negative(metric_name): "pearson", "f1", "average_precision", + "accuracy", ]