Skip to content

Commit

Permalink
Merge pull request #3537 from pycaret/fix_zero_metrics_in_predict_model
Browse files Browse the repository at this point in the history
Fix metrics being zero in `predict_model`
  • Loading branch information
Yard1 committed May 11, 2023
2 parents 3f3ac4b + 360d65d commit 8623007
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 13 deletions.
24 changes: 11 additions & 13 deletions pycaret/internal/pycaret_experiment/supervised_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -4851,11 +4851,11 @@ def predict_model(
"""

def replace_labels_in_column(label_encoder, labels: pd.Series) -> pd.Series:
def encode_labels(label_encoder, labels: pd.Series) -> pd.Series:
# Check if there is a LabelEncoder in the pipeline
if label_encoder:
return pd.Series(
data=label_encoder.inverse_transform(labels),
data=label_encoder.transform(labels),
name=labels.name,
index=labels.index,
)
Expand Down Expand Up @@ -4979,8 +4979,6 @@ def replace_labels_in_column(label_encoder, labels: pd.Series) -> pd.Series:
# Need to convert labels back to numbers
# TODO optimize
label_encoder = get_label_encoder(pipeline)
if label_encoder:
pred = label_encoder.transform(pred)
if isinstance(pred, pd.Series):
pred = pred.values

Expand All @@ -4993,17 +4991,18 @@ def replace_labels_in_column(label_encoder, labels: pd.Series) -> pd.Series:
else:
pred_prob = score

y_test_metrics = y_test_

except Exception:
# This is not a classifier
score = None
pred_prob = None
y_test_metrics = y_test_untransformed

y_test_metrics = y_test_untransformed

if probability_threshold is not None and pred_prob is not None:
try:
pred = (pred_prob >= probability_threshold).astype(int)
if label_encoder:
pred = label_encoder.inverse_transform(pred)
except Exception:
pass

Expand All @@ -5029,20 +5028,19 @@ def replace_labels_in_column(label_encoder, labels: pd.Series) -> pd.Series:
except Exception:
pass

if not encoded_labels:
label[LABEL_COLUMN] = replace_labels_in_column(
label_encoder, label[LABEL_COLUMN]
)
if encoded_labels:
label[LABEL_COLUMN] = encode_labels(label_encoder, label[LABEL_COLUMN])
else:
y_test_untransformed = y_test_
old_index = X_test_untransformed.index
X_test_ = pd.concat([X_test_untransformed, y_test_untransformed, label], axis=1)
X_test_.index = old_index

if score is not None:
pred = pred.astype(int)

if not raw_score:
if label_encoder:
pred = label_encoder.transform(pred)

score = pd.DataFrame(
data=[s[pred[i]] for i, s in enumerate(score)],
index=X_test_.index,
Expand Down
4 changes: 4 additions & 0 deletions pycaret/utils/generic.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import functools
import inspect
import traceback
import warnings
from collections.abc import Mapping
from copy import deepcopy
Expand Down Expand Up @@ -527,6 +528,7 @@ def _calculate_unsupervised_metric(
try:
calculated_metric = score_func(target, labels, **container.args)
except Exception:
warnings.warn(traceback.format_exc())
calculated_metric = 0

return (display_name, calculated_metric)
Expand Down Expand Up @@ -581,9 +583,11 @@ def _calculate_metric(
try:
calculated_metric = score_func(y_test, target, sample_weight=weights, **kwargs)
except Exception:

try:
calculated_metric = score_func(y_test, target, **kwargs)
except Exception:
warnings.warn(traceback.format_exc())
calculated_metric = 0

return display_name, calculated_metric
Expand Down
62 changes: 62 additions & 0 deletions tests/test_supervised_predict_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,70 @@ def test_classification_predict_model():
)
lr_model = pycaret.classification.create_model("lr")
predictions = pycaret.classification.predict_model(lr_model, data=unseen_data)
metrics = pycaret.classification.pull()
# Check that columns of raw data are contained in columns of returned dataframe
assert all(item in predictions.columns for item in unseen_data.columns)
assert all(metrics[metric][0] for metric in metrics.columns)

predictions = pycaret.classification.predict_model(lr_model)
metrics = pycaret.classification.pull()
assert set(predictions["prediction_label"].unique()) == set(
data["Purchase"].unique()
)
assert all(metrics[metric][0] for metric in metrics.columns)

predictions = pycaret.classification.predict_model(lr_model, raw_score=True)
metrics = pycaret.classification.pull()
assert all(metrics[metric][0] for metric in metrics.columns)

predictions = pycaret.classification.predict_model(lr_model, encoded_labels=True)
metrics = pycaret.classification.pull()
assert set(predictions["prediction_label"].unique()) == {0, 1}
assert all(metrics[metric][0] for metric in metrics.columns)


def test_multiclass_predict_model():
# loading classification dataset
data = pycaret.datasets.get_data("iris")
assert isinstance(data, pd.DataFrame)

training_data = data.sample(frac=0.90)
unseen_data = data.drop(training_data.index)

# init setup
pycaret.classification.setup(
data,
target="species",
remove_multicollinearity=True,
multicollinearity_threshold=0.95,
html=False,
session_id=123,
n_jobs=1,
)
lr_model = pycaret.classification.create_model("lr")
predictions = pycaret.classification.predict_model(lr_model, data=unseen_data)
metrics = pycaret.classification.pull()
# Check that columns of raw data are contained in columns of returned dataframe
assert all(item in predictions.columns for item in unseen_data.columns)
assert all(metrics[metric][0] for metric in metrics.columns)

predictions = pycaret.classification.predict_model(lr_model)
metrics = pycaret.classification.pull()
assert set(predictions["prediction_label"].unique()) == set(
data["species"].unique()
)
assert all(metrics[metric][0] for metric in metrics.columns)

predictions = pycaret.classification.predict_model(lr_model, raw_score=True)
metrics = pycaret.classification.pull()
assert all(metrics[metric][0] for metric in metrics.columns)

predictions = pycaret.classification.predict_model(lr_model, encoded_labels=True)
metrics = pycaret.classification.pull()
assert set(predictions["prediction_label"].unique()) == set(
range(len(data["species"].unique()))
)
assert all(metrics[metric][0] for metric in metrics.columns)


def test_regression_predict_model():
Expand Down

0 comments on commit 8623007

Please sign in to comment.