Skip to content

Commit

Permalink
Merge pull request #3621 from pycaret/fix_3606
Browse files Browse the repository at this point in the history
Force np.float64 for CBLOF
  • Loading branch information
Yard1 committed Jun 26, 2023
2 parents e394f54 + 78aa65a commit 82c9c50
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 4 deletions.
4 changes: 2 additions & 2 deletions pycaret/containers/models/anomaly.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ class CBLOFAnomalyContainer(AnomalyContainer):
def __init__(self, experiment):
get_logger()
np.random.seed(experiment.seed)
from pyod.models.cblof import CBLOF
from pycaret.internal.patches.pyod import CBLOFForceToDouble

args = {
"random_state": experiment.seed,
Expand All @@ -194,7 +194,7 @@ def __init__(self, experiment):
super().__init__(
id="cluster",
name="Clustering-Based Local Outlier",
class_def=CBLOF,
class_def=CBLOFForceToDouble,
args=args,
tune_grid=tune_grid,
tune_distribution=tune_distributions,
Expand Down
30 changes: 30 additions & 0 deletions pycaret/internal/patches/pyod.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import numpy as np
import pandas as pd
from pyod.models.cblof import CBLOF


def convert_to_fp64(X):
if isinstance(X, pd.DataFrame):
X = X.astype(
{
col: np.float64
for col in X.columns
if X.dtypes[col] in (np.float32, np.float16)
}
)
elif X.dtype == np.float32:
X = X.astype(np.float64)
return X


# Fixes https://github.com/pycaret/pycaret/issues/3606
class CBLOFForceToDouble(CBLOF):
"""CBLOF with forced float32 -> float64 conversion"""

def fit(self, X, y=None):
X = convert_to_fp64(X)
return super().fit(X, y)

def decision_function(self, X):
X = convert_to_fp64(X)
return super().decision_function(X)
Original file line number Diff line number Diff line change
Expand Up @@ -1099,10 +1099,10 @@ def _create_model(
)
model_fit_start = time.time()
pipeline_with_model.fit(data_X, **fit_kwargs)
except Exception:
except Exception as e:
raise RuntimeError(
"Could not form valid cluster separation. Try a different dataset or model."
)
) from e
else:
pipeline_with_model.fit(data_X, **fit_kwargs)
model_fit_end = time.time()
Expand Down
6 changes: 6 additions & 0 deletions tests/test_anomaly.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ def test_anomaly(data):
# create model
iforest = pycaret.anomaly.create_model("iforest", experiment_custom_tags={"tag": 1})
knn = pycaret.anomaly.create_model("knn", experiment_custom_tags={"tag": 1})
# https://github.com/pycaret/pycaret/issues/3606
cluster = pycaret.anomaly.create_model("cluster", experiment_custom_tags={"tag": 1})

# Plot model
pycaret.anomaly.plot_model(iforest)
Expand All @@ -38,14 +40,18 @@ def test_anomaly(data):
# assign model
iforest_results = pycaret.anomaly.assign_model(iforest)
knn_results = pycaret.anomaly.assign_model(knn)
cluster_results = pycaret.anomaly.assign_model(cluster)
assert isinstance(iforest_results, pd.DataFrame)
assert isinstance(knn_results, pd.DataFrame)
assert isinstance(cluster_results, pd.DataFrame)

# predict model
iforest_predictions = pycaret.anomaly.predict_model(model=iforest, data=data)
knn_predictions = pycaret.anomaly.predict_model(model=knn, data=data)
cluster_predictions = pycaret.anomaly.predict_model(model=cluster, data=data)
assert isinstance(iforest_predictions, pd.DataFrame)
assert isinstance(knn_predictions, pd.DataFrame)
assert isinstance(cluster_predictions, pd.DataFrame)

# get config
X = pycaret.anomaly.get_config("X")
Expand Down

0 comments on commit 82c9c50

Please sign in to comment.