-
Notifications
You must be signed in to change notification settings - Fork 1.7k
/
test_clustering.py
89 lines (69 loc) · 2.49 KB
/
test_clustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import uuid
import pandas as pd
import pytest
from mlflow.tracking.client import MlflowClient
import pycaret.clustering
import pycaret.datasets
@pytest.fixture(scope="module")
def data():
return pycaret.datasets.get_data("jewellery")
def test_clustering(data):
experiment_name = uuid.uuid4().hex
pycaret.clustering.setup(
data,
normalize=True,
log_experiment=True,
experiment_name=experiment_name,
experiment_custom_tags={"tag": 1},
log_plots=True,
html=False,
session_id=123,
n_jobs=1,
)
# create model
kmeans = pycaret.clustering.create_model(
"kmeans", experiment_custom_tags={"tag": 1}
)
kmodes = pycaret.clustering.create_model(
"kmodes", experiment_custom_tags={"tag": 1}
)
# Plot Model
pycaret.clustering.plot_model(kmeans)
pycaret.clustering.plot_model(kmodes)
# assign model
kmeans_results = pycaret.clustering.assign_model(kmeans)
kmodes_results = pycaret.clustering.assign_model(kmodes)
assert isinstance(kmeans_results, pd.DataFrame)
assert isinstance(kmodes_results, pd.DataFrame)
# predict model
kmeans_predictions = pycaret.clustering.predict_model(model=kmeans, data=data)
assert isinstance(kmeans_predictions, pd.DataFrame)
# returns table of models
all_models = pycaret.clustering.models()
assert isinstance(all_models, pd.DataFrame)
# get config
X = pycaret.clustering.get_config("X")
seed = pycaret.clustering.get_config("seed")
assert isinstance(X, pd.DataFrame)
assert isinstance(seed, int)
# set config
pycaret.clustering.set_config("seed", 124)
seed = pycaret.clustering.get_config("seed")
assert seed == 124
# Assert the custom tags are created
client = MlflowClient()
experiment = client.get_experiment_by_name(experiment_name)
for experiment_run in client.list_run_infos(experiment.experiment_id):
run = client.get_run(experiment_run.run_id)
assert run.data.tags.get("tag") == "1"
# save model
pycaret.clustering.save_model(kmeans, "kmeans_model_23122019")
# reset
pycaret.clustering.set_current_experiment(pycaret.clustering.ClusteringExperiment())
# load model
kmeans = pycaret.clustering.load_model("kmeans_model_23122019")
# predict model
kmeans_predictions = pycaret.clustering.predict_model(model=kmeans, data=data)
assert isinstance(kmeans_predictions, pd.DataFrame)
if __name__ == "__main__":
test_clustering()