-
Notifications
You must be signed in to change notification settings - Fork 1.7k
/
test_persistence_experiment.py
152 lines (131 loc) · 4.38 KB
/
test_persistence_experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import os
import joblib
import pytest
import pycaret.anomaly
import pycaret.classification
import pycaret.clustering
import pycaret.datasets
import pycaret.regression
import pycaret.time_series
from pycaret.anomaly import AnomalyExperiment
from pycaret.classification import ClassificationExperiment
from pycaret.clustering import ClusteringExperiment
from pycaret.regression import RegressionExperiment
from pycaret.time_series import TSForecastingExperiment
def check_experiment_equality(exp, new_exp):
for key, value in exp.variables.items():
try:
assert value == new_exp.variables[key]
except Exception:
# For numpy arrays
assert joblib.hash(value) == joblib.hash(new_exp.variables[key])
@pytest.mark.parametrize("preprocess_data", (True, False))
def test_anomaly_persistence(tmpdir, preprocess_data):
data = pycaret.datasets.get_data("anomaly")
exp = AnomalyExperiment()
exp.setup(
data,
normalize=True,
html=False,
session_id=123,
n_jobs=1,
)
exp_path = os.path.join(tmpdir, "exp.pkl")
exp.save_experiment(exp_path)
new_exp = AnomalyExperiment.load_experiment(
exp_path,
data=data if preprocess_data else exp.data,
preprocess_data=preprocess_data,
)
assert "normalize" in new_exp.pipeline.named_steps
check_experiment_equality(exp, new_exp)
@pytest.mark.parametrize("preprocess_data", (True, False))
def test_clustering_persistence(tmpdir, preprocess_data):
data = pycaret.datasets.get_data("jewellery")
exp = ClusteringExperiment()
exp.setup(
data,
normalize=True,
html=False,
session_id=123,
n_jobs=1,
)
exp_path = os.path.join(tmpdir, "exp.pkl")
exp.save_experiment(exp_path)
new_exp = ClusteringExperiment.load_experiment(
exp_path,
data=data if preprocess_data else exp.data,
preprocess_data=preprocess_data,
)
assert "normalize" in new_exp.pipeline.named_steps
check_experiment_equality(exp, new_exp)
@pytest.mark.parametrize("preprocess_data", (True, False))
def test_classification_persistence(tmpdir, preprocess_data):
data = pycaret.datasets.get_data("juice")
exp = ClassificationExperiment()
exp.setup(
data,
target="Purchase",
normalize=True,
html=False,
session_id=123,
n_jobs=1,
)
exp_path = os.path.join(tmpdir, "exp.pkl")
exp.save_experiment(exp_path)
new_exp = ClassificationExperiment.load_experiment(
exp_path,
data=data if preprocess_data else exp.data,
preprocess_data=preprocess_data,
)
assert "normalize" in new_exp.pipeline.named_steps
check_experiment_equality(exp, new_exp)
@pytest.mark.parametrize("preprocess_data", (True, False))
def test_regression_persistence(tmpdir, preprocess_data):
data = pycaret.datasets.get_data("boston")
exp = RegressionExperiment()
exp.setup(
data,
target="medv",
normalize=True,
html=False,
session_id=123,
n_jobs=1,
)
exp_path = os.path.join(tmpdir, "exp.pkl")
exp.save_experiment(exp_path)
new_exp = RegressionExperiment.load_experiment(
exp_path,
data=data if preprocess_data else exp.data,
preprocess_data=preprocess_data,
)
assert "normalize" in new_exp.pipeline.named_steps
check_experiment_equality(exp, new_exp)
@pytest.mark.parametrize("preprocess_data", (True, False))
def test_time_series_persistence(tmpdir, load_pos_and_neg_data, preprocess_data):
data = load_pos_and_neg_data
exp = TSForecastingExperiment()
exp.setup(
data,
transform_target="sqrt",
html=False,
session_id=123,
n_jobs=1,
)
exp_path = os.path.join(tmpdir, "exp.pkl")
exp.save_experiment(exp_path)
new_exp = TSForecastingExperiment.load_experiment(
exp_path,
data=data if preprocess_data else exp.data,
preprocess_data=preprocess_data,
)
# check experiment equality is not working for TS due to sktime,
# so we simply compare results
model = exp.create_model("ets")
results = exp.pull()
preds = exp.predict_model(model)
new_model = new_exp.create_model("ets")
new_results = new_exp.pull()
new_preds = new_exp.predict_model(new_model)
assert preds.equals(new_preds)
assert results.equals(new_results)