/
test_probability_threshold.py
129 lines (108 loc) · 4.55 KB
/
test_probability_threshold.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import pandas as pd
import pycaret.classification
import pycaret.datasets
from pycaret.internal.meta_estimators import CustomProbabilityThresholdClassifier
def test_probability_threshold():
# loading dataset
data = pycaret.datasets.get_data("juice")
assert isinstance(data, pd.DataFrame)
# init setup
pycaret.classification.setup(
data,
target="Purchase",
log_experiment=True,
html=False,
session_id=123,
n_jobs=1,
)
probability_threshold = 0.75
# compare models
top3 = pycaret.classification.compare_models(
n_select=100, exclude=["catboost"], probability_threshold=probability_threshold
)[:3]
assert isinstance(top3, list)
assert isinstance(top3[0], CustomProbabilityThresholdClassifier)
assert top3[0].probability_threshold == probability_threshold
# tune model
tuned_top3 = [pycaret.classification.tune_model(i, n_iter=3) for i in top3]
assert isinstance(tuned_top3, list)
assert isinstance(tuned_top3[0], CustomProbabilityThresholdClassifier)
assert tuned_top3[0].probability_threshold == probability_threshold
# ensemble model
bagged_top3 = [
pycaret.classification.ensemble_model(
i, probability_threshold=probability_threshold
)
for i in tuned_top3
]
assert isinstance(bagged_top3, list)
assert isinstance(bagged_top3[0], CustomProbabilityThresholdClassifier)
assert bagged_top3[0].probability_threshold == probability_threshold
# blend models
blender = pycaret.classification.blend_models(
top3, probability_threshold=probability_threshold
)
assert isinstance(blender, CustomProbabilityThresholdClassifier)
assert blender.probability_threshold == probability_threshold
# stack models
stacker = pycaret.classification.stack_models(
estimator_list=top3[1:],
meta_model=top3[0],
probability_threshold=probability_threshold,
)
assert isinstance(stacker, CustomProbabilityThresholdClassifier)
assert stacker.probability_threshold == probability_threshold
# calibrate model
calibrated = pycaret.classification.calibrate_model(estimator=top3[0])
assert isinstance(calibrated, CustomProbabilityThresholdClassifier)
assert calibrated.probability_threshold == probability_threshold
# plot model
lr = pycaret.classification.create_model(
"lr", probability_threshold=probability_threshold
)
pycaret.classification.plot_model(
lr, save=True
) # scale removed because build failed due to large image size
# select best model
best = pycaret.classification.automl()
assert isinstance(calibrated, CustomProbabilityThresholdClassifier)
assert calibrated.probability_threshold == probability_threshold
# hold out predictions
predict_holdout = pycaret.classification.predict_model(lr)
predict_holdout_0_5 = pycaret.classification.predict_model(
lr, probability_threshold=0.5
)
predict_holdout_0_75 = pycaret.classification.predict_model(
lr, probability_threshold=probability_threshold
)
assert isinstance(predict_holdout, pd.DataFrame)
assert predict_holdout.equals(predict_holdout_0_75)
assert not predict_holdout.equals(predict_holdout_0_5)
# predictions on new dataset
predict_holdout = pycaret.classification.predict_model(
lr, data=data.drop("Purchase", axis=1)
)
predict_holdout_0_5 = pycaret.classification.predict_model(
lr, data=data.drop("Purchase", axis=1), probability_threshold=0.5
)
predict_holdout_0_75 = pycaret.classification.predict_model(
lr,
data=data.drop("Purchase", axis=1),
probability_threshold=probability_threshold,
)
assert isinstance(predict_holdout, pd.DataFrame)
assert predict_holdout.equals(predict_holdout_0_75)
assert not predict_holdout.equals(predict_holdout_0_5)
# finalize model
final_best = pycaret.classification.finalize_model(best)
assert isinstance(final_best._final_estimator, CustomProbabilityThresholdClassifier)
assert final_best._final_estimator.probability_threshold == probability_threshold
# save model
pycaret.classification.save_model(best, "best_model_23122019")
# load model
saved_best = pycaret.classification.load_model("best_model_23122019")
assert isinstance(saved_best._final_estimator, CustomProbabilityThresholdClassifier)
assert saved_best._final_estimator.probability_threshold == probability_threshold
assert 1 == 1
if __name__ == "__main__":
test_probability_threshold()