-
Notifications
You must be signed in to change notification settings - Fork 1.7k
/
test_pipeline.py
72 lines (55 loc) · 2.17 KB
/
test_pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# coding: utf-8
"""
Package: PyCaret
Author: Mavs
Description: Unit tests for pipeline.py
"""
import pandas as pd
import pytest
from sklearn.preprocessing import LabelEncoder, StandardScaler
import pycaret.classification
import pycaret.regression
from pycaret.datasets import get_data
@pytest.fixture
def pipeline():
"""Get a pipeline from atom with/without final estimator."""
dataset = get_data("juice", verbose=False)
pc = pycaret.classification.setup(
data=dataset,
polynomial_features=True,
polynomial_degree=2,
verbose=False,
)
return pc.pipeline
def test_fit(pipeline):
"""Assert that the pipeline can be fitted normally."""
data = get_data("juice", verbose=False)
assert pipeline.fit(data.iloc[:, :-1], data.iloc[:, -1])
assert isinstance(pipeline.feature_names_in_, list)
def test_transforms_only_y():
"""Assert that the pipeline can transform the target column only."""
data = get_data("bank", verbose=False)
pc = pycaret.classification.setup(
data=data,
preprocess=False,
custom_pipeline=("label_encoder", LabelEncoder()),
)
y = pc.pipeline.fit_transform(y=data.iloc[:, -1])
assert isinstance(y, pd.Series)
def test_transform(pipeline):
"""Assert that the pipeline uses transform normally."""
data = get_data("juice", verbose=False)
pipeline.fit(data.iloc[:, :-1], data.iloc[:, -1])
assert isinstance(pipeline.transform(data.iloc[:, :-1]), pd.DataFrame)
assert isinstance(pipeline.transform(data.iloc[:, :-1], data.iloc[:, -1]), tuple)
def test_fit_transform(pipeline):
"""Assert that the pipeline can be fit-transformed normally."""
data = get_data("juice", verbose=False)
pipeline.steps.append(("test", "passthrough"))
X, y = pipeline.fit_transform(data.iloc[:, :-1], data.iloc[:, -1])
assert isinstance(X, pd.DataFrame) and isinstance(y, pd.Series)
def test_transform_imbalancer(pipeline):
"""Assert that the pipeline ignores FixImbalancer during predicting."""
data = get_data("juice", verbose=False)
pipeline.fit(data.iloc[:, :-1], data.iloc[:, -1])
assert len(pipeline.transform(data.iloc[:, :-1])) == len(data)