Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions sklearn_pandas/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import six
from sklearn.pipeline import _name_estimators, Pipeline
from sklearn.utils import tosequence


def _call_fit(fit_method, X, y=None, **kwargs):
Expand Down Expand Up @@ -36,13 +35,17 @@ class TransformerPipeline(Pipeline):
"""

def __init__(self, steps):
# Convert to list first to handle generators/iterators
# (replacement for removed sklearn.utils.tosequence)
steps = list(steps)

names, estimators = zip(*steps)
if len(dict(steps)) != len(steps):
raise ValueError(
"Provided step names are not unique: %s" % (names,))

# shallow copy of steps
self.steps = tosequence(steps)
self.steps = steps
estimator = estimators[-1]

for e in estimators:
Expand Down
70 changes: 70 additions & 0 deletions tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,73 @@ def test_raises_type_error(mock_fit):
"""
with pytest.raises(TypeError):
_call_fit(Trans().fit, 'X', 'y', kwarg='kwarg')


def test_transformer_pipeline_accepts_list():
"""
Test that TransformerPipeline accepts list of steps.
Regression test for issue #267 (tosequence removal in sklearn 1.7.0)
"""
steps = [
('trans1', Trans()),
('trans2', Trans())
]

pipeline = TransformerPipeline(steps)
assert len(pipeline.steps) == 2
assert pipeline.steps[0][0] == 'trans1'
assert pipeline.steps[1][0] == 'trans2'
assert isinstance(pipeline.steps, list)


def test_transformer_pipeline_accepts_tuple():
"""
Test that TransformerPipeline accepts tuple of steps.
Regression test for issue #267 (tosequence removal in sklearn 1.7.0)
"""
steps = (
('trans1', Trans()),
('trans2', Trans())
)

pipeline = TransformerPipeline(steps)
assert len(pipeline.steps) == 2
assert pipeline.steps[0][0] == 'trans1'
assert pipeline.steps[1][0] == 'trans2'
assert isinstance(pipeline.steps, list)


def test_transformer_pipeline_accepts_generator():
"""
Test that TransformerPipeline accepts generator of steps.
Regression test for issue #267 (tosequence removal in sklearn 1.7.0)
"""

def step_generator():
yield ('trans1', Trans())
yield ('trans2', Trans())

pipeline = TransformerPipeline(step_generator())
assert len(pipeline.steps) == 2
assert pipeline.steps[0][0] == 'trans1'
assert pipeline.steps[1][0] == 'trans2'
assert isinstance(pipeline.steps, list)


def test_transformer_pipeline_steps_is_list():
"""
Test that steps attribute is always a list after initialization.
This ensures list() conversion works correctly (replacing tosequence).
Regression test for issue #267 (tosequence removal in sklearn 1.7.0)
"""
# Test with list input
pipeline1 = TransformerPipeline([('trans', Trans())])
assert isinstance(pipeline1.steps, list)

# Test with tuple input
pipeline2 = TransformerPipeline((('trans', Trans()),))
assert isinstance(pipeline2.steps, list)

# Test with iterator input
pipeline3 = TransformerPipeline(iter([('trans', Trans())]))
assert isinstance(pipeline3.steps, list)