From cd33eaa6efde0aa1dc57b49facbf27d6c2ceb36b Mon Sep 17 00:00:00 2001 From: Aditya Mehra Date: Fri, 17 Oct 2025 23:44:58 -0400 Subject: [PATCH] Fix ImportError with sklearn 1.7.0+ by replacing tosequence - Remove deprecated sklearn.utils.tosequence import (removed in sklearn 1.7) - Convert steps to list at start of __init__ to handle all input types - Replace tosequence(steps) with direct list assignment - Fixes compatibility with scikit-learn >= 1.7.0 - Maintains backward compatibility with older sklearn versions - Add regression tests for list, tuple, and generator inputs - Verify steps attribute is always a list type The tosequence utility was deprecated in sklearn 1.5 and removed in 1.7. Using list() directly provides the same functionality without depending on sklearn internals. Fixes #267 --- sklearn_pandas/pipeline.py | 7 ++-- tests/test_pipeline.py | 70 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/sklearn_pandas/pipeline.py b/sklearn_pandas/pipeline.py index e8aae87..4b80fe7 100644 --- a/sklearn_pandas/pipeline.py +++ b/sklearn_pandas/pipeline.py @@ -1,6 +1,5 @@ import six from sklearn.pipeline import _name_estimators, Pipeline -from sklearn.utils import tosequence def _call_fit(fit_method, X, y=None, **kwargs): @@ -36,13 +35,17 @@ class TransformerPipeline(Pipeline): """ def __init__(self, steps): + # Convert to list first to handle generators/iterators + # (replacement for removed sklearn.utils.tosequence) + steps = list(steps) + names, estimators = zip(*steps) if len(dict(steps)) != len(steps): raise ValueError( "Provided step names are not unique: %s" % (names,)) # shallow copy of steps - self.steps = tosequence(steps) + self.steps = steps estimator = estimators[-1] for e in estimators: diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index ee57b57..dd300c6 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -98,3 +98,73 @@ def test_raises_type_error(mock_fit): """ with pytest.raises(TypeError): _call_fit(Trans().fit, 'X', 'y', kwarg='kwarg') + + +def test_transformer_pipeline_accepts_list(): + """ + Test that TransformerPipeline accepts list of steps. + Regression test for issue #267 (tosequence removal in sklearn 1.7.0) + """ + steps = [ + ('trans1', Trans()), + ('trans2', Trans()) + ] + + pipeline = TransformerPipeline(steps) + assert len(pipeline.steps) == 2 + assert pipeline.steps[0][0] == 'trans1' + assert pipeline.steps[1][0] == 'trans2' + assert isinstance(pipeline.steps, list) + + +def test_transformer_pipeline_accepts_tuple(): + """ + Test that TransformerPipeline accepts tuple of steps. + Regression test for issue #267 (tosequence removal in sklearn 1.7.0) + """ + steps = ( + ('trans1', Trans()), + ('trans2', Trans()) + ) + + pipeline = TransformerPipeline(steps) + assert len(pipeline.steps) == 2 + assert pipeline.steps[0][0] == 'trans1' + assert pipeline.steps[1][0] == 'trans2' + assert isinstance(pipeline.steps, list) + + +def test_transformer_pipeline_accepts_generator(): + """ + Test that TransformerPipeline accepts generator of steps. + Regression test for issue #267 (tosequence removal in sklearn 1.7.0) + """ + + def step_generator(): + yield ('trans1', Trans()) + yield ('trans2', Trans()) + + pipeline = TransformerPipeline(step_generator()) + assert len(pipeline.steps) == 2 + assert pipeline.steps[0][0] == 'trans1' + assert pipeline.steps[1][0] == 'trans2' + assert isinstance(pipeline.steps, list) + + +def test_transformer_pipeline_steps_is_list(): + """ + Test that steps attribute is always a list after initialization. + This ensures list() conversion works correctly (replacing tosequence). + Regression test for issue #267 (tosequence removal in sklearn 1.7.0) + """ + # Test with list input + pipeline1 = TransformerPipeline([('trans', Trans())]) + assert isinstance(pipeline1.steps, list) + + # Test with tuple input + pipeline2 = TransformerPipeline((('trans', Trans()),)) + assert isinstance(pipeline2.steps, list) + + # Test with iterator input + pipeline3 = TransformerPipeline(iter([('trans', Trans())])) + assert isinstance(pipeline3.steps, list) \ No newline at end of file