diff --git a/crates/polars-core/src/frame/explode.rs b/crates/polars-core/src/frame/explode.rs index eb4fe1e7cf6d..38fdfca1e262 100644 --- a/crates/polars-core/src/frame/explode.rs +++ b/crates/polars-core/src/frame/explode.rs @@ -34,6 +34,22 @@ pub struct MeltArgs { pub streamable: bool, } +impl MeltArgs { + pub fn is_valid_schema(&self, schema: &Schema) -> PolarsResult<()> { + let mut provided = PlHashSet::new(); + for name in self.id_vars.iter().chain(self.value_vars.iter()) { + if !schema.contains(name) { + polars_bail!(ColumnNotFound: "{}", name) + } + + if !provided.insert(name) { + polars_bail!(Duplicate: "column name '{}' provided more than once in melt", name) + } + } + Ok(()) + } +} + impl DataFrame { pub fn explode_impl(&self, mut columns: Vec) -> PolarsResult { polars_ensure!(!columns.is_empty(), InvalidOperation: "no columns provided in explode"); @@ -256,6 +272,9 @@ impl DataFrame { /// Similar to melt, but without generics. This may be easier if you want to pass /// an empty `id_vars` or empty `value_vars`. pub fn melt2(&self, args: MeltArgs) -> PolarsResult { + let schema = self.schema(); + args.is_valid_schema(&schema)?; + let id_vars = args.id_vars; let mut value_vars = args.value_vars; @@ -293,7 +312,6 @@ impl DataFrame { } // values will all be placed in single column, so we must find their supertype - let schema = self.schema(); let mut iter = value_vars.iter().map(|v| { schema .get(v) diff --git a/crates/polars-plan/src/logical_plan/builder.rs b/crates/polars-plan/src/logical_plan/builder.rs index 7129be8534e5..4543b10a9853 100644 --- a/crates/polars-plan/src/logical_plan/builder.rs +++ b/crates/polars-plan/src/logical_plan/builder.rs @@ -857,8 +857,9 @@ impl LogicalPlanBuilder { } pub fn melt(self, args: Arc) -> Self { - let schema = try_delayed!(self.0.schema(), &self.0, into); - let schema = det_melt_schema(&args, &schema); + let input_schema = try_delayed!(self.0.schema(), &self.0, into); + try_delayed!(args.is_valid_schema(&input_schema), &self.0, into); + let schema = det_melt_schema(&args, &input_schema); LogicalPlan::MapFunction { input: Arc::new(self.0), function: FunctionNode::Melt { args, schema }, diff --git a/py-polars/tests/unit/operations/test_melt.py b/py-polars/tests/unit/operations/test_melt.py index 2d75ab480c1f..a2579154482b 100644 --- a/py-polars/tests/unit/operations/test_melt.py +++ b/py-polars/tests/unit/operations/test_melt.py @@ -1,3 +1,5 @@ +import pytest + import polars as pl import polars.selectors as cs from polars.testing import assert_frame_equal @@ -48,6 +50,34 @@ def test_melt() -> None: ] +def test_melt_duplicates() -> None: + df = pl.DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]}) + + with pytest.raises(pl.DuplicateError): + df.melt(id_vars=["B", "B"]) + with pytest.raises(pl.DuplicateError): + df.lazy().melt(id_vars=["B", "B"]).collect() + + with pytest.raises(pl.DuplicateError): + df.melt(id_vars=["A", "B"], value_vars=["C", "A"]) + with pytest.raises(pl.DuplicateError): + df.lazy().melt(id_vars=["A", "B"], value_vars=["C", "A"]).collect() + + +def test_melt_missing_columns() -> None: + df = pl.DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]}) + + with pytest.raises(pl.ColumnNotFoundError): + df.melt(id_vars=["A", "D"]) + with pytest.raises(pl.ColumnNotFoundError): + df.lazy().melt(id_vars=["A", "D"]).collect() + + with pytest.raises(pl.ColumnNotFoundError): + df.melt(id_vars=["A"], value_vars=["D"]) + with pytest.raises(pl.ColumnNotFoundError): + df.lazy().melt(id_vars=["A"], value_vars=["D"]).collect() + + def test_melt_projection_pd_7747() -> None: df = pl.LazyFrame( { @@ -81,3 +111,14 @@ def test_melt_no_value_vars() -> None: schema={"a": pl.Int64, "variable": pl.String, "value": pl.Null} ) assert_frame_equal(result, expected) + + +# https://github.com/pola-rs/polars/issues/13493 +def test_melt_missing_column_13493() -> None: + with pytest.raises(pl.ColumnNotFoundError): + ( + pl.DataFrame({"col0": [0], "col1": ["a"], "col2": ["b"]}) + .lazy() + .melt(id_vars="row_nr") + .collect() + )