Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(python, rust): Validate schema in melt #15514

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
20 changes: 19 additions & 1 deletion crates/polars-core/src/frame/explode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,22 @@ pub struct MeltArgs {
pub streamable: bool,
}

impl MeltArgs {
pub fn is_valid_schema(&self, schema: &Schema) -> PolarsResult<()> {
let mut provided = PlHashSet::new();
for name in self.id_vars.iter().chain(self.value_vars.iter()) {
if !schema.contains(name) {
polars_bail!(ColumnNotFound: "{}", name)
}

if !provided.insert(name) {
polars_bail!(Duplicate: "column name '{}' provided more than once in melt", name)
}
}
Ok(())
}
}

impl DataFrame {
pub fn explode_impl(&self, mut columns: Vec<Series>) -> PolarsResult<DataFrame> {
polars_ensure!(!columns.is_empty(), InvalidOperation: "no columns provided in explode");
Expand Down Expand Up @@ -256,6 +272,9 @@ impl DataFrame {
/// Similar to melt, but without generics. This may be easier if you want to pass
/// an empty `id_vars` or empty `value_vars`.
pub fn melt2(&self, args: MeltArgs) -> PolarsResult<Self> {
let schema = self.schema();
args.is_valid_schema(&schema)?;

let id_vars = args.id_vars;
let mut value_vars = args.value_vars;

Expand Down Expand Up @@ -293,7 +312,6 @@ impl DataFrame {
}

// values will all be placed in single column, so we must find their supertype
let schema = self.schema();
let mut iter = value_vars.iter().map(|v| {
schema
.get(v)
Expand Down
5 changes: 3 additions & 2 deletions crates/polars-plan/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -857,8 +857,9 @@ impl LogicalPlanBuilder {
}

pub fn melt(self, args: Arc<MeltArgs>) -> Self {
let schema = try_delayed!(self.0.schema(), &self.0, into);
let schema = det_melt_schema(&args, &schema);
let input_schema = try_delayed!(self.0.schema(), &self.0, into);
try_delayed!(args.is_valid_schema(&input_schema), &self.0, into);
let schema = det_melt_schema(&args, &input_schema);
LogicalPlan::MapFunction {
input: Arc::new(self.0),
function: FunctionNode::Melt { args, schema },
Expand Down
41 changes: 41 additions & 0 deletions py-polars/tests/unit/operations/test_melt.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pytest

import polars as pl
import polars.selectors as cs
from polars.testing import assert_frame_equal
Expand Down Expand Up @@ -48,6 +50,34 @@ def test_melt() -> None:
]


def test_melt_duplicates() -> None:
df = pl.DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]})

with pytest.raises(pl.DuplicateError):
df.melt(id_vars=["B", "B"])
with pytest.raises(pl.DuplicateError):
df.lazy().melt(id_vars=["B", "B"]).collect()

with pytest.raises(pl.DuplicateError):
df.melt(id_vars=["A", "B"], value_vars=["C", "A"])
with pytest.raises(pl.DuplicateError):
df.lazy().melt(id_vars=["A", "B"], value_vars=["C", "A"]).collect()


def test_melt_missing_columns() -> None:
df = pl.DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]})

with pytest.raises(pl.ColumnNotFoundError):
df.melt(id_vars=["A", "D"])
with pytest.raises(pl.ColumnNotFoundError):
df.lazy().melt(id_vars=["A", "D"]).collect()

with pytest.raises(pl.ColumnNotFoundError):
df.melt(id_vars=["A"], value_vars=["D"])
with pytest.raises(pl.ColumnNotFoundError):
df.lazy().melt(id_vars=["A"], value_vars=["D"]).collect()


def test_melt_projection_pd_7747() -> None:
df = pl.LazyFrame(
{
Expand Down Expand Up @@ -81,3 +111,14 @@ def test_melt_no_value_vars() -> None:
schema={"a": pl.Int64, "variable": pl.String, "value": pl.Null}
)
assert_frame_equal(result, expected)


# https://github.com/pola-rs/polars/issues/13493
def test_melt_missing_column_13493() -> None:
with pytest.raises(pl.ColumnNotFoundError):
(
pl.DataFrame({"col0": [0], "col1": ["a"], "col2": ["b"]})
.lazy()
.melt(id_vars="row_nr")
.collect()
)