Skip to content

Commit

Permalink
fix(python): improve handling of dict-type "columns" param on frame-i…
Browse files Browse the repository at this point in the history
…nit (#6045)
  • Loading branch information
alexander-beedie committed Jan 5, 2023
1 parent b4ffc80 commit b3a2c9c
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 4 deletions.
9 changes: 8 additions & 1 deletion py-polars/polars/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,14 @@ def dict_to_pydf(
) -> PyDataFrame:
"""Construct a PyDataFrame from a dictionary of sequences."""
if columns is not None:
# the columns arg may also set the dtype of the series
# the columns arg may also set the dtype/column order of the series
if isinstance(columns, dict) and data:
if not all((col in columns) for col in data):
raise ValueError(
"The given column-schema names do not match the data dictionary"
)
data = {col: data[col] for col in columns}

columns, dtypes = _unpack_columns(columns, lookup_names=data.keys())

if not data and dtypes:
Expand Down
6 changes: 3 additions & 3 deletions py-polars/polars/internals/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,9 @@ class DataFrame:
data : dict, Sequence, ndarray, Series, or pandas.DataFrame
Two-dimensional data in various forms. dict must contain Sequences.
Sequence may contain Series or other Sequences.
columns : Sequence of str or (str,DataType) pairs, default None
Column labels to use for resulting DataFrame. If specified, overrides any
labels already present in the data. Must match data dimensions.
columns : Sequence of str, (str,DataType) pairs, or {str:DataType,} dict
Column labels (with optional type) to use for resulting DataFrame. If specified,
overrides any labels already present in the data. Must match data dimensions.
orient : {'col', 'row'}, default None
Whether to interpret two-dimensional data as columns or as rows. If None,
the orientation is inferred by matching the columns and data dimensions. If
Expand Down
21 changes: 21 additions & 0 deletions py-polars/tests/unit/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,27 @@ def test_from_arrow() -> None:
assert df.rows() == []


def test_from_dict_with_dict_columns() -> None:
# expect schema order to take precedence
schema = {"a": pl.UInt8, "b": pl.UInt32}
df = pl.DataFrame({"b": [3, 4], "a": [1, 2]}, columns=schema)
# ┌─────┬─────┐
# │ a ┆ b │
# │ --- ┆ --- │
# │ u8 ┆ u32 │
# ╞═════╪═════╡
# │ 1 ┆ 3 │
# │ 2 ┆ 4 │
# └─────┴─────┘
assert df.columns == ["a", "b"]
assert df.rows() == [(1, 3), (2, 4)]

# expected error
mismatched_schema = {"x": pl.UInt8, "b": pl.UInt32}
with pytest.raises(ValueError):
pl.DataFrame({"b": [3, 4], "a": [1, 2]}, columns=mismatched_schema)


def test_from_dict_with_scalars() -> None:
import polars as pl

Expand Down

0 comments on commit b3a2c9c

Please sign in to comment.