Skip to content

Commit

Permalink
from_numpy orientation (#3960)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Aug 8, 2022
1 parent 76cb9d2 commit e0f52d1
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 57 deletions.
64 changes: 32 additions & 32 deletions py-polars/polars/internals/construction.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

import sys
import warnings
from contextlib import suppress
from datetime import date, datetime, time, timedelta
from itertools import zip_longest
Expand Down Expand Up @@ -527,55 +526,56 @@ def numpy_to_pydf(
) -> PyDataFrame:
"""Construct a PyDataFrame from a numpy ndarray."""
shape = data.shape
n_columns = (
0
if shape == (0,)
else (
1
if len(shape) == 1
else (shape[1] if orient in ("row", None) else shape[0])

# Unpack columns
if shape == (0,):
n_columns = 0

elif len(shape) == 1:
n_columns = 1

elif len(shape) == 2:
# Infer orientation
if orient is None and columns is not None:
orient = "col" if len(columns) == shape[0] else "row"

if orient == "row":
n_columns = shape[1]
elif orient == "col" or orient is None:
n_columns = shape[0]
else:
raise ValueError(
f"orient must be one of {{'col', 'row', None}}, got {orient} instead."
)

else:
raise ValueError(
"Cannot create DataFrame from numpy array with more than two dimensions."
)
)
columns, dtypes = _unpack_columns(columns, n_expected=n_columns)
if columns and len(columns) != n_columns:

if columns is not None and len(columns) != n_columns:
raise ValueError("Dimensions of columns arg must match data dimensions.")

columns, dtypes = _unpack_columns(columns, n_expected=n_columns)

# Convert data to series
if shape == (0,):
data_series = []

elif len(shape) == 1:
data_series = [pli.Series(columns[0], data, dtypes.get(columns[0])).inner()]

elif len(shape) == 2:
# Infer orientation
if orient is None:
warnings.warn(
"Default orientation for constructing DataFrame from numpy "
'array will change from "row" to "column" in a future version. '
"Specify orientation explicitly to silence this warning.",
DeprecationWarning,
stacklevel=2,
)
orient = "row"
# Exchange if-block above for block below when removing warning
# if orientation is None and columns is not None:
# orientation = "col" if len(columns) == shape[0] else "row"
else:
if orient == "row":
data_series = [
pli.Series(columns[i], data[:, i], dtypes.get(columns[i])).inner()
for i in range(n_columns)
]
elif orient == "col":
else:
data_series = [
pli.Series(columns[i], data[i], dtypes.get(columns[i])).inner()
for i in range(n_columns)
]
else:
raise ValueError(
f"orient must be one of {{'col', 'row', None}}, got {orient} instead."
)
else:
raise ValueError("A numpy array should not have more than two dimensions.")

data_series = _handle_columns_arg(data_series, columns=columns)
return PyDataFrame(data_series)
Expand Down
46 changes: 21 additions & 25 deletions py-polars/tests/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,21 +124,20 @@ def test_init_ndarray() -> None:
assert df.rows() == [(True, 2, "a"), (None, None, None)]
assert df.schema == {"x": pl.Boolean, "y": pl.Int32, "z": pl.Utf8}

# TODO: Uncomment tests below when removing deprecation warning
# # 2D array - default to column orientation
# df = pl.DataFrame(np.array([[1, 2], [3, 4]]))
# truth = pl.DataFrame({"column_0": [1, 2], "column_1": [3, 4]})
# assert df.frame_equal(truth)

# # 2D array - row orientation inferred
# df = pl.DataFrame(np.array([[1, 2, 3], [4, 5, 6]]), columns=["a", "b", "c"])
# truth = pl.DataFrame({"a": [1, 4], "b": [2, 5], "c": [3, 6]})
# assert df.frame_equal(truth)

# # 2D array - column orientation inferred
# df = pl.DataFrame(np.array([[1, 2, 3], [4, 5, 6]]), columns=["a", "b"])
# truth = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
# assert df.frame_equal(truth)
# 2D array - default to column orientation
df = pl.DataFrame(np.array([[1, 2], [3, 4]]))
truth = pl.DataFrame({"column_0": [1, 2], "column_1": [3, 4]})
assert df.frame_equal(truth)

# 2D array - row orientation inferred
df = pl.DataFrame(np.array([[1, 2, 3], [4, 5, 6]]), columns=["a", "b", "c"])
truth = pl.DataFrame({"a": [1, 4], "b": [2, 5], "c": [3, 6]})
assert df.frame_equal(truth)

# 2D array - column orientation inferred
df = pl.DataFrame(np.array([[1, 2, 3], [4, 5, 6]]), columns=["a", "b"])
truth = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
assert df.frame_equal(truth)

# 2D array - orientation conflicts with columns
with pytest.raises(ValueError):
Expand All @@ -161,21 +160,18 @@ def test_init_ndarray() -> None:
orient="wrong", # type: ignore[arg-type]
)

# numpy not available
# Dimensions mismatch
with pytest.raises(ValueError):
_ = pl.DataFrame(np.array([1, 2, 3]), columns=[])
with pytest.raises(ValueError):
_ = pl.DataFrame(np.array([[1, 2], [3, 4]]), columns=["a"])

# NumPy not available
with patch("polars.internals.frame._NUMPY_AVAILABLE", False):
with pytest.raises(ValueError):
pl.DataFrame(np.array([1, 2, 3]), columns=["a"])


# TODO: Remove this test case when removing deprecated behaviour
def test_init_ndarray_deprecated() -> None:
with pytest.deprecated_call():
# 2D array - default to row orientation
df = pl.DataFrame(np.array([[1, 2], [3, 4]]))
truth = pl.DataFrame({"column_0": [1, 3], "column_1": [2, 4]})
assert df.frame_equal(truth)


def test_init_arrow() -> None:
# Handle unnamed column
df = pl.DataFrame(pa.table({"a": [1, 2], None: [3, 4]}))
Expand Down

0 comments on commit e0f52d1

Please sign in to comment.