Skip to content

Commit

Permalink
Remove DataFrame setitem (#4331)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Aug 9, 2022
1 parent eafdaf8 commit 4a1ea48
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 131 deletions.
59 changes: 0 additions & 59 deletions py-polars/polars/internals/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1861,65 +1861,6 @@ def __getitem__(
f" of type: '{type(item)}'."
)

def __setitem__(
self, key: str | list[Any] | tuple[Any, str | int], value: Any
) -> None: # pragma: no cover
warnings.warn(
"setting a DataFrame by indexing is deprecated; Consider using"
" DataFrame.with_column",
DeprecationWarning,
)
# df["foo"] = series
if isinstance(key, str):
try:
self.replace(key, pli.Series(key, value))
except Exception:
self.hstack([pli.Series(key, value)], in_place=True)
# df[["C", "D"]]
elif isinstance(key, list):
# TODO: Use python sequence constructors
if not _NUMPY_AVAILABLE:
raise ImportError("'numpy' is required for this functionality.")
value = np.array(value)
if value.ndim != 2:
raise ValueError("can only set multiple columns with 2D matrix")
if value.shape[1] != len(key):
raise ValueError(
"matrix columns should be equal to list use to determine column"
" names"
)
for (i, name) in enumerate(key):
self[name] = value[:, i]

# df[a, b]
elif isinstance(key, tuple):
row_selection, col_selection = key

# get series column selection
if isinstance(col_selection, str):
s = self.__getitem__(col_selection)
elif isinstance(col_selection, int):
s = self[:, col_selection]
else:
raise ValueError(f"column selection not understood: {col_selection}")

# dispatch to __setitem__ of Series to do modification
s[row_selection] = value

# now find the location to place series
# df[idx]
if isinstance(col_selection, int):
self.replace_at_idx(col_selection, s)
# df["foo"]
elif isinstance(col_selection, str):
self.replace(col_selection, s)
else:
raise ValueError(
f"Cannot __setitem__ on DataFrame with key: '{key}' "
f"of type: '{type(key)}' and value: '{value}' "
f"of type: '{type(value)}'."
)

def __len__(self) -> int:
return self.height

Expand Down
3 changes: 1 addition & 2 deletions py-polars/tests/test_datelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1238,7 +1238,6 @@ def test_sum_duration() -> None:
}


@pytest.mark.filterwarnings("ignore:setting a DataFrame by indexing:DeprecationWarning")
def test_supertype_timezones_4174() -> None:
df = pl.DataFrame(
{
Expand All @@ -1252,7 +1251,7 @@ def test_supertype_timezones_4174() -> None:

# test if this runs without error
date_to_fill = df["dt_London"][0]
df["dt_London"] = df["dt_London"].shift_and_fill(1, date_to_fill)
df.with_column(df["dt_London"].shift_and_fill(1, date_to_fill))


def test_weekday() -> None:
Expand Down
69 changes: 0 additions & 69 deletions py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,26 +237,6 @@ def test_replace_at_idx() -> None:
assert_frame_equal(expected_df, df)


def test_indexing_set() -> None:
# This is deprecated behaviour
df = pl.DataFrame({"bool": [True, True], "str": ["N/A", "N/A"], "nr": [1, 2]})

with pytest.deprecated_call():
df[0, "bool"] = False

with pytest.deprecated_call():
df[0, "nr"] = 100

with pytest.deprecated_call():
df[0, "str"] = "foo"

assert df.to_dict(False) == {
"bool": [False, True],
"str": ["foo", "N/A"],
"nr": [100, 2],
}


def test_to_series() -> None:
df = pl.DataFrame({"x": [1, 2, 3], "y": [2, 3, 4], "z": [3, 4, 5]})

Expand Down Expand Up @@ -642,55 +622,6 @@ def test_read_missing_file() -> None:
pl.read_csv(f)


def test_set() -> None:
"""
Setting a dataframe using indices is deprecated. We keep these tests because we
only generate a warning
"""
with pytest.deprecated_call():
np.random.seed(1)
df = pl.DataFrame(
{"foo": np.random.rand(10), "bar": np.arange(10), "ham": ["h"] * 10}
)
df["new"] = np.random.rand(10)
df[df["new"] > 0.5, "new"] = 1

# set 2D
df = pl.DataFrame({"b": [0, 0]})
df[["A", "B"]] = [[1, 2], [1, 2]]
assert df["A"] == [1, 1]
assert df["B"] == [2, 2]

with pytest.raises(ValueError):
df[["C", "D"]] = 1
with pytest.raises(ValueError):
df[["C", "D"]] = [1, 1]
with pytest.raises(ValueError):
df[["C", "D"]] = [[1, 2, 3], [1, 2, 3]]

# set tuple
df = pl.DataFrame({"b": [0, 0]})
df[0, "b"] = 1
assert df[0, "b"] == 1

df[0, 0] = 2
assert df[0, "b"] == 2

# row and col selection have to be int or str
with pytest.raises(ValueError):
df[:, [1]] = 1 # type: ignore[index]
with pytest.raises(ValueError):
df[True, :] = 1 # type: ignore[index]

# needs to be a 2 element tuple
with pytest.raises(ValueError):
df[(1, 2, 3)] = 1 # type: ignore[index]

# we cannot index with any type, such as bool
with pytest.raises(ValueError):
df[True] = 1 # type: ignore[index]


def test_melt() -> None:
df = pl.DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]})
melted = df.melt(id_vars="A", value_vars=["B", "C"])
Expand Down
4 changes: 3 additions & 1 deletion py-polars/tests/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,5 +139,7 @@ def test_getitem_errs() -> None:
):
df["a"][{"strange"}]

with pytest.raises(ValueError, match="Cannot __setitem__ on DataFrame with key:.*"):
with pytest.raises(
TypeError, match="'DataFrame' object does not support item assignment"
):
df[{"some"}] = "foo"

0 comments on commit 4a1ea48

Please sign in to comment.