Skip to content

Commit

Permalink
python allow set by string (#4118)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jul 22, 2022
1 parent 2dece04 commit fe7af78
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 77 deletions.
4 changes: 2 additions & 2 deletions py-polars/polars/internals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ def __getitem__(self, item: int | Series | range | slice) -> Any:
def __setitem__(
self, key: int | Series | np.ndarray | list | tuple, value: Any
) -> None:
if isinstance(value, Sequence):
if isinstance(value, Sequence) and not isinstance(value, str):
raise ValueError("cannot set with list/tuple as value; use a scalar value")
if isinstance(key, Series):
if key.dtype == Boolean:
Expand Down Expand Up @@ -2343,7 +2343,7 @@ def to_pandas(self) -> pd.Series:
)
return self.to_arrow().to_pandas()

def set(self, filter: Series, value: int | float) -> Series:
def set(self, filter: Series, value: int | float | str) -> Series:
"""
Set masked values.
Expand Down
87 changes: 12 additions & 75 deletions py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,18 @@ def test_replace_at_idx() -> None:
assert_frame_equal(expected_df, df)


def test_indexing_set() -> None:
df = pl.DataFrame({"bool": [True, True], "str": ["N/A", "N/A"], "nr": [1, 2]})
df[0, "bool"] = False
df[0, "nr"] = 100
df[0, "str"] = "foo"
assert df.to_dict(False) == {
"bool": [False, True],
"str": ["foo", "N/A"],
"nr": [100, 2],
}


def test_to_series() -> None:
df = pl.DataFrame({"x": [1, 2, 3], "y": [2, 3, 4], "z": [3, 4, 5]})

Expand Down Expand Up @@ -362,81 +374,6 @@ def test_groupby() -> None:
assert df.groupby("b").agg_list().shape == (2, 3)


def test_join() -> None:
df_left = pl.DataFrame(
{
"a": ["a", "b", "a", "z"],
"b": [1, 2, 3, 4],
"c": [6, 5, 4, 3],
}
)
df_right = pl.DataFrame(
{
"a": ["b", "c", "b", "a"],
"k": [0, 3, 9, 6],
"c": [1, 0, 2, 1],
}
)

joined = df_left.join(df_right, left_on="a", right_on="a").sort("a")
assert joined["b"].series_equal(pl.Series("b", [1, 3, 2, 2]))
joined = df_left.join(df_right, left_on="a", right_on="a", how="left").sort("a")
assert joined["c_right"].is_null().sum() == 1
assert joined["b"].series_equal(pl.Series("b", [1, 3, 2, 2, 4]))
joined = df_left.join(df_right, left_on="a", right_on="a", how="outer").sort("a")
assert joined["c_right"].null_count() == 1
assert joined["c"].null_count() == 1
assert joined["b"].null_count() == 1
assert joined["k"].null_count() == 1
assert joined["a"].null_count() == 0

# we need to pass in a column to join on, either by supplying `on`, or both `left_on` and `right_on`
with pytest.raises(ValueError):
df_left.join(df_right)
with pytest.raises(ValueError):
df_left.join(df_right, right_on="a")
with pytest.raises(ValueError):
df_left.join(df_right, left_on="a")

df_a = pl.DataFrame({"a": [1, 2, 1, 1], "b": ["a", "b", "c", "c"]})
df_b = pl.DataFrame(
{"foo": [1, 1, 1], "bar": ["a", "c", "c"], "ham": ["let", "var", "const"]}
)

# just check if join on multiple columns runs
df_a.join(df_b, left_on=["a", "b"], right_on=["foo", "bar"])

eager_join = df_a.join(df_b, left_on="a", right_on="foo")

lazy_join = df_a.lazy().join(df_b.lazy(), left_on="a", right_on="foo").collect()
assert lazy_join.shape == eager_join.shape


def test_joins_dispatch() -> None:
# this just flexes the dispatch a bit

# don't change the data of this dataframe, this triggered:
# https://github.com/pola-rs/polars/issues/1688
dfa = pl.DataFrame(
{
"a": ["a", "b", "c", "a"],
"b": [1, 2, 3, 1],
"date": ["2021-01-01", "2021-01-02", "2021-01-03", "2021-01-01"],
"datetime": [13241324, 12341256, 12341234, 13241324],
}
).with_columns(
[pl.col("date").str.strptime(pl.Date), pl.col("datetime").cast(pl.Datetime)]
)

for how in ["left", "inner", "outer"]:
dfa.join(dfa, on=["a", "b", "date", "datetime"], how=how)
dfa.join(dfa, on=["date", "datetime"], how=how)
dfa.join(dfa, on=["date", "datetime", "a"], how=how)
dfa.join(dfa, on=["date", "a"], how=how)
dfa.join(dfa, on=["a", "datetime"], how=how)
dfa.join(dfa, on=["date"], how=how)


@pytest.mark.parametrize(
"stack,exp_shape,exp_columns",
[
Expand Down
75 changes: 75 additions & 0 deletions py-polars/tests/test_joins.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,78 @@ def test_join_on_expressions() -> None:
assert df_a.join(df_b, left_on=(pl.col("a") ** 2).cast(int), right_on=pl.col("b"))[
"a"
].to_list() == [1, 4, 9, 9]


def test_join() -> None:
df_left = pl.DataFrame(
{
"a": ["a", "b", "a", "z"],
"b": [1, 2, 3, 4],
"c": [6, 5, 4, 3],
}
)
df_right = pl.DataFrame(
{
"a": ["b", "c", "b", "a"],
"k": [0, 3, 9, 6],
"c": [1, 0, 2, 1],
}
)

joined = df_left.join(df_right, left_on="a", right_on="a").sort("a")
assert joined["b"].series_equal(pl.Series("b", [1, 3, 2, 2]))
joined = df_left.join(df_right, left_on="a", right_on="a", how="left").sort("a")
assert joined["c_right"].is_null().sum() == 1
assert joined["b"].series_equal(pl.Series("b", [1, 3, 2, 2, 4]))
joined = df_left.join(df_right, left_on="a", right_on="a", how="outer").sort("a")
assert joined["c_right"].null_count() == 1
assert joined["c"].null_count() == 1
assert joined["b"].null_count() == 1
assert joined["k"].null_count() == 1
assert joined["a"].null_count() == 0

# we need to pass in a column to join on, either by supplying `on`, or both `left_on` and `right_on`
with pytest.raises(ValueError):
df_left.join(df_right)
with pytest.raises(ValueError):
df_left.join(df_right, right_on="a")
with pytest.raises(ValueError):
df_left.join(df_right, left_on="a")

df_a = pl.DataFrame({"a": [1, 2, 1, 1], "b": ["a", "b", "c", "c"]})
df_b = pl.DataFrame(
{"foo": [1, 1, 1], "bar": ["a", "c", "c"], "ham": ["let", "var", "const"]}
)

# just check if join on multiple columns runs
df_a.join(df_b, left_on=["a", "b"], right_on=["foo", "bar"])

eager_join = df_a.join(df_b, left_on="a", right_on="foo")

lazy_join = df_a.lazy().join(df_b.lazy(), left_on="a", right_on="foo").collect()
assert lazy_join.shape == eager_join.shape


def test_joins_dispatch() -> None:
# this just flexes the dispatch a bit

# don't change the data of this dataframe, this triggered:
# https://github.com/pola-rs/polars/issues/1688
dfa = pl.DataFrame(
{
"a": ["a", "b", "c", "a"],
"b": [1, 2, 3, 1],
"date": ["2021-01-01", "2021-01-02", "2021-01-03", "2021-01-01"],
"datetime": [13241324, 12341256, 12341234, 13241324],
}
).with_columns(
[pl.col("date").str.strptime(pl.Date), pl.col("datetime").cast(pl.Datetime)]
)

for how in ["left", "inner", "outer"]:
dfa.join(dfa, on=["a", "b", "date", "datetime"], how=how)
dfa.join(dfa, on=["date", "datetime"], how=how)
dfa.join(dfa, on=["date", "datetime", "a"], how=how)
dfa.join(dfa, on=["date", "a"], how=how)
dfa.join(dfa, on=["a", "datetime"], how=how)
dfa.join(dfa, on=["date"], how=how)

0 comments on commit fe7af78

Please sign in to comment.