Skip to content

Commit

Permalink
document dropping null values
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 29, 2021
1 parent 1f1a949 commit b883e53
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 1 deletion.
64 changes: 63 additions & 1 deletion py-polars/polars/eager/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1645,7 +1645,7 @@ def drop_nulls(self, subset: Optional[tp.List[str]] = None) -> "DataFrame":
"""
Return a new DataFrame where the null values are dropped.
Examples
Examples
--------
>>> df = pl.DataFrame({
>>> "foo": [1, 2, 3],
Expand All @@ -1664,6 +1664,68 @@ def drop_nulls(self, subset: Optional[tp.List[str]] = None) -> "DataFrame":
│ 3 ┆ 8 ┆ "c" │
└─────┴─────┴─────┘
See Also
--------
This method only drops nulls row-wise if any single value of the row is null.
Below are some example snippets that show how you could drop null values based on other
conditions
>>> df = pl.DataFrame(
>>> {
>>> "a": [None, None, None, None],
>>> "b": [1, 2, None, 1],
>>> "c": [1, None, None, 1],
>>> }
>>> )
>>> df
shape: (4, 3)
┌──────┬──────┬──────┐
│ a ┆ b ┆ c │
│ --- ┆ --- ┆ --- │
│ f64 ┆ i64 ┆ i64 │
╞══════╪══════╪══════╡
│ null ┆ 1 ┆ 1 │
├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ null ┆ 2 ┆ null │
├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ null ┆ null ┆ null │
├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ null ┆ 1 ┆ 1 │
└──────┴──────┴──────┘
>>> # drop a row only if all values are null
>>> df.filter(~pl.fold(acc=True, f=lambda acc, s: acc & s.is_null(), exprs=pl.all()))
shape: (3, 3)
┌──────┬─────┬──────┐
│ a ┆ b ┆ c │
│ --- ┆ --- ┆ --- │
│ f64 ┆ i64 ┆ i64 │
╞══════╪═════╪══════╡
│ null ┆ 1 ┆ 1 │
├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
│ null ┆ 2 ┆ null │
├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
│ null ┆ 1 ┆ 1 │
└──────┴─────┴──────┘
>>> # drop a column if all values are null
>>> df[:, [not (s.null_count() == df.height) for s in df]]
shape: (4, 2)
┌──────┬──────┐
│ b ┆ c │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞══════╪══════╡
│ 1 ┆ 1 │
├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2 ┆ null │
├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ null ┆ null │
├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 1 ┆ 1 │
└──────┴──────┘
"""
if subset is not None and isinstance(subset, str):
subset = [subset]
Expand Down
65 changes: 65 additions & 0 deletions py-polars/polars/lazy/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,6 +893,71 @@ def drop_nulls(
) -> "LazyFrame":
"""
Drop rows with null values from this DataFrame.
Examples
--------
>>> df = pl.DataFrame({
>>> "foo": [1, 2, 3],
>>> "bar": [6, None, 8],
>>> "ham": ['a', 'b', 'c']
>>> })
>>> df.lazy().drop_nulls().collect()
shape: (2, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 1 ┆ 6 ┆ "a" │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ 3 ┆ 8 ┆ "c" │
└─────┴─────┴─────┘
See Also
--------
This method only drops nulls row-wise if any single value of the row is null.
Below are some example snippets that show how you could drop null values based on other
conditions
>>> df = pl.DataFrame(
>>> {
>>> "a": [None, None, None, None],
>>> "b": [1, 2, None, 1],
>>> "c": [1, None, None, 1],
>>> }
>>> )
>>> df
shape: (4, 3)
┌──────┬──────┬──────┐
│ a ┆ b ┆ c │
│ --- ┆ --- ┆ --- │
│ f64 ┆ i64 ┆ i64 │
╞══════╪══════╪══════╡
│ null ┆ 1 ┆ 1 │
├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ null ┆ 2 ┆ null │
├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ null ┆ null ┆ null │
├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ null ┆ 1 ┆ 1 │
└──────┴──────┴──────┘
>>> # drop a row only if all values are null
>>> df.filter(~pl.fold(acc=True, f=lambda acc, s: acc & s.is_null(), exprs=pl.all()))
shape: (3, 3)
┌──────┬─────┬──────┐
│ a ┆ b ┆ c │
│ --- ┆ --- ┆ --- │
│ f64 ┆ i64 ┆ i64 │
╞══════╪═════╪══════╡
│ null ┆ 1 ┆ 1 │
├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
│ null ┆ 2 ┆ null │
├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
│ null ┆ 1 ┆ 1 │
└──────┴─────┴──────┘
"""
if subset is not None and not isinstance(subset, list):
subset = [subset]
Expand Down

0 comments on commit b883e53

Please sign in to comment.