Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] updates to update_where and find_replace functions #673

Merged
merged 24 commits into from Jun 20, 2020
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
28 changes: 21 additions & 7 deletions janitor/functions.py
Expand Up @@ -3099,7 +3099,7 @@ def _find_replace(
if match.lower() == "regex":
for k, v in mapper.items():
condition = df[column_name].str.contains(k, regex=True)
df = df.update_where(condition, column_name, v)
df.loc[condition, column_name] = v
return df


Expand All @@ -3111,7 +3111,8 @@ def update_where(
target_column_name: Hashable,
target_val: Any,
) -> pd.DataFrame:
"""Add multiple conditions to update a column in the dataframe.
"""
Add multiple conditions to update a column in the dataframe.

This method mutates the original DataFrame.

Expand All @@ -3129,7 +3130,7 @@ def update_where(
df = (
df
.update_where(
condition=(df['a'] > 2) & (df['b'] < 8),
condition=("a > 2 and b < 8",
target_column_name='c',
target_val=10)
)
Expand All @@ -3140,17 +3141,30 @@ def update_where(
# 4 8 0

:param df: The pandas DataFrame object.
:param conditions: conditions used to update a target column and target
value
:param conditions: Conditions used to update a target column
and target value.
:param target_column_name: Column to be updated. If column does not exist
in dataframe, a new column will be created; note that entries that do
not get set in the new column will be null.
:param target_val: Value to be updated
:returns: An updated pandas DataFrame.
:raises: IndexError if **conditions** does not have the same length as
**df**.
**df**.
ericmjl marked this conversation as resolved.
Show resolved Hide resolved
:raises: TypeError if **conditions** is not a pandas-compatible string
ericmjl marked this conversation as resolved.
Show resolved Hide resolved
query.
ericmjl marked this conversation as resolved.
Show resolved Hide resolved
"""
df.loc[conditions, target_column_name] = target_val

# use query mode if a string expression is passed
if isinstance(conditions, str):
# get the index that meets the conditions criteria
conditions_index = df.query(conditions).index

# pass target_val to dataframe
df.loc[conditions_index, target_column_name] = target_val

else:
df.loc[conditions, target_column_name] = target_val
samukweku marked this conversation as resolved.
Show resolved Hide resolved

return df


Expand Down
20 changes: 20 additions & 0 deletions tests/functions/test_update_where.py
@@ -1,5 +1,8 @@
import pandas as pd
import pytest
from pandas._testing import assert_frame_equal
hectormz marked this conversation as resolved.
Show resolved Hide resolved

from janitor.functions import update_where


@pytest.mark.functions
Expand All @@ -16,3 +19,20 @@ def test_update_where(dataframe):
),
dataframe.replace("Cambridge", "Durham"),
)


def test_update_where_query():
"""
hectormz marked this conversation as resolved.
Show resolved Hide resolved
Test that function works with pandas query-style string expression
"""
df = pd.DataFrame(
{"a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "c": [0, 0, 0, 0]}
)
expected = pd.DataFrame(
{"a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "c": [0, 0, 10, 0]}
)
result = update_where(
df, conditions="a > 2 and b < 8", target_column_name="c", target_val=10
)

assert_frame_equal(result, expected)