Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Pyjanitor fill method #704

Merged
merged 33 commits into from
Aug 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
f4f3b65
forward and backward fill methods
Jul 26, 2020
4c8f93a
updates to fill method
Jul 26, 2020
6ad127f
updates to fill method
Jul 26, 2020
4e44eb7
updates to fill_method
Jul 26, 2020
2562938
updates to fill method
Jul 26, 2020
b98ac93
updates to fill method
Jul 26, 2020
3c06a72
updates to fill method
Jul 26, 2020
13b49ec
updates to fill method
Jul 26, 2020
914d363
updates to fill method
Jul 26, 2020
ea43252
update to changelog
Jul 26, 2020
d6be2a6
Update tests/functions/test_fill.py
samukweku Jul 29, 2020
cf9678d
updates to fill_method
Jul 29, 2020
3d71da6
updates to fill_method
Jul 30, 2020
4ffd4a6
update fill_method
Jul 30, 2020
1a685b1
update fill_method
Jul 30, 2020
13487ab
updates to fill_method
Jul 30, 2020
7932965
update fill_method
Jul 30, 2020
5a83f8f
updates to fill_method
Jul 30, 2020
be5b286
updates to fill_method
Aug 2, 2020
6c442d2
updates to fill_method
Aug 2, 2020
cf65898
updates to fill_method
Aug 2, 2020
4741546
Update janitor/functions.py
samukweku Aug 4, 2020
36200e3
Update janitor/functions.py
samukweku Aug 4, 2020
98b2e0c
Update tests/functions/test_fill_direction.py
samukweku Aug 4, 2020
c4c40cd
Update tests/functions/test_fill_direction.py
samukweku Aug 4, 2020
f56c573
Update tests/functions/test_fill_direction.py
samukweku Aug 4, 2020
5267a44
Update tests/functions/test_fill_direction.py
samukweku Aug 4, 2020
480d5da
Update tests/functions/test_fill_direction.py
samukweku Aug 4, 2020
35d3f9e
Update tests/functions/test_fill_direction.py
samukweku Aug 4, 2020
62247d8
Update tests/functions/test_fill_direction.py
samukweku Aug 4, 2020
52c5c89
Update tests/functions/test_fill_direction.py
samukweku Aug 4, 2020
5caede8
Update janitor/functions.py
samukweku Aug 4, 2020
306eb80
docstring updates
Aug 4, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ new version (on deck)
- [ENH] Added ``process_text`` wrapper function for all Pandas string methods. @samukweku
- [TST] Only skip tests for non-installed libraries on local machine. @hectormz
- [DOC] Fix minor issues in documentation. @hectormz
- [ENH] Added ``fill_direction`` function for forward/backward fills on missing values
for selected columns in a dataframe. @samukweku


v0.20.7
Expand Down
3 changes: 2 additions & 1 deletion docs/reference/general_functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,15 @@ Modify columns
groupby_agg
join_apply
drop_duplicate_columns
process_text

Modify values
~~~~~~~~~~~~~
.. autosummary::
:toctree: janitor.functions/

fill_empty
fill_direction
convert_excel_date
convert_matlab_date
convert_unix_date
Expand Down Expand Up @@ -86,5 +88,4 @@ Other
move
toset
unionize_dataframe_categories
process_text

141 changes: 141 additions & 0 deletions janitor/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4104,3 +4104,144 @@ def process_text(
df[column] = getattr(df[column].str, string_function)(*args, **kwargs)

return df


@pf.register_dataframe_method
def fill_direction(
df: pd.DataFrame,
directions: Dict[Hashable, str],
limit: Optional[int] = None,
) -> pd.DataFrame:
"""Provide a method-chainable function for filling missing values
in selected columns.

Missing values are filled using the next or previous entry.
The columns are paired with the directions in a dictionary.
It is a wrapper for ``pd.Series.ffill`` and ``pd.Series.bfill``.

.. code-block:: python

import pandas as pd
import janitor as jn

df = pd.DataFrame({"text": ["ragnar", np.nan, "sammywemmy",
np.nan, "ginger"],
"code" : [np.nan, 2, 3, np.nan, 5]})

# Single column :
df.fill_direction({"text" : "up"})
# text | code
# ragnar | NaN
# sammywemmy | 2
# sammywemmy | 3
# ginger | NaN
# ginger | 5

# Multiple columns :
df.fill_direction({"text" : "down", "code" : "down"})

# text | code
# ragnar | NaN
# ragnar | 2
# sammywemmy | 3
# sammywemmy | 3
# ginger | 5

# Multiple columns in different directions.
df.fill_direction({"text" : "up", "code" : "down"})

# text | code
# ragnar | NaN
# sammywemmy | 2
# sammywemmy | 3
# ginger | 3
# ginger | 5

Functional usage syntax:

.. code-block:: python

import pandas as pd
import janitor as jn

df = pd.DataFrame(...)
df = jn.fill_direction(
df = df,
directions = {column_1 : direction_1, column_2 : direction_2, ...},
limit = None # limit must be greater than 0
)

Method-chaining usage syntax:

.. code-block:: python

import pandas as pd
import janitor as jn

df = (
pd.DataFrame(...)
.fill_direction(
directions = {column_1 : direction_1, column_2 : direction_2, ...},
limit = None # limit must be greater than 0
)
)

:param df: A pandas dataframe.
:param directions: Key - value pairs of columns and directions. Directions
can be either `down`(default), `up`, `updown`(fill up then down) and
`downup` (fill down then up).
:param limit: number of consecutive null values to forward/backward fill.
Value must be greater than 0.
:returns: A pandas dataframe with modified column(s).
:raises: ValueError if ``directions`` dictionary is empty.
:raises: ValueError if column supplied is not in the dataframe.
:raises: ValueError if direction supplied is not one of `down`,`up`,
`updown`, or `downup`.
"""

# check that dictionary is not empty
if not directions:
raise ValueError("A mapping of columns with directions is required.")

# check that the right columns are provided
# should be removed once the minimum Pandas version is 1.1,
# as Pandas loc will raise a KeyError if columns provided do not exist
wrong_columns_provided = set(directions).difference(df.columns)
if any(wrong_columns_provided):
if len(wrong_columns_provided) > 1:
outcome = ", ".join(f"'{word}'" for word in wrong_columns_provided)
raise ValueError(
f"Columns {outcome} do not exist in the dataframe."
)
outcome = "".join(wrong_columns_provided)
raise ValueError(f"Column {outcome} does not exist in the dataframe.")

# check that the right directions are provided
set_directions = {"up", "down", "updown", "downup"}

# linter throws an error when I use dictionary.values()
# it assumes that dictionary is a dataframe
directions_values = [value for key, value in directions.items()]
wrong_directions_provided = set(directions_values).difference(
set_directions
)
if any(wrong_directions_provided):
raise ValueError(
"""The direction should be a string and should be one of `up`,
`down`, `updown`, or `downup`."""
)

for column, direction in directions.items():
if direction == "up":
df.loc[:, column] = df.loc[:, column].bfill(limit=limit)
elif direction == "down":
df.loc[:, column] = df.loc[:, column].ffill(limit=limit)
elif direction == "updown":
df.loc[:, column] = (
df.loc[:, column].bfill(limit=limit).ffill(limit=limit)
)
elif direction == "downup":
df.loc[:, column] = (
df.loc[:, column].ffill(limit=limit).bfill(limit=limit)
)
return df
145 changes: 145 additions & 0 deletions tests/functions/test_fill_direction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
import numpy as np
import pandas as pd
import pytest
from pandas.testing import assert_frame_equal


@pytest.fixture
def df():
return pd.DataFrame(
[
{
"rank": 1,
"pet_type": np.nan,
"breed": "Boston Terrier",
"owner": "sam",
},
{
"rank": 2,
"pet_type": np.nan,
"breed": "Retrievers (Labrador)",
"owner": "ogor",
},
{
"rank": 3,
"pet_type": np.nan,
"breed": "Retrievers (Golden)",
"owner": "nathan",
},
{
"rank": 4,
"pet_type": np.nan,
"breed": "French Bulldogs",
"owner": np.nan,
},
{
"rank": 5,
"pet_type": np.nan,
"breed": "Bulldogs",
"owner": np.nan,
},
{
"rank": 6,
"pet_type": "Dog",
"breed": "Beagles",
"owner": np.nan,
},
{
"rank": 1,
"pet_type": np.nan,
"breed": "Persian",
"owner": np.nan,
},
{
"rank": 2,
"pet_type": np.nan,
"breed": "Maine Coon",
"owner": "ragnar",
},
{
"rank": 3,
"pet_type": np.nan,
"breed": "Ragdoll",
"owner": np.nan,
},
{
"rank": 4,
"pet_type": np.nan,
"breed": "Exotic",
"owner": np.nan,
},
{
"rank": 5,
"pet_type": np.nan,
"breed": "Siamese",
"owner": np.nan,
},
{
"rank": 6,
"pet_type": "Cat",
"breed": "American Short",
"owner": "adaora",
},
]
)


def test_fill_column(df):
"""Fill down on a single column."""
expected = df.copy()
expected.loc[:, "pet_type"] = expected.loc[:, "pet_type"].ffill()
assert_frame_equal(df.fill_direction({"pet_type": "down"}), expected)


def test_fill_column_up(df):
"""Fill up on a single column."""
expected = df.copy()
expected.loc[:, "pet_type"] = expected.loc[:, "pet_type"].bfill()
assert_frame_equal(df.fill_direction({"pet_type": "up"}), expected)


def test_fill_column_updown(df):
"""Fill upwards, then downwards on a single column."""
expected = df.copy()
expected.loc[:, "pet_type"] = expected.loc[:, "pet_type"].bfill().ffill()
assert_frame_equal(df.fill_direction({"pet_type": "updown"}), expected)


def test_fill_column_down_up(df):
"""Fill downwards, then upwards on a single column."""
expected = df.copy()
expected.loc[:, "pet_type"] = expected.loc[:, "pet_type"].ffill().bfill()
assert_frame_equal(df.fill_direction({"pet_type": "downup"}), expected)


def test_fill_multiple_columns(df):
"""Fill on multiple columns with a single direction."""
expected = df.copy()
expected.loc[:, ["pet_type", "owner"]] = expected.loc[
:, ["pet_type", "owner"]
].ffill()
assert_frame_equal(
df.fill_direction({"pet_type": "down", "owner": "down"}), expected
)


def test_fill_multiple_columns_multiple_directions(df):
"""Fill on multiple columns with different directions."""
expected = df.copy()
expected.loc[:, "pet_type"] = expected.loc[:, "pet_type"].ffill()
expected.loc[:, "owner"] = expected.loc[:, "owner"].bfill()
assert_frame_equal(
df.fill_direction({"pet_type": "down", "owner": "up"}), expected
)


def test_wrong_column_name(df):
"""Raise Value Error if wrong column name is provided."""
with pytest.raises(ValueError):
df.fill_direction({"PetType": "down"})


def test_wrong_direction(df):
"""Raise Value Error if wrong direction is provided."""
with pytest.raises(ValueError):
df.fill_direction({"pet_type": "upanddawn"})
3 changes: 3 additions & 0 deletions tests/functions/test_filter_date.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import pytest


@pytest.mark.xfail
# pandas 1.1 raises a KeyError if columns/indices passed to loc does not exist
# pandas <1.1 raises a TypeError
def test_filter_date_column_name(date_dataframe):
df = date_dataframe
# `column_name` wasn't a string
Expand Down