Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move series.remove_na to core.dtypes.missing.remove_na_arraylike #16935

Merged
merged 2 commits into from
Jul 15, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,3 +394,10 @@ def na_value_for_dtype(dtype):
elif is_bool_dtype(dtype):
return False
return np.nan


def remove_na_arraylike(series):
"""
Return series containing only true/non-NaN values, possibly empty.
"""
Copy link
Member

@gfyoung gfyoung Jul 15, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know you and @jreback discussed this briefly, but I'd like to understand this:

Why are you saying "array-like" when the function only accepts Series (or is only meant for Series)? If it isn't meant just for Series, we should update the documentation to reflect this new behavior.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add tests as well for this function just to make sure it behaves like we want it too.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok with tests. the name is fine. its descriptive and actually works on arrays as well.

yes if you can throw in a couple of tests (with parametrize) would be great.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In that case, we should rename the variable name to something like array_like along with a description of accepted data types.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure you can name it arr (our typical name)

return series[notnull(lib.values_from_object(series))]
12 changes: 2 additions & 10 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
maybe_upcast, infer_dtype_from_scalar,
maybe_convert_platform,
maybe_cast_to_datetime, maybe_castable)
from pandas.core.dtypes.missing import isnull, notnull
from pandas.core.dtypes.missing import isnull, notnull, remove_na_arraylike

from pandas.core.common import (is_bool_indexer,
_default_index,
Expand Down Expand Up @@ -2749,7 +2749,7 @@ def dropna(self, axis=0, inplace=False, **kwargs):
axis = self._get_axis_number(axis or 0)

if self._can_hold_na:
result = remove_na(self)
result = remove_na_arraylike(self)
if inplace:
self._update_inplace(result)
else:
Expand Down Expand Up @@ -2887,14 +2887,6 @@ def _dir_additions(self):
# -----------------------------------------------------------------------------
# Supplementary functions


def remove_na(series):
"""
Return series containing only true/non-NaN values, possibly empty.
"""
return series[notnull(_values_from_object(series))]


def _sanitize_index(data, index, copy=False):
""" sanitize an index type to return an ndarray of the underlying, pass
thru a non-Index
Expand Down
12 changes: 6 additions & 6 deletions pandas/plotting/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from pandas.util._decorators import cache_readonly
from pandas.core.base import PandasObject
from pandas.core.dtypes.missing import notnull
from pandas.core.dtypes.missing import notnull, remove_na_arraylike
from pandas.core.dtypes.common import (
is_list_like,
is_integer,
Expand All @@ -21,7 +21,7 @@
from pandas.core.common import AbstractMethodError, isnull, _try_sort
from pandas.core.generic import _shared_docs, _shared_doc_kwargs
from pandas.core.index import Index, MultiIndex
from pandas.core.series import Series, remove_na
from pandas.core.series import Series
from pandas.core.indexes.period import PeriodIndex
from pandas.compat import range, lrange, map, zip, string_types
import pandas.compat as compat
Expand Down Expand Up @@ -1376,7 +1376,7 @@ def _plot(cls, ax, y, style=None, bw_method=None, ind=None,
from scipy.stats import gaussian_kde
from scipy import __version__ as spv

y = remove_na(y)
y = remove_na_arraylike(y)

if LooseVersion(spv) >= '0.11.0':
gkde = gaussian_kde(y, bw_method=bw_method)
Expand Down Expand Up @@ -1495,13 +1495,13 @@ def _args_adjust(self):
@classmethod
def _plot(cls, ax, y, column_num=None, return_type='axes', **kwds):
if y.ndim == 2:
y = [remove_na(v) for v in y]
y = [remove_na_arraylike(v) for v in y]
# Boxplot fails with empty arrays, so need to add a NaN
# if any cols are empty
# GH 8181
y = [v if v.size > 0 else np.array([np.nan]) for v in y]
else:
y = remove_na(y)
y = remove_na_arraylike(y)
bp = ax.boxplot(y, **kwds)

if return_type == 'dict':
Expand Down Expand Up @@ -1969,7 +1969,7 @@ def maybe_color_bp(bp):

def plot_group(keys, values, ax):
keys = [pprint_thing(x) for x in keys]
values = [remove_na(v) for v in values]
values = [remove_na_arraylike(v) for v in values]
bp = ax.boxplot(values, **kwds)
if fontsize is not None:
ax.tick_params(axis='both', labelsize=fontsize)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/test_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
import pandas as pd

from pandas.core.dtypes.common import is_float_dtype
from pandas.core.dtypes.missing import remove_na_arraylike
from pandas import (Series, DataFrame, Index, date_range, isnull, notnull,
pivot, MultiIndex)
from pandas.core.nanops import nanall, nanany
from pandas.core.panel import Panel
from pandas.core.series import remove_na

from pandas.io.formats.printing import pprint_thing
from pandas import compat
Expand Down Expand Up @@ -155,7 +155,7 @@ def _check_stat_op(self, name, alternative, obj=None, has_skipna=True):
if has_skipna:

def skipna_wrapper(x):
nona = remove_na(x)
nona = remove_na_arraylike(x)
if len(nona) == 0:
return np.nan
return alternative(nona)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/test_panel4d.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
import numpy as np

from pandas.core.dtypes.common import is_float_dtype
from pandas.core.dtypes.missing import remove_na_arraylike
from pandas import Series, Index, isnull, notnull
from pandas.core.panel import Panel
from pandas.core.panel4d import Panel4D
from pandas.core.series import remove_na
from pandas.tseries.offsets import BDay

from pandas.util.testing import (assert_frame_equal, assert_series_equal,
Expand Down Expand Up @@ -118,7 +118,7 @@ def _check_stat_op(self, name, alternative, obj=None, has_skipna=True):

if has_skipna:
def skipna_wrapper(x):
nona = remove_na(x)
nona = remove_na_arraylike(x)
if len(nona) == 0:
return np.nan
return alternative(nona)
Expand Down