Skip to content

Commit

Permalink
BUG: fix initialization of Series with dict containing NaN as key
Browse files Browse the repository at this point in the history
closes #18480
  • Loading branch information
toobaz committed Nov 28, 2017
1 parent 2a0e54b commit d50f170
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 17 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.22.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -208,4 +208,5 @@ Other

- Improved error message when attempting to use a Python keyword as an identifier in a numexpr query (:issue:`18221`)
- Fixed a bug where creating a Series from an array that contains both tz-naive and tz-aware values will result in a Series whose dtype is tz-aware instead of object (:issue:`16406`)
- Fixed construction of :class:`Series` from ``dict`` containing ``NaN`` as key (:issue:`18480`)
-
2 changes: 1 addition & 1 deletion pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,7 +875,7 @@ def _map_values(self, mapper, na_action=None):
# we specify the keys here to handle the
# possibility that they are tuples
from pandas import Series, Index
index = Index(mapper, tupleize_cols=False)
index = Index(mapper, tupleize_cols=True)
mapper = Series(mapper, index=index)

if isinstance(mapper, ABCSeries):
Expand Down
33 changes: 20 additions & 13 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
_default_index,
_asarray_tuplesafe,
_values_from_object,
_try_sort,
_maybe_match_name,
SettingWithCopyError,
_maybe_box_datetimelike,
Expand Down Expand Up @@ -198,18 +197,9 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
data = data.reindex(index, copy=copy)
data = data._data
elif isinstance(data, dict):
if index is None:
if isinstance(data, OrderedDict):
index = Index(data)
else:
index = Index(_try_sort(data))

try:
data = index._get_values_from_dict(data)
except TypeError:
data = ([data.get(i, np.nan) for i in index]
if data else np.nan)

data, index = self._init_from_dict(data, index, dtype)
dtype = None
copy = False
elif isinstance(data, SingleBlockManager):
if index is None:
index = data.index
Expand Down Expand Up @@ -303,6 +293,23 @@ def _can_hold_na(self):

_index = None

def _init_from_dict(self, data, index, dtype):
# Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')]
# raises KeyError), so we iterate the entire dict, and align
if data:
keys, values = zip(*compat.iteritems(data))
else:
keys, values = [], []
s = Series(values, index=keys, dtype=dtype)
if index is not None and not index.identical(keys):
s = s.reindex(index)
elif not isinstance(data, OrderedDict):
try:
s = s.sort_index()
except TypeError:
pass
return s._data, s.index

def _set_axis(self, axis, labels, fastpath=False):
""" override generic, we want to set the _typ here """

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/test_combine_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def test_concat_empty_series_dtypes(self):
# categorical
assert pd.concat([Series(dtype='category'),
Series(dtype='category')]).dtype == 'category'
assert pd.concat([Series(dtype='category'),
assert pd.concat([Series(np.array([]), dtype='category'),
Series(dtype='float64')]).dtype == 'float64'
assert pd.concat([Series(dtype='category'),
Series(dtype='object')]).dtype == 'object'
Expand Down
28 changes: 26 additions & 2 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,32 @@ def test_constructor_dict(self):
expected.iloc[1] = 1
assert_series_equal(result, expected)

def test_constructor_dict_nan_key(self):
# GH 18480
d = {1: 'a', 2: 'b', np.nan: 'c'}
result = Series(d).sort_index()
expected = Series(['a', 'b', 'c'], index=[1, 2, np.nan])
assert_series_equal(result, expected)

# Different NaNs:
d = {1: 'a', 2: 'b', float('nan'): 'c', float('nan'): 'd'}
result = Series(d).sort_values()
expected = Series(['a', 'b', 'c', 'd'], index=[1, 2, np.nan, np.nan])
assert_series_equal(result, expected)

# NaN vs. None:
d = {1: 'a', 2: 'b', np.nan: 'c', None: 'd'}
result = Series(d).sort_values()
expected = Series(['a', 'b', 'c', 'd'], index=[1, 2, np.nan, None])
assert_series_equal(result, expected)

# MultiIndex:
d = {(1, 1): 'a', (2, 2): 'b', (3, np.nan): 'c'}
result = Series(d).sort_values()
expected = Series(['a', 'b', 'c'],
index=Index([(1, 1), (2, 2), (3, np.nan)]))
assert_series_equal(result, expected)

def test_constructor_dict_datetime64_index(self):
# GH 9456

Expand Down Expand Up @@ -658,8 +684,6 @@ def test_constructor_tuple_of_tuples(self):
s = Series(data)
assert tuple(s) == data

@pytest.mark.xfail(reason='GH 18480 (Series initialization from dict with '
'NaN keys')
def test_constructor_dict_of_tuples(self):
data = {(1, 2): 3,
(None, 5): 6}
Expand Down

0 comments on commit d50f170

Please sign in to comment.