Skip to content

Commit

Permalink
BUG: fix initialization of Series with dict containing NaN key
Browse files Browse the repository at this point in the history
  • Loading branch information
toobaz committed Nov 26, 2017
1 parent b45325e commit 04fb7a2
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 25 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.22.0.txt
Expand Up @@ -196,4 +196,5 @@ Other

- Improved error message when attempting to use a Python keyword as an identifier in a numexpr query (:issue:`18221`)
- Fixed a bug where creating a Series from an array that contains both tz-naive and tz-aware values will result in a Series whose dtype is tz-aware instead of object (:issue:`16406`)
- Fixed initialization of Series from dict containing NaN as key (:issue:`18480`)
-
56 changes: 31 additions & 25 deletions pandas/core/series.py
Expand Up @@ -198,32 +198,39 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
data = data.reindex(index, copy=copy)
data = data._data
elif isinstance(data, dict):
if index is None:
if isinstance(data, OrderedDict):
index = Index(data)
else:
index = Index(_try_sort(data))
try:
if isinstance(index, DatetimeIndex):
if len(data):
# coerce back to datetime objects for lookup
data = _dict_compat(data)
data = lib.fast_multiget(data,
index.asobject.values,
default=np.nan)
if data:
keys, values = zip(*compat.iteritems(data))
keys = Index(list(keys), tupleize_cols=True)
values = np.asarray(values, dtype='object')
try:
values = lib.maybe_convert_objects(values)
except:
pass
if index is None:
if isinstance(data, OrderedDict):
order = np.arange(len(keys))
else:
data = np.nan
# GH #12169
elif isinstance(index, (PeriodIndex, TimedeltaIndex)):
data = ([data.get(i, np.nan) for i in index]
if data else np.nan)
try:
order = keys.argsort()
except TypeError:
order = np.arange(len(keys))
index = keys[order]
else:
data = lib.fast_multiget(data, index.values,
default=np.nan)
except TypeError:
data = ([data.get(i, np.nan) for i in index]
if data else np.nan)

locs = index.get_indexer(keys)
order = - np.ones(len(index), dtype=int)
order[locs] = np.arange(len(keys))
data = values[order]
nan_idxs = np.where(order == -1)[0]
if len(nan_idxs):
if is_integer_dtype(data):
data = data.astype(float)
data[nan_idxs] = np.nan
else:
if index is None:
index = Index([])
data = np.array([np.nan] * len(index))
if any([is_list_like(item) for item in data]):
data = list(data)
elif isinstance(data, SingleBlockManager):
if index is None:
index = data.index
Expand Down Expand Up @@ -263,7 +270,6 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
else:
data = _sanitize_array(data, index, dtype, copy,
raise_cast_failure=True)

data = SingleBlockManager(data, index, fastpath=True)

generic.NDFrame.__init__(self, data, fastpath=True)
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/series/test_constructors.py
Expand Up @@ -625,6 +625,18 @@ def test_constructor_dict(self):
expected.iloc[1] = 1
assert_series_equal(result, expected)

# GH 18480 - NaN key
d = {1: 'a', 2: 'b', np.nan: 'c'}
result = Series(d).sort_index()
expected = Series(['a', 'b', 'c'], index=[1, 2, np.nan])
assert_series_equal(result, expected)

# Different NaNs:
d = {1: 'a', 2: 'b', float('nan'): 'c', float('nan'): 'd'}
result = Series(d).sort_values()
expected = Series(['a', 'b', 'c', 'd'], index=[1, 2, np.nan, np.nan])
assert_series_equal(result, expected)

def test_constructor_dict_datetime64_index(self):
# GH 9456

Expand Down

0 comments on commit 04fb7a2

Please sign in to comment.