Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

Already on GitHub? Sign in to your account

BUG/DTYPES: preserve bools in convert_objects #7416

Merged
merged 1 commit into from Jun 10, 2014
Jump to file or symbol
Failed to load files and symbols.
+48 −29
Split
View
@@ -216,3 +216,5 @@ Bug Fixes
(:issue:`7408`)
- Bug where ``NaT`` wasn't repr'd correctly in a ``MultiIndex`` (:issue:`7406`,
:issue:`7409`).
+- Bug where bool objects were converted to ``nan`` in ``convert_objects``
+ (:issue:`7416`).
View
@@ -427,86 +427,91 @@ cdef extern from "parse_helper.h":
cdef double fINT64_MAX = <double> INT64_MAX
cdef double fINT64_MIN = <double> INT64_MIN
-def maybe_convert_numeric(ndarray[object] values, set na_values,
- convert_empty=True, coerce_numeric=False):
+
+def maybe_convert_numeric(object[:] values, set na_values,
+ bint convert_empty=True, bint coerce_numeric=False):
'''
Type inference function-- convert strings to numeric (potentially) and
convert to proper dtype array
'''
cdef:
int status
- Py_ssize_t i, n
- ndarray[float64_t] floats
- ndarray[complex128_t] complexes
- ndarray[int64_t] ints
- bint seen_float = 0
- bint seen_complex = 0
+ Py_ssize_t i, n = values.size
+ ndarray[float64_t] floats = np.empty(n, dtype='f8')
+ ndarray[complex128_t] complexes = np.empty(n, dtype='c16')
+ ndarray[int64_t] ints = np.empty(n, dtype='i8')
+ ndarray[uint8_t] bools = np.empty(n, dtype='u1')
+ bint seen_float = False
+ bint seen_complex = False
+ bint seen_int = False
+ bint seen_bool = False
object val
float64_t fval
- n = len(values)
-
- floats = np.empty(n, dtype='f8')
- complexes = np.empty(n, dtype='c16')
- ints = np.empty(n, dtype='i8')
-
- for i from 0 <= i < n:
+ for i in range(n):
val = values[i]
if val in na_values:
floats[i] = complexes[i] = nan
- seen_float = 1
+ seen_float = True
elif util.is_float_object(val):
floats[i] = complexes[i] = val
- seen_float = 1
+ seen_float = True
elif util.is_integer_object(val):
floats[i] = ints[i] = val
- seen_int = 1
+ seen_int = True
+ elif util.is_bool_object(val):
+ floats[i] = ints[i] = bools[i] = val
+ seen_bool = True
elif val is None:
floats[i] = complexes[i] = nan
- seen_float = 1
- elif hasattr(val,'__len__') and len(val) == 0:
+ seen_float = True
+ elif hasattr(val, '__len__') and len(val) == 0:
if convert_empty or coerce_numeric:
floats[i] = complexes[i] = nan
- seen_float = 1
+ seen_float = True
else:
raise ValueError('Empty string encountered')
elif util.is_complex_object(val):
complexes[i] = val
- seen_complex = 1
+ seen_complex = True
else:
try:
status = floatify(val, &fval)
floats[i] = fval
if not seen_float:
if '.' in val or fval == INF or fval == NEGINF:
- seen_float = 1
+ seen_float = True
elif 'inf' in val: # special case to handle +/-inf
- seen_float = 1
+ seen_float = True
elif fval < fINT64_MAX and fval > fINT64_MIN:
try:
ints[i] = int(val)
except ValueError:
ints[i] = <int64_t> fval
else:
- seen_float = 1
+ seen_float = True
except:
if not coerce_numeric:
raise
floats[i] = nan
- seen_float = 1
-
+ seen_float = True
if seen_complex:
return complexes
elif seen_float:
return floats
- else:
+ elif seen_int:
return ints
+ elif seen_bool:
+ return bools.view(np.bool_)
+ return ints
+
def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
- bint safe=0, bint convert_datetime=0, bint convert_timedelta=0):
+ bint safe=0, bint convert_datetime=0,
+ bint convert_timedelta=0):
'''
Type inference function-- convert object array to proper dtype
'''
@@ -5025,6 +5025,18 @@ def test_convert_objects(self):
result = s.convert_objects(convert_dates='coerce')
assert_series_equal(result, s)
+ def test_convert_objects_preserve_bool(self):
+ s = Series([1, True, 3, 5], dtype=object)
+ r = s.convert_objects(convert_numeric=True)
+ e = Series([1, 1, 3, 5], dtype='i8')
+ tm.assert_series_equal(r, e)
+
+ def test_convert_objects_preserve_all_bool(self):
+ s = Series([False, True, False, False], dtype=object)
+ r = s.convert_objects(convert_numeric=True)
+ e = Series([False, True, False, False], dtype=bool)
+ tm.assert_series_equal(r, e)
+
def test_apply_args(self):
s = Series(['foo,bar'])