Skip to content

Commit

Permalink
ENH: Declare a BoolBlock as a NumericBlock
Browse files Browse the repository at this point in the history
BUG: GH2641 fixes "df.decribe() with boolean column"

This change will make all numeric operations on boolean data work, by
just transparently treating them as integers values 1 and 0.  This is
not pandas specific behavior, this is the default operations of both
numpy and python.
  • Loading branch information
Dan Birken committed Apr 1, 2013
1 parent ac7dd25 commit bc46b49
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 8 deletions.
3 changes: 3 additions & 0 deletions RELEASE.rst
Expand Up @@ -124,6 +124,8 @@ pandas 0.11.0
knows how many columns to expect in the result) (GH2981_)
- On a mixed DataFrame, allow setting with indexers with ndarray/DataFrame
on rhs (GH3216_)
- Treat boolean values as integers (values 1 and 0) for numeric
operations. (GH2641_)

**API Changes**

Expand Down Expand Up @@ -350,6 +352,7 @@ pandas 0.11.0
.. _GH2747: https://github.com/pydata/pandas/issues/2747
.. _GH2816: https://github.com/pydata/pandas/issues/2816
.. _GH3216: https://github.com/pydata/pandas/issues/3216
.. _GH2641: https://github.com/pydata/pandas/issues/2641

pandas 0.10.1
=============
Expand Down
4 changes: 4 additions & 0 deletions doc/source/v0.11.0.txt
Expand Up @@ -304,6 +304,9 @@ Enhancements
- added option `display.with_wmp_style` providing a sleeker visual style
for plots. Based on https://gist.github.com/huyng/816622 (GH3075_).

- Treat boolean values as integers (values 1 and 0) for numeric
operations. (GH2641_)

See the `full release notes
<https://github.com/pydata/pandas/blob/master/RELEASE.rst>`__ or issue tracker
on GitHub for a complete list.
Expand All @@ -328,3 +331,4 @@ on GitHub for a complete list.
.. _GH3059: https://github.com/pydata/pandas/issues/3059
.. _GH3070: https://github.com/pydata/pandas/issues/3070
.. _GH3075: https://github.com/pydata/pandas/issues/3075
.. _GH2641: https://github.com/pydata/pandas/issues/2641
5 changes: 1 addition & 4 deletions pandas/core/internals.py
Expand Up @@ -628,7 +628,7 @@ def should_store(self, value):
return com.is_integer_dtype(value) and value.dtype == self.dtype


class BoolBlock(Block):
class BoolBlock(NumericBlock):
is_bool = True
_can_hold_na = False

Expand All @@ -641,9 +641,6 @@ def _try_cast(self, element):
except: # pragma: no cover
return element

def _try_cast_result(self, result):
return _possibly_downcast_to_dtype(result, self.dtype)

def should_store(self, value):
return issubclass(value.dtype.type, np.bool_)

Expand Down
29 changes: 26 additions & 3 deletions pandas/tests/test_frame.py
Expand Up @@ -7899,15 +7899,14 @@ def test_dataframe_clip(self):
def test_get_X_columns(self):
# numeric and object columns

# Booleans get casted to float in DataFrame, so skip for now
df = DataFrame({'a': [1, 2, 3],
# 'b' : [True, False, True],
'b' : [True, False, True],
'c': ['foo', 'bar', 'baz'],
'd': [None, None, None],
'e': [3.14, 0.577, 2.773]})

self.assert_(np.array_equal(df._get_numeric_data().columns,
['a', 'e']))
['a', 'b', 'e']))

def test_get_numeric_data(self):
intname = np.dtype(np.int_).name
Expand Down Expand Up @@ -7939,6 +7938,30 @@ def test_get_numeric_data(self):
expected = df.ix[:, []]
assert_frame_equal(result, expected)

def test_bool_describe_in_mixed_frame(self):
df = DataFrame({
'string_data': ['a', 'b', 'c', 'd', 'e'],
'bool_data': [True, True, False, False, False],
'int_data': [10, 20, 30, 40, 50],
})

# Boolean data and integer data is included in .describe() output, string data isn't
self.assert_(np.array_equal(df.describe().columns, ['bool_data', 'int_data']))

bool_describe = df.describe()['bool_data']

# Both the min and the max values should stay booleans
self.assert_(bool_describe['min'].dtype == np.bool_)
self.assert_(bool_describe['max'].dtype == np.bool_)

self.assert_(bool_describe['min'] == False)
self.assert_(bool_describe['max'] == True)

# For numeric operations, like mean or median, the values True/False are cast to
# the integer values 1 and 0
assert_almost_equal(bool_describe['mean'], 0.4)
assert_almost_equal(bool_describe['50%'], 0)

def test_count(self):
f = lambda s: notnull(s).sum()
self._check_stat_op('count', f,
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/test_internals.py
Expand Up @@ -497,7 +497,7 @@ def test_get_numeric_data(self):
'bool': bool_ser, 'obj': obj_ser,
'dt': dt_ser})
xp = DataFrame({'int': int_ser, 'float': float_ser,
'complex': complex_ser})
'complex': complex_ser, 'bool': bool_ser})
rs = DataFrame(df._data.get_numeric_data())
assert_frame_equal(xp, rs)

Expand Down

0 comments on commit bc46b49

Please sign in to comment.