Skip to content

Commit

Permalink
Fixes Bugs in Describe (#140)
Browse files Browse the repository at this point in the history
* fixing describe bug

* formatting

* formatting change

* removed comments
  • Loading branch information
osalpekar authored and devin-petersohn committed Oct 10, 2018
1 parent 00ee9e4 commit 5b34421
Showing 1 changed file with 31 additions and 9 deletions.
40 changes: 31 additions & 9 deletions modin/data_management/data_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,15 +159,17 @@ def helper(df, internal_indices=[]):

return helper

def numeric_columns(self):
def numeric_columns(self, include_bool=True):
"""Returns the numeric columns of the Manager.
Returns:
List of index names.
"""
columns = []
for col, dtype in zip(self.columns, self.dtypes):
if is_numeric_dtype(dtype):
if is_numeric_dtype(dtype) and (
include_bool or (not include_bool and dtype != np.bool_)
):
columns.append(col)
return columns

Expand Down Expand Up @@ -1379,25 +1381,45 @@ def describe(self, **kwargs):
Returns:
DataFrame object containing the descriptive statistics of the DataFrame.
"""
# Only describe numeric if there are numeric
# Only describe numeric if there are numeric columns
# Otherwise, describe all
columns_for_describe = self.numeric_columns()
if len(columns_for_describe) != 0 and "object" in kwargs["exclude"]:
new_columns = self.numeric_columns(include_bool=False)
if len(new_columns) != 0:
numeric = True
exclude = kwargs.get("exclude", None)
if is_list_like(exclude):
exclude.append([np.timedelta64, np.datetime64])
else:
exclude = [exclude, np.timedelta64, np.datetime64]
kwargs["exclude"] = exclude
else:
numeric = False
# If no numeric dtypes, then do all
columns_for_describe = self.columns
# If only timedelta and datetime objects, only do the timedelta
# columns
if all(
(
dtype
for dtype in self.dtypes
if dtype == np.datetime64 or dtype == np.timedelta64
)
):
new_columns = [
self.columns[i]
for i in range(len(self.columns))
if self.dtypes[i] != np.dtype("datetime64[ns]")
]
else:
# Describe all columns
new_columns = self.columns

def describe_builder(df, **kwargs):
return pandas.DataFrame.describe(df, **kwargs)

# Apply describe and update indices, columns, and dtypes
func = self._prepare_method(describe_builder, **kwargs)
new_data = self.full_axis_reduce_along_select_indices(
func, 0, columns_for_describe, False
func, 0, new_columns, False
)
new_columns = columns_for_describe
new_index = self.compute_index(0, new_data, False)
if numeric:
new_dtypes = pandas.Series(
Expand Down

0 comments on commit 5b34421

Please sign in to comment.