Skip to content

Commit

Permalink
Fixed Min and Max (#161)
Browse files Browse the repository at this point in the history
* fixes for min

* fixed min and max

* formatting
  • Loading branch information
osalpekar authored and devin-petersohn committed Oct 16, 2018
1 parent a770e15 commit 65d5aca
Showing 1 changed file with 42 additions and 10 deletions.
52 changes: 42 additions & 10 deletions modin/data_management/data_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -903,6 +903,46 @@ def full_reduce(self, axis, map_func, reduce_func=None, numeric_only=False):
result.index = data_manager.index
return result

def _process_min_max(self, func, **kwargs):
"""Calculates the min or max of the DataFrame.
Return:
Pandas series containing the min or max values from each column or
row.
"""
# Pandas default is 0 (though not mentioned in docs)
axis = kwargs.get("axis", 0)
numeric_only = kwargs.get("numeric_only", None)

# If our DataFrame has both numeric and non-numeric dtypes then
# comparisons between these types do not make sense and we must raise a
# TypeError. The exception to this rule is when there are datetime and
# timedelta objects, in which case we proceed with the comparison
# without ignoring any non-numeric types. We must check explicitly if
# numeric_only is False because if it is None, it will default to True
# if the operation fails with mixed dtypes.
if (
axis
and numeric_only is False
and np.unique([is_numeric_dtype(dtype) for dtype in self.dtypes]).size == 2
):
# check if there are columns with dtypes datetime or timedelta
if all(
dtype != np.dtype("datetime64[ns]")
and dtype != np.dtype("timedelta64[ns]")
for dtype in self.dtypes
):
raise TypeError("Cannot compare Numeric and Non-Numeric Types")

numeric_only = True if axis else kwargs.get("numeric_only", False)

def min_max_builder(df, **kwargs):
if not df.empty:
return func(df, **kwargs)

map_func = self._prepare_method(min_max_builder, **kwargs)
return self.full_reduce(axis, map_func, numeric_only=numeric_only)

def count(self, **kwargs):
"""Counts the number of non-NaN objects for each column or row.
Expand All @@ -921,11 +961,7 @@ def max(self, **kwargs):
Return:
Pandas series with the maximum values from each column or row.
"""
# Pandas default is 0 (though not mentioned in docs)
axis = kwargs.get("axis", 0)
numeric_only = True if axis else kwargs.get("numeric_only", False)
func = self._prepare_method(pandas.DataFrame.max, **kwargs)
return self.full_reduce(axis, func, numeric_only=numeric_only)
return self._process_min_max(pandas.DataFrame.max, **kwargs)

def mean(self, **kwargs):
"""Returns the mean for each numerical column or row.
Expand All @@ -944,11 +980,7 @@ def min(self, **kwargs):
Return:
Pandas series with the minimum value from each column or row.
"""
# Pandas default is 0 (though not mentioned in docs)
axis = kwargs.get("axis", 0)
numeric_only = True if axis else kwargs.get("numeric_only", False)
func = self._prepare_method(pandas.DataFrame.min, **kwargs)
return self.full_reduce(axis, func, numeric_only=numeric_only)
return self._process_min_max(pandas.DataFrame.min, **kwargs)

def prod(self, **kwargs):
"""Returns the product of each numerical column or row.
Expand Down

0 comments on commit 65d5aca

Please sign in to comment.