Skip to content

Commit

Permalink
Fix numeric functions (#133)
Browse files Browse the repository at this point in the history
* Fixed and cleaned up rest of numeric functions

* formatting

* Removing excess vertical whitespace
  • Loading branch information
williamma12 authored and devin-petersohn committed Oct 8, 2018
1 parent 1c01b0a commit 07f3cbc
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 97 deletions.
181 changes: 84 additions & 97 deletions modin/data_management/data_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1216,6 +1216,20 @@ def last_valid_index_builder(df):

return self.index[first_result.max()]

def median(self, **kwargs):
"""Returns median of each column or row.
Returns:
Series containing the median of each column or row.
"""
# Pandas default is 0 (though not mentioned in docs)
axis = kwargs.get("axis", 0)
result, data_manager = self.numeric_function_clean_dataframe(axis)
if result is not None:
return result
func = self._prepare_method(pandas.DataFrame.median, **kwargs)
return data_manager.full_axis_reduce(func, axis)

def memory_usage(self, **kwargs):
"""Returns the memory usage of each column.
Expand All @@ -1240,6 +1254,62 @@ def nunique(self, **kwargs):
func = self._prepare_method(pandas.DataFrame.nunique, **kwargs)
return self.full_axis_reduce(func, axis)

def quantile_for_single_value(self, **kwargs):
"""Returns quantile of each column or row.
Returns:
Series containing the quantile of each column or row.
"""
axis = kwargs.get("axis", 0)
q = kwargs.get("q", 0.5)
numeric_only = kwargs.get("numeric_only", True)
assert type(q) is float
if numeric_only:
result, data_manager = self.numeric_function_clean_dataframe(axis)
if result is not None:
return result
else:
data_manager = self

def quantile_builder(df, **kwargs):
try:
return pandas.DataFrame.quantile(df, **kwargs)
except ValueError:
return pandas.Series()

func = self._prepare_method(quantile_builder, **kwargs)
result = data_manager.full_axis_reduce(func, axis)
result.name = q
return result

def skew(self, **kwargs):
"""Returns skew of each column or row.
Returns:
Series containing the skew of each column or row.
"""
# Pandas default is 0 (though not mentioned in docs)
axis = kwargs.get("axis", 0)
result, data_manager = self.numeric_function_clean_dataframe(axis)
if result is not None:
return result
func = self._prepare_method(pandas.DataFrame.skew, **kwargs)
return data_manager.full_axis_reduce(func, axis)

def std(self, **kwargs):
"""Returns standard deviation of each column or row.
Returns:
Series containing the standard deviation of each column or row.
"""
# Pandas default is 0 (though not mentioned in docs)
axis = kwargs.get("axis", 0)
result, data_manager = self.numeric_function_clean_dataframe(axis)
if result is not None:
return result
func = self._prepare_method(pandas.DataFrame.std, **kwargs)
return data_manager.full_axis_reduce(func, axis)

def to_datetime(self, **kwargs):
"""Converts the Manager to a Series of DateTime objects.
Expand All @@ -1255,6 +1325,20 @@ def to_datetime_builder(df, **kwargs):
func = self._prepare_method(to_datetime_builder, **kwargs)
return self.full_axis_reduce(func, 1)

def var(self, **kwargs):
"""Returns varience of each column or row.
Returns:
Series containing the varience of each column or row.
"""
# Pandas default is 0 (though not mentioned in docs)
axis = kwargs.get("axis", 0)
result, data_manager = self.numeric_function_clean_dataframe(axis)
if result is not None:
return result
func = data_manager._prepare_method(pandas.DataFrame.var, **kwargs)
return data_manager.full_axis_reduce(func, axis)

# END Column/Row partitions reduce operations

# Column/Row partitions reduce operations over select indices
Expand Down Expand Up @@ -1325,103 +1409,6 @@ def describe_builder(df, **kwargs):
)
return self.__constructor__(new_data, new_index, new_columns, new_dtypes)

def median(self, **kwargs):
"""Returns median of each column or row.
Returns:
Series containing the median of each column or row.
"""
# Pandas default is 0 (though not mentioned in docs)
axis = kwargs.get("axis", 0)
result, data_manager = self.numeric_function_clean_dataframe(axis)
if result is not None:
return result

def median_builder(df, **kwargs):
return pandas.DataFrame.median(df, **kwargs)

func = self._prepare_method(median_builder, **kwargs)
return data_manager.full_axis_reduce(func, axis)

def skew(self, **kwargs):
"""Returns skew of each column or row.
Returns:
Series containing the skew of each column or row.
"""
# Pandas default is 0 (though not mentioned in docs)
axis = kwargs.get("axis", 0)
result, data_manager = self.numeric_function_clean_dataframe(axis)
if result is not None:
return result

def skew_builder(df, **kwargs):
return pandas.DataFrame.skew(df, **kwargs)

func = self._prepare_method(skew_builder, **kwargs)
return data_manager.full_axis_reduce(func, axis)

def std(self, **kwargs):
"""Returns standard deviation of each column or row.
Returns:
Series containing the standard deviation of each column or row.
"""
# Pandas default is 0 (though not mentioned in docs)
axis = kwargs.get("axis", 0)
result, data_manager = self.numeric_function_clean_dataframe(axis)
if result is not None:
return result

def std_builder(df, **kwargs):
return pandas.DataFrame.std(df, **kwargs)

func = self._prepare_method(std_builder, **kwargs)
return data_manager.full_axis_reduce(func, axis)

def var(self, **kwargs):
"""Returns varience of each column or row.
Returns:
Series containing the varience of each column or row.
"""
# Pandas default is 0 (though not mentioned in docs)
axis = kwargs.get("axis", 0)
result, data_manager = self.numeric_function_clean_dataframe(axis)
if result is not None:
return result

func = data_manager._prepare_method(pandas.DataFrame.var, **kwargs)
return data_manager.full_axis_reduce(func, axis)

def quantile_for_single_value(self, **kwargs):
"""Returns quantile of each column or row.
Returns:
Series containing the quantile of each column or row.
"""
axis = kwargs.get("axis", 0)
q = kwargs.get("q", 0.5)
numeric_only = kwargs.get("numeric_only", True)
assert type(q) is float
if numeric_only:
result, data_manager = self.numeric_function_clean_dataframe(axis)
if result is not None:
return result
else:
data_manager = self

def quantile_builder(df, **kwargs):
try:
return pandas.DataFrame.quantile(df, **kwargs)
except ValueError:
return pandas.Series()

func = self._prepare_method(quantile_builder, **kwargs)
result = data_manager.full_axis_reduce(func, axis)
result.name = q
return result

# END Column/Row partitions reduce operations over select indices

# Map across rows/columns
Expand Down
14 changes: 14 additions & 0 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2185,6 +2185,9 @@ def median(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
The median of the DataFrame. (Pandas series)
"""
axis = pandas.DataFrame()._get_axis_number(axis) if axis is not None else 0
if numeric_only is not None and not numeric_only:
self._validate_dtypes(numeric_only=True)

return self._data_manager.median(
axis=axis, skipna=skipna, level=level, numeric_only=numeric_only, **kwargs
)
Expand Down Expand Up @@ -2653,6 +2656,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, interpolation="linear"):
index is the columns of self and the values
are the quantiles.
"""
axis = pandas.DataFrame()._get_axis_number(axis) if axis is not None else 0

def check_dtype(t):
return is_numeric_dtype(t) or is_datetime_or_timedelta_dtype(t)
Expand Down Expand Up @@ -3419,6 +3423,10 @@ def skew(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs):
Returns:
skew : Series or DataFrame (if level specified)
"""
axis = pandas.DataFrame()._get_axis_number(axis) if axis is not None else 0
if numeric_only is not None and not numeric_only:
self._validate_dtypes(numeric_only=True)

return self._data_manager.skew(
axis=axis, skipna=skipna, level=level, numeric_only=numeric_only, **kwargs
)
Expand Down Expand Up @@ -3560,6 +3568,9 @@ def std(
The std of the DataFrame (Pandas Series)
"""
axis = pandas.DataFrame()._get_axis_number(axis) if axis is not None else 0
if numeric_only is not None and not numeric_only:
self._validate_dtypes(numeric_only=True)

return self._data_manager.std(
axis=axis,
skipna=skipna,
Expand Down Expand Up @@ -4118,6 +4129,9 @@ def var(
The variance of the DataFrame.
"""
axis = pandas.DataFrame()._get_axis_number(axis) if axis is not None else 0
if numeric_only is not None and not numeric_only:
self._validate_dtypes(numeric_only=True)

return self._data_manager.var(
axis=axis,
skipna=skipna,
Expand Down

0 comments on commit 07f3cbc

Please sign in to comment.