Skip to content

Commit

Permalink
Merge pull request #94 from sklam/enh/ddof
Browse files Browse the repository at this point in the history
Add ddof option and make its default to 1 (like pandas)
  • Loading branch information
seibert authored Dec 13, 2017
2 parents ad098b0 + 93ff769 commit 0e3f774
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 9 deletions.
5 changes: 3 additions & 2 deletions pygdf/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,12 +143,13 @@ def sum(self):
def mean(self):
return self.sum().astype('f8') / self.valid_count

def mean_var(self):
def mean_var(self, ddof=1):
x = self.astype('f8')
mu = x.mean()
n = x.valid_count
asum = _gdf.apply_reduce(libgdf.gdf_sum_squared_generic, x)
var = asum / n - mu ** 2
div = n - ddof
var = asum / div - (mu ** 2) * n / div
return mu, var

def applymap(self, udf, out_dtype=None):
Expand Down
12 changes: 6 additions & 6 deletions pygdf/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,21 +655,21 @@ def mean(self):
"""
return self._column.mean()

def std(self):
def std(self, ddof=1):
"""Compute the standard deviation of the series
"""
return np.sqrt(self.var())
return np.sqrt(self.var(ddof=ddof))

def var(self):
def var(self, ddof=1):
"""Compute the variance of the series
"""
mu, var = self.mean_var()
mu, var = self.mean_var(ddof=ddof)
return var

def mean_var(self):
def mean_var(self, ddof=1):
"""Compute mean and variance at the same time.
"""
mu, var = self._column.mean_var()
mu, var = self._column.mean_var(ddof=ddof)
return mu, var

def unique_k(self, k):
Expand Down
16 changes: 15 additions & 1 deletion pygdf/tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,27 @@ def test_series_reductions(method, dtype):

def call_test(sr):
fn = getattr(sr, method)
return fn()
if method in ['std', 'var']:
return fn(ddof=1)
else:
return fn()

expect, got = call_test(arr2), call_test(sr)
print(expect, got)
np.testing.assert_approx_equal(expect, got)


@pytest.mark.parametrize('ddof', range(3))
def test_series_std(ddof):
np.random.seed(0)
arr = np.random.random(100) - 0.5
sr = Series(arr)
pd = sr.to_pandas()
got = sr.std(ddof=ddof)
expect = pd.std(ddof=ddof)
np.testing.assert_approx_equal(expect, got)


def test_series_unique():
for size in [10 ** x for x in range(5)]:
arr = np.random.randint(low=-1, high=10, size=size)
Expand Down

0 comments on commit 0e3f774

Please sign in to comment.