Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

Fix for Issue #1798 #1958

Closed
wants to merge 1 commit into from

2 participants

@dnmiller

Added comments to specify how variance is normalized.

#1798

@wesm
Owner

thanks!

@wesm wesm closed this
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
This page is out of date. Refresh to see the latest.
View
10 doc/source/gotchas.rst
@@ -302,3 +302,13 @@ of the new set of columns rather than the original ones:
:suppress:
os.remove('tmp.csv')
+
+
+Differences with NumPy
+----------------------
+For Series and DataFrame objects, ``var`` normalizes by ``N-1`` to produce
+unbiased estimates of the sample variance, while NumPy's ``var`` normalizes
+by N, which measures the variance of the sample. Note that ``cov``
+normalizes by ``N-1`` in both pandas and NumPy.
+
+
View
13 pandas/core/frame.py
@@ -4046,6 +4046,9 @@ def cov(self):
Returns
-------
y : DataFrame
+
+ y contains the covariance matrix of the DataFrame's time series.
+ The covariance is normalized by N-1 (unbiased estimator).
"""
numeric_df = self._get_numeric_data()
cols = numeric_df.columns
@@ -4362,7 +4365,10 @@ def mad(self, axis=0, skipna=True, level=None):
@Substitution(name='variance', shortname='var',
na_action=_doc_exclude_na, extras='')
- @Appender(_stat_doc)
+ @Appender(_stat_doc +
+ """
+ Normalized by N-1 (unbiased estimator).
+ """)
def var(self, axis=0, skipna=True, level=None, ddof=1):
if level is not None:
return self._agg_by_level('var', axis=axis, level=level,
@@ -4372,7 +4378,10 @@ def var(self, axis=0, skipna=True, level=None, ddof=1):
@Substitution(name='standard deviation', shortname='std',
na_action=_doc_exclude_na, extras='')
- @Appender(_stat_doc)
+ @Appender(_stat_doc +
+ """
+ Normalized by N-1 (unbiased estimator).
+ """)
def std(self, axis=0, skipna=True, level=None, ddof=1):
if level is not None:
return self._agg_by_level('std', axis=axis, level=level,
View
12 pandas/core/series.py
@@ -1140,7 +1140,10 @@ def max(self, axis=None, out=None, skipna=True, level=None):
@Substitution(name='standard deviation', shortname='stdev',
na_action=_doc_exclude_na, extras='')
- @Appender(_stat_doc)
+ @Appender(_stat_doc +
+ """
+ Normalized by N-1 (unbiased estimator).
+ """)
def std(self, axis=None, dtype=None, out=None, ddof=1, skipna=True,
level=None):
if level is not None:
@@ -1150,7 +1153,10 @@ def std(self, axis=None, dtype=None, out=None, ddof=1, skipna=True,
@Substitution(name='variance', shortname='var',
na_action=_doc_exclude_na, extras='')
- @Appender(_stat_doc)
+ @Appender(_stat_doc +
+ """
+ Normalized by N-1 (unbiased estimator).
+ """)
def var(self, axis=None, dtype=None, out=None, ddof=1, skipna=True,
level=None):
if level is not None:
@@ -1463,6 +1469,8 @@ def cov(self, other):
Returns
-------
covariance : float
+
+ Normalized by N-1 (unbiased estimator).
"""
this, other = self.align(other, join='inner')
if len(this) == 0:
Something went wrong with that request. Please try again.