# public pydata /pandas

### Subversion checkout URL

You can clone with HTTPS or Subversion.

# Fix for Issue #1798#1958

Closed
wants to merge 1 commit into from
 +31 4

### 2 participants

#1798

 dnmiller DOC: Adding details on normalization for variance functions. 061884b
Owner

thanks!

closed this

Showing 1 unique commit by 1 author.

Sep 23, 2012
DOC: Adding details on normalization for variance functions. 061884b

Showing 3 changed files with 31 additions and 4 deletions.

1. doc/source/gotchas.rst
2. pandas/core/frame.py
3. pandas/core/series.py
10  doc/source/gotchas.rst
 @@ -302,3 +302,13 @@ of the new set of columns rather than the original ones: 302 302 :suppress: 303 303 304 304 os.remove('tmp.csv') 305 + 306 + 307 +Differences with NumPy 308 +---------------------- 309 +For Series and DataFrame objects, ``var`` normalizes by ``N-1`` to produce 310 +unbiased estimates of the sample variance, while NumPy's ``var`` normalizes 311 +by N, which measures the variance of the sample. Note that ``cov`` 312 +normalizes by ``N-1`` in both pandas and NumPy. 313 + 314 +
13  pandas/core/frame.py
 @@ -4046,6 +4046,9 @@ def cov(self): 4046 4046 Returns 4047 4047 ------- 4048 4048 y : DataFrame 4049 + 4050 + y contains the covariance matrix of the DataFrame's time series. 4051 + The covariance is normalized by N-1 (unbiased estimator). 4049 4052 """ 4050 4053 numeric_df = self._get_numeric_data() 4051 4054 cols = numeric_df.columns @@ -4362,7 +4365,10 @@ def mad(self, axis=0, skipna=True, level=None): 4362 4365 4363 4366 @Substitution(name='variance', shortname='var', 4364 4367 na_action=_doc_exclude_na, extras='') 4365 - @Appender(_stat_doc) 4368 + @Appender(_stat_doc + 4369 + """ 4370 + Normalized by N-1 (unbiased estimator). 4371 + """) 4366 4372 def var(self, axis=0, skipna=True, level=None, ddof=1): 4367 4373 if level is not None: 4368 4374 return self._agg_by_level('var', axis=axis, level=level, @@ -4372,7 +4378,10 @@ def var(self, axis=0, skipna=True, level=None, ddof=1): 4372 4378 4373 4379 @Substitution(name='standard deviation', shortname='std', 4374 4380 na_action=_doc_exclude_na, extras='') 4375 - @Appender(_stat_doc) 4381 + @Appender(_stat_doc + 4382 + """ 4383 + Normalized by N-1 (unbiased estimator). 4384 + """) 4376 4385 def std(self, axis=0, skipna=True, level=None, ddof=1): 4377 4386 if level is not None: 4378 4387 return self._agg_by_level('std', axis=axis, level=level,
12  pandas/core/series.py
 @@ -1140,7 +1140,10 @@ def max(self, axis=None, out=None, skipna=True, level=None): 1140 1140 1141 1141 @Substitution(name='standard deviation', shortname='stdev', 1142 1142 na_action=_doc_exclude_na, extras='') 1143 - @Appender(_stat_doc) 1143 + @Appender(_stat_doc + 1144 + """ 1145 + Normalized by N-1 (unbiased estimator). 1146 + """) 1144 1147 def std(self, axis=None, dtype=None, out=None, ddof=1, skipna=True, 1145 1148 level=None): 1146 1149 if level is not None: @@ -1150,7 +1153,10 @@ def std(self, axis=None, dtype=None, out=None, ddof=1, skipna=True, 1150 1153 1151 1154 @Substitution(name='variance', shortname='var', 1152 1155 na_action=_doc_exclude_na, extras='') 1153 - @Appender(_stat_doc) 1156 + @Appender(_stat_doc + 1157 + """ 1158 + Normalized by N-1 (unbiased estimator). 1159 + """) 1154 1160 def var(self, axis=None, dtype=None, out=None, ddof=1, skipna=True, 1155 1161 level=None): 1156 1162 if level is not None: @@ -1463,6 +1469,8 @@ def cov(self, other): 1463 1469 Returns 1464 1470 ------- 1465 1471 covariance : float 1472 + 1473 + Normalized by N-1 (unbiased estimator). 1466 1474 """ 1467 1475 this, other = self.align(other, join='inner') 1468 1476 if len(this) == 0:

Tip: You can add notes to lines in a file. Hover to the left of a line to make a note

Something went wrong with that request. Please try again.