In [1]:
from __future__ import division
import numpy as np
import scipy.stats as stats

Draw N samples from the standard normal distribution.

In [2]:
N = 15
s = np.random.normal(size=N)

Reproduce `numpy.mean`

In [3]:
mu = sum(s)/N
print("Mean: Numpy = {0}, Reproduced = {1}".format(np.mean(s), mu))

Mean: Numpy = 0.0395781375689, Reproduced = 0.0395781375689


Reproduce `numpy.var`

In [4]:
# Biased estimate of the second central moment (variance)
mu2_biased = np.mean((s - mu)**2)

# Unbiased estimate of variance
mu2_unbiased = sum((s - mu)**2) / (N - 1)

print("Biased variance: Numpy = {0}, Reproduced = {1}".format(np.var(s), mu2_biased))
print("Unbiased variance: Numpy = {0}, Reproduced = {1}".format(np.var(s, ddof=1), mu2_unbiased))

Biased variance: Numpy = 0.813473419943, Reproduced = 0.813473419943
Unbiased variance: Numpy = 0.871578664225, Reproduced = 0.871578664225


Reproduce `numpy.cov`. Note that correlation `numpy.corrcoef` is insensitive to bias/unbias.

In [5]:
x = np.random.normal(size=N)
y = np.random.normal(size=N)

# Biased estimate of covariance
cov_biased = np.mean((x - x.mean())*(y - y.mean()))
np_cov_biased = np.cov(x, y, ddof=0)[0, 1]

# Unbiased estimate of covariance
cov_unbiased = np.sum((x - x.mean())*(y - y.mean())) / (N - 1)
np_cov_unbiased = np.cov(x, y)[0, 1]

print("Biased covariance: Numpy = {0}, Reproduced = {1}".format(np_cov_biased, cov_biased))
print("Unbiased covariance: Numpy = {0}, Reproduced = {1}".format(cov_unbiased, cov_unbiased))

Biased covariance: Numpy = -0.239269244548, Reproduced = -0.239269244548
Unbiased covariance: Numpy = -0.256359904873, Reproduced = -0.256359904873


Reproduce `scipy.stats.skew`

In [6]:
# Biased estimate of the third central moment
mu3_biased = np.mean((s - mu)**3)
# Biased estimate of skewnewss
skew_biased = mu3_biased / mu2_biased**1.5
# Unbiased estimate of the third central moment
mu3_unbiased = sum((s - mu)**3) * N/((N-1)*(N-2))
# "Unbiased" estimate of skewnewss. Although both the
# numerator and denominator are unbiased, skewnewss itself may not be.
skew_unbiased = mu3_unbiased / mu2_unbiased**1.5
print("Biased skewness: Scipy = {0}, Reproduced = {1}".format(stats.skew(s), skew_biased))
print('"Unbiased" skewness: Scipy = {0}, Reproduced = {1}'.format(stats.skew(s, bias=False), skew_unbiased))

Biased skewness: Scipy = 0.0791253091256, Reproduced = 0.0791253091256
"Unbiased" skewness: Scipy = 0.0882026665152, Reproduced = 0.0882026665152


Reproduce `scipy.stats.kurtosis`

In [7]:
# Biased estimate of the fourth central moment
mu4_biased = np.mean((s - mu)**4)
# Biased estimate of excess Kurtosis
kurt_biased = mu4_biased / mu2_biased**2 - 3
# Unbiased estimate of the fourth cumulant
k4_unbiased = N**2*(N+1)/((N-1)*(N-2)*(N-3))*mu4_biased - 3*N**2/((N-2)*(N-3))*mu2_biased**2
# "Unbiased" estimate of excess kurtosis. Although both the
# numerator and denominator are unbiased, kurtosis itself may not be.
kurt_unbiased = k4_unbiased / mu2_unbiased**2
print("Biased excess kurtosis: Scipy = {0}, Reproduced = {1}".format(stats.kurtosis(s), kurt_biased))
print('"Unbiased" excess kurtosis: Scipy = {0}, Reproduced = {1}'.format(stats.kurtosis(s, bias=False), kurt_unbiased))

Biased excess kurtosis: Scipy = -0.426522495553, Reproduced = -0.426522495553
"Unbiased" excess kurtosis: Scipy = -0.0739810192549, Reproduced = -0.0739810192549
