In [1]:
import math
import statistics
import numpy as np
import scipy.stats
import pandas as pd

In [2]:
x = [8.0, 1, 2.5, 4, 28.0]
x_with_nan = [8.0, 1, 2.5, math.nan, 4, 28.0]
x

[8.0, 1, 2.5, 4, 28.0]

In [3]:
x_with_nan

[8.0, 1, 2.5, nan, 4, 28.0]

In [4]:
math.isnan(np.nan), np.isnan(math.nan)

(True, True)

In [6]:
math.isnan(x_with_nan[3]), np.isnan(x_with_nan[3])

(True, True)

In [7]:
y, y_with_nan = np.array(x), np.array(x_with_nan)

In [8]:
z, z_with_nan = pd.Series(x), pd.Series(x_with_nan)

In [9]:
y

array([ 8. ,  1. ,  2.5,  4. , 28. ])

In [10]:
z

0     8.0
1     1.0
2     2.5
3     4.0
4    28.0
dtype: float64

In [11]:
mean_ = sum(x) / len(x)

In [12]:
mean_

8.7

In [13]:
mean_ = statistics.mean(x)

In [14]:
mean_

8.7

In [15]:
mean_ = statistics.fmean(x)

In [16]:
mean_

8.7

In [17]:
mean_ = statistics.mean(x_with_nan)
mean_

nan

In [18]:
mean_ = y.mean()

In [19]:
mean_

8.7

In [20]:
np.nanmean(y_with_nan)

8.7

In [21]:
z_with_nan.mean()

8.7

In [22]:
x = [8.0, 1, 2.5, 4, 28.0]
w = [0.1, 0.2, 0.3, 0.25, 0.15]
wmean = sum(w[i] * x[i] for i in range(len(x))) / sum(w)
wmean

6.95

In [23]:
y, z, w = np.array(x), pd.Series(x), np.array(w)
wmean = np.average(y, weights=w)

In [24]:
wmean

6.95

In [25]:
hmean = len(x) / sum(1 / item for item in x)

In [26]:
hmean

2.7613412228796843

In [27]:
hmean = statistics.harmonic_mean(x)

In [28]:
hmean

2.7613412228796843

In [29]:
statistics.harmonic_mean([1, 0, 2])

0

In [31]:
statistics.harmonic_mean([1, 2, 2])

1.5

In [32]:
scipy.stats.hmean(y)


2.7613412228796843

In [33]:
median_ = statistics.median(x)

In [35]:
statistics.median_low(x[:-1])

2.5

In [36]:
statistics.median_high(x[:-1])

4

In [37]:
np.nanmedian(y_with_nan)

4.0

In [39]:
u = [2, 3, 2, 8, 12]
mode_ = max((u.count(item), item) for item in set(u))[1]
mode_

2

In [40]:
mode_ = statistics.mode(u)

In [41]:
mode_

2

In [42]:
mode_ = statistics.multimode(u)

In [43]:
mode_

[2]

In [44]:
n = len(x)
mean_ = sum(x) / n
var_ = sum((item - mean_)**2 for item in x) / (n - 1)
var_

123.19999999999999

In [45]:
var_ = statistics.variance(x)

In [46]:
var_

123.2

In [47]:
var_ = np.var(y, ddof=1)

In [48]:
var_

123.19999999999999

In [49]:
std_ = var_ ** 0.5

In [50]:
std_

11.099549540409285

In [51]:
std_ = statistics.stdev(x)

In [52]:
std_

11.099549540409287

In [53]:
np.std(y, ddof=1)

11.099549540409285

In [54]:
y.std(ddof=1)

11.099549540409285

In [55]:
np.std(y_with_nan, ddof=1)

nan

In [56]:
y_with_nan.std(ddof=1)

nan

In [57]:
np.nanstd(y_with_nan, ddof=1)

11.099549540409285

In [58]:
z.std(ddof=1)

11.099549540409285

In [59]:
z_with_nan.std(ddof=1)

11.099549540409285

In [60]:
x = [8.0, 1, 2.5, 4, 28.0]
n = len(x)
mean_ = sum(x) / n
var_ = sum((item - mean_)**2 for item in x) / (n - 1)
std_ = var_ ** 0.5
skew_ = (sum((item - mean_)**3 for item in x) * n / ((n - 1) * (n - 2) * std_**3))
skew_

1.9470432273905929

In [61]:
y, y_with_nan = np.array(x), np.array(x_with_nan)
scipy.stats.skew(y, bias=False)

1.9470432273905927

In [62]:
scipy.stats.skew(y_with_nan, bias=False)

nan

In [63]:
z, z_with_nan = pd.Series(x), pd.Series(x_with_nan)

In [64]:
z.skew()

1.9470432273905924

In [65]:
z_with_nan.skew()

1.9470432273905924

In [68]:
x = [-5.0, -1.1, 0.1, 2.0, 8.0, 12.8, 21.0, 25.8, 41.0]
statistics.quantiles(x, n=2)
statistics.quantiles(x, n=4, method='inclusive')

[0.1, 8.0, 21.0]

In [69]:
y = np.array(x)
np.percentile(y, 5)
np.percentile(y, 95)

34.919999999999995

In [71]:
np.percentile(y, [25, 50, 75])
np.median(y)

8.0

In [72]:
y_with_nan = np.insert(y, 2, np.nan)
y_with_nan


array([-5. , -1.1,  nan,  0.1,  2. ,  8. , 12.8, 21. , 25.8, 41. ])

In [73]:
np.nanpercentile(y_with_nan, [25, 50, 75])

array([ 0.1,  8. , 21. ])

In [74]:
np.quantile(y, 0.05)


-3.44

In [75]:
np.quantile(y, 0.95)

34.919999999999995

In [76]:
np.quantile(y, [0.25, 0.5, 0.75])

array([ 0.1,  8. , 21. ])

In [77]:
np.nanquantile(y_with_nan, [0.25, 0.5, 0.75])

array([ 0.1,  8. , 21. ])

In [78]:
z, z_with_nan = pd.Series(y), pd.Series(y_with_nan)

In [79]:
z.quantile(0.05)

-3.44

In [80]:
z.quantile(0.95)

34.919999999999995

In [81]:
z_with_nan.quantile([0.25, 0.5, 0.75])

0.25     0.1
0.50     8.0
0.75    21.0
dtype: float64

In [82]:
np.ptp(y)

46.0

In [83]:
np.amax(y) - np.amin(y)

46.0

In [84]:
quartiles = np.quantile(y, [0.25, 0.75])

In [85]:
quartiles[1] - quartiles[0]

20.9

In [86]:
quartiles = z.quantile([0.25, 0.75])

In [87]:
quartiles[0.75] - quartiles[0.25]

20.9

In [88]:
result = scipy.stats.describe(y, ddof=1, bias=False)

In [89]:
result

DescribeResult(nobs=9, minmax=(-5.0, 41.0), mean=11.622222222222222, variance=228.75194444444446, skewness=0.9249043136685094, kurtosis=0.14770623629658886)

In [90]:
result.nobs

9

In [91]:
result.minmax[0]

-5.0

In [92]:
result.minmax[1]

41.0

In [93]:
result.mean

11.622222222222222

In [94]:
result.variance

228.75194444444446

In [95]:
result.skewness

0.9249043136685094

In [96]:
result.kurtosis

0.14770623629658886

In [97]:
result = z.describe()

In [99]:
result

count     9.000000
mean     11.622222
std      15.124548
min      -5.000000
25%       0.100000
50%       8.000000
75%      21.000000
max      41.000000
dtype: float64

In [100]:
x = list(range(-10, 11))
y = [0, 2, 2, 2, 2, 3, 3, 6, 7, 4, 7, 6, 6, 9, 4, 5, 5, 10, 11, 12, 14]
x_, y_ = np.array(x), np.array(y)
x__, y__ = pd.Series(x_), pd.Series(y_)

In [101]:
n = len(x)

In [102]:
mean_x, mean_y = sum(x) / n, sum(y) / n

In [104]:
cov_xy = (sum((x[k] - mean_x) * (y[k] - mean_y) for k in range(n))/ (n - 1))

In [105]:
cov_xy

19.95

In [106]:
cov_matrix = np.cov(x_, y_)
cov_matrix

array([[38.5       , 19.95      ],
       [19.95      , 13.91428571]])

In [107]:
x_.var(ddof=1)
y_.var(ddof=1)

13.914285714285711

In [108]:
cov_xy = cov_matrix[0, 1]

In [109]:
cov_xy = x__.cov(y__)

In [110]:
cov_xy

19.95

In [111]:
cov_xy = y__.cov(x__)

In [112]:
cov_xy

19.95

In [113]:
var_x = sum((item - mean_x)**2 for item in x) / (n - 1)
var_y = sum((item - mean_y)**2 for item in y) / (n - 1)
std_x, std_y = var_x ** 0.5, var_y ** 0.5
r = cov_xy / (std_x * std_y)
r

0.861950005631606

In [114]:
r, p = scipy.stats.pearsonr(x_, y_)

In [115]:
p

5.122760847201171e-07

In [116]:
 corr_matrix = np.corrcoef(x_, y_)

In [117]:
corr_matrix

array([[1.        , 0.86195001],
       [0.86195001, 1.        ]])

In [118]:
r = corr_matrix[0, 1]

In [119]:
r

0.8619500056316061

In [120]:
scipy.stats.linregress(x_, y_)

LinregressResult(slope=0.5181818181818181, intercept=5.714285714285714, rvalue=0.861950005631606, pvalue=5.122760847201164e-07, stderr=0.06992387660074979, intercept_stderr=0.4234100995002589)

In [121]:
result = scipy.stats.linregress(x_, y_)

In [122]:
result

LinregressResult(slope=0.5181818181818181, intercept=5.714285714285714, rvalue=0.861950005631606, pvalue=5.122760847201164e-07, stderr=0.06992387660074979, intercept_stderr=0.4234100995002589)

In [123]:
r = result.rvalue

In [124]:
r

0.861950005631606

In [125]:
r = x__.corr(y__)

In [126]:
r = y__.corr(x__)

In [127]:
a = np.array([[1, 1, 1],
               [2, 3, 1],
               [4, 9, 2],
               [8, 27, 4],
               [16, 1, 1]])

In [128]:
a

array([[ 1,  1,  1],
       [ 2,  3,  1],
       [ 4,  9,  2],
       [ 8, 27,  4],
       [16,  1,  1]])

In [129]:
np.mean(a)

5.4

In [130]:
a.mean()

5.4

In [131]:
np.median(a)

2.0

In [133]:
a.var(ddof=1)

53.40000000000001

In [134]:
np.mean(a, axis=0)

array([6.2, 8.2, 1.8])

In [135]:
a.mean(axis=0)

array([6.2, 8.2, 1.8])

In [136]:
np.mean(a, axis=1)

array([ 1.,  2.,  5., 13.,  6.])

In [137]:
a.mean(axis=1)

array([ 1.,  2.,  5., 13.,  6.])

In [138]:
np.median(a, axis=0)

array([4., 3., 1.])

In [139]:
np.median(a, axis=1)

array([1., 2., 4., 8., 1.])

In [140]:
a.var(axis=0, ddof=1)

array([ 37.2, 121.2,   1.7])

In [141]:
a.var(axis=1, ddof=1)

array([  0.,   1.,  13., 151.,  75.])

In [142]:
scipy.stats.gmean(a)

array([4.        , 3.73719282, 1.51571657])

In [143]:
scipy.stats.gmean(a, axis=0)

array([4.        , 3.73719282, 1.51571657])

In [144]:
scipy.stats.gmean(a, axis=1)

array([1.        , 1.81712059, 4.16016765, 9.52440631, 2.5198421 ])

In [145]:
scipy.stats.gmean(a, axis=None)

2.829705017016332

In [146]:
scipy.stats.describe(a, axis=None, ddof=1, bias=False)

DescribeResult(nobs=15, minmax=(1, 27), mean=5.4, variance=53.40000000000001, skewness=2.264965290423389, kurtosis=5.212690982795767)

In [147]:
scipy.stats.describe(a, ddof=1, bias=False)

DescribeResult(nobs=5, minmax=(array([1, 1, 1]), array([16, 27,  4])), mean=array([6.2, 8.2, 1.8]), variance=array([ 37.2, 121.2,   1.7]), skewness=array([1.32531471, 1.79809454, 1.71439233]), kurtosis=array([1.30376344, 3.14969121, 2.66435986]))

In [148]:
scipy.stats.describe(a, axis=1, ddof=1, bias=False)

DescribeResult(nobs=3, minmax=(array([1, 1, 2, 4, 1]), array([ 1,  3,  9, 27, 16])), mean=array([ 1.,  2.,  5., 13.,  6.]), variance=array([  0.,   1.,  13., 151.,  75.]), skewness=array([0.        , 0.        , 1.15206964, 1.52787436, 1.73205081]), kurtosis=array([-3. , -1.5, -1.5, -1.5, -1.5]))