In [2]:
import math
import statistics
import numpy as np
import scipy.stats
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
x = [8.0, 1, 2.5, 4, 28.0]
x_with_nan = [8.0, 1, 2.5, math.nan, 4, 28.0]
print(x)
print(x_with_nan)

[8.0, 1, 2.5, 4, 28.0]
[8.0, 1, 2.5, nan, 4, 28.0]


In [4]:
y, y_with_nan = np.array(x), np.array(x_with_nan)
z, z_with_nan = pd.Series(x), pd.Series(x_with_nan)
print(y)
print(y_with_nan)
print(z_with_nan)

[ 8.   1.   2.5  4.  28. ]
[ 8.   1.   2.5  nan  4.  28. ]
0     8.0
1     1.0
2     2.5
3     NaN
4     4.0
5    28.0
dtype: float64


In [5]:
mean_ = sum(x)/len(x)
mean_

8.7

In [6]:
mean_ = statistics.mean(x)
mean_

8.7

In [7]:
mean_ = statistics.mean(x_with_nan)
mean_

nan

In [8]:
mean_ = np.mean(y)
mean_

8.7

In [9]:
mean_ = y.mean()
mean_

8.7

In [10]:
print(np.mean(y_with_nan))
print(y_with_nan.mean())

nan
nan


In [11]:
np.nanmean(y_with_nan)

8.7

In [13]:
mean_ = z.mean()
mean_

8.7

In [14]:
z_with_nan.mean()

8.7

In [15]:
n = len(x)
if n % 2:
    median_ = sorted(x)[round(0.5*(n-1))]
else :
    x_ord, index = sorted(x), round(0.5*n)
    median_ = 0.5*(x_ord[index-1] + x_ord[index])

median_

4

In [16]:
x

[8.0, 1, 2.5, 4, 28.0]

In [17]:
statistics.median_low(x[:-1])

2.5

In [18]:
statistics.median_high(x[:-1])

4

In [20]:
print(statistics.median(x_with_nan))
print(statistics.median_low(x_with_nan))
print(statistics.median_high(x_with_nan))

6.0
4
8.0


In [22]:
median_ = np.median(y)
median_

4.0

In [23]:
median_ = np.median(y[:-1])
print(median_)

3.25


In [24]:
u = [2,3,2,8,12]
v = [12,15,12,15,21,15,12]

mode_ = max((u.count(item), item) for item in set(u))[1]
mode_

2

In [25]:
mode_ = statistics.mode(u)
mode_

2

In [30]:
u, v = np.array(u), np.array(v)
mode_ = scipy.stats.mode(u)
mode_

ModeResult(mode=array([2]), count=array([2]))

In [31]:
mode_ = scipy.stats.mode(v)
mode_

ModeResult(mode=array([12]), count=array([3]))

In [32]:
print(mode_.mode)
print(mode_.count)

[12]
[3]


In [33]:
u, v, w = pd.Series(u), pd.Series(v), pd.Series([2,2,math.nan])
print(u.mode())
print(v.mode())
print(w.mode())

0    2
dtype: int64
0    12
1    15
dtype: int64
0    2.0
dtype: float64


In [34]:
n = len(x)
mean_ = sum(x)/n
var_ = sum((item-mean_)**2 for item in x)/(n-1)
var_

123.19999999999999

In [36]:
var_ = statistics.variance(x)
var_

123.2

In [37]:
var_ = np.var(y, ddof=1)
var_

123.19999999999999

In [39]:
var_ = y.var(ddof=1)
var_

123.19999999999999

In [40]:
y

array([ 8. ,  1. ,  2.5,  4. , 28. ])

In [41]:
z.var(ddof=1)

123.19999999999999

In [42]:
std_ = var_**0.5
std_

11.099549540409285

In [43]:
std_ = statistics.stdev(x)
std_

11.099549540409287

In [44]:
y.std(ddof=1)

11.099549540409285

In [45]:
z.std(ddof=1)

11.099549540409285

In [46]:
x

[8.0, 1, 2.5, 4, 28.0]

In [47]:
n = len(x)
mean_ = sum(x)/n
var_ = sum((item-mean_)**2 for item in x)/(n-1)
std_ = var_**0.5

skew_ = (sum((item-mean_)**3 for item in x)*n/((n-1)*(n-2)*std_**3))

skew_

1.9470432273905929

In [48]:
y

array([ 8. ,  1. ,  2.5,  4. , 28. ])

In [49]:
y_with_nan

array([ 8. ,  1. ,  2.5,  nan,  4. , 28. ])

In [50]:
scipy.stats.skew(y, bias=False)

1.9470432273905927

In [51]:
scipy.stats.skew(y_with_nan, bias=False)

nan

In [53]:
z.skew()

1.9470432273905924

In [54]:
z_with_nan.skew()

1.9470432273905924

In [55]:
x = [-5.0,-1.1,0.1,2.0,8.0,12.8,21.0,25.8,41.0]
statistics.quantiles(x,n=2)

[8.0]

In [56]:
statistics.quantiles(x,n=4,method='inclusive')

[0.1, 8.0, 21.0]

In [57]:
y = np.array(x)
np.percentile(y,5)

-3.44

In [58]:
np.percentile(y,95)

34.919999999999995

In [59]:
np.percentile(y,[25,50,75])

array([ 0.1,  8. , 21. ])

In [60]:
np.median(y)

8.0

In [61]:
y_with_nan = np.insert(y,2,np.nan)
y_with_nan

array([-5. , -1.1,  nan,  0.1,  2. ,  8. , 12.8, 21. , 25.8, 41. ])

In [62]:
np.nanpercentile(y_with_nan,[25,50,75])

array([ 0.1,  8. , 21. ])

In [63]:
np.quantile(y,[0.25,0.5,0.75])

array([ 0.1,  8. , 21. ])

In [64]:
z, z_with_nan = pd.Series(y), pd.Series(y_with_nan)

In [65]:
z.quantile(0.05)

-3.44

In [66]:
z.quantile(0.95)

34.919999999999995

In [67]:
z.quantile([0.25,0.5,0.75])

0.25     0.1
0.50     8.0
0.75    21.0
dtype: float64

In [70]:
z_with_nan.quantile([0.25,0.5,0.75])

0.25     0.1
0.50     8.0
0.75    21.0
dtype: float64

In [72]:
np.ptp(y_with_nan)

nan

In [73]:
np.ptp(z)

46.0

In [78]:
np.ptp(z_with_nan)

nan

In [76]:
z_with_nan

0    -5.0
1    -1.1
2     NaN
3     0.1
4     2.0
5     8.0
6    12.8
7    21.0
8    25.8
9    41.0
dtype: float64

In [77]:
y_with_nan

array([-5. , -1.1,  nan,  0.1,  2. ,  8. , 12.8, 21. , 25.8, 41. ])

In [79]:
np.amax(y)-np.amin(y)

46.0

In [80]:
z_with_nan.max()-z_with_nan.min()

46.0

In [83]:
np.ptp(z_with_nan)

nan

In [86]:
quartiles = np.quantile(y,[0.25,0.75])
quartiles[1]-quartiles[0]

20.9

In [87]:
quartiles = z.quantile([0.25,0.75])
quartiles[0.75]-quartiles[0.25]

20.9

In [88]:
z.describe()

count     9.000000
mean     11.622222
std      15.124548
min      -5.000000
25%       0.100000
50%       8.000000
75%      21.000000
max      41.000000
dtype: float64

In [89]:
a = np.array([[1,1,1],
             [2,3,1],
             [4,9,2],
             [8,27,4],
             [16,1,1]])
a

array([[ 1,  1,  1],
       [ 2,  3,  1],
       [ 4,  9,  2],
       [ 8, 27,  4],
       [16,  1,  1]])

In [90]:
row_names = ['first','second','third','fourth','fifth']
col_names = ['A','B','C']

df = pd.DataFrame(a,index=row_names, columns=col_names)
df

Unnamed: 0,A,B,C
first,1,1,1
second,2,3,1
third,4,9,2
fourth,8,27,4
fifth,16,1,1


In [96]:
df.describe()

Unnamed: 0,A,B,C
count,5.0,5.0,5.0
mean,6.2,8.2,1.8
std,6.09918,11.009087,1.30384
min,1.0,1.0,1.0
25%,2.0,1.0,1.0
50%,4.0,3.0,1.0
75%,8.0,9.0,2.0
max,16.0,27.0,4.0
