# Measures of Central Tendency

In [1]:
import numpy as np
import pandas as pd
from sklearn import datasets as dt

In [41]:
import scipy.stats.mstats as sts

In [4]:
iris = dt.load_iris()

In [5]:
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [6]:
type(iris)

sklearn.utils.Bunch

In [9]:
df = pd.DataFrame(iris.data)
df.columns = iris.feature_names

In [12]:
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [20]:
feature = pd.DataFrame(iris.target)

### Mean

In [13]:
df.mean()

sepal length (cm)    5.843333
sepal width (cm)     3.057333
petal length (cm)    3.758000
petal width (cm)     1.199333
dtype: float64

In [15]:
np.average(df['sepal length (cm)'])

5.843333333333334

In [16]:
## Average of whole dataframe
np.average(df)

3.4644999999999997

### Weighted mean

In [34]:
weights = np.reshape(np.asarray(feature),(150,))

In [35]:
np.average(df['sepal length (cm)'],weights=weights)

6.370666666666666

In [36]:
def weighted_average(data, weights):
    return sum([data[i]*weights[i] for i in range(len(data))])/sum(weights)

weighted_average(df['sepal length (cm)'], weights)

6.370666666666665

### Geometric mean

In [38]:
from scipy.stats.mstats import gmean

In [39]:
gmean(df['sepal length (cm)'])

5.785720390427728

### Harmonic mean

In [42]:
sts.hmean(df['sepal length (cm)'])

5.728905057850834

### Trimmed mean

Unlike trimmed mean in R which takes proportion of the values to be trimmed, here the paramter consists of a tuple of is lower and upper values to be includes, and a tuple indicating whether to include the limits in the calculations

In [44]:
sts.tmean(df['sepal length (cm)'],limits = (2,5), inclusive = (True,False))

4.690909090909091

For a R-like function, use the below function

In [52]:
sts.trimmed_mean(df['sepal length (cm)'], limits = (0.025,0.97), inclusive = (False,True),relative = True)

4.5

### Median

In [53]:
df.median()

sepal length (cm)    5.80
sepal width (cm)     3.00
petal length (cm)    4.35
petal width (cm)     1.30
dtype: float64

### Mode

In [56]:
df.mode(axis = 0)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.0,3.0,1.4,0.2
1,,,1.5,


In [57]:
sts.mode(df['sepal length (cm)'])

ModeResult(mode=array([5.]), count=array([10.]))

In [58]:
a = sts.mode(df['sepal length (cm)'])

In [67]:
a.mode

array([5.])

In [68]:
a.count

array([10.])

### quartiles

In [69]:
sts.mquantiles((df['sepal length (cm)']))

array([5.1, 5.8, 6.4])

In [72]:
df.quantile(q=[0.25,0.5,0.75])

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0.25,5.1,2.8,1.6,0.3
0.5,5.8,3.0,4.35,1.3
0.75,6.4,3.3,5.1,1.8


### Percentiles

In [74]:
df.quantile(q=0.01)

sepal length (cm)    4.400
sepal width (cm)     2.200
petal length (cm)    1.149
petal width (cm)     0.100
Name: 0.01, dtype: float64

In [76]:
np.percentile(df['sepal length (cm)'],q=.01)

4.301489999999999