## Descriptive Statistics
<h3> Calculate Measures of Dispersion using Python </h3>

Calculate the measures of spread of data using range, IQR, standard deviation, variance, skewness and kurtosis.

In [1]:
import pandas as pd
import numpy as np         #for calculating range, std. deviation, variance
import scipy.stats         #for calculating IQR, skewness, kurtosis

In [2]:
df=pd.read_csv("mtcars.csv")

In [3]:
df.head()

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


In [4]:
df.describe()

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
count,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0
mean,20.090625,6.1875,230.721875,146.6875,3.596563,3.21725,17.84875,0.4375,0.40625,3.6875,2.8125
std,6.026948,1.785922,123.938694,68.562868,0.534679,0.978457,1.786943,0.504016,0.498991,0.737804,1.6152
min,10.4,4.0,71.1,52.0,2.76,1.513,14.5,0.0,0.0,3.0,1.0
25%,15.425,4.0,120.825,96.5,3.08,2.58125,16.8925,0.0,0.0,3.0,2.0
50%,19.2,6.0,196.3,123.0,3.695,3.325,17.71,0.0,0.0,4.0,2.0
75%,22.8,8.0,326.0,180.0,3.92,3.61,18.9,1.0,1.0,4.0,4.0
max,33.9,8.0,472.0,335.0,4.93,5.424,22.9,1.0,1.0,5.0,8.0


### Range

In [5]:
#using ptp method from numpy library
mpg_range= np.ptp(df["mpg"], axis=0)
disp_range=np.ptp(df["disp"], axis=0)    

In [6]:
print("mpg_range: ", mpg_range)    
print("disp_range: ", disp_range)    

mpg_range:  23.5
disp_range:  400.9


In [7]:
df["mpg"].max()-df["mpg"].min()

23.5

### IQR 
Use iqr method from scipy.stats library

In [8]:
from scipy.stats import iqr

In [9]:
df['mpg'].describe()

count    32.000000
mean     20.090625
std       6.026948
min      10.400000
25%      15.425000
50%      19.200000
75%      22.800000
max      33.900000
Name: mpg, dtype: float64

In [10]:
iqr(df['mpg'])

7.375

In [11]:
from scipy.stats import iqr
iqr(df[['mpg','disp','wt']], axis=0)       

array([  7.375  , 205.175  ,   1.02875])

### Standard deviation 

Use std method from scipy.stats library

In [12]:
df['mpg'].std()

6.026948052089105

In [13]:
np.std(df[['mpg','disp','wt']], axis=0)      

mpg       5.932030
disp    121.986781
wt        0.963048
dtype: float64

### Variance

In [14]:
df['mpg'].var()

36.32410282258065

In [15]:
np.var(df['mpg'])

35.188974609375

In [16]:
np.var(df[['mpg','disp','wt']], axis=0)       

mpg        35.188975
disp    14880.774834
wt          0.927461
dtype: float64

In [17]:
df['mpg'].min()

10.4

In [18]:
df['mpg'].max()

33.9

In [19]:
df['mpg'].value_counts()

21.0    2
19.2    2
30.4    2
22.8    2
15.2    2
10.4    2
21.4    2
33.9    1
19.7    1
15.8    1
26.0    1
27.3    1
13.3    1
15.5    1
21.5    1
24.4    1
14.3    1
32.4    1
14.7    1
18.7    1
18.1    1
17.3    1
16.4    1
17.8    1
15.0    1
Name: mpg, dtype: int64

In [20]:
df['mpg'].mean()

20.090624999999996

In [21]:
from scipy.stats import skew

In [22]:
skew(df['mpg'])

0.6404398640318834

### Skewness

In [23]:
from scipy.stats import skew
skew(df[['mpg','disp','wt']], axis=0)      

array([0.64043986, 0.40027245, 0.44378554])

### Kurtosis

In [24]:
from scipy.stats import kurtosis
kurtosis(df[['mpg','disp','wt']], fisher=False)


array([2.79946679, 1.91031734, 3.17247054])

In [25]:
df["wt"].max()

5.424

In [26]:
df["wt"].min()

1.513

In [27]:
df["wt"].mean()

3.2172499999999995

In [28]:
kurtosis(df["wt"],fisher=False)

3.172470540158735

In [29]:
kurtosis(df[['mpg','disp','wt']], fisher=True)

array([-0.20053321, -1.08968266,  0.17247054])