## Experiment 2
### Demonstration of descriptive statistical measures.

In [2]:
# importing required libraries
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as snss

import scipy
from scipy import stats
from scipy.stats import zscore, kurtosis, variation, scoreatpercentile
from scipy.stats.mstats import gmean

In [3]:
# Read the data
data = pd.read_csv('iris_dataset.csv')

In [4]:
# Drop the Id column
data = data.drop('Id', axis=1)

In [5]:
# Print the first 5 rows of the data
print(data.head().to_markdown())

|    |   SepalLengthCm |   SepalWidthCm |   PetalLengthCm |   PetalWidthCm | Species     |
|---:|----------------:|---------------:|----------------:|---------------:|:------------|
|  0 |             5.1 |            3.5 |             1.4 |            0.2 | Iris-setosa |
|  1 |             4.9 |            3   |             1.4 |            0.2 | Iris-setosa |
|  2 |             4.7 |            3.2 |             1.3 |            0.2 | Iris-setosa |
|  3 |             4.6 |            3.1 |             1.5 |            0.2 | Iris-setosa |
|  4 |             5   |            3.6 |             1.4 |            0.2 | Iris-setosa |


In [6]:
# Information about the data
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   SepalLengthCm  150 non-null    float64
 1   SepalWidthCm   150 non-null    float64
 2   PetalLengthCm  150 non-null    float64
 3   PetalWidthCm   150 non-null    float64
 4   Species        150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
None


In [7]:
# Describe the data
print(data.groupby('Species').describe())

                SepalLengthCm                                               
                        count   mean       std  min    25%  50%  75%  max   
Species                                                                     
Iris-setosa              50.0  5.006  0.352490  4.3  4.800  5.0  5.2  5.8  \
Iris-versicolor          50.0  5.936  0.516171  4.9  5.600  5.9  6.3  7.0   
Iris-virginica           50.0  6.588  0.635880  4.9  6.225  6.5  6.9  7.9   

                SepalWidthCm         ... PetalLengthCm      PetalWidthCm   
                       count   mean  ...           75%  max        count   
Species                              ...                                   
Iris-setosa             50.0  3.418  ...         1.575  1.9         50.0  \
Iris-versicolor         50.0  2.770  ...         4.600  5.1         50.0   
Iris-virginica          50.0  2.974  ...         5.875  6.9         50.0   

                                                           
                  me

Mean, Median, Mode


In [8]:
# Mean of the data
print(data.groupby('Species').mean().to_markdown())

| Species         |   SepalLengthCm |   SepalWidthCm |   PetalLengthCm |   PetalWidthCm |
|:----------------|----------------:|---------------:|----------------:|---------------:|
| Iris-setosa     |           5.006 |          3.418 |           1.464 |          0.244 |
| Iris-versicolor |           5.936 |          2.77  |           4.26  |          1.326 |
| Iris-virginica  |           6.588 |          2.974 |           5.552 |          2.026 |


In [9]:
# Median of the data
print(data.groupby('Species').median().to_markdown())

| Species         |   SepalLengthCm |   SepalWidthCm |   PetalLengthCm |   PetalWidthCm |
|:----------------|----------------:|---------------:|----------------:|---------------:|
| Iris-setosa     |             5   |            3.4 |            1.5  |            0.2 |
| Iris-versicolor |             5.9 |            2.8 |            4.35 |            1.3 |
| Iris-virginica  |             6.5 |            3   |            5.55 |            2   |


In [10]:
# Mode of the data
data.groupby('Species').value_counts()

Species         SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm
Iris-setosa     4.9            3.1           1.5            0.1             3
                4.3            3.0           1.1            0.1             1
                4.4            2.9           1.4            0.2             1
                5.0            3.4           1.5            0.2             1
                                             1.6            0.4             1
                                                                           ..
Iris-virginica  6.5            3.0           5.8            2.2             1
                               3.2           5.1            2.0             1
                6.7            2.5           5.8            1.8             1
                               3.0           5.2            2.3             1
                7.9            3.8           6.4            2.0             1
Name: count, Length: 147, dtype: int64

Geometric Mean and Harmonic Mean


In [11]:
# gemetric mean of the data
print(data.groupby('Species').apply(gmean).to_markdown())

# Harmonic mean of the data
print(data.groupby('Species').apply(scipy.stats.hmean).to_markdown())

| Species         | 0                                             |
|:----------------|:----------------------------------------------|
| Iris-setosa     | [4.99384106 3.3969062  1.45373856 0.22346247] |
| Iris-versicolor | [5.91397936 2.75187353 4.23308089 1.31118738] |
| Iris-virginica  | [6.55779452 2.95701356 5.52578887 2.00721413] |
| Species         | 0                                             |
|:----------------|:----------------------------------------------|
| Iris-setosa     | [4.9816814  3.37529787 1.44318022 0.20449898] |
| Iris-versicolor | [5.89193597 2.73301053 4.20449935 1.29602855] |
| Iris-virginica  | [6.52735823 2.94008724 5.50023704 1.98786729] |


  (len(row) >= 1 and row[0] == SEPARATING_LINE)
  or (len(row) >= 2 and row[1] == SEPARATING_LINE)
  (len(row) >= 1 and row[0] == SEPARATING_LINE)
  or (len(row) >= 2 and row[1] == SEPARATING_LINE)


Standard Devation


In [12]:
# Standard Deviation of the data
print(data.groupby('Species').std().to_markdown())

| Species         |   SepalLengthCm |   SepalWidthCm |   PetalLengthCm |   PetalWidthCm |
|:----------------|----------------:|---------------:|----------------:|---------------:|
| Iris-setosa     |        0.35249  |       0.381024 |        0.173511 |       0.10721  |
| Iris-versicolor |        0.516171 |       0.313798 |        0.469911 |       0.197753 |
| Iris-virginica  |        0.63588  |       0.322497 |        0.551895 |       0.27465  |


In [13]:
# Standard Deviation of the data using numpy
print(data.groupby('Species').agg(np.std).to_markdown())

| Species         |   SepalLengthCm |   SepalWidthCm |   PetalLengthCm |   PetalWidthCm |
|:----------------|----------------:|---------------:|----------------:|---------------:|
| Iris-setosa     |        0.35249  |       0.381024 |        0.173511 |       0.10721  |
| Iris-versicolor |        0.516171 |       0.313798 |        0.469911 |       0.197753 |
| Iris-virginica  |        0.63588  |       0.322497 |        0.551895 |       0.27465  |


In [14]:
# Standard Deviation Using Formula
variance = data.groupby('Species').var()
standard_deviation = np.sqrt(variance)
print(standard_deviation.to_markdown())

| Species         |   SepalLengthCm |   SepalWidthCm |   PetalLengthCm |   PetalWidthCm |
|:----------------|----------------:|---------------:|----------------:|---------------:|
| Iris-setosa     |        0.35249  |       0.381024 |        0.173511 |       0.10721  |
| Iris-versicolor |        0.516171 |       0.313798 |        0.469911 |       0.197753 |
| Iris-virginica  |        0.63588  |       0.322497 |        0.551895 |       0.27465  |


Variance


In [15]:
# Variance of the data
print(data.groupby('Species').var().to_markdown())

| Species         |   SepalLengthCm |   SepalWidthCm |   PetalLengthCm |   PetalWidthCm |
|:----------------|----------------:|---------------:|----------------:|---------------:|
| Iris-setosa     |        0.124249 |      0.14518   |       0.0301061 |      0.0114939 |
| Iris-versicolor |        0.266433 |      0.0984694 |       0.220816  |      0.0391061 |
| Iris-virginica  |        0.404343 |      0.104004  |       0.304588  |      0.0754327 |


In [16]:
# Variance of the data using numpy
print(data.groupby('Species').agg(np.var).to_markdown())

| Species         |   SepalLengthCm |   SepalWidthCm |   PetalLengthCm |   PetalWidthCm |
|:----------------|----------------:|---------------:|----------------:|---------------:|
| Iris-setosa     |        0.124249 |      0.14518   |       0.0301061 |      0.0114939 |
| Iris-versicolor |        0.266433 |      0.0984694 |       0.220816  |      0.0391061 |
| Iris-virginica  |        0.404343 |      0.104004  |       0.304588  |      0.0754327 |


In [17]:
# Variance of the data using formula
standard_deviation = data.groupby('Species').std()
variance = np.square(standard_deviation)
print(variance.to_markdown())

| Species         |   SepalLengthCm |   SepalWidthCm |   PetalLengthCm |   PetalWidthCm |
|:----------------|----------------:|---------------:|----------------:|---------------:|
| Iris-setosa     |        0.124249 |      0.14518   |       0.0301061 |      0.0114939 |
| Iris-versicolor |        0.266433 |      0.0984694 |       0.220816  |      0.0391061 |
| Iris-virginica  |        0.404343 |      0.104004  |       0.304588  |      0.0754327 |
