# Iris Data Analysis

In [4]:
import pandas as pd

file_path = "../datasets/Iris/iris.csv"
df = pd.read_csv(file_path, delimiter=",")
df.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [5]:
df.shape

(150, 5)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal.length  150 non-null    float64
 1   sepal.width   150 non-null    float64
 2   petal.length  150 non-null    float64
 3   petal.width   150 non-null    float64
 4   variety       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [9]:
df['variety'].unique()

array(['Setosa', 'Versicolor', 'Virginica'], dtype=object)

In [11]:
# Create data subsets for different species 
setosa_df = df[df['variety'] == 'Setosa']
versicolor_df = df[df['variety'] == 'Versicolor']
virginica_df = df[df['variety'] == 'Virginica']

print("Setosa subset count : ", len(setosa_df))
print("Versicolor subset count : ", len(setosa_df))
print("Virginica subset count : ", len(setosa_df))

setosa_df.head()

Setosa subset count :  50
Versicolor subset count :  50
Virginica subset count :  50


Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [12]:
# Merge two subsets
merged_df = pd.concat([versicolor_df, virginica_df], ignore_index=True)

print("Merged Versicolor and Virginca count : ",len(merged_df))

merged_df.head()

Merged Versicolor and Virginca count :  100


Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,7.0,3.2,4.7,1.4,Versicolor
1,6.4,3.2,4.5,1.5,Versicolor
2,6.9,3.1,4.9,1.5,Versicolor
3,5.5,2.3,4.0,1.3,Versicolor
4,6.5,2.8,4.6,1.5,Versicolor


In [16]:
# Sort Data Petal Length 
sorted_df = df.sort_values(by='petal.length', ascending=False)

sorted_df.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
118,7.7,2.6,6.9,2.3,Virginica
122,7.7,2.8,6.7,2.0,Virginica
117,7.7,3.8,6.7,2.2,Virginica
105,7.6,3.0,6.6,2.1,Virginica
131,7.9,3.8,6.4,2.0,Virginica


In [17]:
# Transposing Data  
transposed_df = df.transpose()

transposed_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,140,141,142,143,144,145,146,147,148,149
sepal.length,5.1,4.9,4.7,4.6,5.0,5.4,4.6,5.0,4.4,4.9,...,6.7,6.9,5.8,6.8,6.7,6.7,6.3,6.5,6.2,5.9
sepal.width,3.5,3.0,3.2,3.1,3.6,3.9,3.4,3.4,2.9,3.1,...,3.1,3.1,2.7,3.2,3.3,3.0,2.5,3.0,3.4,3.0
petal.length,1.4,1.4,1.3,1.5,1.4,1.7,1.4,1.5,1.4,1.5,...,5.6,5.1,5.1,5.9,5.7,5.2,5.0,5.2,5.4,5.1
petal.width,0.2,0.2,0.2,0.2,0.2,0.4,0.3,0.2,0.2,0.1,...,2.4,2.3,1.9,2.3,2.5,2.3,1.9,2.0,2.3,1.8
variety,Setosa,Setosa,Setosa,Setosa,Setosa,Setosa,Setosa,Setosa,Setosa,Setosa,...,Virginica,Virginica,Virginica,Virginica,Virginica,Virginica,Virginica,Virginica,Virginica,Virginica


In [23]:
# Melting Data to long format (wide to long)
melted_df = pd.melt(df, id_vars=['variety'], value_vars=['sepal.length', 'sepal.width', 'petal.length', 'petal.width'], var_name='Measurement', value_name='Value')
melted_df

Unnamed: 0,variety,Measurement,Value
0,Setosa,sepal.length,5.1
1,Setosa,sepal.length,4.9
2,Setosa,sepal.length,4.7
3,Setosa,sepal.length,4.6
4,Setosa,sepal.length,5.0
...,...,...,...
595,Virginica,petal.width,2.3
596,Virginica,petal.width,1.9
597,Virginica,petal.width,2.0
598,Virginica,petal.width,2.3


In [27]:
# Casting data to wide format (long to wide)
cast_df = melted_df.pivot_table( index='variety', columns='Measurement', values='Value', aggfunc='mean').reset_index()
cast_df.head()

Measurement,variety,petal.length,petal.width,sepal.length,sepal.width
0,Setosa,1.462,0.246,5.006,3.428
1,Versicolor,4.26,1.326,5.936,2.77
2,Virginica,5.552,2.026,6.588,2.974
