📝 **Author:** Amirhossein Heydari - 📧 **Email:** AmirhosseinHeydari78@gmail.com - 📍 **Linktree:** [linktr.ee/mr_pylin](https://linktr.ee/mr_pylin)

---

# Dependencies

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# Load Iris Dataset

<figure style="text-align: center;">
    <img src="../../assets/images/third_party/misc/01_08.png" alt="01_08.png" style="width: 50%;">
    <figcaption style="text-align: center;">©️ Image: <a href= "https://github.com/rasbt/machine-learning-book/blob/main/ch01/figures/01_08.png">Machine Learning with PyTorch and Scikit-Learn</a></figcaption>
</figure>

In [2]:
iris_dataset_url = r"https://raw.githubusercontent.com/mr-pylin/datasets/refs/heads/main/data/tabular-data/iris/dataset.csv"

# pandas data-frame
df = pd.read_csv(iris_dataset_url, encoding='utf-8')

# log
df.head()

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


## Species Count/Distribution

In [None]:
species = df['class'].value_counts()

# plot
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(8, 4), layout='compressed')

axs[0].bar(species.index, species.values, color='skyblue')
axs[0].set(title="Bar chart of Species Counts", ylabel="Count")
axs[1].pie(species, autopct='%1.1f%%', startangle=90, labels=species.index)
axs[1].set(title="Pie Chart of Species Distribution")

plt.show()

## Histogram for Feature Distribution

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=4, figsize=(16, 4), layout='compressed')

for i, col_name in enumerate(df.columns[:-1]):
    axs[i].hist(df[col_name], bins=20, edgecolor='black', alpha=0.7)
    axs[i].set(title=f"Histogram of {col_name}", xlabel=f"{col_name} (cm)", ylabel="Frequency")

plt.show()

## Box Plot to Check for Outliers

In [None]:
plt.figure(figsize=(10, 5))
plt.boxplot(df.values[:, :-1])
plt.title('Boxplot of Features')
plt.show()

## Scatter Plot for Pairs of Features

In [None]:
species_idx = df['class'].unique()

class_colors = {l: c for l, c in zip(species_idx, ['blue', 'green', 'red'])}
class_markers = {l: m for l, m in zip(species_idx, ['o', 's', 'x'])}

# plot
fig, axs = plt.subplots(nrows=2, ncols=3, figsize=(14, 8), layout='compressed')
axs = axs.flatten()
idx = 0

for i, x_name in enumerate(df.columns[:-1]):
    for j, y_name in enumerate(df.columns[(i + 1):-1]):
        for cls in species_idx:
            cls_data = df[df['class'] == cls]
            axs[idx].scatter(cls_data[x_name], cls_data[y_name], color=class_colors[cls], marker=class_markers[cls], alpha=0.7, label=cls)
        
        axs[idx].set(xlabel=x_name, ylabel=y_name)
        axs[idx].legend(shadow=True, fancybox=True)
        
        idx += 1

plt.show()

## Violin Plot for Feature Distribution by Species

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=4, figsize=(16, 4), layout='compressed')

for i, col_name in enumerate(df.columns[:-1]):
    sns.violinplot(ax=axs[i], x='class', y=col_name, data=df)
    axs[i].set(title=f"Violin plot of {col_name} by species")

plt.show()

## Correlation Heatmap

In [None]:
corr = df.iloc[:, :-1].corr()

# plot
plt.figure(figsize=(8, 6))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Feature Correlation Heatmap')
plt.show()

# 3D Scatter Plot for Triplet Features

In [None]:
species_idx = df['class'].unique()
class_colors = {l: c for l, c in zip(species_idx, ['blue', 'green', 'red'])}
class_markers = {l: m for l, m in zip(species_idx, ['o', 's', 'x'])}

# plot
fig, axs = plt.subplots(nrows=1, ncols=4, figsize=(20, 6), subplot_kw={'projection': '3d'})

for cls in species_idx:
    cls_data = df[df['class'] == cls]
    axs[0].scatter(cls_data['sepal-length'], cls_data['sepal-width'], cls_data['petal-length'], color=class_colors[cls], marker=class_markers[cls])
    axs[1].scatter(cls_data['sepal-length'], cls_data['sepal-width'], cls_data['petal-width'], color=class_colors[cls], marker=class_markers[cls])
    axs[2].scatter(cls_data['sepal-length'], cls_data['petal-length'], cls_data['petal-width'], color=class_colors[cls], marker=class_markers[cls])
    axs[3].scatter(cls_data['sepal-width'], cls_data['petal-length'], cls_data['petal-width'], color=class_colors[cls], marker=class_markers[cls])

axs[0].set(xlabel="Sepal Length (cm)", ylabel="Sepal Width (cm)", zlabel="Petal Length (cm)")
axs[1].set(xlabel="Sepal Length (cm)", ylabel="Sepal Width (cm)", zlabel="Petal Width (cm)")
axs[2].set(xlabel="Sepal Length (cm)", ylabel="Petal Length (cm)", zlabel="Petal Width (cm)")
axs[3].set(xlabel="Sepal Width (cm)", ylabel="Petal Height (cm)", zlabel="Petal Width (cm)")
plt.show()

## Hexbin Plot

In [None]:
plt.hexbin(df['sepal-length'], df['sepal-width'], gridsize=30, cmap='Blues')
plt.colorbar()
plt.title('Hexbin Plot of Sepal Length vs Sepal Width')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Sepal Width (cm)')
plt.show()