In [65]:
# import required libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [71]:
#import dataset
from sklearn.datasets import load_iris
# Load the Iris dataset
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['species'] = iris.target

In [None]:
df.head()

In [None]:
# Pairplot visualization of the original dataset
sns.pairplot(df, hue='species', palette="Set2", diag_kind="kde")
plt.suptitle("Iris Data Distribution", y=1.02)
plt.show()

In [74]:
from sklearn.cluster import KMeans
# Apply K-Means clustering
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)  # 3 clusters for the 3 species
df['cluster'] = kmeans.fit_predict(df.iloc[:, :-1])  # Exclude the 'species' column

In [75]:
from sklearn.decomposition import PCA
# Visualize clusters with PCA
pca = PCA(n_components=2)
df_pca = pca.fit_transform(df.iloc[:, :-2])  # Exclude 'species' and 'cluster'
df['pca1'] = df_pca[:, 0]
df['pca2'] = df_pca[:, 1]

In [None]:
# Scatter plot of clusters
plt.figure(figsize=(8, 6))
sns.scatterplot(x='pca1', y='pca2', hue=df['cluster'], palette='Set1', data=df)
plt.title("K-Means Clustering of Iris Dataset")
plt.show()

In [None]:
# Compare actual species vs clusters
plt.figure(figsize=(8, 4))
sns.heatmap(pd.crosstab(df['species'], df['cluster']), annot=True, cmap="coolwarm")
plt.xlabel("Cluster Labels")
plt.ylabel("Actual Species")
plt.title("Cluster vs Actual Species")
plt.show()