In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans

In [None]:
# --- 1. Load the Iris dataset ---
iris = load_iris()
x = pd.DataFrame(iris.data, columns=iris.feature_names)

# Display first 5 rows
print("First 5 rows of Iris dataset:")
print(x.head())

# --- 2. Determine optimal number of clusters using Elbow Method ---
wcss = []  # Within-cluster sum of squares

for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', random_state=42)
    kmeans.fit(x)
    wcss.append(kmeans.inertia_)

# Plot the Elbow graph
plt.figure(figsize=(8,5))
plt.plot(range(1, 11), wcss, marker='o')
plt.title('Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.show()

In [None]:
# --- 3. Fit K-Means with optimal clusters (k=3) ---
kmeans = KMeans(n_clusters=3, init='k-means++', random_state=42)
y_kmeans = kmeans.fit_predict(x)

In [None]:
# --- 4. Visualize clusters (using first two features: Sepal Length & Sepal Width) ---
plt.figure(figsize=(8,5))
plt.scatter(x.iloc[y_kmeans == 0, 0], x.iloc[y_kmeans == 0, 1], s=100, c='red', label='Cluster 1 (Iris-setosa)')
plt.scatter(x.iloc[y_kmeans == 1, 0], x.iloc[y_kmeans == 1, 1], s=100, c='blue', label='Cluster 2 (Iris-versicolour)')
plt.scatter(x.iloc[y_kmeans == 2, 0], x.iloc[y_kmeans == 2, 1], s=100, c='green', label='Cluster 3 (Iris-virginica)')

In [None]:
# Plot centroids
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=200, c='yellow', label='Centroids', marker='X')
plt.title('Clusters of Iris Flowers')
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
plt.legend()
plt.show()

In [None]:
# --- 5. Conclusion ---
print("K-Means clustering has grouped the Iris dataset into 3 clusters corresponding closely to the actual species.")
print("The Elbow method confirmed that 3 clusters are optimal.")