<a href="https://colab.research.google.com/github/priyansuapk/ml-lab-exam/blob/main/kmeans.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Select two features (e.g., Sepal Length and Sepal Width) and two classes (e.g., Class 0 and 1)
X = X[y < 2, :2]  # Selecting classes 0 and 1, and the first two features

# K-Means implementation
class KMeansScratch:
    def __init__(self, n_clusters=2, max_iter=100, tol=1e-4):
        self.n_clusters = n_clusters
        self.max_iter = max_iter
        self.tol = tol

    def fit(self, X):
        np.random.seed(42)
        # Initialize centroids randomly
        random_indices = np.random.permutation(X.shape[0])[:self.n_clusters]
        self.centroids = X[random_indices]

        for _ in range(self.max_iter):
            # Assign clusters
            self.labels = self._assign_clusters(X)

            # Calculate new centroids
            new_centroids = self._compute_centroids(X)

            # Check for convergence
            if np.all(np.abs(new_centroids - self.centroids) < self.tol):
                break

            self.centroids = new_centroids

    def _assign_clusters(self, X):
        distances = np.linalg.norm(X[:, np.newaxis] - self.centroids, axis=2)
        return np.argmin(distances, axis=1)

    def _compute_centroids(self, X):
        return np.array([X[self.labels == i].mean(axis=0) for i in range(self.n_clusters)])

    def predict(self, X):
        return self._assign_clusters(X)

# Apply K-Means from scratch
kmeans = KMeansScratch(n_clusters=2)
kmeans.fit(X)

# Plot the results
plt.figure(figsize=(8, 6))
for i, color in enumerate(['red', 'blue']):
    plt.scatter(X[kmeans.labels == i, 0], X[kmeans.labels == i, 1], color=color, label=f"Cluster {i}")
plt.scatter(kmeans.centroids[:, 0], kmeans.centroids[:, 1], s=200, c='yellow', label="Centroids", marker='X')
plt.xlabel("Feature 1 (Sepal Length)")
plt.ylabel("Feature 2 (Sepal Width)")
plt.title("K-Means Clustering (2 Features, 2 Classes)")
plt.legend()
plt.grid()
plt.show()
