# Kmeans1

In [3]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
class KMeans:
    def __init__(self, k=2, epochs=100, ep=1e-4, seed=42):
        self.k = k
        self.ep = ep
        self.epochs = epochs
        np.random.seed(seed)
        self.centroids = None
        self.labels = None
        self.j_hist = []

    def init_centroids(self, X):
        idxs = np.random.choice(X.shape[0], self.k, replace=False)
        return X[idxs]
    
    def assign_cluster(self, X, centroids):
        assign = []
        for x in X:
            distances = [self._dist(x, pt) for pt in centroids]
            assign.append(np.argmin(distances))
        return np.array(assign)
    
    def _dist(self, a, b):
        return np.sqrt(np.sum((a - b) ** 2))
    
    def update_centroids(self, X, labels):
        centroids = np.zeros((self.k, X.shape[1]))
        for i in range(self.k):
            points = X[labels == i]
            if len(points) > 0:
                centroids[i] = points.mean(axis=0)
            else:
                centroids[i] = X[np.random.choice(X.shape[0])]
        return centroids
    
    def compute_objective(self, X, labels, centroids):
        total = 0
        for i in range(self.k):
            cluster_points = X[labels == i]
            total += np.sum((cluster_points - centroids[i]) ** 2)

        return total / X.shape[0]
    
    def fit(self, X):
        self.centroids = self.init_centroids(X)
        for i in range(self.epochs):
            labels = self.assign_cluster(X, self.centroids)
            new_centroids = self.update_centroids(X, labels)
            j = self.compute_objective(X, labels, new_centroids)
            self.j_hist.append(round(j, 4))
            if np.linalg.norm(new_centroids - self.centroids) < self.ep:
                break
            self.centroids = new_centroids
        self.labels = labels
        return self
    
    def plot_convergence(self):
        plt.figure()
        plt.plot(range(1, len(self.j_hist) + 1), self.j_hist, marker='o')
        plt.title('Convergece of KMeans Objective j')
        plt.xlabel('epochs')
        plt.ylabel('J (Average Squared Distance)')
        plt.grid(True)
        plt.show()

    def plot_clusters(self, X):
        plt.figure()
        colors = ['red', 'blue', 'green', 'purple', 'orange', 'cyan']
        for i in range(self.k):
            pts = X[self.labels == i]
            plt.scatter(pts[:, 0], pts[:, 1], s=50, c=colors[i % len(colors)], label=f"Cluster {i + 1}")
        plt.scatter(self.centroids[:, 0], self.centroids[:, 1], c='black', s=150, marker='x', label='centroids')
        plt.title('K Means clustering Result')
        plt.xlabel('X')
        plt.ylabel('Y')
        plt.legend()
        plt.show()

# DBSCAN

In [6]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
class DBSCAN:
    def __init__(self, eps=3, minPts=2):
        self.eps = eps
        self.minPts = minPts

    def fit(self, X):
        self.X = X
        n = len(X)
        self.visited = np.zeros(n, dtype=bool)
        self.cluster_labels = np.zeros(n, dtype=int)
        self.core_points = []
        self.cluster_id = 0

        self.dist_matrix = np.zeros((n, n))
        for i in range(n):
            for j in range(n):
                self.dist_matrix[i, j] = np.linalg.norm(X[i] - X[j])
        self.dist_matrix = np.round(self.dist_matrix, 2)

        self.neighbors = []
        for i in range(n):
            neigh = np.where(self.dist_matrix[i] <= self.eps)[0]
            self.neighbors.append(neigh)
            if len(neigh) >= self.minPts:
                self.core_points.append(i)

        for i in range(n):
            if not self.visited[i] and i in self.core_points:
                self.cluster_id += 1
                self.expand_cluster(i)

    def expand_cluster(self, i):
        queue = [i]
        while queue:
            point = queue.pop(0)
            if not self.visited[point]:
                self.visited[point] = True
                self.cluster_labels[point] = self.cluster_id
                if point in self.core_points:
                    for neigh in self.neighbors[point]:
                        if self.cluster_labels[neigh] == 0:
                            queue.append(neigh)

    def noise_ratio(self):
        noise_count = np.sum(self.cluster_labels == 0)
        return noise_count / len(self.X)
    
    def print_results(self):
        print("Core points:", ' '.join(str(i+1) for i in self.core_points))
        print("Cluster Assignments:", ' '.join(str(lbl) for lbl in self.cluster_labels))
        print("Noise Ratio =", round(self.noise_ratio(), 2))

    def plot_clusters(self):
        plt.figure(figsize=(6, 5))
        colors = ['red', 'blue', 'green', 'orange', 'purple', 'cyan']
        for idx, label in enumerate(set(self.cluster_labels)):
            if label == 0:
                pts = self.X[self.cluster_labels == label]
                plt.scatter(pts[:, 0], pts[:, 1], marker='x', color='k', label='Noise')
            else:
                pts = self.X[self.cluster_labels == label]
                plt.scatter(pts[:, 0], pts[:, 1], color=colors[label % len(colors)], label=f'Cluster {label}')
        plt.title(f'DBSCAN Clustering (eps={self.eps}, minPts={self.minPts})')
        plt.xlabel('x1')
        plt.ylabel('x2')
        plt.legend()
        plt.show()