In [1]:
# https://towardsdatascience.com/understanding-dbscan-algorithm-and-implementation-from-scratch-c256289479c5?gi=769a841f4ce1
# https://lifewithdata.com/2023/05/28/implementing-the-dbscan-algorithm-from-scratch-in-python/

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
class DBSCAN:
    def __init__(self, epsilon=1, min_pts=5):
        self.epsilon = epsilon
        self.min_pts = min_pts
        self.clusters = []
        self.noise = []
    
    @staticmethod
    def _euclidean_distance(point1, point2):
        return np.sqrt(np.sum((point1 - point2) ** 2))
    
    
    def _get_neighbors(self, X, point):
        neighbors = []
        for index, candidate in enumerate(X):
            if self._euclidean_distance(point, candidate) < self.epsilon:
                neighbors.append(index)
        return neighbors
    
    def fit(self, X):
        visited = [False]*len(X)
        for index in range(len(X)):
            if not visited[index]:
                visited[index] = True
                neighbors = self._get_neighbors(X, X[index])
                if len(neighbors) < self.min_pts:
                    self.noise.append(index)
                else:
                    self._expand_cluster(X, visited, index, neighbors)
        return self.clusters, self.noise
    
    def _expand_cluster(self, X, visited, index, neighbors):
        self.clusters.append([index])
        i = 0
        while i < len(neighbors):
            next_index = neighbors[i]
            if not visited[next_index]:
                visited[next_index] = True
                next_neighbors = self._get_neighbors(X, X[next_index])
                if len(next_neighbors) >= self.min_pts:
                    neighbors += next_neighbors
            cluster_indices = [i for cluster in self.clusters for i in cluster]
            if next_index not in cluster_indices:
                self.clusters[-1].append(next_index)
            i += 1
    
    