In [6]:
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances, manhattan_distances

In [97]:
class DBScan :
    __neighborhood = []
    __centroids = []
    __curr_cluster_label = 0
    __labels = []
    
    def __init__(self, eps, minPts, metrics="euclidean") :
        if metrics == "euclidean" :
            self.distanceFunction = euclidean_distances
        elif (metrics == "manhattan") :
            self.distanceFunction = manhattan_distances
        else :
            raise Exception('Doesn\'t support '+ metrics + ' metrics. Try euclidean or manhattan')
        
        self.eps = eps
        self.minPts = minPts
    
    # Count distance every point to other points
    # Distance calculated according to self.distanceFunction
    # if distance <= eps add to matriks of neighbours
    # Matrix of neighbours[i, j] :
    #    i               - index of the point
    #    j               - index f other point
    #    value of matrix - distances point i and j
    def _pre_set_neighbor(self, data) :
        row, column = data.shape
        distanceMatrix = self.distanceFunction(data, data)
        for i in range(row) :
            # Initialize neighborhood of this point as [] (empty array)
            self.__neighborhood.append([])
            for j in range(row) :
                if (distanceMatrix[i, j] <= self.eps) :
                    self.__neighborhood[i].append(j)
        
    # Method for getting all centroid from all neighborhood
    # Centroid set if length of neighborhood array >= minPts
    def _find_centroids(self) :
        for i in range(0, len(self.__neighborhood)) :
            if (len(self.__neighborhood[i]) >= self.minPts) :
                self.__centroids.append(i)
    
    # Method for assignning cluster label of every point
    # Point which not in centroid array, automatically labelled as outlier
    def _set_label(self, labels) :
        # If label of point already not -1 anymore, pass this point
        temporary_centroid = self.__centroids.copy()
        
        for i in self.__centroids :
            if (labels[i] == -1) :
                processing_point = [i]
                
                while len(processing_point) > 0 :
                    curr_processed_point = processing_point.pop()
                    labels[curr_processed_point] = self.__curr_cluster_label
#                     print(curr_processed_point, self.__curr_cluster_label)
                    
                    if curr_processed_point in temporary_centroid :
                        temporary_centroid.remove(curr_processed_point)
                        for neigh in self.__neighborhood[curr_processed_point] :
                            if (labels[neigh] == -1) :
                                processing_point.append(neigh)
                                       
                self.__curr_cluster_label += 1
                                   
        return labels
    
    def fit(self, data) :
        self._pre_set_neighbor(data)
        self._find_centroids()
        
        labels = [-1] * len(data) # Initialize all data as an outlier
        self.__labels = self._set_label(labels)
        
    def getLabels(self) :
        return np.array(self.__labels)

In [82]:
from sklearn.datasets import load_iris

iris = load_iris()
iris = iris.data

In [98]:
model = DBScan(0.5, 4)
model.fit(iris)
print(model.getLabels())

[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 -1  0  0  0  0  0  0
  0  0  1  1  1  1  1  1  1  2  1  1  2  1  1  1  1  1  1  1 -1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1  1  1  1  1  1  2  1  1
  1  1  2  1  1  1  1  1  1 -1 -1  1 -1 -1  1  1  1  1  1  1  1 -1 -1  1
  1  1 -1  1  1  1  1  1  1  1  1 -1  1  1 -1 -1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1]


In [60]:
from sklearn.cluster import DBSCAN as DBSCAN_SKLEARN

In [90]:
dbscan_sklearn = DBSCAN_SKLEARN(eps=0.5, min_samples=4).fit(iris)
print (dbscan_sklearn.labels_)

[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 -1  0  0  0  0  0  0
  0  0  1  1  1  1  1  1  1  2  1  1  2  1  1  1  1  1  1  1 -1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1  1  1  1  1  1  2  1  1
  1  1  2  1  1  1  1  1  1 -1 -1  1 -1 -1  1  1  1  1  1  1  1 -1 -1  1
  1  1 -1  1  1  1  1  1  1  1  1 -1  1  1 -1 -1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1]


### Check if true

In [99]:
print (dbscan_sklearn.labels_ == model.getLabels())

[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True]
