In [4]:
import numpy as np
import pandas as pd
import scipy.spatial.distance as dist
from collections import defaultdict

In [2]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [150]:
class DBSCAN:
    def __init__(self, points, eps, min_pts, distance='Euclidean', rounding_digit = 2):
        self.points = points
        self.distance = distance
        self.eps = eps
        self.min_pts = min_pts
        self.rounding_digit = rounding_digit
        

    def cal_distance(self, x1, x2):
        if self.distance == 'Euclidean':
            return round(dist.euclidean(x1,x2),self.rounding_digit)
        elif self.distance == 'Manhattan':
            return sum([abs(i-j) for i,j in zip(x1,x2)])
        
         
        
    
    def cluster(self):
        print(f'Points are:\n')
        self.pt_list = [f'P{i+1}' for i in range(len(self.points))]
        self.points_df = pd.Series(self.points, index=self.pt_list)
        print(self.points_df.to_string())
        self.proximity_matrix = pd.DataFrame(index=self.points, columns=self.points)
        self.proximity_matrix.fillna('', inplace=True)
        
        for i in range(len(self.points)):
            for j in range(len(self.points)):
                if i >= j:
                    self.proximity_matrix.iloc[i,j] = self.cal_distance(self.points[i], self.points[j])
        
        print(f'\neps = {self.eps}, MinPts = {self.min_pts}')
        print(f'\nProximity matrix based on {self.distance} distance')
        
        
        
        display(self.proximity_matrix)
        
        self.neighbourhood_dict = defaultdict(set)
        
        for i in range(len(self.points)):
            for j in range(len(self.points)):
                if i > j and self.proximity_matrix.iloc[i,j] <= self.eps:
                    #print('Hi')
                    self.neighbourhood_dict[self.points[i]].add(self.points[j])
                    self.neighbourhood_dict[self.points[j]].add(self.points[i])
                    
        self.neighbourhood_matrix =pd.DataFrame()
        self.neighbourhood_matrix['Point'] = self.points
        self.neighbourhood_matrix['Neighbours'] = self.neighbourhood_matrix['Point']. \
                                                    apply(lambda x: self.neighbourhood_dict[x])
        self.neighbourhood_matrix['Density'] = self.neighbourhood_matrix['Neighbours']. \
                                                    apply(lambda x: len(x) +1)
        self.neighbourhood_matrix['Point Type'] = self.neighbourhood_matrix['Density']. \
                                                    apply(lambda x: 'Core' if x >= self.min_pts else 'Noise')
        print('\nComputing neighbourhood and density of each point:')
        
        
        self.core_points = list(self.neighbourhood_matrix[self.neighbourhood_matrix['Point Type'] == 'Core']['Point'].values)
        self.noise_points = list(self.neighbourhood_matrix[self.neighbourhood_matrix['Point Type'] == 'Noise']['Point'].values)
        
        self.neighbourhood_matrix.set_index('Point', inplace=True)
        display(self.neighbourhood_matrix)
        print(f'\nCore Points: {self.core_points}\nNoise Points: {self.noise_points}')
        
        self.visited = []
        self.clusters = []
        self.border_points = []
        
        print('\nIterating over each core point')
        self.cluster_count = 0
        for point in self.core_points:
            cluster = []
            print(f'\nPoint: {point}')
            if point not in self.visited:
                self.cluster_count +=1
                self.visit_point(point, cluster)
                self.clusters.append(cluster)
            else:
                print(f'\nPoint {point} already visited')
            print('=======================================================================')
        print('\nFinal Clusters:')
        for i, cluster in enumerate(self.clusters):
            print(f'   C{i+1}: {cluster}')
        
        print(f'\nCore Points: {self.core_points}')
        print(f'\nNoise Points: {self.noise_points}')
        print(f'\nBorder Points: {self.border_points}')
        
        self.neighbourhood_matrix.reset_index(inplace=True)
        #display(self.neighbourhood_matrix)
        self.neighbourhood_matrix['Point Type'] = self.neighbourhood_matrix.apply(lambda x:'Border'
                                                                                  if x['Point'] in self.border_points
                                                                                 else x['Point Type'], axis=1)
        
        self.neighbourhood_matrix.set_index('Point', inplace=True)
#         for idx in self.neighbourhood_matrix.index:
#             if idx in self.border_points:
#                 print(idx)
#                 self.neighbourhood_matrix.loc[idx]['Point Type'] = 'Border'
                
        print('\nNeighbourhood matrix:')
        
        display(self.neighbourhood_matrix)
    
    def visit_point(self, point, cluster):
        if point not in self.visited:
            print(f'\nVisiting Point: {point}')
            self.visited.append(point)
            cluster.append(point)
            print(f'\nVisited: {self.visited}')
            print(f'Cluster, C{self.cluster_count}: {cluster}')
            neighbours = self.neighbourhood_dict[point]
            print(f'\nNeighbours N({point}): {neighbours}')
            print(f'\nVisiting neighbours of point {point}')
            for neighbour in neighbours:
                print(f'\nNeighbour: {neighbour}')
                if neighbour in self.core_points:
                    print(f'\n{neighbour} is a core point')
                    self.visit_point(neighbour, cluster)
                else:
                    print(f'\n{neighbour} is a border point')
                    self.border_points.append(neighbour)
                    self.noise_points.remove(neighbour)
                    cluster.append(neighbour)
                    print(f'Cluster, C{self.cluster_count}: {cluster}')
                print('---------------------------------------')
        else:
            print(f'\nPoint {point} already visited')
        print('-----------------------------------------------------------------')
        

In [151]:
DBSCAN([(0.40, 0.53), (0.22, 0.38), (0.35, 0.32), (0.26, 0.19), (0.08, 0.41), (0.45, 0.30)], eps = 0.21, min_pts = 3,
              distance='Euclidean').cluster()

Points are:

P1     (0.4, 0.53)
P2    (0.22, 0.38)
P3    (0.35, 0.32)
P4    (0.26, 0.19)
P5    (0.08, 0.41)
P6     (0.45, 0.3)

eps = 0.21, MinPts = 3

Proximity matrix based on Euclidean distance


Unnamed: 0,"(0.4, 0.53)","(0.22, 0.38)","(0.35, 0.32)","(0.26, 0.19)","(0.08, 0.41)","(0.45, 0.3)"
"(0.4, 0.53)",0.0,,,,,
"(0.22, 0.38)",0.23,0.0,,,,
"(0.35, 0.32)",0.22,0.14,0.0,,,
"(0.26, 0.19)",0.37,0.19,0.16,0.0,,
"(0.08, 0.41)",0.34,0.14,0.28,0.28,0.0,
"(0.45, 0.3)",0.24,0.24,0.1,0.22,0.39,0.0



Computing neighbourhood and density of each point:


Unnamed: 0_level_0,Neighbours,Density,Point Type
Point,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"(0.4, 0.53)",{},1,Noise
"(0.22, 0.38)","{(0.26, 0.19), (0.35, 0.32), (0.08, 0.41)}",4,Core
"(0.35, 0.32)","{(0.26, 0.19), (0.22, 0.38), (0.45, 0.3)}",4,Core
"(0.26, 0.19)","{(0.35, 0.32), (0.22, 0.38)}",3,Core
"(0.08, 0.41)","{(0.22, 0.38)}",2,Noise
"(0.45, 0.3)","{(0.35, 0.32)}",2,Noise



Core Points: [(0.22, 0.38), (0.35, 0.32), (0.26, 0.19)]
Noise Points: [(0.4, 0.53), (0.08, 0.41), (0.45, 0.3)]

Iterating over each core point

Point: (0.22, 0.38)

Visiting Point: (0.22, 0.38)

Visited: [(0.22, 0.38)]
Cluster, C1: [(0.22, 0.38)]

Neighbours N((0.22, 0.38)): {(0.26, 0.19), (0.35, 0.32), (0.08, 0.41)}

Visiting neighbours of point (0.22, 0.38)

Neighbour: (0.26, 0.19)

(0.26, 0.19) is a core point

Visiting Point: (0.26, 0.19)

Visited: [(0.22, 0.38), (0.26, 0.19)]
Cluster, C1: [(0.22, 0.38), (0.26, 0.19)]

Neighbours N((0.26, 0.19)): {(0.35, 0.32), (0.22, 0.38)}

Visiting neighbours of point (0.26, 0.19)

Neighbour: (0.35, 0.32)

(0.35, 0.32) is a core point

Visiting Point: (0.35, 0.32)

Visited: [(0.22, 0.38), (0.26, 0.19), (0.35, 0.32)]
Cluster, C1: [(0.22, 0.38), (0.26, 0.19), (0.35, 0.32)]

Neighbours N((0.35, 0.32)): {(0.26, 0.19), (0.22, 0.38), (0.45, 0.3)}

Visiting neighbours of point (0.35, 0.32)

Neighbour: (0.26, 0.19)

(0.26, 0.19) is a core point

Point 

Unnamed: 0_level_0,Neighbours,Density,Point Type
Point,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"(0.4, 0.53)",{},1,Noise
"(0.22, 0.38)","{(0.26, 0.19), (0.35, 0.32), (0.08, 0.41)}",4,Core
"(0.35, 0.32)","{(0.26, 0.19), (0.22, 0.38), (0.45, 0.3)}",4,Core
"(0.26, 0.19)","{(0.35, 0.32), (0.22, 0.38)}",3,Core
"(0.08, 0.41)","{(0.22, 0.38)}",2,Border
"(0.45, 0.3)","{(0.35, 0.32)}",2,Border


In [145]:
DBSCAN([2,10,12,4,25,3,30,20,11], eps = 2, min_pts = 3,
              distance='Euclidean').cluster()

Points are:

P1     2
P2    10
P3    12
P4     4
P5    25
P6     3
P7    30
P8    20
P9    11
dtype: int64

eps = 2, MinPts = 3

Proximity matrix based on Euclidean distance


Unnamed: 0,2,10,12,4,25,3,30,20,11
2,0.0,,,,,,,,
10,8.0,0.0,,,,,,,
12,10.0,2.0,0.0,,,,,,
4,2.0,6.0,8.0,0.0,,,,,
25,23.0,15.0,13.0,21.0,0.0,,,,
3,1.0,7.0,9.0,1.0,22.0,0.0,,,
30,28.0,20.0,18.0,26.0,5.0,27.0,0.0,,
20,18.0,10.0,8.0,16.0,5.0,17.0,10.0,0.0,
11,9.0,1.0,1.0,7.0,14.0,8.0,19.0,9.0,0.0



Computing neighbourhood and density of each point:


Unnamed: 0_level_0,Neighbours,Density,Point Type
Point,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,"{3, 4}",3,Core
10,"{11, 12}",3,Core
12,"{10, 11}",3,Core
4,"{2, 3}",3,Core
25,{},1,Noise
3,"{2, 4}",3,Core
30,{},1,Noise
20,{},1,Noise
11,"{10, 12}",3,Core



Core Points: [2, 10, 12, 4, 3, 11]
Noise Points: [25, 30, 20]

Iterating over each core point

Point: 2

Visiting Point: 2

Visited: [2]
Cluster, C1: [2]

Neighbours N(2): {3, 4}

Visiting neighbours of point 2

Neighbour: 3

3 is a core point

Visiting Point: 3

Visited: [2, 3]
Cluster, C1: [2, 3]

Neighbours N(3): {2, 4}

Visiting neighbours of point 3

Neighbour: 2

2 is a core point

Point 2 already visited
-----------------------------------------------------------------
---------------------------------------

Neighbour: 4

4 is a core point

Visiting Point: 4

Visited: [2, 3, 4]
Cluster, C1: [2, 3, 4]

Neighbours N(4): {2, 3}

Visiting neighbours of point 4

Neighbour: 2

2 is a core point

Point 2 already visited
-----------------------------------------------------------------
---------------------------------------

Neighbour: 3

3 is a core point

Point 3 already visited
-----------------------------------------------------------------
--------------------------------------

Unnamed: 0_level_0,Neighbours,Density,Point Type
Point,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,"{3, 4}",3,Core
10,"{11, 12}",3,Core
12,"{10, 11}",3,Core
4,"{2, 3}",3,Core
25,{},1,Noise
3,"{2, 4}",3,Core
30,{},1,Noise
20,{},1,Noise
11,"{10, 12}",3,Core
