## Задание 4
Реализация алгоритма кластеризации `DBSCAN`

In [39]:
import numpy as np

In [40]:
class DB_SCAN():

    def __init__(self, dataset, eps=30, min_samples=2):
        self.dataset = dataset
        self.eps = eps
        self.min_samples = min_samples
        self.n_clusters = 0
        self.clusters = {0: []}
        self.visited = set()
        self.clustered = set()
        self.fitted = False

    def get_dist2(self, list1, list2):
        return sum((i - j)**2 for i,j in zip(list1, list2))

    def get_region(self, data):
        return [list(q) for q in dataset if self.get_dist2(data, q) < self.eps]

    def fit(self):
        for p in self.dataset:
            if tuple(p) in self.visited:
                continue
            self.visited.add(tuple(p))
            neighbours = self.get_region(p)
            if len(neighbours) < self.min_samples:
                self.clusters[0].append(list(p))
            else:
                self.n_clusters += 1
                self.expand_cluster(p, neighbours)
        self.fitted = True
        print(self.n_clusters)

    def expand_cluster(self, p, neighbours):
        if self.n_clusters not in self.clusters:
            self.clusters[self.n_clusters] = []
        self.clustered.add(tuple(p))
        self.clusters[self.n_clusters].append(list(p))
        while neighbours:
            q = neighbours.pop()
            if tuple(q) not in self.visited:
                self.visited.add(tuple(q))
                q_neighbours = self.get_region(q)
                if len(q_neighbours) > self.min_samples:
                    neighbours.extend(q_neighbours)
            if tuple(q) not in self.clustered:
                self.clustered.add(tuple(q))
                self.clusters[self.n_clusters].append(q)
                if q in self.clusters[0]:
                    self.clusters[0].remove(q)
        print(self.clusters[self.n_clusters])

    def get_labels(self):
        labels = np.array([])
        if not self.fitted:
            self.fit()
        for data in self.dataset:
            for i in range(self.n_clusters + 1):
                if list(data) in self.clusters:
                    print(list(data))


In [41]:
n = 100
dataset = [[np.random.randint(1, 100), np.random.randint(1, 100)] for i in range(n)]

In [42]:
scan = DB_SCAN(dataset, 200, 2)
scan.get_region(dataset[0])
scan.fit()



[[56, 84], [68, 90], [66, 78], [77, 72], [80, 62], [82, 56], [78, 43], [88, 50], [94, 60], [98, 63], [96, 71], [88, 81], [89, 83], [95, 94], [99, 95], [90, 68], [95, 61], [97, 48], [93, 59], [80, 59], [70, 57], [70, 53], [73, 43], [60, 46], [49, 45], [38, 52], [37, 53], [34, 63], [29, 67], [17, 64], [17, 54], [17, 49], [16, 38], [26, 36], [39, 33], [42, 43], [53, 48], [50, 35], [50, 30], [36, 29], [29, 17], [19, 19], [13, 14], [7, 8], [19, 17], [30, 13], [5, 23], [32, 38], [36, 53], [30, 55], [26, 66], [23, 69], [26, 81], [35, 86], [45, 95], [45, 99], [47, 89], [47, 96], [47, 92], [44, 81], [54, 72], [65, 76], [69, 83], [59, 80], [66, 86], [76, 69], [17, 89], [25, 99], [21, 93], [7, 93], [3, 80], [16, 75], [26, 74], [23, 68], [30, 56], [39, 65], [12, 99], [16, 95], [14, 41], [2, 44], [2, 52], [3, 61], [5, 64], [67, 42], [66, 56], [67, 52], [78, 45], [83, 40], [94, 47], [93, 53], [76, 35], [68, 52]]
[[87, 25], [95, 24]]
[[69, 12], [61, 15], [54, 5], [63, 23], [76, 12]]
3
