# Worksheet 07

Name:  Michelle Sun
UID: U14824452

### Topics

- Density-Based Clustering

### Density-Based Clustering

Follow along with the live coding of the DBScan algorithm.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import sklearn.datasets as datasets

centers = [[1, 1], [-1, -1], [1, -1]]
X, _ = datasets.make_blobs(n_samples=750, centers=centers, cluster_std=0.4,
                            random_state=0)
plt.scatter(X[:,0],X[:,1],s=10, alpha=0.8)
plt.show()

class DBC():

    def __init__(self, dataset, min_pts, epsilon):
        self.dataset = dataset
        self.min_pts = min_pts
        self.epsilon = epsilon
        self.assignment = [0 for _ in range(len(self.dataset))]

    def get_neighboorhood(self, i): # used to find all neighbors of i point
        neighborhood = []
        for j in range(len(self.dataset)):
            if self.distance(i, j) <= self.epsilon and i != j:
                neighborhood.append(j)
        return neighborhood

    def distance(self, i, j):
        return np.linalg.norm(self.dataset[i] - self.dataset[j])
    
    def is_core(self, i): # returns bool based on how many neighboors a point has
        return len(self.get_neighboorhood(i)) >= self.min_pts

    def assign(self, i, clus_num): #puts a point into the correct cluster
        self.assignment[i] = clus_num
        neighbor_queue = self.get_neighboorhood(i) #start a queue from i to look at next neighboors

        while neighbor_queue:
            next_point = neighbor_queue.pop()
            if self.assignment[next_point] != 0:
                # dups can occur when adding neighbors when those neighbors haven't been assigned yet
                continue #this means already assigned
            self.assignment[next_point] = clus_num

            if self.is_core(next_point): #is this neighbor of i, is it also a core point
                next_neighboorhood = self.get_neighboorhood(next_point)
                neighbor_queue += [i for i in next_neighboorhood if self.assignment[i] == 0]
            
        return 

    def dbscan(self):
        """
        returns a list of assignments. The index of the
        assignment should match the index of the data point
        in the dataset.
        """

        clus_num = 1
        for i in range(len(self.dataset)):
            if self.is_core(i) and self.assignment[i] == 0: #just so there is unique and not assigned
                #start assign process
                self.assign(i, clus_num)
                clus_num += 1

        
        return self.assignment
    

clustering = DBC(X, 3, .2).dbscan()
colors = np.array([x for x in 'bgrcmykbgrcmykbgrcmykbgrcmyk'])
colors = np.hstack([colors] * 40)
plt.scatter(X[:, 0], X[:, 1], color=colors[clustering].tolist(), s=10, alpha=0.8)
plt.show()