In [None]:
import numpy as np
from PIL import Image as im
import matplotlib.pyplot as plt
import sklearn.datasets as datasets

TEMPFILE = 'temp.png'


class DBC():

    def __init__(self, dataset, min_pts, epsilon):
        self.dataset = dataset
        self.min_pts = min_pts
        self.epsilon = epsilon
        self.snaps = []
        self.assignments = [0 for _ in range(len(self.dataset))]

    def snapshot(self):
        fig, ax = plt.subplots()
        colors = ['red', 'green', 'orange', 'yellow',
                  'blue', 'purple', 'pink', 'brown']

        for i in range(len(self.dataset)):
            ax.scatter(self.dataset[i][0], self.dataset[i]
                       [1], color=colors[self.assignments[i]])

        cir = plt.Circle(self.dataset[self.new_point],
                         self.epsilon, fill=False, color='black')
        ax.add_patch(cir)

        ax.set_xlim(-2, 2)
        ax.set_ylim(-1, 3)
        # necessary or else the circles appear to be oval shaped
        ax.set_aspect('equal')

        fig.savefig(TEMPFILE)
        plt.close()

        return im.fromarray(np.asarray(im.open(TEMPFILE)))

    def is_core(self, i):
        neighbors = []
        for j in range(len(self.dataset)):
            if i != j and np.linalg.norm(self.dataset[i] - self.dataset[j]) <= self.epsilon:
                neighbors.append(j)
        return len(neighbors) >= self.min_pts

    def get_unlabeled_neighbors(self, i):
        neighbors = []
        for j in range(len(self.dataset)):
            if i != j and self.assignments[j] == 0 and np.linalg.norm(self.dataset[i] - self.dataset[j]) <= self.epsilon:
                neighbors.append(j)

        return neighbors

    def dfs_assign(self, i, cluster_num):
        self.assignments[i] = cluster_num
        # this should return a list of indexes
        neighbors = self.get_unlabeled_neighbors(i)

        while neighbors:
            next_candidate = neighbors.pop()

            if self.assignments[next_candidate] != 0:
                # if we hit this if statement, something is wrong
                # print('help')
                continue

            self.assignments[next_candidate] = cluster_num
            self.new_point = next_candidate
            self.snaps.append(self.snapshot())
            if self.is_core(next_candidate):
                neighbors += self.get_unlabeled_neighbors(next_candidate)

        return

    def dbscan(self):
        cluster_num = 1
        for i in range(len(self.dataset)):
            if self.assignments[i] != 0:
                # already assigned to a cluster - no need to re-evaluate
                continue
            if self.is_core(i):
                self.dfs_assign(i, cluster_num)

            cluster_num += 1
        return self.assignments


centers = [[-0.75, 2], [0.75, 2]]
eyes, _ = datasets.make_blobs(
    n_samples=200, centers=centers, cluster_std=0.2, random_state=0)

mouth_x = np.arange(-1.5, 1.5, 0.01)
mouth_y = 0.25 * (mouth_x ** 2) + 0.2 * np.random.randn(len(mouth_x))

face = np.append(eyes, np.column_stack((mouth_x, mouth_y)), axis=0)

dbc = DBC(face, 3, 0.2)
clustering = dbc.dbscan()

plt.scatter(face[:, 0], face[:, 1], s=10, alpha=0.8)
# plt.show()

dbc.snaps[0].save(
    'dbscan-animation.gif',
    optimize=False,
    save_all=True,
    append_images=dbc.snaps[1:],
    loop=0,
    duration=25
)
