In [1]:
import multiprocessing as mp
from functools import partial
import numpy as np
import pandas as pd
import skimage.io
import random
import copy
import math

In [2]:
def picture_to_pixelmap(picture_path: str) -> np.array:
    """
    Convert a image to a np.array of pixels with RGB components.

    Parameters
    ----------
    picture_path : str
        Image file path.

    Returns
    -------
    np.array
    """
    pixel_map = skimage.io.imread(picture_path).copy()

    # Remove alpha component if it's a PNG file.
    if picture_path[-3::] == "png":
        width, height, _ = pixel_map.shape
        pixel_map.resize((width, height, 3))
    return pixel_map


def color_in_array(array, subarray) -> bool:
    """
    Check if a color is in the given array of colors.

    Parameters
    ----------
    array : list[list[int]]
        Array of colors.
    subarray : list[int]
        Color tp search.

    Returns
    -------
    bool
    """
    for i in range(len(array)):
        if (
            subarray[0] == array[i][0]
            and subarray[1] == array[i][1]
            and subarray[2] == array[i][2]
        ):
            return True
    return False


def color_distance(r1: int, g1: int, b1: int, r2: int, g2: int, b2: int) -> float:
    """
    Compute the euclidean distance between two RGB colors.

    Parameters
    ----------
    r1 : int
        RED component of the first color.
    g1 : int
        GREEN component of the first color.
    b1 : int
        BLUE component of the first color.
    r2 : int
        RED component of the second color.
    g2 : int
        GREEN component of the second color.
    b2 : int
        BLUE component of the second color.

    Returns
    -------
    float
    """
    return math.sqrt(
        pow(float(r1) - r2, 2) + pow(float(g1) - g2, 2) + pow(float(b1) - b2, 2)
    )

jpg_file_path = "./images/003.jpg"
png_file_path = "./images/004.png"

In [21]:
def neighbour_points(data, point_id):
    neighbours = []
    for i in range(len(data)):
        # Euclidian distance
        if np.linalg.norm([c1 - c2 for c1, c2 in zip(data[point_id][2], data[i][2])]) <= self.epsilon:
            neighbours.append(i)
    return neighbours

class DBScan:

    def __init__(self, picture_path: str, minimum_points: int = 3, epsilon: float = 1):
        super().__init__()

        if picture_path:
            self.pixels_map = picture_to_pixelmap(picture_path)
        else:
            self.pixels_map = None

        self.cluster_mapping = np.array([])
        self.data = []
        self.minimum_points = minimum_points
        self.epsilon = epsilon
        self.to_cluster = []
        self.trained = False

    def fit(self, picture_path: str = None):
        if picture_path:
            self.pixels_map = picture_to_pixelmap(picture_path)
        if not self.pixels_map.any():
            raise RuntimeError("You must provide the path of the picture.")
            
        data = self.flatten_map()
        
        print(data)
        
#         self.cluster_mapping = np.zeros([width, height, 2])

#         for x in range(width):
#             for y in range(height):
#                 self.to_cluster.append((x, y, self.pixels_map[x][y]))

#         while self.to_cluster:
#             cluster_points = [self.create_random_point()]
#             to_expand = [cluster_points[0]]

#             while to_expand:
#                 expanding_point = to_expand.pop()
#                 for point in self.to_cluster:
                    
#                     if color_distance(*expanding_point[2], *point[2]) <= self.epsilon:
#                         cluster_points.append(point)
#                         self.cluster_mapping[point[0], point[1]] = point[:2]
#                         to_expand.append(point)
#                         self.to_cluster.remove(point)

#                 if len(cluster_points) < self.minimum_points:
#                     break
#             print(len(self.to_cluster))
                    
    def save(self):
        pass

    def export(self):
        pass

    def create_random_point(self):
        i = random.randrange(0, len(self.to_cluster))
        return self.to_cluster[i]
    
    def flatten_map(self):
        width, height, _ = self.pixels_map.shape
        print(self.pixels_map)
        print(self.pixels_map[0, 0])
        print(self.pixels_map - self.pixels_map[0, 0])
        print(np.linalg.norm(self.pixels_map - self.pixels_map[0, 0], axis=2))
        new_map = np.empty((width*height, 3))
#         for x in range(width):
#             for y in range(height):
#                 new_map = np.append(new_map, [[x, y, self.pixels_map[x][y]]], axis=0)
        return new_map

In [22]:
dbscan = DBScan(png_file_path)

In [23]:
dbscan.fit()

[[[  3  51  64]
  [255   3  51]
  [ 64 255   3]
  ...
  [255  29  64]
  [ 73 255  27]
  [ 63  72 255]]

 [[ 25  61  70]
  [255  23  60]
  [ 69 255  24]
  ...
  [255   2  41]
  [ 53 255   2]
  [ 41  53 255]]

 [[  1  40  52]
  [255   2  41]
  [ 53 255   3]
  ...
  [255  22  66]
  [ 77 255  21]
  [ 66  77 255]]

 ...

 [[194 150 115]
  [255 192 148]
  [115 255 192]
  ...
  [255 239 229]
  [221 255 239]
  [229 221 255]]

 [[238 228 220]
  [255 238 228]
  [219 255 238]
  ...
  [255 181 122]
  [ 87 255 185]
  [126  90 255]]

 [[181 126  91]
  [255 176 122]
  [ 88 255 170]
  ...
  [255 131 105]
  [ 82 255 126]
  [100  77 255]]]
[ 3 51 64]
[[[  0   0   0]
  [252 208 243]
  [ 61 204 195]
  ...
  [252 234   0]
  [ 70 204 219]
  [ 60  21 191]]

 [[ 22  10   6]
  [252 228 252]
  [ 66 204 216]
  ...
  [252 207 233]
  [ 50 204 194]
  [ 38   2 191]]

 [[254 245 244]
  [252 207 233]
  [ 50 204 195]
  ...
  [252 227   2]
  [ 74 204 213]
  [ 63  26 191]]

 ...

 [[191  99  51]
  [252 141  84]
  [112 20