<a href="https://colab.research.google.com/github/k3m2s12/Building_Image/blob/main/Clustering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import cv2
from sklearn.cluster import DBSCAN
from sklearn import metrics
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from collections import Counter

In [None]:
def plot_clusters(labels):
    unique_labels = set(labels)
    print(unique_labels)
    colors = [plt.cm.Spectral(each)
            for each in np.linspace(0, 1, len(unique_labels))]

    for k, col in zip(unique_labels, colors):
        if k == -1:
            # Black used for noise.
            col = [0, 0, 0, 1]

        class_member_mask = (labels == k)

        xy = X[class_member_mask & core_samples_mask]
        plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
                markeredgecolor='k', markersize=14)

        xy = X[class_member_mask & ~core_samples_mask]
        plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
                markeredgecolor='w', markersize=0)

    plt.title('Estimated number of clusters: %d' % n_clusters_)
    plt.show()

In [None]:
# Use Opencv and read the image.
# Convert image data (width x height) into   
image = cv2.imread("binary_image.png")

pixel_loc = []

for r in range(0, 2000):
    for c in range(0, 2000):
        if image[r][c][0] != 255 or image[r][c][1] != 255 or image[r][c][2] != 255:
            pixel_loc.append([r, c])

X = np.array(pixel_loc)
print("The number of pixels detected for clustering: ", len(X))

In [None]:
###############################################################################
# Density Based Clustering - Input eps & min_sample
print("----Density Based Clustering----") 
eps_num = int(input("Input # for eps: "))
min_samples_nu = int(input("Input # for min_samples: "))

db = DBSCAN(eps=eps_num, min_samples=min_samples_nu).fit(X)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_

# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)

print('Estimated number of clusters: %d' % n_clusters_)
print('Estimated number of noise points: %d' % n_noise_)

# Silhouette score can be (-1 ~ +1). Remove # to activate Silhouette Score.
# print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels)).

In [None]:
count_list = []
count_labels = Counter(labels)
big_labels = dict(Counter(count_labels).most_common(10))
big_labels.pop(-1)

for cluster_num in big_labels:
    indice = [index for index in range(len(labels)) if labels[index] == cluster_num] 
    
    for index in indice:
        count_list.append(X[index])

    print("* The cluster %d has %d of points" % (cluster_num, len(indice)))
    

In [None]:
print(big_labels)
plot_clusters(big_labels)
plot_clusters(labels)

@ eps: decide the range of points near the core point. 
- eps ⇧ 
∝ number of data points in cluster (cluster size) ⇧
∝ total cluster # ⇩ 
- eps very ⇩ 
∝ core points #, hard to satisfy min_samples
- eps very ⇧ 
∝ every data points are in one cluster

@ min_samples: decides the min size of cluster. (noise control)
- min_samples ⇧ 
∝ core points # ⇩
∝ noise points # ⇧