In [1]:
try:
    import cv2
    import numpy as np
    import matplotlib.pyplot as plt
    np.random.seed(42)
except Exception as e:
    print(e)

In [2]:
img = cv2.imread("hulk2.jpg", cv2.IMREAD_COLOR)
cv2.imshow("image", img)

In [3]:
cv2.waitKey(0)
cv2.destroyAllWindows()

In [4]:
print(img.shape)

(581, 552, 3)


In [5]:
try:
    gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
except Exception as e:
    print(e)

In [6]:
cv2.imshow("gray_img", gray_img)
cv2.waitKey(0)
cv2.destroyAllWindows

<function destroyAllWindows>

In [7]:
scale_percent = 50
width = int(img.shape[1] * scale_percent / 100)
height = int(img.shape[0] * scale_percent / 100)
dim = (width, height)
resized_img_original = cv2.resize(img, dim)

In [8]:
cv2.imshow("resized_img_original", resized_img_original)
cv2.waitKey(0)
cv2.destroyAllWindows

<function destroyAllWindows>

In [9]:
#Chuyển về mảng 2d
flattened_img = img.reshape((-1, 3))

In [10]:
print(flattened_img.shape[0])

320712


In [11]:
#Khởi tạo K centroids ngẫu nhiên, mỗi centroids đại diện cho màu trung bình của một cụm.
k = 5
random_sample_idxs = np.random.choice(flattened_img.shape[0], k, replace=False)
centroids = [flattened_img[idx] for idx in random_sample_idxs]

In [12]:
print(centroids)

[array([34, 31, 26], dtype=uint8), array([ 2, 14, 92], dtype=uint8), array([35, 32, 27], dtype=uint8), array([33, 30, 26], dtype=uint8), array([42, 77, 97], dtype=uint8)]


In [13]:
print(flattened_img.shape[1])

3


In [None]:
clusters = [[] for _ in range(k)]
plot_steps = False

In [14]:
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2)**2))

In [15]:
def closest_centroid(sample, centroids):
    distances = [euclidean_distance(sample, point) for point in centroids]
    closest_index = np.argmin(distances)
    return closest_index

In [16]:
def create_clusters(centroids):
    clusters = [[] for _ in range(k)]
    for idx, sample in enumerate(flattened_img):
        centroid_idx = closest_centroid(sample, centroids)
        clusters[centroid_idx].append(idx)
    return clusters

In [17]:
def get_cluster_labels(clusters):
    labels = np.empty(flattened_img.shape[0])
    for cluster_idx, cluster in enumerate(clusters):
        for sample_index in cluster:
            labels[sample_index] = cluster_idx
    return labels
    

In [18]:
def get_centroids(clusters):
    centroids = np.zeros((k, flattened_img.shape[1]))
    for cluster_idx, cluster in enumerate(clusters):
        cluster_mean = np.mean(flattened_img[cluster], axis= 0)
        centroids[cluster_idx] = cluster_mean
    return centroids

In [19]:
def is_converged(centroids_old, centroids):
    distances = [euclidean_distance(centroids_old[i], centroids[i]) for i in range(k)]
    return sum(distances) == 0

In [22]:
def plot():
    fig, ax = plt.subplots(figsize=(12,8))
    
    for i, index in enumerate(clusters):
        point = flattened_img[index].T
        ax.scatter(*point)
    
    for point in centroids:
        ax.scatter(*point, marker="x", color='black', linewidths=2)
    
    plt.show()

In [23]:
def cent():
    return centroids

In [24]:
def predict():
    for _ in range(100):
        clusters = create_clusters(centroids)
        if plot_steps:
            plot()
        
        centroids_old = centroids
        centroids = get_centroids(clusters)

        if is_converged(centroids_old, centroids):
            break
        if plot_steps:
            plot()
    return get_cluster_labels(clusters)

    