In [None]:
#!pip install matplotlib scikit-learn pillow numpy
import matplotlib.pyplot as plt
import os
import numpy as np
from PIL import Image

# Load the image file, display the image and show the image data with its size
im = Image.open(os.getcwd()+"/colors.png")
im_matrix = np.array(im)
ax = plt.axes(xticks=[], yticks=[])
ax.imshow(im)
print(im_matrix.shape)
print(im_matrix)

# Scale the values to the range from 0 to 1 and flatten the array to 2D
data = im_matrix / 255
data = data.reshape(668 * 1000, 3)
print(data.shape)
print(data)

In [None]:
# Initialize a random number generator to generate a random permutation of indices and select the first 100000 to avoid long computation time
rng = np.random.RandomState(0)
i = rng.permutation(data.shape[0])[:100000]

# Select the colors corresponding to the randomly chosen indices and transpose the selected data to separate the Red, Green, and Blue channels
colors = data[i]
R, G, B = data[i].T

# Plot the scatter chart of Red vs Green values and Red vs Blue values, colored by the original colors
fig, ax = plt.subplots(1, 2, figsize=(16, 6))
ax[0].scatter(R, G, color=colors, marker='.')
ax[0].set(xlabel='Red', ylabel='Green', xlim=(0, 1), ylim=(0, 1))

ax[1].scatter(R, B, color=colors, marker='.')
ax[1].set(xlabel='Red', ylabel='Blue', xlim=(0, 1), ylim=(0, 1))

fig.suptitle('Input color space: millions of possible colors', size=20)

In [None]:
# Employ the KMeans function from sklearn to fit the data and train a model with 7 clusters. Extract the 7 new colors by the centroids of the KMeans model.
from sklearn.cluster import KMeans
kmeans = KMeans(7).fit(data)
new_colors = kmeans.cluster_centers_[kmeans.predict(data)]

# Initialize a random number generator to generate a random permutation of indices and select the first 100000 to avoid long computation time
rng = np.random.RandomState(0)
i = rng.permutation(data.shape[0])[:100000]

# Select the new colors corresponding to the randomly chosen indices and transpose the selected data to separate the Red, Green, and Blue channels
colors = new_colors[i]
R, G, B = data[i].T

fig, ax = plt.subplots(1, 2, figsize=(16, 6))
ax[0].scatter(R, G, color=colors, marker='.')
ax[0].set(xlabel='Red', ylabel='Green', xlim=(0, 1), ylim=(0, 1))

ax[1].scatter(R, B, color=colors, marker='.')
ax[1].set(xlabel='Red', ylabel='Blue', xlim=(0, 1), ylim=(0, 1))

fig.suptitle('Reduced color space: 7 colors', size=20);


In [None]:
# Convert the 2D array back to the original shape for rendering the processed image
seven_color = new_colors.reshape(im_matrix.shape)

# Plot the original image and the processed image
fig, ax = plt.subplots(1, 2, figsize=(16, 6), subplot_kw=dict(xticks=[], yticks=[]))
fig.subplots_adjust(wspace=0.05)
ax[0].imshow(im)
ax[0].set_title('Original Image', size=16)
ax[1].imshow(seven_color)
ax[1].set_title('7-color Image', size=16);


In [None]:
# Apply the elbow method to the data and train the model with clusters from 1 to 10
inertia = []
clusters = range(1, 10)
for k in clusters:
    kmeans = KMeans(k)
    kmeans.fit(data)
    inertia.append(kmeans.inertia_)

# Plot the elbow curve
plt.figure(figsize=(8, 4))
plt.plot(clusters, inertia, 'o-')
plt.xlabel('Number of clusters (k)')
plt.ylabel('Inertia')
plt.title('Elbow curve')
plt.show()