Links I referenced:
- https://www.pluralsight.com/guides/importing-image-data-into-numpy-arrays
- https://scikit-learn.org/stable/auto_examples/cluster/plot_color_quantization.html



In [None]:
import numpy as np
from numpy import asarray
import PIL
from PIL import Image
from matplotlib import image
from matplotlib import pyplot
print(__doc__)
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin
from sklearn.datasets import load_sample_image
from sklearn.utils import shuffle
from time import time
from sklearn.cluster import DBSCAN

## Load image using Pillow

In [None]:
imagePIL = Image.open('IMG2.jpg')

print(imagePIL.format)
print(imagePIL.size)
print(imagePIL.mode)

# show the image (will open in Preview)
# image.show()

## Load image using Matplotlib

In [None]:
imageMPL = image.imread('IMG2.jpg')

print(imageMPL.dtype)
print(imageMPL.shape)

pyplot.imshow(imageMPL)
pyplot.show()

## Convert image to numpy array

In [None]:
np_array = asarray(imagePIL)
print(np_array.shape)

## Do the clustering

In [None]:
n_colors = 64

# Convert to floats instead of the default 8 bits integer coding. Dividing by 255 is important so that plt.imshow behaves works well on float data (need to be in the range [0-1])
np_array = np.array(np_array, dtype=np.float64) / 255

# Load image and transform to a 2D numpy array
w, h, d = original_shape = tuple(np_array.shape)
np_2d = np.reshape(np_array, (w * h, d))

In [None]:
# # Fit models on small sub-sample of the data to find main colors
# np_2d_sample = shuffle(np_2d, random_state=0)[:1000]

# print("Fitting k-means model on a small sub-sample of the data")
# t0 = time()
# kmeans = KMeans(n_clusters=n_colors, random_state=0).fit(np_2d_sample)
# print("done in %0.3fs." % (time() - t0))

# print("Fitting DBSCAN model on a small sub-sample of the data")
# t0 = time()
# dbscan = DBSCAN(eps=0.5, min_samples=5).fit(np_2d_sample)
# print("done in %0.3fs." % (time() - t0))

In [None]:
# # Get labels for all points k-means
# print("Predicting color indices on the full image (k-means)")
# t0 = time()
# labelsK = kmeans.predict(np_2d)
# print("done in %0.3fs." % (time() - t0))

# # Get labels for all points dbscan
# print("Predicting color indices on the full image (DBSCAN)")
# t0 = time()
# labelsDB = dbscan.predict(np_2d)
# print("done in %0.3fs." % (time() - t0))

In [None]:
# Fit and predict for both models on full image

print("Fitting and predicting with k-means")
t0 = time()
labelsK = KMeans(n_clusters=n_colors, random_state=0).fit_predict(np_2d)
print("done in %0.3fs." % (time() - t0))

print("Fitting and predicting with DBSCAN")
t0 = time()
labelsDB = DBSCAN(eps=0.5, min_samples=5).fit_predict(np_2d)
print("done in %0.3fs." % (time() - t0))

In [None]:
codebook_random = shuffle(np_2d, random_state=0)[:n_colors]
print("Predicting color indices on the full image (random)")
t0 = time()
labels_random = pairwise_distances_argmin(codebook_random, np_2d, axis=0)
print("done in %0.3fs." % (time() - t0))

In [None]:
def recreate_image(codebook, labels, w, h):
    """Recreate the (compressed) image from the code book & labels"""
    d = codebook.shape[1]
    image = np.zeros((w, h, d))
    label_idx = 0
    for i in range(w):
        for j in range(h):
            image[i][j] = codebook[labels[label_idx]]
            label_idx += 1
    return image

In [None]:
# Display all results, alongside original image
plt.figure(1)
plt.clf()
plt.axis('off')
plt.title('Original image (96,615 colors)')
plt.imshow(img_array)

plt.figure(2)
plt.clf()
plt.axis('off')
plt.title('Quantized image (64 colors, K-Means)')
plt.imshow(recreate_image(kmeans.cluster_centers_, labelsK, w, h))

plt.figure(3)
plt.clf()
plt.axis('off')
plt.title('Quantized image (64 colors, DBSCAN)')
plt.imshow(recreate_image(kmeans.cluster_centers_, labelsDB, w, h))

plt.figure(4)
plt.clf()
plt.axis('off')
plt.title('Quantized image (64 colors, Random)')
plt.imshow(recreate_image(codebook_random, labels_random, w, h))
plt.show()