### DESCRIPTION

There is an image named “tiger.png.” Use k-means clustering with k set to 16 and cluster the image, which means that you want to keep just 16 colors in your compressed image.

**Objective**: Open and display the image “tiger.png.” Convert the image into numpy array, so that it can be used in further processing. Find out the dimensions of the image and convert it into a two- dimensional array (Use k-means clustering for image segmentation, reducing the image to 16 colors).

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
plt.rcParams['figure.figsize'] = (12,6) # handles default matplotlib values
from sklearn.cluster import KMeans

In [2]:
pip install pillow

Note: you may need to restart the kernel to use updated packages.


In [5]:
import PIL
from PIL import Image
tiger = Image.open('tiger.png')

# summarize some details about image
print(tiger.format)
print(tiger.size)
print(tiger.mode)

PNG
(1280, 720)
RGB


In [6]:
# convert image to array
from numpy import asarray
tiger_array = asarray(tiger)

print(type(tiger_array))
print(tiger_array.shape)

<class 'numpy.ndarray'>
(720, 1280, 3)


In [7]:
tiger_array

array([[[164, 160, 159],
        [165, 161, 160],
        [164, 163, 161],
        ...,
        [160, 128,  90],
        [158, 125,  90],
        [161, 128,  93]],

       [[164, 160, 159],
        [164, 160, 159],
        [163, 162, 160],
        ...,
        [164, 132,  94],
        [162, 129,  94],
        [157, 124,  89]],

       [[163, 159, 156],
        [164, 160, 157],
        [163, 162, 160],
        ...,
        [164, 132,  94],
        [162, 129,  94],
        [157, 124,  89]],

       ...,

       [[ 93,  94,  96],
        [ 93,  94,  96],
        [ 92,  93,  95],
        ...,
        [120, 116, 107],
        [121, 117, 108],
        [122, 118, 109]],

       [[ 93,  94,  96],
        [ 93,  94,  96],
        [ 92,  93,  95],
        ...,
        [120, 116, 107],
        [121, 117, 108],
        [122, 118, 109]],

       [[ 93,  94,  96],
        [ 93,  94,  96],
        [ 92,  93,  95],
        ...,
        [119, 115, 106],
        [121, 117, 108],
        [122, 118, 109]]

In [8]:
tiger_array.shape[0]

720

In [9]:
# convert array to 2d
x = tiger_array.shape[0]
y = tiger_array.shape[1] * tiger_array.shape[2]
tiger_array.resize(x,y)
tiger_array.shape

(720, 3840)

In [10]:
kmeans = KMeans(n_clusters=16)

In [11]:
kmeans.fit(tiger_array)

KMeans(n_clusters=16)

In [12]:
kmeans.cluster_centers_

array([[130.67058824, 142.07058824, 128.83529412, ..., 162.76470588,
        135.30588235, 108.4       ],
       [104.36111111, 100.47222222, 105.70833333, ..., 123.38888889,
        117.75      , 113.36111111],
       [104.17948718, 107.41025641, 108.82051282, ..., 140.56410256,
        120.20512821,  95.61538462],
       ...,
       [ 94.625     , 106.97916667,  92.0625    , ..., 161.97916667,
        137.08333333, 108.72916667],
       [ 52.33333333,  88.74074074,  50.88888889, ..., 134.81481481,
        123.18518519, 108.25925926],
       [119.83333333, 126.55555556, 122.5       , ..., 153.66666667,
        133.94444444, 106.11111111]])

In [13]:
print(kmeans.labels_)
print(len(kmeans.labels_))

[11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11
 11 11 11 11 11 11 11 11 11 11 11 11  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3
  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3 13 13 13 13 13 13 13 13 13
 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13
 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 15 15 15 15 15 15 15 15 15
 15 15 15 15 15 15 15 15 15  9  9  9  9  9  9  9  9  9  9  9  9  9  9  9
  9  9  9  9  9  9  9  9  9  9  9  9  9  7  7  7  7  7  7  7  7  7  7  7
  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7
  7  7  7  7  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2
  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2

In [14]:
print(type(kmeans.labels_))
unique, counts = np.unique(kmeans.labels_, return_counts=True)
print(dict(zip(unique, counts)))

<class 'numpy.ndarray'>
{0: 85, 1: 72, 2: 39, 3: 38, 4: 51, 5: 44, 6: 35, 7: 39, 8: 56, 9: 28, 10: 63, 11: 36, 12: 41, 13: 48, 14: 27, 15: 18}


would plot but seaborn lmpolt takes dataframe as data source.