In [11]:
!pip install validclust

Collecting validclust
  Downloading validclust-0.1.1-py2.py3-none-any.whl (8.1 kB)
Installing collected packages: validclust
Successfully installed validclust-0.1.1


In [12]:
import numpy as np
import random
import cv2
import matplotlib.pyplot as plt

import keras
from keras.datasets import cifar10
from keras import layers
from tensorflow.keras.models import load_model
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D, BatchNormalization, Activation
from tensorflow.keras.models import Model,Sequential
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam

from sklearn.metrics import silhouette_score
from sklearn.metrics import pairwise_distances,accuracy_score 
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

from validclust import dunn


# K-Means Clustering
---

In [90]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

In [91]:
x_train = np.array([cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) for image in x_train])
x_test = np.array([cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) for image in x_test])

In [92]:
x_train = x_train.reshape((-1, 1024))
x_test = x_test.reshape((-1, 1024))

In [93]:
x_train.shape

(50000, 1024)

In [94]:
# # Principal component analysis (PCA).
# pca = PCA(2,whiten=True)

# # Transform the data
# x_test = pca.fit_transform(x_test)

In [95]:
def recalculate_clusters(X, centroids, k):
    clusters = {}
    for i in range(k):
        clusters[i] = []
    for data in X:
        euc_dist = []
        for j in range(k):
            euc_dist.append(np.linalg.norm(data - centroids[j]))
        clusters[euc_dist.index(min(euc_dist))].append(data)
    return clusters    
 
def recalculate_centroids(centroids, clusters, k):
    for i in range(k):
        centroids[i] = np.mean(clusters[i], axis=0)
    return centroids

def k_means_clustering(X, centroids={}, k=10, repeats=500):
    for i in range(k):
        random_img_centroid = random.randint(0, 9)
        centroids[i] =  x_test[random_img_centroid]

    for i in range(repeats):        
        clusters = recalculate_clusters(X, centroids, k)  
        centroids = recalculate_centroids(centroids, clusters, k)

    return clusters,centroids 

In [96]:
clusters,centroids=k_means_clustering(x_test)


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


In [97]:
k=10
image_belongs_to=[]
clusters = {}
dist=[]
for i in range(k):
    clusters[i] = []
for data in x_test:
    euc_dist = []
    for j in range(k):
        euc_dist.append(np.linalg.norm(data - centroids[j]))
    dist.append(euc_dist)
    clusters[euc_dist.index(min(euc_dist))].append(data)
    image_belongs_to.append(euc_dist.index(min(euc_dist)))


In [98]:
print("Silhouette Score: ",silhouette_score(x_test, image_belongs_to))#0.054

Silhouette Score:  0.07549538670355976


In [99]:
dist = pairwise_distances(x_test)
print("Dunns Index: ",dunn(dist, np.array(image_belongs_to)))#0.089

Dunns Index:  0.09066185012304237


# Auto-Encoder

In [13]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [14]:
# normalize data
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

In [15]:
input_img = Input(shape=(32, 32, 3))
x = Conv2D(64, (3, 3), padding='same')(input_img)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(32, (3, 3), padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(16, (3, 3), padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)

x = Conv2D(16, (3, 3), padding='same')(encoded)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(32, (3, 3), padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(64, (3, 3), padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(3, (3, 3), padding='same')(x)
x = BatchNormalization()(x)
decoded = Activation('linear')(x)

In [16]:
model = Model(input_img, decoded)
model.compile(optimizer='adam', loss='mean_squared_error',metrics=['accuracy'])

In [17]:
es_cb = EarlyStopping(monitor='val_loss', patience=2, verbose=1, mode='auto')
chkpt = 'AutoEncoder_Cifar10_Deep_weights.{epoch:02d}-{loss:.2f}-{val_loss:.2f}.hdf5'
cp_cb = ModelCheckpoint(filepath = chkpt, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True,mode='auto')

In [None]:
history = model.fit(x_train, x_train,
                    batch_size=256,
                    epochs=20,
                    verbose=1,
                    validation_data=(x_test, x_test),
                    callbacks=[es_cb,cp_cb],
                    shuffle=True
                   )

Epoch 1/20
 41/196 [=====>........................] - ETA: 4:10 - loss: 0.8853 - accuracy: 0.4697

In [None]:
history.history['accuracy'][-1]

In [None]:
encoder_model = Model(input_img, encoded)


In [None]:
encoded_imgs = encoder_model.predict(x_train)

In [None]:
encoded_imgs.shape

In [None]:
encoded_imgs=encoded_imgs.reshape((-1,256))

In [None]:
encoded_imgs.shape


In [None]:
# kmeans = KMeans(n_clusters=10, random_state=0).fit(encoded_imgs)
kmeans = KMeans(n_clusters=10, 
                init='random', 
                n_init=30, 
                max_iter=1000).fit(encoded_imgs)

In [None]:
kmeans.labels_

In [None]:
x_train1=x_train.reshape((-1,3072))

In [None]:
print("Silhouette Score: ",silhouette_score(x_train1[:30000], kmeans.labels_[:30000]))#0.054 #0.023

In [None]:
x_val = x_test[:7000]
c10test = model.predict(x_train)
c10val = model.predict(x_val)

In [None]:
def showOrigDec(orig, dec, num=10):
    import matplotlib.pyplot as plt
    n = num
    plt.figure(figsize=(20, 4))

    for i in range(n):
        # display original
        ax = plt.subplot(2, n, i+1)
        plt.imshow(orig[i].reshape(32, 32, 3))
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

        # display reconstruction
        ax = plt.subplot(2, n, i +1 + n)
        plt.imshow(dec[i].reshape(32, 32, 3))
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
    plt.show()

In [None]:
showOrigDec(x_train, c10test)
