In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
from time import time
import numpy as np
import cv2
import os
import h5py
import keras.backend as K
from keras.engine.topology import Layer, InputSpec
from keras.layers import Dense, Input, Conv2D, Flatten, Reshape
from keras.layers import LeakyReLU, Conv2DTranspose
from keras.models import Model, load_model
from keras.optimizers import SGD
from keras import callbacks
from keras.initializers import VarianceScaling
from sklearn.cluster import KMeans
from keras.preprocessing.image import ImageDataGenerator
#import metrics



In [0]:
def csv_image_generator(inputPath, bs):
  
  f = open(inputPath, "r")
  
  while True:
    
    image = []
    while len(image) < bs:
      line = f.readline()
      if line == "":
        f.seek(0)
        line = f.readline()
        
      line = line.strip().split(",")
      
      img = cv2.imread(line[0])
      img = cv2.resize(img, (64,64))
      image.append(img/255)
      
      if len(image) == bs:
        yield (np.array(image),np.array(image))
        image = []

In [0]:
def generator(images, dir, bs=6):
    while True:
        num_batches = int(len(images)/bs)
        start = bs
        for i in range(num_batches):
            x = []
            start = i*bs
            end = start + bs

            for img in images[start:end]:
                img = cv2.imread(os.path.join(dir, img))
                #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = cv2.resize(img, (64,64))
                x.append(img/255.)

            yield (np.array(x), np.array(x))

In [0]:
def autoencoderConv2D(input_shape=(64, 64, 3), filters=[32, 64, 128, 10]):
    input_img = Input(shape=input_shape)
    if input_shape[0] % 8 == 0:
        pad3 = 'same'
    else:
        pad3 = 'valid'
    x = Conv2D(filters[0], 5, strides=2, padding='same', activation='relu', name='conv1', input_shape=input_shape)(input_img)

    x = Conv2D(filters[1], 5, strides=2, padding='same', activation='relu', name='conv2')(x)

    x = Conv2D(filters[2], 3, strides=2, padding=pad3, activation='relu', name='conv3')(x)

    x = Flatten()(x)
    encoded = Dense(units=filters[3], name='embedding', activation='relu')(x)
    x = Dense(units=filters[2]*int(input_shape[0]/8)*int(input_shape[0]/8), activation='relu')(encoded)

    x = Reshape((int(input_shape[0]/8), int(input_shape[0]/8), filters[2]))(x)
    x = Conv2DTranspose(filters[1], 3, strides=2, padding=pad3, activation='relu', name='deconv3')(x)

    x = Conv2DTranspose(filters[0], 5, strides=2, padding='same', activation='relu', name='deconv2')(x)

    decoded = Conv2DTranspose(input_shape[2], 5, strides=2, padding='same', activation='relu', name='deconv1')(x)
    return Model(inputs=input_img, outputs=decoded, name='AE'), Model(inputs=input_img, outputs=encoded, name='encoder')

## Hyper-params

In [0]:
dims = [32, 64, 128, 10]
init = VarianceScaling(scale=1. / 3., mode='fan_in',
                           distribution='uniform')
pretrain_optimizer = SGD(lr=0.1, momentum=0.9)
pretrain_epochs = 30
batch_size = 6
save_dir = ''

In [0]:
dir = r'drive/My Drive/Deep_Clustering/person_rcnn/'
images = os.listdir(dir)
trainGen = csv_image_generator(r'drive/My Drive/Deep_Clustering/trainData.csv', bs=batch_size)

In [0]:
autoencoder, encoder = autoencoderConv2D((64,64,3), dims)
autoencoder.summary()

## Pretrain auto-encoder

In [0]:
autoencoder.compile(optimizer=pretrain_optimizer, loss='mse')
autoencoder.fit_generator(trainGen, steps_per_epoch=1613, epochs=20, initial_epoch=0, shuffle=True) #, callbacks=cb)
autoencoder.save(save_dir + 'ae_model.h5')

In [0]:
autoencoder = load_model('ae_model.h5')
encoder = Model(autoencoder.input, autoencoder.layers[5].output)
#res = encoder.predict_generator(trainGen, steps=1613, verbose = 0)
encoder.summary()

In [0]:
h5f = h5py.File('PredData.h5', 'w')
h5f.create_dataset('dataset_1',data = pred,  dtype="float32")
h5f.close()

In [0]:
h5f = h5py.File('PredData.h5', 'r')
for key in h5f.keys():
  print(key)
pred = h5f['dataset_1']

In [0]:
import scipy.cluster.hierarchy as shc
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.preprocessing import normalize

df = pd.DataFrame({'C1':pred[:,0], 'C2':pred[:,1], 'C3':pred[:,2], 'C4':pred[:,3], 'C5':pred[:,4], 'C6':pred[:,5],
                  'C7':pred[:,6], 'C8':pred[:,7], 'C9':pred[:,8], 'C10':pred[:,9],})

data = normalize(df)
data = pd.DataFrame(data, columns=df.columns)
print(data.head())
plt.figure(figsize=(10,7))  
plt.title("Dendrograms")  
dend = shc.dendrogram(shc.linkage(df, method='ward'))

In [0]:
from sklearn.cluster import AgglomerativeClustering
cluster = AgglomerativeClustering(n_clusters=500, affinity='euclidean', linkage='ward')  
res = cluster.fit_predict(data)

In [0]:
plt.figure(figsize=(10, 7))  
plt.scatter(data['C1'], data['C2'], c=cluster.labels_)