# Question 3 - ImageNet

In [34]:
# Importing the required packages

import tensorflow as tf
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.models import Model
from keras import backend as K
import os
import re
import numpy as np
import pandas as pd
from tensorflow.python.platform import gfile
import glob
from PIL import Image
from skimage.color import gray2rgb
from sklearn.model_selection import train_test_split
from scipy.spatial.distance import correlation
from sklearn.decomposition import TruncatedSVD
from skimage import color
from sklearn.metrics import confusion_matrix
from scipy.stats.stats import pearsonr


In [23]:
# Importing the dataset
image_glob = glob.glob('./Images/*.JPEG')

# Resizing the images to a smaller size for faster computation and removing grayscale
images = np.array([np.array(Image.open(i).resize((64,64))) for i in image_glob if len(np.array(Image.open(i).resize((64,64))).shape) == 3 ])

print("Shape of images array: ", images.shape)

# Scaling the images
images = images.astype('float32') / 255

img_len = [1241,1569,1271,1883,1802,1207]

# Creating the labels for the images
Y_label = labels = np.concatenate((np.repeat(0, img_len[0]), np.repeat(1, img_len[1]), np.repeat(2, img_len[2]), np.repeat(3,img_len[3]), np.repeat(4,img_len[4]), np.repeat(5,img_len[5])))

Shape of images array:  (8973, 64, 64, 3)


## Training the Autoencoder

In [25]:
# We will train the autoencoder on the whole dataset for better performance and make the train, test splits after training

input_img = Input(shape=(64, 64, 3)) #downsized  

#Encoder
x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)

#Decoder
x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)


autoencoder = Model(input_img, decoded)
encoder = Model(input_img, encoded)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

autoencoder.fit(images, images,
                epochs = 10,
                batch_size = 128,
                shuffle = True)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1e500e4fa90>

#### CNN configuration used:
#### Encoder: 3 convolution layers and 2 maxpooling layers. Filter size of convolution layers = 3x3
#### Decoder: 3 convolution layers and 2 Upsampling layers. Filter size of convolution layers = 3x3
#### Epochs: 10 ; batch size: 128 ; activation: relu
#### The number of CNN layers was increased and it gave better performance. Also, the optimizer used was Adam, as it gave a better result in the previous question. We can't increase filter size because it slows the run.

In [36]:
# Defining Functions to use in prediction metrics

def Euclidean_Distance(X_Train, X_test, Y, row, k):
    # Making an empty distance list
    dist = []
    rows = X_Train.shape[0] 
    for i in range(0, rows):
        dist.append([i, np.linalg.norm(X_Train[i,:] - X_test[row,:])])
    
    dist.sort(key = lambda x:x[1])
    opt_k = [dist[i][0] for i in range(0,k)]
    predicted =  np.bincount(Y[opt_k]).argmax()
    
    return predicted


def Pearson_Distance(X_Train, X_test, Y, row, k):
    # Making an empty distance list
    dist = []
    rows = X_Train.shape[0]
    for i in range(0, rows):
        dist.append([i,correlation(X_Train[i,:],X_test[row,:])])
    
    dist.sort(key = lambda x:x[1])
    opt_k = [dist[i][0] for i in range(0,k)]
    predicted =  np.bincount(Y[opt_k]).argmax()
    
    return predicted


### Confusions Matrices, Accuracy for autoencoder

In [None]:
# Making the predictions using the encoder on the whole dataset to get the Compressed Images
X_comp = (encoder.predict(images))
X_comp = X_comp.reshape(len(X_comp), np.prod(X_comp.shape[1:]))


In [38]:
# Autoencoder train & test split
X_train, X_Test, Y_Train, Y_Test = train_test_split(X_comp, Y_label, test_size=0.2, random_state=0)

Y_Pred_E = np.zeros(shape=(len(Y_Test),))
Y_Pred_P = np.zeros(shape=(len(Y_Test),))

for i in range(0,len(Y_Pred_E)):
    Y_Pred_E[i,] = Euclidean_Distance(X_train , X_Test, Y_Train , i, 5)
    Y_Pred_P[i,] = Pearson_Distance(X_train , X_Test, Y_Train , i, 5)

conf1 = confusion_matrix(Y_Test, Y_Pred_E)
conf2 = confusion_matrix(Y_Test, Y_Pred_P)
acc1 = np.trace(conf1) / len(Y_Test)
acc2 = np.trace(conf2) / len(Y_Test)

print("\n\nAccuracy Using Eucledian Distance Autoencoder = ",acc1)
print("Confusion Matrix Using Eucledian Distance Autoencoder")
print(conf1)

print("\n\n Accuracy Using Pearson Distance Autoencoder = ",acc2)
print("Confusion Matrix Using Pearson Distance Autoencoder")
print(conf2)



Accuracy Using Eucledian Distance Autoencoder =  0.394986072423
Confusion Matrix Using Eucledian Distance Autoencoder
[[ 54  60  12  20  96  10]
 [ 14 110  25  13 129  14]
 [  8  44 115  11  54  14]
 [ 22  87  27  65 154   6]
 [ 18  36   9   8 310   2]
 [ 12  36  53  13  79  55]]


 Accuracy Using Pearson Distance Autoencoder =  0.392757660167
Confusion Matrix Using Pearson Distance Autoencoder
[[ 71  49   7  21  93  11]
 [ 22  96  27  23 119  18]
 [  6  46 103  12  60  19]
 [ 30  69  26  68 159   9]
 [ 14  38  16   7 304   4]
 [ 11  40  45  10  79  63]]


### Singular Value Decomposition

In [40]:
X_SVD = images.reshape(len(images),np.prod(images.shape[1:]))
Model_SVD = TruncatedSVD(n_components = 300, n_iter=25, random_state=0) # Tried at intervals 100, 200, 300. It stabilizes after 200. 
X_SVD = Model_SVD.fit_transform(X_SVD)

### Confusions Matrices, Accuracy for SVD

In [41]:
# Splitting data for SVD
X_train, X_Test, Y_Train, Y_Test = train_test_split(X_SVD, Y_label, test_size=0.2, random_state=0)

Y_Pred_E = np.zeros(shape=(len(Y_Test),))
Y_Pred_P = np.zeros(shape=(len(Y_Test),))

for i in range(0,len(Y_Pred_E)):
    Y_Pred_E[i,] = Euclidean_Distance(X_train , X_Test, Y_Train , i, 5)
    Y_Pred_P[i,] = Pearson_Distance(X_train , X_Test, Y_Train , i, 5)

conf1 = confusion_matrix(Y_Test, Y_Pred_E)
conf2 = confusion_matrix(Y_Test, Y_Pred_P)
acc1 = np.trace(conf1) / len(Y_Test)
acc2 = np.trace(conf2) / len(Y_Test)

print("\n Accuracy Using Eucledian Distance SVD = ", acc1)
print("Confusion Matrix Using Eucledian Distance SVD")
print(conf1)

print("\n Accuracy Using Pearson Distance SVD = ", acc2)
print("Confusion Matrix Using Pearson Distance SVD")
print(conf2)


 Accuracy Using Eucledian Distance SVD =  0.380501392758
Confusion Matrix Using Eucledian Distance SVD
[[ 60  59  14  20  86  13]
 [ 15 108  43  26 100  13]
 [ 13  66  89  20  44  14]
 [ 20 101  26  76 135   3]
 [ 15  34  13  17 300   4]
 [ 18  55  45  17  63  50]]

 Accuracy Using Pearson Distance SVD =  0.37938718663
Confusion Matrix Using Pearson Distance SVD
[[ 60  67  14  16  87   8]
 [ 30 109  40  22  96   8]
 [ 17  63  92  14  49  11]
 [ 49  73  26  77 133   3]
 [ 25  28  15  16 296   3]
 [ 13  51  47  18  72  47]]


### RGB Histograms

In [45]:
X_RGB = np.zeros(shape=(len(images),256*3))

for row in range(0,len(images)):
    r_hist, _ = np.histogram(images[row,:,:,0], bins = 256)
    g_hist, _ = np.histogram(images[row,:,:,1], bins = 256)
    b_hist, _ = np.histogram(images[row,:,:,2], bins = 256)
    arr = np.concatenate((r_hist,g_hist,b_hist),axis=0)
    X_RGB[row,:] = arr

### Confusions Matrices, Accuracy for RGB Histograms

In [44]:
## RGB Histogram train & test split
X_train, X_Test, Y_Train, Y_Test = train_test_split(X_RGB, Y_label, test_size=0.2, random_state=0)

Y_Pred_E = np.zeros(shape=(len(Y_Test),))
Y_Pred_P = np.zeros(shape=(len(Y_Test),))

for i in range(0,len(Y_Pred_E)):
    Y_Pred_E[i,] = Euclidean_Distance(X_train , X_Test, Y_Train , i, 5)
    Y_Pred_P[i,] = Pearson_Distance(X_train , X_Test, Y_Train , i, 5)

conf1 = confusion_matrix(Y_Test, Y_Pred_E)
conf2 = confusion_matrix(Y_Test, Y_Pred_P)
acc1 = np.trace(conf1) / len(Y_Test)
acc2 = np.trace(conf2) / len(Y_Test)

print("\n Accuracy Using Eucledian Distance RGB Histogram = ",acc1)
print("Confusion Matrix Using Eucledian Distance RGB Histogram")
print(conf1)

print("\n Accuracy Using Pearson Distance RGB Histogram = ",acc2)
print("Confusion Matrix Using Pearson Distance RGB Histogram")
print(conf2)


 Accuracy Using Eucledian Distance RGB Histogram =  0.377158774373
Confusion Matrix Using Eucledian Distance RGB Histogram
[[ 95  48  16  21  17  55]
 [ 42 102  50  21  36  54]
 [ 17  44 108  12  13  52]
 [ 48  67  29 124  28  65]
 [ 39  86  32  33 139  54]
 [ 25  44  47   5  18 109]]

 Accuracy Using Pearson Distance RGB Histogram =  0.404456824513
Confusion Matrix Using Pearson Distance RGB Histogram
[[118  50  16  26  21  21]
 [ 43  98  45  28  45  46]
 [ 20  44  98  13  32  39]
 [ 49  68  26 150  32  36]
 [ 44  71  22  26 185  35]
 [ 33  43  42  15  38  77]]


### HSV Histogram

In [47]:
X_HSV = np.zeros(shape=(len(images),692))

for row in range(0,len(images)):
    img = images[row,:,:,:]
    img_hsv = color.rgb2hsv(img)
    h_hist, _ = np.histogram(img_hsv[:,:,0], bins = 180 )
    s_hist, _ = np.histogram(img_hsv[:,:,1], bins = 256 )
    v_hist, _ = np.histogram(img_hsv[:,:,2], bins = 256 )
    arr = np.concatenate((h_hist,s_hist,v_hist), axis=0)
    X_HSV[row,:] = arr

### Confusions Matrices, Accuracy for HSV Histograms

In [49]:
# HSV Histogram train & test split
X_train, X_Test, Y_Train, Y_Test = train_test_split(X_HSV, Y_label, test_size=0.2, random_state=0)

Y_Pred_E = np.zeros(shape=(len(Y_Test),))
Y_Pred_P = np.zeros(shape=(len(Y_Test),))

for i in range(0,len(Y_Pred_E)):
    Y_Pred_E[i,] = Euclidean_Distance(X_train , X_Test, Y_Train , i, 5)
    Y_Pred_P[i,] = Pearson_Distance(X_train , X_Test, Y_Train , i, 5)

conf1 = confusion_matrix(Y_Test, Y_Pred_E)
conf2 = confusion_matrix(Y_Test, Y_Pred_P)
acc1 = np.trace(conf1) / len(Y_Test)
acc2 = np.trace(conf2) / len(Y_Test)

print("\n Accuracy Using Eucledian Distance HSV Histogram", acc1)
print("Confusion Matrix Using Eucledian Distance HSV Histogram")
print(conf1)

print("\n Accuracy Using Pearson Distance HSV Histogram = ", acc2)
print("Confusion Matrix Using Pearson Distance HSV Histogram")
print(conf2)


 Accuracy Using Eucledian Distance HSV Histogram 0.413370473538
Confusion Matrix Using Eucledian Distance HSV Histogram
[[ 97  29  15  27  24  60]
 [ 22  78  53  24  56  72]
 [  9  29 150   4   9  45]
 [ 37  44  37 139  24  80]
 [ 27  74  41  48 146  47]
 [ 12  44  37   6  17 132]]

 Accuracy Using Pearson Distance HSV Histogram =  0.421727019499
Confusion Matrix Using Pearson Distance HSV Histogram
[[ 97  32  18  31  28  46]
 [ 31  81  54  19  70  50]
 [ 10  33 145   9  23  26]
 [ 34  64  30 136  45  52]
 [ 39  77  25  29 182  31]
 [ 19  44  33   9  27 116]]


### Comparison of the results between different models

#### Among the 4 models, HSV histogram gives the max accuracy of 42.1%. This is probably because the histogram method doesn't reduce dimensionality that much and utilizes most amount of information.
#### The Singular Value Decomposition method performs the worst.
#### The Pearson Correlation method gives better accuracy compared to using Eucliean Distance for all the classifiers.
#### We could improve the autoencoder by further tuning it. It performs better than the other two methods apart from HSV.