<a href="https://colab.research.google.com/github/linuxsynckliye/ELL784_assignment2/blob/master/EigenUnsupervised.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [47]:
# Assignment 2 || Eigen spaces in Unsupervised Learning
# Abhishek Ranjan 2019EEA2238
# Prashant Panddy 2019EEA2786

from statistics import mean
import cv2
import os
import numpy as np

# =========================================================================================
# store images as vector for each subject
# =========================================================================================

class Subject:
  def __init__(self, data, fileName):
      self.data_subject = data
      self.name = fileName
      self.projection = []
      self.assigned_to = 0

  def appendData(self,data):
      self.data_subject = np.column_stack((data,self.data_subject))

# =========================================================================================
# store obtained result for each cluster
# =========================================================================================
class Clusters:
  def __init__(self, centroid):
    self.members = []
    self.centroid = centroid

# =========================================================================================
# finding W_pca
# =========================================================================================

def pca(X, n_components):

  mU_overall = np.mat(X.mean(axis = 1)).T
  n_components = check_num_components(n_components)

  X = X - mU_overall
  # to find eigen vectors of XX' we first calculate eigen vectors of X'X
  # then pre-multiply it with matrix X
  XTX = np.dot(X.T, X)

  [e_val_PCA, e_vec_PCA] = np.linalg.eig(XTX)
  e_vec_PCA = np.dot(X, e_vec_PCA)

  for i in range(e_vec_PCA.shape[1]):
    e_vec_PCA[:,i] = e_vec_PCA[:,i] / np.linalg.norm(e_vec_PCA[:,i])

  # sort according to decreasing value of eigen_values
  index_sorted = np.argsort(-1*(e_val_PCA))

  e_val_PCA = e_val_PCA[index_sorted]
  e_vec_PCA = e_vec_PCA[:, index_sorted]

  print("size of eigenVector matrix for PCA is :", e_vec_PCA.shape)
  print("size of eigenVector matrix for PCA after trimming to desired components is :", e_vec_PCA[:, 0 : n_components].shape)

  # return first 'n_components' eigen vectors/ values and overall mean
  return([ e_val_PCA[0 : n_components].real, e_vec_PCA[:, 0 : n_components].real, mU_overall ])
  


# =========================================================================================
# check if desired number of components are within the limits. If not set to default value
# =========================================================================================

def check_num_components(n_components):
  if (n_components <= 0) or (n_components > n_classes - 1):
    print("n_components was : ",n_components, " and has been reset to ", n_classes-1)
    return (n_classes - 1)
  else:
    return (n_components)

# =========================================================================================
# read all files in folder
# image class is assumed to be mentioned as file name itself
# =========================================================================================

# trainingSetDirectory = '/content/drive/My Drive/Photos/trainingWithName'
trainingSetDirectory = '/content/drive/My Drive/Photos/TrainThreeClassOnly'

files = os.listdir(trainingSetDirectory)

img = cv2.imread(trainingSetDirectory + '/' + files[0],cv2.IMREAD_GRAYSCALE)

row, col = img.shape
data = np.reshape(img,(row*col,1))

print("###shape###", row,col)


# final value of n_classes serves as number of classes in training set also
n_classes = 3
max_iterations = 1500
subjects = []
clusters = []

for file in files:
    # print(".....................................")
    
    img = cv2.imread(trainingSetDirectory + '/' + file,cv2.IMREAD_GRAYSCALE)

    data = np.reshape(img,(row*col,1))
    subjects.append(Subject(data, file))



print("total number of classes in training set is ", n_classes)
X = np.empty([row*col, 1])
X = np.delete(X,0,1)

# generate matrix of all images as column vectors
for i in range(len(subjects)):
    X = np.column_stack((subjects[i].data_subject,X))


[e_val_pca, e_vec_pca, mU_overall] = pca(X, X.shape[1] - n_classes)

# comment/uncomment the below expression W for EigenFaces
W = e_vec_pca
print("size of transformation matrix W is :", W.shape)

# find projection of all subjects
for i in range(len(subjects)):
    subjects[i].projection = np.dot(W.T, subjects[i].data_subject - mU_overall)

# shuffle them to for initializing cluster randomly
np.random.shuffle(subjects)
for i in range(n_classes):
  clusters.append(subjects[i].projection)


distances = []
projections_list = []

# Kmeans Clustering
for i in range(max_iterations):

  for subs in range(len(subjects)):
    # find distances and get index corresponding to minimum distance
    distances = [np.linalg.norm(subjects[subs].projection - clusters[centroid]) for centroid in range(n_classes)]
    subjects[subs].assigned_to = distances.index(min(distances))

  # update the cluster mean by first locating which all elements belong to that cluster 
  # and then take mean
  for centroid in range(n_classes):
    projections_list = []

    for s in range(len(subjects)):

      if (centroid == subjects[s].assigned_to):
        projections_list.append(subjects[s].projection)

    clusters[centroid] = np.mean(np.asarray(projections_list))

# create a list of Clusters class 
classes = []
for i in range(n_classes):
  classes.append(Clusters(clusters[i]))

# store names of subjects belonging to a particular cluster
for s in range(len(subjects)):
  classes[subjects[s].assigned_to].members.append(subjects[s].name)

# display the cluster obtained after K-means clustering
for i in range(n_classes):
  print(classes[i].members)

# =========================================================================================
# Testing results
# =========================================================================================
print("====================================================================================")
print("")

print("**************testing results using Eigen Faces**************")

# print("**************testing results using Fisher Linear Discriminant**************")

# testFileDirectory = '/content/drive/My Drive/Photos/demo'
testFileDirectory = '/content/drive/My Drive/Photos/TestThreeClassOnly'
testFiles = os.listdir(testFileDirectory)

for testFile in testFiles:
  test = cv2.imread(testFileDirectory + '/'+ testFile,cv2.IMREAD_GRAYSCALE)
  test_v = np.reshape(test,(row*col,1))

  test_v = np.reshape(test,(row*col,1))
  # print("size of test_image vector is :", test_v.shape)

  # take projection 
  testProjection = np.dot(W.T, test_v-mU_overall)
  # print("size of projection of test_image vector is :", testProjection.shape)

  cluster_index = -1
  for centroid in range(n_classes):
    distances = [np.linalg.norm(testProjection - clusters[centroid]) for centroid in range(n_classes)]
    cluster_index = distances.index(min(distances))



  # =========================================================================================
  # Display results
  # =========================================================================================
  print("====================================================================================")
  print("")

  print("test_subject : ",testFile," belongs to class containing subjects: ", classes[cluster_index].members)



###shape### 243 320
total number of classes in training set is  3
n_components was :  25  and has been reset to  2
size of eigenVector matrix for PCA is : (77760, 28)
size of eigenVector matrix for PCA after trimming to desired components is : (77760, 2)
size of transformation matrix W is : (77760, 2)
['subject03_centerlight.png', 'subject01_sleepy.png', 'subject01_happy.png', 'subject01_sad.png', 'subject01_wink.png', 'subject01_surprised.png', 'subject03_glasses.png', 'subject01_normal.png', 'subject01_noglasses.png', 'subject01_gif.png']
['subject03_sleepy.png', 'subject03_sad.png', 'subject03_wink.png', 'subject03_happy.png', 'subject03_noglasses.png', 'subject03_normal.png', 'subject03_surprised.png']
['subject03_leftlight.png', 'subject02_leftlight.png', 'subject02_noglasses.png', 'subject02_sleepy.png', 'subject02_normal.png', 'subject02_sad.png', 'subject02_centerlight.png', 'subject02_rightlight.png', 'subject01_rightlight.png', 'subject02_happy.png', 'subject02_wink.png']

**