In [0]:
from google.colab import files 
import os
import numpy as np
from PIL import Image
from matplotlib import image
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
import cv2 


def readDataSet(filepath):
  labels = np.array([]) 
  data = None
  root = filepath
  for directory in os.listdir(root):
    path = root + "/" +  directory
    if os.path.isdir(path):
      for filename in os.listdir(path):
        label = directory[1:]
        labels = np.append(labels,int(label))
        imagedata = (image.imread(root + "/" + directory + "/" + filename))
        vectorized = np.matrix.flatten(imagedata.transpose())
        vectorized = np.matrix(vectorized)
        if data is None:
          data = vectorized
        else:
          data = np.concatenate((data, vectorized), axis=0)
  return data, labels


def splitDataSet(data, labels):
  train = data[::2]
  test = data[1::2]
  y_train = labels[::2]
  y_test = labels[1::2]
  return train, test, y_train, y_test


def PCA(data,alpha):
  mean_vector = np.mean(data,axis=0)
  expanded_mean_vector = np.outer(np.ones(len(data)),mean_vector)
  centered_data = np.subtract(data,expanded_mean_vector)
  data_cov = np.cov(centered_data,rowvar=False)
  eig_vals,eig_vecs = np.linalg.eigh(data_cov)
  eig_vals = eig_vals[::-1]
  eig_vecs = eig_vecs[: , ::-1]    
  fraction_of_total_var = 0
  counter = 0
  for i in eig_vals :
    fraction_of_total_var += i/sum(eig_vals)
    counter+= 1
    if fraction_of_total_var >= alpha:
      break    
  projection_matrix = eig_vecs[:, :counter]
  return projection_matrix


def LDA(train, y_train, components):
  total_mean = np.mean(train, axis = 0).reshape(1, train.shape[1])
  class_means = np.array([])
  class_scatters = None
  clusterCardinality = []
  S = np.zeros((train.shape[1], train.shape[1]))
  B = np.zeros((train.shape[1], train.shape[1]))
  mean_vecs = np.array([])
  for x in np.unique(y_train):
      # cluster = train[y_train == x]
      n = train[y_train == x].shape[0]
      Si = np.zeros((train.shape[1], train.shape[1]))
      # clusterCardinality.append(len(cluster))
      meanX = np.mean(train[y_train == x], axis = 0)
      meanX = meanX.reshape(1, train.shape[1])
      diff = np.subtract(meanX, total_mean)
      temp = n * np.dot(diff.T, diff)
      B = np.add(B, temp)
      for c in train[y_train == x]:
        t = c - meanX
        Si = np.add(Si,np.dot(t.T, t))
      # Z = cluster - meanX
      S = np.add(S,Si)
      # if class_means is None:
      #   class_means = meanX
      # else:
      #   class_means = np.concatenate((class_means, meanX), axis = 0)
      # if class_scatters is None:
      #   class_scatters = Z
      # else:
      #   class_scatters = np.concatenate((class_scatters, Z), axis=0)
  print("For loop done") 
  # meanDifference = class_means - total_mean
  # clusterCardinality = np.array(clusterCardinality)
  # vectorized = np.multiply(meanDifference, clusterCardinality[:, np.newaxis])
  # B = np.dot(meanDifference.transpose(), vectorized)
  print("B is done")
  # S = np.dot(class_scatters.transpose(), class_scatters)
  Sinv = np.linalg.pinv(S, hermitian=True)
  print("inverse done")
  M = np.dot(Sinv, B)
  print("Mul done")
  eigval, eigvec = np.linalg.eig(M)
  print("eigenvectors done")
  idx = eigval.argsort()[::-1]   
  eigval = eigval[idx]
  eigvec = eigvec[:,idx]
  # eigvec = eigvec[:,::-1]
  projection_matrix = eigvec[:, :components]
  return projection_matrix.real


def projectData(data,projection_matrix):
  projected_data = np.transpose(np.dot(np.transpose(projection_matrix),np.transpose(data)))
  return projected_data


def KNeighbours(n, p_train, y_train, p_test):
  classifier = KNeighborsClassifier(n_neighbors=1)
  classifier.fit(p_train, y_train)
  y_pred = classifier.predict(p_test)
  return y_pred


def printData(y_test, y_pred):
  print(confusion_matrix(y_test, y_pred))
  print(classification_report(y_test, y_pred))


def classifierTuning(p_train,y_train,p_test,y_test):
    n_values = [1, 3, 5, 7]
    for x in n_values:
      y_pred = KNeighbours(x,p_train,y_train,p_test)
      print("Report for number of neighbours = " + str(x))
      printData(y_test, y_pred)


def readNonFace(filepath):
  data = None
  root = filepath
  width = 112
  height = 92
  dim = (width, height)
  for filename in os.listdir(root):
    imagedata  = cv2.imread(root + "/" + filename)
    imagedata  = cv2.cvtColor(imagedata, cv2.COLOR_BGR2GRAY)
    vectorized = cv2.resize(imagedata, dim)
    vectorized = np.matrix.flatten(vectorized.transpose())
    vectorized = np.matrix(vectorized)
    if data is None:
      data = vectorized
    else:
      data = np.concatenate((data, vectorized), axis=0)
  labels = np.zeros(len(data))
  return data, labels


def twoClassLDA(train, y_train):
  cluster1 = train[y_train == 1]
  cluster2 = train[y_train == 0]
  mean1 = np.mean(cluster1, axis = 0)
  mean2 = np.mean(cluster2, axis = 0)
  diff = mean1 - mean2
  B = np.dot(diff.T, diff)
  Z1 = cluster1 - mean1
  Z2 = cluster2 - mean2
  S1 = np.dot(Z1.T,Z1)
  S2 = np.dot(Z2.T,Z2)
  S = S1 + S2
  Sinv = np.linalg.pinv(S, hermitian=True)
  M = np.multiply(Sinv,B)
  eigval, eigvec = np.linalg.eig(M)
  idx = eigval.argsort()[::-1]   
  eigval = eigval[idx]
  eigvec = eigvec[:,idx]
  # eigvecs = eigvecs[:,::-1]
  return eigvec[:,0].real





In [0]:
# !unzip att_faces.zip
# !unzip Misc.zip
data, labels = readDataSet("./att_faces")
train, test, y_train, y_test = splitDataSet(data,labels)

# ***Classification using PCA***

In [0]:
alphas = [0.8,0.85,0.9,0.95]

for alpha in alphas:
  projection_matrix = PCA(train,alpha)
  projected_train = projectData(train,projection_matrix)
  projected_test = projectData(test,projection_matrix)  
  y_pred = KNeighbours(1,projected_train,y_train,projected_test)
  print("Alpha =  ", alpha)
  printData(y_test,y_pred)  
  
projected_train_pca = projected_train
projected_test_pca = projected_test


Alpha =   0.8
[[4 0 0 ... 0 0 0]
 [0 5 0 ... 0 0 0]
 [0 0 5 ... 0 0 0]
 ...
 [0 0 0 ... 4 0 0]
 [0 0 0 ... 0 5 0]
 [0 0 0 ... 0 0 4]]
              precision    recall  f1-score   support

         1.0       1.00      0.80      0.89         5
         2.0       1.00      1.00      1.00         5
         3.0       1.00      1.00      1.00         5
         4.0       1.00      1.00      1.00         5
         5.0       0.71      1.00      0.83         5
         6.0       1.00      1.00      1.00         5
         7.0       1.00      1.00      1.00         5
         8.0       1.00      1.00      1.00         5
         9.0       0.75      0.60      0.67         5
        10.0       1.00      1.00      1.00         5
        11.0       1.00      1.00      1.00         5
        12.0       1.00      1.00      1.00         5
        13.0       1.00      1.00      1.00         5
        14.0       1.00      1.00      1.00         5
        15.0       0.83      1.00      0.91         5
 

  _warn_prf(average, modifier, msg_start, len(result))


Alpha =   0.85
[[4 0 0 ... 0 0 0]
 [0 5 0 ... 0 0 0]
 [0 0 5 ... 0 0 0]
 ...
 [0 0 0 ... 4 0 0]
 [0 0 0 ... 0 5 0]
 [0 0 0 ... 0 0 4]]
              precision    recall  f1-score   support

         1.0       1.00      0.80      0.89         5
         2.0       1.00      1.00      1.00         5
         3.0       1.00      1.00      1.00         5
         4.0       1.00      1.00      1.00         5
         5.0       0.71      1.00      0.83         5
         6.0       1.00      1.00      1.00         5
         7.0       1.00      1.00      1.00         5
         8.0       1.00      1.00      1.00         5
         9.0       0.80      0.80      0.80         5
        10.0       1.00      1.00      1.00         5
        11.0       1.00      1.00      1.00         5
        12.0       1.00      1.00      1.00         5
        13.0       1.00      1.00      1.00         5
        14.0       1.00      1.00      1.00         5
        15.0       1.00      1.00      1.00         5


  _warn_prf(average, modifier, msg_start, len(result))


Alpha =   0.9
[[4 0 0 ... 0 0 0]
 [0 5 0 ... 0 0 0]
 [0 0 5 ... 0 0 0]
 ...
 [0 0 0 ... 4 0 0]
 [0 0 0 ... 0 5 0]
 [0 0 0 ... 0 0 4]]
              precision    recall  f1-score   support

         1.0       1.00      0.80      0.89         5
         2.0       1.00      1.00      1.00         5
         3.0       1.00      1.00      1.00         5
         4.0       1.00      1.00      1.00         5
         5.0       0.71      1.00      0.83         5
         6.0       1.00      1.00      1.00         5
         7.0       1.00      1.00      1.00         5
         8.0       1.00      1.00      1.00         5
         9.0       1.00      0.80      0.89         5
        10.0       1.00      1.00      1.00         5
        11.0       1.00      1.00      1.00         5
        12.0       1.00      1.00      1.00         5
        13.0       1.00      1.00      1.00         5
        14.0       1.00      1.00      1.00         5
        15.0       1.00      1.00      1.00         5
 

  _warn_prf(average, modifier, msg_start, len(result))


Alpha =   0.95
[[4 0 0 ... 0 0 0]
 [0 5 0 ... 0 0 0]
 [0 0 5 ... 0 0 0]
 ...
 [0 0 0 ... 4 0 0]
 [0 0 0 ... 0 5 0]
 [0 0 0 ... 0 0 3]]
              precision    recall  f1-score   support

         1.0       1.00      0.80      0.89         5
         2.0       1.00      1.00      1.00         5
         3.0       1.00      1.00      1.00         5
         4.0       1.00      1.00      1.00         5
         5.0       0.62      1.00      0.77         5
         6.0       1.00      1.00      1.00         5
         7.0       1.00      1.00      1.00         5
         8.0       1.00      1.00      1.00         5
         9.0       1.00      0.80      0.89         5
        10.0       1.00      1.00      1.00         5
        11.0       1.00      1.00      1.00         5
        12.0       1.00      1.00      1.00         5
        13.0       1.00      1.00      1.00         5
        14.0       1.00      1.00      1.00         5
        15.0       1.00      1.00      1.00         5


  _warn_prf(average, modifier, msg_start, len(result))


# **Classification using LDA**

In [0]:
projection_matrix = LDA(train, y_train, 39)
projected_train = projectData(train,projection_matrix)
projected_test = projectData(test,projection_matrix)
y_pred = KNeighbours(1,projected_train,y_train,projected_test)

printData(y_test, y_pred)
projected_train_lda = projected_train
projected_test_lda = projected_test

For loop done
B is done
inverse done
Mul done
eigenvectors done
[[3 0 0 ... 0 0 0]
 [0 5 0 ... 0 0 0]
 [0 0 4 ... 0 0 0]
 ...
 [0 0 0 ... 5 0 0]
 [0 0 0 ... 0 3 0]
 [0 0 0 ... 0 0 5]]
              precision    recall  f1-score   support

         1.0       0.75      0.60      0.67         5
         2.0       1.00      1.00      1.00         5
         3.0       1.00      0.80      0.89         5
         4.0       0.83      1.00      0.91         5
         5.0       1.00      1.00      1.00         5
         6.0       1.00      1.00      1.00         5
         7.0       1.00      1.00      1.00         5
         8.0       1.00      1.00      1.00         5
         9.0       1.00      1.00      1.00         5
        10.0       1.00      1.00      1.00         5
        11.0       1.00      1.00      1.00         5
        12.0       1.00      1.00      1.00         5
        13.0       0.71      1.00      0.83         5
        14.0       1.00      1.00      1.00         5
     

# ***Classifier Tuning***



1.   PCA
 


In [0]:
classifierTuning(projected_train_pca, y_train,projected_test_pca,y_test)

Report for number of neighbours = 1
[[4 0 0 ... 0 0 0]
 [0 5 0 ... 0 0 0]
 [0 0 5 ... 0 0 0]
 ...
 [0 0 0 ... 4 0 0]
 [0 0 0 ... 0 5 0]
 [0 0 0 ... 0 0 3]]
              precision    recall  f1-score   support

         1.0       1.00      0.80      0.89         5
         2.0       1.00      1.00      1.00         5
         3.0       1.00      1.00      1.00         5
         4.0       1.00      1.00      1.00         5
         5.0       0.62      1.00      0.77         5
         6.0       1.00      1.00      1.00         5
         7.0       1.00      1.00      1.00         5
         8.0       1.00      1.00      1.00         5
         9.0       1.00      0.80      0.89         5
        10.0       1.00      1.00      1.00         5
        11.0       1.00      1.00      1.00         5
        12.0       1.00      1.00      1.00         5
        13.0       1.00      1.00      1.00         5
        14.0       1.00      1.00      1.00         5
        15.0       1.00      1.00

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




 2.   LDA



In [0]:
classifierTuning(projected_train_lda, y_train,projected_test_lda,y_test)

Report for number of neighbours = 1
[[4 0 0 ... 0 0 0]
 [0 5 0 ... 0 0 0]
 [0 0 5 ... 0 0 0]
 ...
 [0 0 0 ... 4 0 0]
 [0 0 0 ... 0 5 0]
 [0 0 0 ... 0 0 4]]
              precision    recall  f1-score   support

         1.0       1.00      0.80      0.89         5
         2.0       1.00      1.00      1.00         5
         3.0       1.00      1.00      1.00         5
         4.0       1.00      1.00      1.00         5
         5.0       0.71      1.00      0.83         5
         6.0       1.00      1.00      1.00         5
         7.0       1.00      1.00      1.00         5
         8.0       1.00      1.00      1.00         5
         9.0       1.00      1.00      1.00         5
        10.0       1.00      1.00      1.00         5
        11.0       1.00      1.00      1.00         5
        12.0       1.00      1.00      1.00         5
        13.0       1.00      1.00      1.00         5
        14.0       1.00      1.00      1.00         5
        15.0       0.83      1.00

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# ***Bonus 1***
0.7 - 0.3 split

 


In [0]:
x_train,x_test,y_train,y_test=train_test_split(data,labels,train_size = 0.7, test_size=0.3, stratify = labels)



1.   PCA
 


In [0]:
alphas = [0.8,0.85,0.9,0.95]

for alpha in alphas:
  projection_matrix = PCA(x_train,alpha)
  projected_train = projectData(x_train,projection_matrix)
  projected_test = projectData(x_test,projection_matrix)  
  y_pred = KNeighbours(1,projected_train,y_train,projected_test)

  print("Alpha =  ", alpha)
  printData(y_test,y_pred)
 

Alpha =   0.8
[[3 0 0 ... 0 0 0]
 [0 3 0 ... 0 0 0]
 [0 0 3 ... 0 0 0]
 ...
 [0 0 0 ... 3 0 0]
 [0 0 0 ... 0 3 0]
 [0 0 0 ... 0 0 3]]
              precision    recall  f1-score   support

         1.0       1.00      1.00      1.00         3
         2.0       1.00      1.00      1.00         3
         3.0       1.00      1.00      1.00         3
         4.0       0.60      1.00      0.75         3
         5.0       1.00      1.00      1.00         3
         6.0       1.00      1.00      1.00         3
         7.0       1.00      1.00      1.00         3
         8.0       1.00      1.00      1.00         3
         9.0       1.00      1.00      1.00         3
        10.0       1.00      0.67      0.80         3
        11.0       1.00      1.00      1.00         3
        12.0       1.00      1.00      1.00         3
        13.0       1.00      1.00      1.00         3
        14.0       1.00      1.00      1.00         3
        15.0       1.00      1.00      1.00         3
 

  _warn_prf(average, modifier, msg_start, len(result))


Alpha =   0.85
[[3 0 0 ... 0 0 0]
 [0 3 0 ... 0 0 0]
 [0 0 3 ... 0 0 0]
 ...
 [0 0 0 ... 3 0 0]
 [0 0 0 ... 0 3 0]
 [0 0 0 ... 0 0 3]]
              precision    recall  f1-score   support

         1.0       1.00      1.00      1.00         3
         2.0       0.75      1.00      0.86         3
         3.0       1.00      1.00      1.00         3
         4.0       0.50      1.00      0.67         3
         5.0       1.00      1.00      1.00         3
         6.0       1.00      1.00      1.00         3
         7.0       1.00      1.00      1.00         3
         8.0       1.00      1.00      1.00         3
         9.0       1.00      1.00      1.00         3
        10.0       1.00      0.67      0.80         3
        11.0       1.00      1.00      1.00         3
        12.0       1.00      1.00      1.00         3
        13.0       1.00      1.00      1.00         3
        14.0       1.00      1.00      1.00         3
        15.0       1.00      1.00      1.00         3


  _warn_prf(average, modifier, msg_start, len(result))


Alpha =   0.9
[[3 0 0 ... 0 0 0]
 [0 3 0 ... 0 0 0]
 [0 0 3 ... 0 0 0]
 ...
 [0 0 0 ... 3 0 0]
 [0 0 0 ... 0 3 0]
 [0 0 0 ... 0 0 3]]
              precision    recall  f1-score   support

         1.0       1.00      1.00      1.00         3
         2.0       1.00      1.00      1.00         3
         3.0       1.00      1.00      1.00         3
         4.0       0.50      1.00      0.67         3
         5.0       1.00      1.00      1.00         3
         6.0       1.00      1.00      1.00         3
         7.0       1.00      1.00      1.00         3
         8.0       1.00      1.00      1.00         3
         9.0       1.00      1.00      1.00         3
        10.0       1.00      0.67      0.80         3
        11.0       1.00      1.00      1.00         3
        12.0       1.00      1.00      1.00         3
        13.0       1.00      1.00      1.00         3
        14.0       1.00      1.00      1.00         3
        15.0       1.00      1.00      1.00         3
 

  _warn_prf(average, modifier, msg_start, len(result))


Alpha =   0.95
[[3 0 0 ... 0 0 0]
 [0 3 0 ... 0 0 0]
 [0 0 3 ... 0 0 0]
 ...
 [0 0 0 ... 3 0 0]
 [0 0 0 ... 0 3 0]
 [0 0 0 ... 0 0 3]]
              precision    recall  f1-score   support

         1.0       1.00      1.00      1.00         3
         2.0       0.75      1.00      0.86         3
         3.0       1.00      1.00      1.00         3
         4.0       0.50      1.00      0.67         3
         5.0       1.00      1.00      1.00         3
         6.0       1.00      1.00      1.00         3
         7.0       1.00      1.00      1.00         3
         8.0       1.00      1.00      1.00         3
         9.0       1.00      1.00      1.00         3
        10.0       1.00      0.67      0.80         3
        11.0       1.00      1.00      1.00         3
        12.0       1.00      1.00      1.00         3
        13.0       1.00      1.00      1.00         3
        14.0       1.00      1.00      1.00         3
        15.0       1.00      1.00      1.00         3


  _warn_prf(average, modifier, msg_start, len(result))




 2.   LDA



In [0]:
projection_matrix = LDA(x_train, y_train, 39)
projected_train = projectData(x_train,projection_matrix)
projected_test = projectData(x_test,projection_matrix)
y_pred = KNeighbours(1,projected_train,y_train,projected_test)

printData(y_test, y_pred) 

For loop done
B is done
inverse done
Mul done
eigenvectors done
[[3 0 0 ... 0 0 0]
 [0 3 0 ... 0 0 0]
 [0 0 3 ... 0 0 0]
 ...
 [0 0 0 ... 3 0 0]
 [0 0 0 ... 0 3 0]
 [0 0 0 ... 0 0 3]]
              precision    recall  f1-score   support

         1.0       1.00      1.00      1.00         3
         2.0       1.00      1.00      1.00         3
         3.0       1.00      1.00      1.00         3
         4.0       0.50      1.00      0.67         3
         5.0       1.00      1.00      1.00         3
         6.0       1.00      1.00      1.00         3
         7.0       1.00      1.00      1.00         3
         8.0       1.00      1.00      1.00         3
         9.0       1.00      1.00      1.00         3
        10.0       1.00      1.00      1.00         3
        11.0       1.00      1.00      1.00         3
        12.0       1.00      1.00      1.00         3
        13.0       1.00      1.00      1.00         3
        14.0       1.00      1.00      1.00         3
     

  _warn_prf(average, modifier, msg_start, len(result))


# ***Bonus 2***
Non-face recognition

In [0]:

nonData, nonLabels = readNonFace("./Misc")
labels = np.ones(len(data))
totalData = np.concatenate((data, nonData), axis = 0)
totalLabels = np.concatenate((labels, nonLabels))
x_train,x_test,y_train,y_test=train_test_split(totalData,totalLabels,train_size = 0.5, test_size=0.5, stratify = totalLabels)
projection_matrix = twoClassLDA(x_train, y_train)
projected_train = projectData(x_train,projection_matrix)
projected_test = projectData(x_test,projection_matrix)
y_pred = KNeighbours(1,projected_train,y_train,projected_test)

printData(y_test, y_pred)



[[133  67]
 [ 58 142]]
              precision    recall  f1-score   support

         0.0       0.70      0.67      0.68       200
         1.0       0.68      0.71      0.69       200

    accuracy                           0.69       400
   macro avg       0.69      0.69      0.69       400
weighted avg       0.69      0.69      0.69       400

