<a href="https://colab.research.google.com/github/markonium/AI_Pattern_Recognition/blob/master/Face_Recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from numpy.linalg.linalg import eig
from PIL import Image
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import numpy as np
from google.colab import drive
drive.mount('/content/drive')

#Creating the Data matrix and labels vector
def gen_data_matrix_label():
  # Generating empty data matrix and labels vector to be filled with images vectors
  data_matrix = np.empty([400, 10304], dtype = int)
  # y is the labels matrix as requested in the lab pdf
  y = np.empty([400, 1], dtype = int)

  # dir is the directory that contains all the images
  dir = '/content/drive/MyDrive/archive/';
  k = 0;
  for i in range(40):
    subject = 's' + str(i + 1);
    for j in range(10):
      image_name = '/' + str(j + 1) + '.pgm';
      image = Image.open(dir + subject + image_name)
      # Converting the image into a numpy array of dimensions: 112 x 92
      img_array = np.asarray(image)
      # Converting the numpy array into a 1D vector of dimension: 10304 (Row wise)
      img_vector = np.reshape(img_array, -1)
      # Adding the image vector as a new row in the data matrix
      data_matrix[k] = img_vector
      # Adding the label of the current image as new row in labels matrix
      label = np.array([i + 1])
      #y[k] = label
      y[k]=1
      k += 1
  return data_matrix, y

# Separating date into training and testing sets, such that odd rows are for testing and even rows are for training
# Separate labels accordingly
def separate_training_testing(data_matrix, y):
  train_data = data_matrix[::2]
  test_data = (data_matrix[1:,:])[::2]
  train_label = y[::2]
  test_label = (y[1:,:])[::2]
  return train_data, test_data, train_label, test_label

# Principle component analysis
# alpha array is an array containing all required values of alpha i.e 0.8, 0.85, 0.9 and 0.95
def PCA(data_matrix, alpha_array, n):
  mean_vector = np.mean(data_matrix, axis = 0)
  z = np.empty([n,10304])
  for j in range(10304):
    for i in range(n):
      z[i,j] = data_matrix[i,j] - mean_vector[j]
  cov = (1/n) * np.matmul(np.transpose(z), z)
  eigen_values, eigen_vectors = np.linalg.eigh(cov)
  idx = eigen_values.argsort()[::-1]
  eigen_values = eigen_values[idx]
  eigen_vectors = eigen_vectors[:,idx]

  total_sum = np.sum(eigen_values)
  r = 0
  pcs = np.empty([alpha_array.size], dtype = int)

  for k in range(alpha_array.size):
    curr_sum = 0
    for i in range(eigen_values.size):
      curr_sum = curr_sum + eigen_values[i]
      if((curr_sum / total_sum) >= alpha_array[k]):
        r = i + 1
        pcs[k] = r
        break
  return eigen_vectors[:, 0:pcs[0]], eigen_vectors[:, 0:pcs[1]], eigen_vectors[:, 0:pcs[2]], eigen_vectors[:, 0:pcs[3]]

def projection(train_data, test_data, u1, u2, u3, u4):
  proj1_train_data = np.transpose(np.matmul(np.transpose(u1), np.transpose(train_data)))
  proj2_train_data = np.transpose(np.matmul(np.transpose(u2), np.transpose(train_data)))
  proj3_train_data = np.transpose(np.matmul(np.transpose(u3), np.transpose(train_data)))
  proj4_train_data = np.transpose(np.matmul(np.transpose(u4), np.transpose(train_data)))

  proj1_test_data = np.transpose(np.matmul(np.transpose(u1), np.transpose(test_data)))
  proj2_test_data = np.transpose(np.matmul(np.transpose(u2), np.transpose(test_data)))
  proj3_test_data = np.transpose(np.matmul(np.transpose(u3), np.transpose(test_data)))
  proj4_test_data = np.transpose(np.matmul(np.transpose(u4), np.transpose(test_data)))

  return proj1_train_data, proj2_train_data, proj3_train_data, proj4_train_data, proj1_test_data, proj2_test_data, proj3_test_data, proj4_test_data

def knn(train_data, train_label, test_data, test_label, n):
  knn = KNeighborsClassifier(n_neighbors = n)
  knn = knn.fit(train_data, train_label)
  predicted_label = knn.predict(test_data)
  accuracy = accuracy_score(test_label, predicted_label)
  return accuracy
# ----------------------------------------------------------------------------------------------------------------
def multi_n(train_data, train_label, test_data, test_label):
  print('n = 1     accuracy = ', knn(train_data, train_label, test_data, test_label, 1), '\n')
  print('n = 3     accuracy = ', knn(train_data, train_label, test_data, test_label, 3), '\n')
  print('n = 5     accuracy = ', knn(train_data, train_label, test_data, test_label, 5), '\n')
  print('n = 7     accuracy = ', knn(train_data, train_label, test_data, test_label, 7), '\n')


def prepare(train , classes, samples):
  matrix = []
  for i in range (classes):
    matrix.append([])
  
  temp = 0
  for i in range (classes):
    for j in range (samples):
      matrix[i].append(train[temp])
      temp = temp+1
  LDA_matrix=np.array(matrix)
  return LDA_matrix
  



def LDA(train, test, classes, samples):
  LDA_matrix = prepare(train,classes,samples)
  mean_victor = np.mean(LDA_matrix,axis = 1)
  Over_mean = np.mean(mean_victor, axis = 0)
  z = np.empty([classes,samples,10304])
  for i in range(classes):
    z[i] = LDA_matrix[i]-mean_victor[i]
  s = np.empty([10304,10304])
  for i in range(classes):
    s +=(np.dot(z[i].T,z[i]))
  
  sb = np.empty([10304,10304])
  for i in range(classes):
    sb += (samples*(np.dot((mean_victor[i]-Over_mean),(mean_victor[i]-Over_mean).T)))
  s_inv = np.linalg.inv(s)
  ans = np.matmul(s_inv,sb)
  eigen_values, eigen_vectors = np.linalg.eigh(ans)
  idx = eigen_values.argsort()[-(classes-1):][::-1]
  eigen_vector_sorted = eigen_vectors[idx]
  trains = np.dot(train,eigen_vector_sorted.T)
  tests = np.dot(test,eigen_vector_sorted.T)
  return trains.real,tests.real
  
def prepare_non(train , samples1, samples2):
  matrix1 = []
  matrix2 = []
  temp = 0
  for i in range(samples1):
    matrix1.append(train[temp])
    temp = temp + 1

  for i in range(samples2):
    matrix2.append(train[temp])
    temp = temp + 1

  LDA_matrix1=np.array(matrix1)
  LDA_matrix2=np.array(matrix2)
  return LDA_matrix1, LDA_matrix2



def LDA_non(train, test, samples1, samples2):
  LDA_matrix1, LDA_matrix2 = prepare_non(train,samples1,samples2)
  m1 = np.mean(LDA_matrix1 , axis = 0)
  m2 = np.mean(LDA_matrix2 , axis = 0)
  dim = m1.shape[0]
  M = np.zeros([dim,1])
  for i in range(dim):
    M[i][0] = m1[i]-m2[i]

  
  B = np.matmul(M, M.T)
  Z1 = np.zeros([samples1, dim])
  Z2 = np.zeros([samples2, dim])
  for i in range(samples1):
    Z1[i] = LDA_matrix1[i]-m1

  for j in range(samples2):
    Z2[j] = LDA_matrix2[j]-m2

  S1 = np.matmul(Z1.T,Z1)
  S2 = np.matmul(Z2.T,Z2)
  S = S1+S2
  S_in = np.linalg.inv(S)
  ans = np.matmul(S_in,B)
  eigen_values, eigen_vectors = np.linalg.eigh(ans)
  idx = eigen_values.argsort()[--1:][::-1]
  eigen_vector_sorted = eigen_vectors[idx]
  trains = np.dot(train,eigen_vector_sorted.T)
  tests = np.dot(test,eigen_vector_sorted.T)
  return trains.real,tests.real


  # mean_victor = np.mean(LDA_matrix,axis = 1)
  # Over_mean = np.mean(mean_victor, axis = 0)

  # z = np.empty([classes,samples,10304])
  # for i in range(2):
  #   z[i] = LDA_matrix[i]-mean_victor[i]
  # s = np.empty([10304,10304])
  # for i in range(2):
  #   s +=(np.dot(z[i].T,z[i]))
  
  # sb = np.empty([10304,10304])
  # for i in range(2):
  #   sb += (samples*(np.dot((mean_victor[i]-Over_mean),(mean_victor[i]-Over_mean).T)))
  # s_inv = np.linalg.inv(s)
  # ans = np.matmul(s_inv,sb)
  # eigen_values, eigen_vectors = np.linalg.eigh(ans)
  # idx = eigen_values.argsort()[-(classes-1):][::-1]
  # eigen_vector_sorted = eigen_vectors[idx]
  # trains = np.dot(train,eigen_vector_sorted.T)
  # tests = np.dot(test,eigen_vector_sorted.T)
  # return trains.real,tests.real
    



#data_matrix, y = gen_data_matrix_label()
#train_data, test_data, train_label, test_label = separate_training_testing(data_matrix, y)


# u1, u2, u3, u4 = PCA(data_matrix, np.array([0.8, 0.85, 0.9, 0.95]), 400)
# proj1_train_data, proj2_train_data, proj3_train_data, proj4_train_data, proj1_test_data, proj2_test_data, proj3_test_data, proj4_test_data = projection(train_data, test_data, u1, u2, u3, u4)

# print('For PCA: \n')
# print('For alpha = 0.8 \n')
# multi_n(proj1_train_data, np.ravel(train_label), proj1_test_data, np.ravel(test_label))
# print('For alpha = 0.85 \n ')
# multi_n(proj2_train_data, np.ravel(train_label), proj2_test_data, np.ravel(test_label))
# print('For alpha = 0.9, \n ') 
# multi_n(proj3_train_data, np.ravel(train_label), proj3_test_data, np.ravel(test_label))
# print('For alpha = 0.95, \n')
# multi_n(proj4_train_data, np.ravel(train_label), proj4_test_data, np.ravel(test_label))



#trains, tests = LDA(train_data, test_data, 40 , 5)

#print('For LDA: \n')
#multi_n(trains,train_label,tests,test_label)





Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
(2,)
1.8055555555555558


# **nonfaces dataset, mixed dataset generation**

In [None]:
#Creating the nonfaces Data matrix and labels vector
def gen_nonfaces_data_matrix_label(n):
  # Generating empty data matrix and labels vector to be filled with images vectors
  nonfaces_data_matrix = np.empty([10*n, 10304], dtype = int)
  # y is the labels matrix as requested in the lab pdf
  nonfaces_y = np.empty([10*n, 1], dtype = int)

  # dir is the directory that contains all the images
  dir = '/content/drive/MyDrive/face-nonface/';
  k = 0;
  for i in range(40,40+n):
    subject = 's' + str(i + 1);
    for j in range(10):
      image_name = '/' + str(j + 1) + '.jpg';
      image = Image.open(dir + subject + image_name).convert('L').resize((92,112))
      image.save('greyscale.jpg')
      # Converting the image into a numpy array of dimensions: 112 x 92
      img_array = np.asarray(image)
      # Converting the numpy array into a 1D vector of dimension: 10304 (Row wise)
      img_vector = np.reshape(img_array, -1)
      # Adding the image vector as a new row in the data matrix
      nonfaces_data_matrix[k] = img_vector
      # Adding the label of the current image as new row in labels matrix
      #label = np.array([i + 1])
      #nonfaces_y[k] = label
      nonfaces_y[k] = -1
      k += 1
  return nonfaces_data_matrix, nonfaces_y


#Creating the mixed Data matrix and labels vector
def gen_mixed_data_matrix_label(n):
  #n: number of nonfaces folders (max 40, min 0)
  #Creating the faces Data matrix and labels vector
  data_matrix,y= gen_data_matrix_label()
  #Creating the nonfaces Data matrix and labels vector
  nonfaces_data_matrix,nonfaces_y= gen_nonfaces_data_matrix_label(n)
  #concatinate the faces and nonfaces matrices,vectors
  data_matrix = np.vstack((data_matrix,nonfaces_data_matrix))
  y = np.vstack((y,nonfaces_y))

  return data_matrix, y


# **PCA,LDA with the mixed datasets(faces/nonfaces)**

In [None]:
#generate mixed data matrix with 400 nonfaces sample
data_matrix, y = gen_mixed_data_matrix_label(40)
train_data, test_data, train_label, test_label = separate_training_testing(data_matrix, y)

In [None]:
u1, u2, u3, u4 = PCA(data_matrix, np.array([0.8, 0.85, 0.9, 0.95]), 800)
proj1_train_data, proj2_train_data, proj3_train_data, proj4_train_data, proj1_test_data, proj2_test_data, proj3_test_data, proj4_test_data = projection(train_data, test_data, u1, u2, u3, u4)

print('For PCA: \n')
print('For alpha = 0.8 \n')
multi_n(proj1_train_data, np.ravel(train_label), proj1_test_data, np.ravel(test_label))
print('For alpha = 0.85 \n ')
multi_n(proj2_train_data, np.ravel(train_label), proj2_test_data, np.ravel(test_label))
print('For alpha = 0.9, \n ') 
multi_n(proj3_train_data, np.ravel(train_label), proj3_test_data, np.ravel(test_label))
print('For alpha = 0.95, \n')
multi_n(proj4_train_data, np.ravel(train_label), proj4_test_data, np.ravel(test_label))


In [None]:
trains, tests = LDA_non(train_data, test_data,200,200) #200 for faces,200 for nonfaces
print('For LDA: \n')
multi_n(trains,train_label,tests,test_label)

# **----------------------------------------------------------------------------------------------**

In [None]:
#generate mixed data matrix with 350 nonfaces sample
data_matrix, y = gen_mixed_data_matrix_label(35)
train_data, test_data, train_label, test_label = separate_training_testing(data_matrix, y)

In [None]:
u1, u2, u3, u4 = PCA(data_matrix, np.array([0.8, 0.85, 0.9, 0.95]), 750)
proj1_train_data, proj2_train_data, proj3_train_data, proj4_train_data, proj1_test_data, proj2_test_data, proj3_test_data, proj4_test_data = projection(train_data, test_data, u1, u2, u3, u4)

print('For PCA: \n')
print('For alpha = 0.8 \n')
multi_n(proj1_train_data, np.ravel(train_label), proj1_test_data, np.ravel(test_label))
print('For alpha = 0.85 \n ')
multi_n(proj2_train_data, np.ravel(train_label), proj2_test_data, np.ravel(test_label))
print('For alpha = 0.9, \n ') 
multi_n(proj3_train_data, np.ravel(train_label), proj3_test_data, np.ravel(test_label))
print('For alpha = 0.95, \n')
multi_n(proj4_train_data, np.ravel(train_label), proj4_test_data, np.ravel(test_label))


In [None]:
trains, tests = LDA_non(train_data, test_data, 200,175 )#200 for faces,175 for nonfaces
print('For LDA: \n')
multi_n(trains,train_label,tests,test_label)

# **----------------------------------------------------------------------------------------------**

In [None]:
#generate mixed data matrix with 300 nonfaces sample
data_matrix, y = gen_mixed_data_matrix_label(30)
train_data, test_data, train_label, test_label = separate_training_testing(data_matrix, y)

In [None]:
u1, u2, u3, u4 = PCA(data_matrix, np.array([0.8, 0.85, 0.9, 0.95]), 700)
proj1_train_data, proj2_train_data, proj3_train_data, proj4_train_data, proj1_test_data, proj2_test_data, proj3_test_data, proj4_test_data = projection(train_data, test_data, u1, u2, u3, u4)

print('For PCA: \n')
print('For alpha = 0.8 \n')
multi_n(proj1_train_data, np.ravel(train_label), proj1_test_data, np.ravel(test_label))
print('For alpha = 0.85 \n ')
multi_n(proj2_train_data, np.ravel(train_label), proj2_test_data, np.ravel(test_label))
print('For alpha = 0.9, \n ') 
multi_n(proj3_train_data, np.ravel(train_label), proj3_test_data, np.ravel(test_label))
print('For alpha = 0.95, \n')
multi_n(proj4_train_data, np.ravel(train_label), proj4_test_data, np.ravel(test_label))


In [None]:
trains, tests = LDA_non(train_data, test_data, 200 ,150 )#200 for faces,150 for nonfaces
print('For LDA: \n')
multi_n(trains,train_label,tests,test_label)

# **----------------------------------------------------------------------------------------------**

In [None]:
#generate mixed data matrix with 250 nonfaces sample
data_matrix, y = gen_mixed_data_matrix_label(25)
train_data, test_data, train_label, test_label = separate_training_testing(data_matrix, y)

In [None]:
u1, u2, u3, u4 = PCA(data_matrix, np.array([0.8, 0.85, 0.9, 0.95]), 650)
proj1_train_data, proj2_train_data, proj3_train_data, proj4_train_data, proj1_test_data, proj2_test_data, proj3_test_data, proj4_test_data = projection(train_data, test_data, u1, u2, u3, u4)

print('For PCA: \n')
print('For alpha = 0.8 \n')
multi_n(proj1_train_data, np.ravel(train_label), proj1_test_data, np.ravel(test_label))
print('For alpha = 0.85 \n ')
multi_n(proj2_train_data, np.ravel(train_label), proj2_test_data, np.ravel(test_label))
print('For alpha = 0.9, \n ') 
multi_n(proj3_train_data, np.ravel(train_label), proj3_test_data, np.ravel(test_label))
print('For alpha = 0.95, \n')
multi_n(proj4_train_data, np.ravel(train_label), proj4_test_data, np.ravel(test_label))


In [None]:
trains, tests = LDA_non(train_data, test_data, 200 ,125 )#200for faces, 125 for nonfaces
print('For LDA: \n')
multi_n(trains,train_label,tests,test_label)

# **----------------------------------------------------------------------------------------------**

In [None]:
#generate mixed data matrix with 200 nonfaces sample
data_matrix, y = gen_mixed_data_matrix_label(20)
train_data, test_data, train_label, test_label = separate_training_testing(data_matrix, y)

In [None]:
u1, u2, u3, u4 = PCA(data_matrix, np.array([0.8, 0.85, 0.9, 0.95]), 600)
proj1_train_data, proj2_train_data, proj3_train_data, proj4_train_data, proj1_test_data, proj2_test_data, proj3_test_data, proj4_test_data = projection(train_data, test_data, u1, u2, u3, u4)

print('For PCA: \n')
print('For alpha = 0.8 \n')
multi_n(proj1_train_data, np.ravel(train_label), proj1_test_data, np.ravel(test_label))
print('For alpha = 0.85 \n ')
multi_n(proj2_train_data, np.ravel(train_label), proj2_test_data, np.ravel(test_label))
print('For alpha = 0.9, \n ') 
multi_n(proj3_train_data, np.ravel(train_label), proj3_test_data, np.ravel(test_label))
print('For alpha = 0.95, \n')
multi_n(proj4_train_data, np.ravel(train_label), proj4_test_data, np.ravel(test_label))


In [None]:
trains, tests = LDA_non(train_data, test_data, 200, 100)#200for faces, 100 for nonfaces
print('For LDA: \n')
multi_n(trains,train_label,tests,test_label)

# **----------------------------------------------------------------------------------------------**

In [None]:
#generate mixed data matrix with 150 nonfaces sample
data_matrix, y = gen_mixed_data_matrix_label(15)
train_data, test_data, train_label, test_label = separate_training_testing(data_matrix, y)

In [None]:
u1, u2, u3, u4 = PCA(data_matrix, np.array([0.8, 0.85, 0.9, 0.95]), 550)
proj1_train_data, proj2_train_data, proj3_train_data, proj4_train_data, proj1_test_data, proj2_test_data, proj3_test_data, proj4_test_data = projection(train_data, test_data, u1, u2, u3, u4)

print('For PCA: \n')
print('For alpha = 0.8 \n')
multi_n(proj1_train_data, np.ravel(train_label), proj1_test_data, np.ravel(test_label))
print('For alpha = 0.85 \n ')
multi_n(proj2_train_data, np.ravel(train_label), proj2_test_data, np.ravel(test_label))
print('For alpha = 0.9, \n ') 
multi_n(proj3_train_data, np.ravel(train_label), proj3_test_data, np.ravel(test_label))
print('For alpha = 0.95, \n')
multi_n(proj4_train_data, np.ravel(train_label), proj4_test_data, np.ravel(test_label))


In [None]:
trains, tests = LDA_non(train_data, test_data, 200 , 75)#200for faces, 75 for nonfaces
print('For LDA: \n')
multi_n(trains,train_label,tests,test_label)

# **----------------------------------------------------------------------------------------------**

In [None]:
#generate mixed data matrix with 100 nonfaces sample
data_matrix, y = gen_mixed_data_matrix_label(10)
train_data, test_data, train_label, test_label = separate_training_testing(data_matrix, y)

In [None]:
u1, u2, u3, u4 = PCA(data_matrix, np.array([0.8, 0.85, 0.9, 0.95]), 500)
proj1_train_data, proj2_train_data, proj3_train_data, proj4_train_data, proj1_test_data, proj2_test_data, proj3_test_data, proj4_test_data = projection(train_data, test_data, u1, u2, u3, u4)

print('For PCA: \n')
print('For alpha = 0.8 \n')
multi_n(proj1_train_data, np.ravel(train_label), proj1_test_data, np.ravel(test_label))
print('For alpha = 0.85 \n ')
multi_n(proj2_train_data, np.ravel(train_label), proj2_test_data, np.ravel(test_label))
print('For alpha = 0.9, \n ') 
multi_n(proj3_train_data, np.ravel(train_label), proj3_test_data, np.ravel(test_label))
print('For alpha = 0.95, \n')
multi_n(proj4_train_data, np.ravel(train_label), proj4_test_data, np.ravel(test_label))


In [None]:
trains, tests = LDA_non(train_data, test_data, 200 ,50 )#200for faces, 50 for nonfaces
print('For LDA: \n')
multi_n(trains,train_label,tests,test_label)

# **----------------------------------------------------------------------------------------------**

In [None]:
#generate mixed data matrix with 50 nonfaces sample
data_matrix, y = gen_mixed_data_matrix_label(5)
train_data, test_data, train_label, test_label = separate_training_testing(data_matrix, y)

In [None]:
u1, u2, u3, u4 = PCA(data_matrix, np.array([0.8, 0.85, 0.9, 0.95]), 450)
proj1_train_data, proj2_train_data, proj3_train_data, proj4_train_data, proj1_test_data, proj2_test_data, proj3_test_data, proj4_test_data = projection(train_data, test_data, u1, u2, u3, u4)

print('For PCA: \n')
print('For alpha = 0.8 \n')
multi_n(proj1_train_data, np.ravel(train_label), proj1_test_data, np.ravel(test_label))
print('For alpha = 0.85 \n ')
multi_n(proj2_train_data, np.ravel(train_label), proj2_test_data, np.ravel(test_label))
print('For alpha = 0.9, \n ') 
multi_n(proj3_train_data, np.ravel(train_label), proj3_test_data, np.ravel(test_label))
print('For alpha = 0.95, \n')
multi_n(proj4_train_data, np.ravel(train_label), proj4_test_data, np.ravel(test_label))


In [None]:
trains, tests = LDA_non(train_data, test_data, 200 , 25)#200for faces, 25 for nonfaces
print('For LDA: \n')
multi_n(trains,train_label,tests,test_label)

# **plots**

In [None]:
#n=1:
x=[400,350,300,250,200,150,100,50,0]
#(alpla=0.8)
y1=[0.9425 ,0.9386666666666666 , 0.9514285714285714 ,0.9538461538461539 , 0.9566666666666667, 0.9672727272727273,0.964 , 0.9733333333333334 ,1]
#(alpla=0.85)
y2=[0.935,0.9253333333333333,0.9485714285714286 , 0.9446153846153846, 0.9466666666666667 , 0.96,  0.944 , 0.9644444444444444 ,1]
#(alpla=0.9)
y3=[0.9175, 0.9173333333333333 ,0.9314285714285714 , 0.9446153846153846, 0.94 ,0.9563636363636364 ,0.932 ,0.9511111111111111 ,1] 
#(alpla=0.95)
y4=[0.9025,0.9013333333333333 , 0.9228571428571428 ,0.9353846153846154,  0.93 , 0.9418181818181818 , 0.924, 0.9422222222222222 ,1]

In [None]:
import matplotlib.pyplot as plt
plt.plot(x, y1, 'r--', x, y2, 'bs', x, y3, 'g^',x,y4)
plt.ylabel('PCA accuracy for n=1')
plt.xlabel('number of nonfaces samples')
#plt.axis([0, 400, 0, 1])
plt.show()

In [None]:
#n=3:
x=[400,350,300,250,200,150,100,50,0]
#(alpla=0.8)
y1=[ 0.9175 ,0.9093333333333333 ,0.9314285714285714,0.9323076923076923 ,0.9233333333333333,0.9490909090909091 ,0.94, 0.9555555555555556 ,1]
#(alpla=0.85)
y2=[0.91, 0.904 ,0.9285714285714286 ,0.9261538461538461 , 0.92 ,0.9454545454545454 ,0.936 ,0.9466666666666667 ,1]
#(alpla=0.9)
y3=[0.885 ,0.8853333333333333 ,0.9057142857142857, 0.9138461538461539, 0.91, 0.9345454545454546,  0.92, 0.9422222222222222 ,1]
#(alpla=0.95)
y4=[ 0.86 ,0.8613333333333333, 0.8885714285714286,0.9015384615384615 ,0.8966666666666666,0.9236363636363636,  0.92,0.9422222222222222 ,1]

In [None]:
import matplotlib.pyplot as plt
plt.plot(x, y1, 'r--', x, y2, 'bs', x, y3, 'g^',x,y4)
plt.ylabel('PCA accuracy for n=3')
plt.xlabel('number of nonfaces samples')
#plt.axis([0, 400, 0, 1])
plt.show()

In [None]:
#n=5:
x=[400,350,300,250,200,150,100,50,0]
#(alpla=0.8)
y1=[0.91, 0.9066666666666666,  0.9228571428571428 ,0.9230769230769231 ,0.91 , 0.9381818181818182,  0.932,0.9422222222222222,1]
#(alpla=0.85)
y2=[0.895, 0.8933333333333333, 0.9142857142857143 , 0.9138461538461539, 0.9066666666666666,  0.9345454545454546, 0.916,0.9466666666666667 ,1]
#(alpla=0.9)
y3=[0.87,0.872 ,0.8971428571428571, 0.9046153846153846, 0.9 ,0.9236363636363636,0.908 ,0.9422222222222222,1]
#(alpla=0.95)
y4=[0.845, 0.8506666666666667 ,0.88 ,0.8923076923076924, 0.8866666666666667, 0.9163636363636364, 0.904, 0.9422222222222222,1]

In [None]:
import matplotlib.pyplot as plt
plt.plot(x, y1, 'r--', x, y2, 'bs', x, y3, 'g^',x,y4)
plt.ylabel('PCA accuracy for n=5')
plt.xlabel('number of nonfaces samples')
#plt.axis([0, 400, 0, 1])
plt.show()

In [None]:
#n=7:
x=[400,350,300,250,200,150,100,50,0]
#(alpla=0.8)
y1=[0.9025,  0.896 ,0.92, 0.9138461538461539 ,0.91 ,0.9272727272727272, 0.916,0.9377777777777778 ,1]
#(alpla=0.85)
y2=[0.88 ,0.8746666666666667, 0.9028571428571428 ,0.9076923076923077, 0.8966666666666666,0.9236363636363636, 0.908, 0.9377777777777778 ,1]
#(alpla=0.9)
y3=[0.86 , 0.864 , 0.8914285714285715 ,0.8953846153846153 ,0.8866666666666667 , 0.9127272727272727 , 0.9 ,0.9377777777777778,1]
#(alpla=0.95)
y4=[0.835,0.848,0.8714285714285714 , 0.8861538461538462 , 0.8733333333333333 ,0.9090909090909091,0.896,   0.9377777777777778,1]

In [None]:
import matplotlib.pyplot as plt

plt.plot(x, y1, 'r--', x, y2, 'bs', x, y3, 'g^',x,y4)
plt.ylabel('PCA accuracy for n=7')
plt.xlabel('number of nonfaces samples')
#plt.axis([0, 400, 0, 1])
plt.show()

In [None]:
#LDA results
#n=1:
x=[400,350,300,250,200,150,100,50,0]
y1=[0.8875,0.888,0.9171428571428571, 0.9261538461538461,0.9233333333333333 ,0.9381818181818182 , 0.924 , 0.9466666666666667 ,1]

#n=3:
x=[400,350,300,250,200,150,100,50,0]
y2=[ 0.8525,0.8586666666666667 ,0.8828571428571429 ,0.8984615384615384 ,0.8933333333333333,0.92 ,0.92,0.9422222222222222,1]

#n=5:
x=[400,350,300,250,200,150,100,50,0]
y3=[0.83 ,0.8373333333333334 ,0.8657142857142858 , 0.8861538461538462 , 0.88 ,0.9127272727272727 ,  0.904 , 0.9422222222222222 ,1]

#n=7:
x=[400,350,300,250,200,150,100,50,0]
y4=[0.8225,0.8346666666666667 ,0.8628571428571429,0.88 ,0.8733333333333333 ,0.9090909090909091,0.896,0.9377777777777778 ,1]


In [None]:
import matplotlib.pyplot as plt
plt.plot(x, y1, 'r--', x, y2, 'bs', x, y3, 'g^',x,y4)
plt.ylabel('LDA accuracy for n=1,3,5,7')
plt.xlabel('number of nonfaces samples')
#plt.title('bbbbbbbbbbb')#plt.axis([0, 400, 0, 1])
plt.show()

# **70:30 separation ratio**

In [None]:
def bonus_separation(data_matrix, y):
  bonus_train_data = np.empty([280, 10304], dtype = int)
  bonus_test_data = np.empty([120, 10304], dtype = int)
  bonus_train_label = np.empty([280, 1], dtype = int)
  bonus_test_label = np.empty([120, 1], dtype = int)
  
  a = 0
  b = 0
  for i in range(40):
    for j in range(10):
      if(j < 7):
        bonus_train_data[a] = data_matrix[10 * i + j]
        bonus_train_label[a] = y[10 * i + j]
        a += 1
      else:
        bonus_test_data[b] = data_matrix[10 * i + j]
        bonus_test_label[b] = y[10 * i + j]
        b += 1
      
  print(bonus_train_data.shape)
  print(bonus_test_data.shape)
  print(bonus_train_label.shape)
  print(bonus_test_label.shape)
  return bonus_train_data, bonus_test_data, bonus_train_label, bonus_test_label

In [None]:
data_matrix, y = gen_data_matrix_label()
bouns_train_data, bouns_test_data, bouns_train_label, bouns_test_label = bonus_separation(data_matrix, y)

(280, 10304)
(120, 10304)
(280, 1)
(120, 1)


In [None]:
u1, u2, u3, u4 = PCA(data_matrix, np.array([0.8, 0.85, 0.9, 0.95]),400)
proj1_train_data, proj2_train_data, proj3_train_data, proj4_train_data, proj1_test_data, proj2_test_data, proj3_test_data, proj4_test_data = projection(bouns_train_data, bouns_test_data, u1, u2, u3, u4)

print('For PCA with 70/30 separation: \n')
print('For alpha = 0.8, accuracy = ', knn(proj1_train_data, np.ravel(bouns_train_label), proj1_test_data, np.ravel(bouns_test_label),1))
print('For alpha = 0.85, accuracy = ', knn(proj2_train_data, np.ravel(bouns_train_label), proj2_test_data, np.ravel(bouns_test_label),1))
print('For alpha = 0.9, accuracy = ', knn(proj3_train_data, np.ravel(bouns_train_label), proj3_test_data, np.ravel(bouns_test_label),1))
print('For alpha = 0.95, accuracy = ', knn(proj4_train_data, np.ravel(bouns_train_label), proj4_test_data, np.ravel(bouns_test_label),1))

For PCA with 70/30 separation: 

For alpha = 0.8, accuracy =  0.9583333333333334
For alpha = 0.85, accuracy =  0.9666666666666667
For alpha = 0.9, accuracy =  0.9583333333333334
For alpha = 0.95, accuracy =  0.95


In [None]:
trains, tests = LDA(bouns_train_data, bouns_test_data, 40 , 7)
print('For LDA: \n')
multi_n(trains,bouns_train_label,tests,bouns_test_label)


For LDA: 

n = 1     accuracy =  0.7583333333333333 

n = 3     accuracy =  0.7083333333333334 

n = 5     accuracy =  0.6416666666666667 

n = 7     accuracy =  0.625 



  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
