<a href="https://colab.research.google.com/github/radwaahmed20112000/Face-Recognition/blob/main/Face_Recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Data Balancing**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy

In [None]:
import zipfile
import io
zf = zipfile.ZipFile('dataSet.zip')
zf.extractall('dataSet')

In [None]:
PATH = "/content/dataSet/"

Read pmg files of given directory and convert it to matrix

In [None]:
import cv2
import os
def load_images_from_folder(folder):
    images = []
    for i in range(1,11,1):
        img = cv2.imread(os.path.join(folder, str(i) + ".pgm"), 0)
        if img is not None:
            images.append(img)
               
    return images

Read each person images and store them in the data matrix

In [None]:
def read_images():
  dataset = []
  for i in range (1,41,1):
    dataset += load_images_from_folder(PATH + "s" + str(i) + "/");
  return dataset

Flatten each image from data matrix into 10304 1d 

In [None]:
def flatten_images(rows, dataset):
  data = np.zeros((rows, 10304))
  for i in range (rows):
      image = dataset[i].flatten()
      data[i,:] = (image)
  return data

Split data into training and testing sets

In [None]:
def split(df, label):
  #even rows for test
  X_test = df.iloc[::2]
  Y_test = label[::2]

  #odd rows for training
  X_train = df.iloc[1::2]
  Y_train = label[1::2]

  X_train.head(10)
  return X_train, Y_train, X_test, Y_test

Data preparation

In [None]:
def data_preparation(split_callback):

  dataset = read_images()

  data = flatten_images(400, dataset)
  # Generate labels and dataframe from data matrix
  label = []
  for i in range(1,41,1):
    for j in range(10):
      label.append(i);

  df = pd.DataFrame(data=data)

  return split_callback(df, label)

Bonus part : Splitting 70% , 30% 

In [None]:
def bonus_split(df, label):

  X_train = np.zeros((280,10304))
  X_test = np.zeros((120,10304))
  Y_train = np.zeros((280))
  Y_test = np.zeros((120))

  k = 0
  j = 0

  for i in range(0,400,10):
      #first 7 images for train
      X_train[j:j+7,: ] = df.iloc[i:i+7,:]
      Y_train[j:j+7] = label[i:i+7]
      j += 7
      #the remaining 3 for test
      X_test[k:k+3,:] = df.iloc[i+7:i+10,:]
      Y_test[k:k+3]= label[i+7:i+10]
      k += 3

  X_train = pd.DataFrame(data = X_train)
  X_test = pd.DataFrame(data = X_test)

  return X_train, Y_train, X_test, Y_test 

# **LDA**

Parameter Initialization

In [None]:
# Default Values 
rows = 200              
n_of_classes = 40       
n_of_eigenvectors = 39  
n_of_instances = 5

In [None]:
def intialize_variables(n_rows, classes, instances):
  global rows, n_of_classes, n_of_instances
  rows = n_rows
  n_of_classes = classes
  n_of_instances = instances

In [None]:
def set_n_of_eigen_vectors(eigen_vectors):
  global n_of_eigenvectors
  n_of_eigenvectors = eigen_vectors

Calculate each class mean

In [None]:
def calculate_means(data):
  MUs = np.zeros([n_of_classes, 10304])
  k = 0
  for i in range(0, rows, n_of_instances):
    x = data.iloc[i:i + n_of_instances, :]
    m = x.mean()
    MUs[k:] = m
    k += 1
  return MUs

Calculate the over all sample mean

In [None]:
def compute_overall_mean(MUs):
  U = np.zeros([1,10304])
  for i in range (n_of_classes):
    U += MUs[i]
  U = U/n_of_classes 
  return U

*Calculate* Sb (between-class scatter matrix)

In [None]:
def compute_sb(MUs, U, s_callback):
  Sb = pd.DataFrame(np.zeros((10304, 10304)))
  for i in range (n_of_classes):
    b = MUs[i] - U
    s = s_callback(i) * (b.T * b)
    Sb += s
  return Sb

Calculate each S (within-class scatter matrix)

In [None]:
def compute_s(MUs, data):
  S = pd.DataFrame(np.zeros((10304, 10304)))

  for i in range (0, rows, n_of_instances):
    d = data.iloc[i:i+n_of_instances, :]
    # z = d - MUs[i//n_of_instances]
    s = (n_of_instances - 1) * d.cov()
    S += s
  return S


Calculate eigen vectors and eigen values of A

In [None]:
from numpy.linalg import eig
import scipy.linalg
def lda_compute_eigen_values_vectors(A):
  print(n_of_eigenvectors)
  eigenValues, eigenVectors = scipy.linalg.eigh(A, 
                        eigvals=( (10304 - (n_of_eigenvectors + 1) ), (10304-1) ) )

  # printing eigen values
  print("Eigen values :\n", eigenValues)
    
  # printing eigen vectors
  print("Eigen vectors of :\n", eigenVectors)
  
  #take the real parts only
  eigenValues = eigenValues.astype(np.float64)
  print(type(eigenValues[0]))

  eigenVectors = eigenVectors.astype(np.float64)
  print(type(eigenVectors[0][0]))

  return eigenValues, eigenVectors

Calculate the projection matrix

In [None]:
def compute_projection_matrix(eigenVectors):
  P = eigenVectors.transpose()[:n_of_eigenvectors, :]
  P = pd.DataFrame(P).astype(np.float64)
  print(P.shape)
  return P

In [None]:
def lda_projection(n_of_eigenvectors, A, X_train, X_test, Y_train, Y_test):
  
  set_n_of_eigen_vectors(n_of_eigenvectors)
  
  eigenValues, eigenVectors = lda_compute_eigen_values_vectors(A)
  
  P = compute_projection_matrix(eigenVectors)
  
  projected_x_train, projected_x_test = projection(P, X_train, X_test)
  
  predicted = knn_prediction(projected_x_train, Y_train, projected_x_test)
  
  print("Evaluation Without Tunning: ")
  evaluation(predicted, Y_test)
  
  print("Evaluation With Tunning: ")
  tunning(projected_x_train, Y_train, projected_x_test, Y_test)


In [None]:
def lda(n_of_eigenvectors, split_callback):

  X_train, Y_train, X_test, Y_test = data_preparation(split_callback)

  MUs = calculate_means(X_train)

  S = compute_s(MUs, X_train)

  U = compute_overall_mean(MUs)

  def s(i):
    return n_of_instances
  Sb = compute_sb(MUs, U, s)
  
  A = np.dot(np.linalg.inv(S), Sb)
  
  lda_projection(n_of_eigenvectors, A, X_train, X_test, Y_train, Y_test)

In [None]:
lda(39, bonus_split)

# **Classification using PCA** 

*Center The Data*

In [None]:
def center_data(data):
  mean = data.mean(axis=0)
  z = data - mean
  print(z)
  return z

*Compute Covariance matrix*

In [None]:
def compute_covariance(data):
  cov_matrix = np.cov(data, rowvar = False, bias=True)
  print(cov_matrix)
  return cov_matrix

*Compute eigenvales and eigenvectors*

In [None]:
def compute_eigens(cov_matrix):
  eigen_values, eigen_vectors = np.linalg.eigh(cov_matrix)
  for i in range(len(eigen_vectors)):
    eigen_vectors[i] = eigen_vectors[i][::-1]
  eigen_values = eigen_values[::-1]
  print(eigen_values)
  print(eigen_vectors)

  #take the real parts only
  eigen_values = eigen_values.astype(np.float64)
  print(type(eigen_values[0]))

  eigen_vectors = eigen_vectors.astype(np.float64)
  print(type(eigen_vectors[0][0]))
  return eigen_values, eigen_vectors

*Fraction of the total variance*

In [None]:
def choose_dimensionality(alpha):
  sum = eigen_values.sum()
  values = 0
  r = 0
  for value in eigen_values:
    values += value
    r = r + 1
    ratio = values/sum 
    if ratio >= alpha:
      print(r)
      return r

*Reduced basis*

In [None]:
def reduced_basis(r, eigen_vectors):
  u = eigen_vectors
  u = eigen_vectors[:,0:r]

  return u

*Reduced Dimensionality data*

In [None]:
def prepare_for_pca(data):
  z = center_data(data)
  cov_matrix = compute_covariance(data)
  eigen_values, eigen_vectors = compute_eigens(cov_matrix)
  return eigen_values, eigen_vectors

In [None]:
def PCA(data, alpha, eigen_values, eigen_vectors):

  r = choose_dimensionality(alpha)

  u = reduced_basis(r, eigen_vectors)

  u = reduced_basis(r, eigen_vectors)

  projected_x_train, projected_x_test = projection(u.transpose(), X_train, X_test)

  predicted = knn_prediction(projected_x_train, Y_train, projected_x_test)

  evaluation(predicted , Y_test)

  tunning(projected_x_train, Y_train, projected_x_test, Y_test)

Compute Different Alphas

In [None]:
alphas = [0.8, 0.85, 0.9, 0.95]

X_train, Y_train, X_test, Y_test = data_preparation(split)
eigen_values, eigen_vectors = prepare_for_pca(X_train)

for alpha in alphas :
  PCA(X_train, alpha, eigen_values, eigen_vectors)

# **Projection**

Project X_train and X_test

In [None]:
def projection(P, X_train, X_test):
  
  # get p transpose
  P_trans = P.transpose()
  
  # project x_train
  projected_x_train = np.dot(X_train, P_trans)
  
  # insure the dim
  print(projected_x_train.shape) 
  
  # project x_test 
  projected_x_test = np.dot(X_test, P_trans)
  
  # insure the dim
  print(projected_x_test.shape)
  
  return projected_x_train, projected_x_test

Knn Classifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier

def knn_prediction(projected_x_train, Y_train, projected_x_test):
  
  knn = KNeighborsClassifier(n_neighbors=1)
  knn.fit(projected_x_train, Y_train) 

  # Prediction 
  predicted = knn.predict(projected_x_test)
  return predicted

Evolution

In [None]:
def evaluation(predicted, y_test):

    from sklearn.metrics import accuracy_score, f1_score
    accuracy = accuracy_score(predicted , y_test)
    f1 = f1_score(predicted , y_test , average='weighted')
    print ("Accuracy: ", accuracy)
    f1 = float("{0:.3f}".format(f1))
    print("F1_score: ",f1)

    #calculating precision and reall

    from sklearn.metrics import precision_score , recall_score
    precision = precision_score(predicted, y_test,  average='micro')
    recall = recall_score(predicted, y_test,  average='micro' ) 
    print('Precision: ', precision)
    print('Recall: ', recall)
    
    from sklearn.metrics import confusion_matrix
    confusion = confusion_matrix(predicted, y_test)
    print("Confusion Matrix:")
    print(confusion)

Model Parameter Tuning

In [None]:
#function input is the training data , model , params to tunes  return the best model after tunning
# Import 'make_scorer', and 'GridSearchCV'
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ShuffleSplit
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt


def fit_model(X, y , model , params):    
    # Create cross-validation sets from the training data
    cv_sets=ShuffleSplit(n_splits=10, random_state=42, test_size=0.3, train_size=None)
  

    #Transform 'performance_metric' into a scoring function using 'make_scorer' 
    scoring_fnc = make_scorer(accuracy_score)
    #Create the grid search cv object --> GridSearchCV()
    grid = GridSearchCV(estimator=model, param_grid= params, scoring=scoring_fnc,cv=cv_sets)
    # Fit the grid search object to the data to compute the optimal model
    grid = grid.fit(X, y)

    scores = grid.cv_results_.get('mean_test_score')
    for i in params :      
        plt.scatter(x = params[i] ,y = scores)
        plt.show()
    # Return the optimal model after fitting the data
    return grid.best_estimator_

Tuning

In [None]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier

def tunning(projected_x_train, Y_train, projected_x_test, Y_test):
  model = KNeighborsClassifier()

  space = np.arange(1,9,2)

  #  Create a dictionary for the parameter 'n_estimators' with a range from 10 to 100
  params = {'n_neighbors':space}

  #call gradient decent function to split the data into kfolds and tune the giving params
  best_model = fit_model(projected_x_train, Y_train, model, params)
  print("the best n_neighbors :" , best_model.get_params()['n_neighbors'])
  #Predict the response for test dataset
  predicted = best_model.predict(projected_x_test)

  #evaluating
  evaluation(predicted, Y_test)

# **Compare vs Non-Face Images - Binary Classification**

In [None]:
accuracy = []
ratio = []

# LDA (number of eigen vectors) : 1, 10, 20, 30, 40, 50, 60, 70, 100

# Teat 1 : Ratio 1    -    Faces = 200, Non-Faces = 200
# Test 2 : Ratio 0.75 -    Faces = 200, Non-Faces = 150
# Test 3 : Ratio 0.5  -    Faces = 200, Non-Faces = 100
# Test 4 : Ratio 0.25 -    Faces = 200, Non-Faces = 50

non_faces_numbers = [200, 150, 100, 50]
n_of_eigenvectors = [1, 10, 20, 30, 40, 50, 60, 70, 100] 

In [None]:
import zipfile
import io
zf = zipfile.ZipFile('pgm.zip')
zf.extractall('pgm')

Read non-faces photos

In [None]:
import cv2
import os

def load_non_faces_images(folder):
    images = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder, filename), 0)
        if img is not None:
            images.append(img)
    return images

In [None]:
def split_non_faces_data(df_non, df):
  # Split Non-Faces Data
  X_non_test = df_non.iloc[::2]
  X_non_train = df_non.iloc[1::2]

  # Split Faces Data
  X_faces_test = df.iloc[::2]
  X_faces_train = df.iloc[1::2]

  return X_non_test, X_non_train, X_faces_test, X_faces_train

In [None]:
def non_faces_data_preparation(n_rows):

  dataset = read_images()
  images = load_non_faces_images('/content/pgm/pgm/')

  data = flatten_images(400, dataset)
  non_faces_data = flatten_images(n_rows*2, images)
  print(len(images))

  df = pd.DataFrame(data=data)
  df_non = pd.DataFrame(data=non_faces_data)

  X_non_test, X_non_train, X_faces_test, X_faces_train = split_non_faces_data(df_non, df)

  df = pd.concat([X_non_train, X_faces_train])
  
  MUs = calculate_means(df)
  S = compute_s(MUs, df)

  return MUs, S, shuffle_data(X_non_train, X_non_test, X_faces_train, X_faces_test)

In [None]:
def lda_intial(n_rows, data_preparation):

  intialize_variables(n_rows + 200, 2, 0)

  MUs, S, X_train, Y_train, X_test, Y_test = data_preparation(n_rows)
  
  U = compute_overall_mean(MUs)

  instances = [n_rows, 200]
  def s_instances(i):
    return instances[i] 
  
  Sb = compute_sb(MUs, U, s_instances)
  
  A = np.dot(np.linalg.inv(S), Sb)
  
  return A, X_train, Y_train, X_test, Y_test

In [None]:
A, X_train, Y_train, X_test, Y_test = lda_intial(200, non_faces_data_preparation)
lda_projection(70, A, X_train, X_test, Y_train, Y_test)

In [None]:
for i in [1, 10, 20, 30, 40, 50, 60, 70, 100] :
  print("Number of Eigen vectors : " + str(i))
  lda_projection(i, A, X_train, X_test, Y_train, Y_test)
  

In [None]:
for i in [80, 90, 150, 200, 300]:
  lda_projection(i, A, X_train, X_test, Y_train, Y_test)

In [None]:
for i in [200, 150, 100, 50]:
  A, X_train, Y_train, X_test, Y_test = lda_intial(i, non_faces_compute_mean, non_faces_compute_s)
  lda_projection(70, A, X_train, X_test, Y_train, Y_test)

In [None]:
import matplotlib.pyplot as plt
 
# x axis values
x = [400, 300, 200, 100]
# corresponding y axis values
y = [0.925, 0.9028, 0.923, 0.952]
 
 
# naming the x axis
plt.xlabel('Number of Non-faces images (Faces = 400) ')
# naming the y axis
plt.ylabel('Accuracy')
 
# giving a title to my graph
plt.scatter(x, y, label= "stars", color= "green", s=50) 
# function to show the plot
plt.legend()
plt.show()

Labels Identification 

In [None]:
def label_identification(X_non_train, X_non_test, X_faces_train, X_faces_test):
  X_non_train[len(X_non_train.columns)] = 0
  X_non_test[len(X_non_test.columns)] = 0

  X_faces_train[len(X_faces_train.columns)] = 1
  X_faces_test[len(X_faces_test.columns)] = 1

  return X_non_test, X_non_train, X_faces_test, X_faces_train

In [None]:
label_identification()

In [None]:
def merge_shuffle(df_1, df_2):  
  df = pd.concat([df_1, df_2])
  df = df.sample(frac=1).reset_index(drop=True)
  return df

In [None]:
def shuffle_data(X_non_train, X_non_test, X_faces_train, X_faces_test):
  X_non_test, X_non_train, X_faces_test, X_faces_train = label_identification(X_non_train, X_non_test, X_faces_train, X_faces_test)
  X_train = merge_shuffle(X_non_train, X_faces_train)
  X_test = merge_shuffle(X_non_test, X_faces_test) 

  Y_train = X_train.iloc[:,-1:]
  X_train.drop(X_train.columns[len(X_train.columns)-1], axis=1, inplace=True)

  Y_test = X_test.iloc[:,-1:]
  X_test.drop(X_test.columns[len(X_test.columns)-1], axis=1, inplace=True)
  return X_train, Y_train, X_test, Y_test