<a href="https://colab.research.google.com/github/nourwalid70/Face-Recognition/blob/main/PR_lab1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# reading pgm files
def read_pgm(pgmf):
    assert pgmf.readline() == b'P5\n'
    (width, height) = [int(i) for i in pgmf.readline().split()]
    depth = int(pgmf.readline())
    assert depth <= 255

    raster = []
    for y in range(height*width):
        raster.append(ord(pgmf.read(1)))

    return raster

In [None]:
# read faces data
path = 'drive/MyDrive/Colab Notebooks/PR lab1/data/s'
label = []

cols = range(1, 10305)
df = pd.DataFrame(columns = cols)

#loop on files
for i in range(1, 41):
  p1 = path + str(i) 
  for j in range(1,11):
    p2 = p1 + '/' + str(j) + '.pgm'
    label.append(i)
    f = open(p2, 'rb')
    im = read_pgm(f)
    f.close()
    df.loc[10*(i-1) + j - 1] = im

df['label'] = label 
df

In [None]:
# read non-faces data
path = 'drive/MyDrive/Colab Notebooks/PR lab1/non-face/'

cols = range(1, 10305)
non_face_df = pd.DataFrame(columns = cols)

#loop on files
for i in range(1, 401):
  p1 = path + str(i) + '.pgm' 
  f = open(p1, 'rb')
  im = read_pgm(f)
  f.close()
  non_face_df.loc[i-1] = im

non_face_df['label'] = 0 
face_df = df.copy()
face_df['label'] = 1
non_face_df, face_df

In [None]:
# faces dataset
# 50% training , 50% testing
train_set = df.loc[df.index % 2 == 0]
test_set = df.loc[df.index % 2 != 0]
train_set.index = train_set.index//2
test_set.index = (test_set.index-1)//2
train_set.head(10), test_set.head(10)

In [None]:
# 70% training , 30% testing
train_set, test_set = train_test_split(df, test_size=0.3, stratify=df['label'])
train_set.sort_values(by=['label'], inplace=True)
train_set, test_set

In [None]:
#faces vs non-faces dataset
non_faces_frac = 0.75

face_train_set = face_df.sample(frac = 0.5, random_state=1)
face_test_set = face_df.drop(face_train_set.index)

non_face_train_set = non_face_df.sample(frac = non_faces_frac, random_state=1)
non_face_test_set = non_face_df.drop(non_face_train_set.index)

train_set = pd.concat([face_train_set, non_face_train_set])
test_set = pd.concat([face_test_set, non_face_test_set])

train_set

In [None]:
def PCA(eigen_values,eigen_vectors,alpha):
  indeces = eigen_values.argsort()[::-1]
  sorted_eigen_vectors=eigen_vectors
  for i in range (len(eigen_vectors)):
    sorted_eigen_vectors[:,i]=eigen_vectors[:,indeces[i]]
  sum=eigen_values[indeces[0]]
  exp_variance = (sum/eigen_values.sum())
  
  idx=1
  while exp_variance<alpha:
    sum+=eigen_values[indeces[idx]]
    idx+=1
    exp_variance = (sum/eigen_values.sum())
  p=sorted_eigen_vectors[:,0:idx]
  print(idx)
  return p


In [None]:
def eigen(df):
  data_matrix= df.to_numpy()
  mean_vector = np.mean(data_matrix,0)
  z=data_matrix-mean_vector
  #cov_matrix= np.cov(z,rowvar=False,bias=True)
  n = len(data_matrix)
  cov_matrix = (1/n) * (z.T @ z)
  eigen_values,eigen_vectors=np.linalg.eigh(cov_matrix)
  return eigen_values,eigen_vectors

In [None]:
def LDA(df):
  labels = df['label'].unique()  # labels
  d = len(df.columns) - 1   # number of features
  overallMean = df.loc[:, df.columns!='label'].mean().to_numpy(dtype='float64')
  n = []   # stores number of samples of each class
  means = np.empty([0, d])   # stores mean vectores of eack class (𝜇𝑘)
  Ms1 = np.empty([0, d])  # stores n𝑘(𝜇𝑘−𝜇) for each class
  Ms2 = np.empty([0, d])  # stores (𝜇𝑘−𝜇) for each class

  for label in labels:
    D = df[df['label'] == label].loc[:, df.columns!='label'].to_numpy(dtype='float64')   # data samples of this class
    nk = len(D)  # number of samples of this class
    mean = D.mean(axis=0)   # (𝜇𝑘)
    M_delta = (mean - overallMean)
    M1 = nk * M_delta  # n𝑘(𝜇𝑘−𝜇)
    M2 =  M_delta # (𝜇𝑘−𝜇)

    n.append(nk)
    means = np.vstack([means, mean])
    Ms1 = np.vstack([Ms1, M1])
    Ms2 = np.vstack([Ms2, M2])
  
  sb = Ms1.T @ Ms2  #between-class scatter matrix
  D = df.loc[:, df.columns!='label'].to_numpy(dtype='float64')  # all data data samples
  z = D - np.repeat(means, n, 0)  # centralized data
  s = z.T @ z   # within-class scatter matrix
  
  y,w = np.linalg.eigh(np.linalg.inv(s) @ sb)   # eigen values and eigen vectors
  #sort eigenvalues and sort eigenvectors accordingly
  idx = np.argsort(y)
  y = y[idx]
  w = w[:,idx]

  return w[:,1-len(labels):] # projection matrix

In [None]:
def project(train_set, test_set, P):
  D_train = train_set.loc[:, train_set.columns!='label'].to_numpy(dtype='float64')
  after_proj_train = D_train @ P.T  # data after projection
  X_train, Y_train = pd.DataFrame(after_proj_train), train_set['label']

  D_test = test_set.loc[:, test_set.columns!='label'].to_numpy(dtype='float64')
  after_proj_test = D_test @ P.T  # data after projection
  X_test, Y_test = pd.DataFrame(after_proj_test), test_set['label']

  return X_train, Y_train, X_test, Y_test

In [None]:
def knn_tuning(X_train, Y_train):
  best_k, best_score = -1, 0
  for k in [1, 3 , 5, 7]:
    knn = KNeighborsClassifier(n_neighbors=k)
    cv = StratifiedKFold(n_splits=5, shuffle=True)
    n_scores = cross_val_score(knn, X_train, Y_train, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
    if np.mean(n_scores) > best_score:
      best_k = k
      best_score = np.mean(n_scores)
    print(f'accuracy at k = {k} : {np.mean(n_scores)}')

  return best_k, best_score

In [None]:
def evaluate(knn, X_test, Y_test):
  Y_pred = knn.predict(X_test)
  acc = accuracy_score(Y_test, Y_pred) * 100
  print("Accuracy : \n", acc)
  print("Confusion Matrix: \n", confusion_matrix(Y_test, Y_pred))
  print("Report : \n", classification_report(Y_test, Y_pred))
  return acc

In [None]:
# LDA
# projection
proj_matrix = LDA(train_set)
X_train, Y_train, X_test, Y_test = project(train_set, test_set, proj_matrix.T)

# cross validation 
best_k, best_score = knn_tuning(X_train, Y_train)
print(f'best validation accuracy = {best_score} at k = {best_k}')

# train the KNN model
knn = KNeighborsClassifier(n_neighbors=best_k)
knn.fit(X_train, Y_train)

#test
evaluate(knn, X_test, Y_test)


In [None]:
# PCA
train_set=train_set.astype(float)
test_set=test_set.astype(float)
test_set.dtypes

1        float64
2        float64
3        float64
4        float64
5        float64
          ...   
10301    float64
10302    float64
10303    float64
10304    float64
label    float64
Length: 10305, dtype: object

In [None]:
# PCA
Y_train=train_set[:]['label']
data_train_set = train_set.drop("label", axis=1)

Y_test=test_set[:]['label']
data_test_set = test_set.drop("label", axis=1)


In [None]:
eigen_values,eigen_vectors=eigen(data_train_set)

In [None]:
eigen_values,eigen_vectors.shape

In [None]:
# projection
proj_matrix = PCA(eigen_values,eigen_vectors,0.8)

proj_matrix.shape

In [None]:
X_train=np.dot(data_train_set,proj_matrix)
X_test=np.dot(data_test_set,proj_matrix)
X_train.shape, X_test.shape

In [None]:
# cross validation 
best_k, best_score = knn_tuning(X_train, Y_train)
print(f'best validation accuracy = {best_score} at k = {best_k}')

# train the KNN model
knn = KNeighborsClassifier(n_neighbors=best_k)
knn.fit(X_train, Y_train)

#test
evaluate(knn, X_test, Y_test)

In [None]:
import matplotlib.pyplot as plt
list = []
for i in np.arange(0.40,0.95,0.05):
  proj_matrix = PCA(eigen_values,eigen_vectors,i)
  X_train=np.dot(data_train_set,proj_matrix)
  X_test=np.dot(data_test_set,proj_matrix)
  # cross validation 
  best_k, best_score = knn_tuning(X_train, Y_train)
  print(f'best validation accuracy = {best_score} at k = {best_k}')

  # train the KNN model
  knn = KNeighborsClassifier(n_neighbors=best_k)
  knn.fit(X_train, Y_train)

  #test
  list.append(evaluate(knn, X_test, Y_test))
plt.plot(np.arange(0.40,0.95,0.05), list, 'ro')
plt.ylabel('Testing Accuracy')
plt.xlabel('Alpha')
plt.show()