In [1]:
!python -m pip uninstall opencv-python --yes
!pip install opencv-python --user

!python -m pip uninstall opencv_contrib_python --yes
!pip install opencv-contrib-python --user

In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from numpy.linalg import eigh
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
#%matplotlib inline

import matplotlib.image as mpimg

In [3]:
def load_non_faces(folder):
    images = []
    pics = os.listdir(folder)
    for idx in range(0, 50):
        img = cv2.imread(os.path.join(folder, pics[idx]))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # transform to grayscale
        img = cv2.resize(img,(92,112))
        if img is not None:
            images.append(img)
    return images


def load_non_faces_images(folder):
    images = []
    dirs = os.listdir(folder)
    folder = os.path.join(folder, dirs[0])
    dirs = os.listdir(folder)
    dirs.remove("person")
    for directory in dirs:
        path = os.path.join(folder, directory)
        images = images + load_non_faces(path)
    images = np.array(images)
    images = images.reshape(images.shape[0],images.shape[1] * images.shape[2])
    return images

In [4]:
def loadImages(folder, y):
    images = []
    labels = []
    pics = os.listdir(folder)
    for pic in pics:
        img = cv2.imread(os.path.join(folder,pic))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # transform to grayscale
        if img is not None:
            images.append(img)
            labels.append(y)
    return images, labels


def load_images_from_folder(folder):
    images = []
    labels = []
    dirs = os.listdir(folder)
    dirs.remove("README")
    for directory in dirs:
        y = directory[1:]
        path = os.path.join(folder, directory)
        i, l = loadImages(path, y)
        images = images + i 
        labels = labels + l
    images = np.array(images) 
    images = images.reshape(images.shape[0],images.shape[1] * images.shape[2])
    labels = np.array(labels)
    return images, labels

In [5]:
def split(X,y):
    X_train,X_test,y_train,y_test = [],[],[],[]
    # fillig train data 
    for i in range(1,X.shape[0],2):
        X[i] = X[i].reshape(-1,1).T
        X_train.append(X[i])
        y_train.append(y[i])
        
    for i in range(0,X.shape[0],2):
        X[i] = X[i].reshape(-1,1).T
        X_test.append(X[i])
        y_test.append(y[i])
        
    X_train,X_test,y_train,y_test = np.array(X_train),np.array(X_test),np.array(y_train),np.array(y_test)
    y_train = y_train.reshape(-1,1)
    y_test = y_test.reshape(-1,1)
    return X_train,X_test,y_train,y_test

In [6]:
def plot(x_points, y_points, title, x_label, y_label):
    x_points = np.array(x_points)
    y_points = np.array(y_points)
    plt.plot(x_points, y_points)
    plt.title(title)
    plt.xlabel(x_label)
    plt.ylabel(y_label);
    plt.show()

# Read Data

In [7]:
folder = '../input/att-database-of-faces'
X, y = load_images_from_folder(folder)

In [8]:
X_train, X_test, y_train, y_test = split(X,y)
alpha = [0.8, 0.85, 0.9, 0.95]  # For PCA
no_of_neigbours = [1, 3, 5, 7]  # For KNN

# PCA

In [9]:
# x_train = 200 * 10304
# mean = 10304 * 1
def center_data(D_train, D_test):
    mean = np.mean(D_train, axis = 0)
    Z_train = np.array(D_train - mean)
    Z_test = np.array(D_test - mean)
    return Z_train, Z_test


def get_cov_matrix(z):
    return (1/len(z)) * np.dot(np.transpose(z), z) 


def get_eign_sorted(COV):
    eigenValues, eigenVectors = eigh(COV)
    idx = eigenValues.argsort()[::-1]   
    eigenValues = eigenValues[idx]
    eigenVectors = eigenVectors[:,idx]
    return eigenValues, eigenVectors

In [None]:
Z_train, Z_test = center_data(X_train, X_test)
print(len(Z_train))
COV = get_cov_matrix(Z_train)
print(COV.shape)

eigenValues, eigenVectors = get_eign_sorted(COV)

In [None]:
def getEnd(a):
    i = 0
    sum = 0
    epison = a * np.sum(eigenValues)
    while sum < epison and i < len(eigenValues):
        sum += eigenValues[i]
        i += 1
    return i

def PCA(a):
    i = getEnd(a)
    values = eigenValues[0 : i]
    values = np.array(values)
    temp = []
    for j in range(0, len(eigenVectors)):
        temp.append(eigenVectors[j][0 : i])

    vectors = np.array(temp)
    NEW_TEST = np.transpose(np.dot(np.transpose(vectors),np.transpose(Z_test)))
    NEW_TRAIN = np.transpose(np.dot(np.transpose(vectors),np.transpose(Z_train)))
    return NEW_TRAIN, NEW_TEST

In [None]:
for a in alpha:
    xp = []
    yp = []
    #Create KNN Classifier
    for n in no_of_neigbours:
        knn = KNeighborsClassifier(n_neighbors = n)
        #Train the model using the training sets
        NEW_TRAIN, NEW_TEST = PCA(a)
        y_train = np.ravel(y_train)
        knn.fit(NEW_TRAIN, y_train)
        #Predict the response for test dataset
        y_pred = knn.predict(NEW_TEST)
        accuary = metrics.accuracy_score(y_test, y_pred)
        xp.append(n)
        yp.append(accuary)
        #print(f"{a}           {n}          {accuary}\n")
    plot(xp, yp, "alpha = " +  str(a), "no_of_neigbours", "accuracy")

# LDA

In [None]:
def compute_classes_mean_matrix(data, n):
    means = np.zeros((40,10304))
    matrices = list()
    x = 0
    for i in range(0, 40):
        matrices.append(data[x:x+n])
        means[i,:] = np.mean(matrices[i], axis=0)
        x += n
    return means
        
def compute_between_class_scatter_matrix(means,total_mean, n):
    #10304*10304
     B = np.zeros((means.shape[1],means.shape[1]))
     for i in range(0, 40):
        delta_mean = means[i] - total_mean
        B += n * np.dot(delta_mean, delta_mean.T)
     return B

def centralize_data(data,means,labels):
    Z = np.zeros(data.shape)
    for i in range(data.shape[0]):
        Z[i,:] = data[i,:] - means[(int) (labels[i]) -1,:]
    return Z

def compute_within_class_scatter_matrix(Z):
    S = np.zeros((10304,10304))
    S = np.dot(Z.T,Z)
    return S ;

In [None]:
def LDA(X_train,y_train,X_test,y_test,x):
    #40*10304
    matrix_mean = compute_classes_mean_matrix(X_train,x)
 
    #1*10304
    total_mean = np.mean(X_train, axis = 0)
 
    #10304*10304
    B = compute_between_class_scatter_matrix(matrix_mean,total_mean,x)
 
    #200*10304
    Z = centralize_data(X_train,matrix_mean,y_train)
 
    #10304*10304
    S = compute_within_class_scatter_matrix(Z)
 
    S_inverse = np.linalg.inv(S)
 

    eigen_values ,eigen_vectors = get_eign_sorted(np.dot(S_inverse, B))
 
    P = eigen_vectors[:,:39]
 
    NEW_TRAIN =  np.dot( X_train , P)
    NEW_TEST = np.dot( X_test , P)
 
    for n in no_of_neigbours: 
        #Create KNN Classifier
        knn = KNeighborsClassifier(n_neighbors = n)
        #Train the model using the training set
        y_train = np.ravel(y_train)
        knn.fit(NEW_TRAIN, y_train)
        #Predict the response for test dataset
        y_pred = knn.predict(NEW_TEST)
        accuary=metrics.accuracy_score(y_test, y_pred)
        print(f"{n}                       {accuary}\n")



In [None]:
LDA(X_train,y_train,X_test,y_test,5)

In [None]:
xp = []
yp = []
for n in no_of_neigbours:
    #Create KNN Classifier
    knn = KNeighborsClassifier(n_neighbors=n)
    #Train the model using the training set
    y_train = np.ravel(y_train)
    knn.fit(NEW_TRAIN, y_train)
    #Predict the response for test dataset
    y_pred = knn.predict(NEW_TEST)
    accuary = metrics.accuracy_score(y_test, y_pred)
    xp.append(n)
    yp.append(accuary)
    #print(f"{n}                       {accuary}\n")

In [None]:
plot(xp, yp, "LDA", "no_of_neigbours", "accuracy")

# **PCA FOR FACES VS NON FACES PROBLEM**

In [None]:
folder = '../input/natural-images'
X_nonFaces = load_non_faces_images(folder)
Y_nonFaces = np.array([0] * X_nonFaces.shape[0])

In [None]:
X_train_f_nf,X_test_f_nf,y_train_f_nf,y_test_f_nf = split(X_nonFaces,Y_nonFaces)
y_train_unique = np.array([1] * y_train.shape[0]).reshape(-1,1)
y_test_unique = np.array([1] * y_test.shape[0]).reshape(-1,1)
X_train_f_nf = np.concatenate((X_train,X_train_f_nf))
X_test_f_nf = np.concatenate((X_test,X_test_f_nf))
y_train_f_nf = np.concatenate((y_train_unique,y_train_f_nf))
y_test_f_nf = np.concatenate((y_test_unique,y_test_f_nf))
print(X_train_f_nf.shape,X_test_f_nf.shape,y_train_f_nf.shape,y_test_f_nf.shape)

In [None]:
Z_train, Z_test = center_data(X_train_f_nf, X_test_f_nf)
COV = get_cov_matrix(Z_train)
eigenValues, eigenVectors = get_eign_sorted(COV)

In [None]:
print(X_train_f_nf.shape,X_test_f_nf.shape)
EV2 , EVEC2 = eigenValues, eigenVectors

In [None]:
for a in alpha:
    xp = []
    yp = []
    #Create KNN Classifier
    for n in no_of_neigbours:
        knn = KNeighborsClassifier(n_neighbors = n)
        #Train the model using the training sets
        NEW_TRAIN, NEW_TEST = PCA(a)
        y_train = np.ravel(y_train_f_nf)
        knn.fit(NEW_TRAIN, y_train)
        #Predict the response for test dataset
        y_pred = knn.predict(NEW_TEST)
        accuary=metrics.accuracy_score(y_test_f_nf, y_pred)
        xp.append(n)
        yp.append(accuary)
        #print(f"{a}           {n}          {accuary}\n")
    plot(xp, yp, "alpha = " +  str(a), "no_of_neigbours", "accuracy")

# LDA FOR FACES VS NON FACES PROBLEM

In [None]:

def split_f_nf(n):
    train_nonFaces = X_nonFaces[:n,:]
    test_nonFaces = X_nonFaces[n:2*n,:]

    #200
    labels1 = np.array([0] * (train_nonFaces.shape[0] + X_train.shape[0]))
    labels1 = np.array(labels1)
    labels1[:X_train.shape[0]] = 1

    labels2 = np.array([0] * (X_test.shape[0] + test_nonFaces.shape[0]))
    labels2 = np.array(labels2)
    labels2[:X_test.shape[0]] = 1
    
    training_data = np.concatenate((X_train,train_nonFaces ), axis=0)
    testing_data = np.concatenate((X_test,test_nonFaces ), axis=0)
    
    return training_data, testing_data, labels1, labels2 ,train_nonFaces ,test_nonFaces

def get_m1_m2(X_train, train_nonFaces):
    m1 = np.mean(X_train, axis = 0) 
    m2 = np.mean(train_nonFaces, axis = 0)
    return m1,m2

def center_data(X_train,m1,train_nonFaces,m2):
    Z1 = np.zeros(X_train.shape)
    for i in range(X_train.shape[0]):
        Z1[i,:] = X_train[i,:] - m1 
    Z2 = np.zeros(train_nonFaces.shape)
    for i in range(train_nonFaces.shape[0]):
        Z2[i,:] = train_nonFaces[i,:] - m2 
        
    return Z1 ,Z2

def within_class_scatter_matrix(Z1,Z2):
    S1 = np.dot(Z1.T,Z1)
    S2 = np.dot(Z2.T,Z2)
    S = S1 + S2 
    return S 
    
    
def LDA_non_faces(number_of_eignVectors,number_of_nonFaces_images):
    training_data, testing_data, training_labels, testing_labels, train_nonFaces,test_nonFaces = split_f_nf(number_of_nonFaces_images)

    m1,m2 = get_m1_m2(X_train, train_nonFaces)

    B = np.dot((m1-m2).T , (m1-m2))

    Z1 , Z2 = center_data(X_train,m1,train_nonFaces,m2)

    S = within_class_scatter_matrix(Z1,Z2)


    S_inv = np.linalg.inv(S)

    y, w = get_eign_sorted(np.dot(S_inv,B))

    P = w[:,:number_of_eignVectors]

    print(P.shape)

    NEW_TRAIN =  np.dot( training_data , P) 
    NEW_TEST = np.dot( testing_data , P)
    
    y_accuracy = []
    for n in no_of_neigbours: 
        #Create KNN Classifier
        knn = KNeighborsClassifier(n_neighbors = n)
        #Train the model using the training set
        training_labels = np.ravel(training_labels)
        knn.fit(NEW_TRAIN, training_labels)
        #Predict the response for test dataset
        y_pred = knn.predict(NEW_TEST)
        accuracy = metrics.accuracy_score(testing_labels, y_pred)
        y_accuracy.append(accuracy)
        print(f"{n}                       {y_accuracy}\n")
    return y_accuracy

In [None]:
no_non_faces = [100, 150, 200, 250, 300, 350, 400]
yp = np.zeros((4, len(no_non_faces)))
for i in range(0, len(no_non_faces)):
    accuracy = LDA_non_faces(16, no_non_faces[i])
    print(accuracy)
    for j in range(0, 4):
        yp[j][i] = accuracy[j]

In [None]:
print(yp)
for j in range(0, 4):
    plot(no_non_faces, yp[j], "N = " + str(no_of_neigbours[j]), "no_non_faces", "accuracy")

# Different Split of the Data

In [None]:
def bonus_split(X,y):
    # 0 9 , 10 19 , 20 29 .. 390 399
    X_train,X_test,y_train,y_test = [],[],[],[]
    
    for i in range(0,len(X),10):
        l , m , r = i , i + 10 - 4 , i + 10 - 1
#         print(l,m,r)
        for j in range(l,m+1):
            X[j] = X[j].reshape(-1,1).T
            X_train.append(X[j])
            y_train.append(y[j])
        for j in range(m+1,r+1):
            X[j] = X[j].reshape(-1,1).T
            X_test.append(X[j])
            y_test.append(y[j])
            
    X_train,X_test,y_train,y_test = np.array(X_train),np.array(X_test),np.array(y_train),np.array(y_test)
    y_train = y_train.reshape(-1,1)
    y_test = y_test.reshape(-1,1)
    return X_train,X_test,y_train,y_test

In [None]:
X_train,X_test,y_train,y_test = bonus_split(X, y)
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)

In [None]:
LDA(X_train,y_train,X_test,y_test,7)

In [None]:
Z_train, Z_test = center_data_2(X_train, X_test)

print(len(Z_train))
COV = get_cov_matrix(Z_train)
print(COV.shape)

eigenValues, eigenVectors = get_eign_sorted(COV)

In [None]:
for a in alpha:
    xp = []
    yp = []
    #Create KNN Classifier
    for n in no_of_neigbours:
        knn = KNeighborsClassifier(n_neighbors = n)
        #Train the model using the training sets
        NEW_TRAIN, NEW_TEST = PCA(a)
        y_train = np.ravel(y_train)
        knn.fit(NEW_TRAIN, y_train)
        #Predict the response for test dataset
        y_pred = knn.predict(NEW_TEST)
        accuary = metrics.accuracy_score(y_test, y_pred)
        xp.append(n)
        yp.append(accuary)
        #print(f"{a}           {n}          {accuary}\n")
    plot(xp, yp, "alpha = " +  str(a), "no_of_neigbours", "accuracy")

In [None]:
matrix_mean = compute_classes_mean_matrix(X_train, 7)

#1*10304
total_mean = np.mean(X_train, axis = 0)

#10304*10304
B = compute_between_class_scatter_matrix(matrix_mean,total_mean, 7)

#200*10304
Z = centralize_data(X_train,matrix_mean,y_train)

#10304*10304
S = compute_within_class_scatter_matrix(Z)

S_inverse = np.linalg.inv(S)


eigen_values ,eigen_vectors = get_eign_sorted(np.dot(S_inverse, B))

P = eigen_vectors[:,:39]

NEW_TRAIN =  np.dot( X_train , P)
NEW_TEST = np.dot( X_test , P)


In [None]:
xp = []
yp = []
for n in no_of_neigbours:
    #Create KNN Classifier
    knn = KNeighborsClassifier(n_neighbors = n)
    #Train the model using the training set
    y_train = np.ravel(y_train)
    knn.fit(NEW_TRAIN, y_train)
    #Predict the response for test dataset
    y_pred = knn.predict(NEW_TEST)
    accuary=metrics.accuracy_score(y_test, y_pred)
    xp.append(n)
    yp.append(accuary)
    #print(f"{n}                       {accuary}\n")
plot(xp, yp, "LDA", "no_of_neigbours", "accuracy")