## Importing Libraries

In [43]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import sympy as sp

from sklearn.metrics import confusion_matrix, classification_report

## Defining Classes and Functions

In [44]:
class PCA:
    def __init__(self, n_components=0):
        self.d1 = n_components

    def fit(self, X):
        self.mean_vector = np.mean(X, axis = 0)
        self.covariance_matrix = np.cov(X, rowvar=False)
        self.eigen_values, self.eigen_vectors = np.linalg.eig(self.covariance_matrix)
        ind = np.argsort(self.eigen_values)[::-1]
        self.sorted_eigen_values = self.eigen_values[ind]
        if self.d1 > 0 and self.d1 < 1:
            self.total_variance = np.sum(self.sorted_eigen_values)
            self.selected_eigen_values = []
            cum_variance = 0
            i = 0
            while cum_variance < 0.95 * self.total_variance:
                cum_variance += self.sorted_eigen_values[i]
                self.selected_eigen_values.append(self.sorted_eigen_values[i])
                i += 1
            self.selected_eigen_values = np.array(self.selected_eigen_values)
            self.d1 = len(self.selected_eigen_values)
        self.sorted_eigen_vectors = self.eigen_vectors[ind]
        self.final_eigen_vectors = self.sorted_eigen_vectors[:, :self.d1]

    def transform(self, X):
        X1 = np.dot(X, self.final_eigen_vectors)
        return X1

In [45]:
class BayesClassifier:
    def __init__(self):
        pass

    def fit(self, X_train, y_train):
        self.classes, self.class_counts = np.unique(y_train, return_counts=True)
        self.no_of_classes = len(self.classes)
        self.total_data_points = len(y_train)
        self.apriori_probabilities = self.class_counts/self.total_data_points
        self.class_split_training_data = {}
        for c in self.classes:
            self.class_split_training_data[c] = []
            for j in range(len(y_train)):
                if c == y_train[j]:
                    self.class_split_training_data[c].append(X_train[j])
            self.class_split_training_data[c] = np.array(self.class_split_training_data[c])
        self.mean_vectors = []
        for c in self.classes:
            self.mean_vectors.append(np.mean(self.class_split_training_data[c], axis = 0))
        self.covariance_matrices = []
        for c in self.classes:
            self.covariance_matrices.append(np.cov(self.class_split_training_data[c], rowvar=False))
        self.inverse_covariance_matrices = []
        for c in range(len(self.classes)):
            self.inverse_covariance_matrices.append(np.linalg.inv(self.covariance_matrices[c]))
        self.dimensions = len(X_train[0])
        self.det_covariance_matrices = []
        for c in range(len(self.classes)):
            self.det_covariance_matrices.append(sp.Matrix(self.covariance_matrices[c]).det())

    def predict(self, X_test):
        y_pred = []
        for X in X_test:
            probability_values = {}
            class_count = 0
            for c in self.classes:
                p_xw = np.exp(-0.5 * np.dot(np.dot((X - self.mean_vectors[class_count]).T, self.inverse_covariance_matrices[class_count]), (X - self.mean_vectors[class_count]))) / (((2 * np.pi) ** (self.dimensions / 2)) * np.power(self.det_covariance_matrices[c], 0.5))
                p_wx = p_xw * self.apriori_probabilities[class_count]
                probability_values[c] = p_wx
                class_count += 1
            y_pred.append(max(probability_values, key=probability_values.get))
        y_pred = np.array(y_pred)
        return y_pred

## Data Extraction

In [46]:
dataset = pd.read_csv('face.csv')
dataset.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4087,4088,4089,4090,4091,4092,4093,4094,4095,target
0,0.309917,0.367769,0.417355,0.442149,0.528926,0.607438,0.657025,0.677686,0.690083,0.68595,...,0.669422,0.652893,0.661157,0.475207,0.132231,0.14876,0.152893,0.161157,0.157025,0
1,0.454545,0.471074,0.512397,0.557851,0.595041,0.640496,0.681818,0.702479,0.710744,0.702479,...,0.157025,0.136364,0.14876,0.152893,0.152893,0.152893,0.152893,0.152893,0.152893,0
2,0.318182,0.400826,0.491736,0.528926,0.586777,0.657025,0.681818,0.68595,0.702479,0.698347,...,0.132231,0.181818,0.136364,0.128099,0.14876,0.144628,0.140496,0.14876,0.152893,0
3,0.198347,0.194215,0.194215,0.194215,0.190083,0.190083,0.243802,0.404959,0.483471,0.516529,...,0.636364,0.657025,0.68595,0.727273,0.743802,0.764463,0.752066,0.752066,0.739669,0
4,0.5,0.545455,0.582645,0.623967,0.64876,0.690083,0.694215,0.714876,0.72314,0.731405,...,0.161157,0.177686,0.173554,0.177686,0.177686,0.177686,0.177686,0.173554,0.173554,0


In [47]:
dataset.shape

(400, 4097)

In [48]:
dataset['target'].nunique()

40

In [None]:
dataset = dataset.fillna(dataset.mean())

## Test-Train Split

In [49]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [50]:
print(X)

[[0.30991736 0.3677686  0.41735536 ... 0.15289256 0.16115703 0.1570248 ]
 [0.45454547 0.47107437 0.5123967  ... 0.15289256 0.15289256 0.15289256]
 [0.3181818  0.40082645 0.49173555 ... 0.14049587 0.14876033 0.15289256]
 ...
 [0.5        0.53305787 0.607438   ... 0.17768595 0.14876033 0.19008264]
 [0.21487603 0.21900827 0.21900827 ... 0.57438016 0.59090906 0.60330576]
 [0.5165289  0.46280992 0.28099173 ... 0.35950413 0.3553719  0.38429752]]


In [51]:
print(y)

[ 0  0  0  0  0  0  0  0  0  0  1  1  1  1  1  1  1  1  1  1  2  2  2  2
  2  2  2  2  2  2  3  3  3  3  3  3  3  3  3  3  4  4  4  4  4  4  4  4
  4  4  5  5  5  5  5  5  5  5  5  5  6  6  6  6  6  6  6  6  6  6  7  7
  7  7  7  7  7  7  7  7  8  8  8  8  8  8  8  8  8  8  9  9  9  9  9  9
  9  9  9  9 10 10 10 10 10 10 10 10 10 10 11 11 11 11 11 11 11 11 11 11
 12 12 12 12 12 12 12 12 12 12 13 13 13 13 13 13 13 13 13 13 14 14 14 14
 14 14 14 14 14 14 15 15 15 15 15 15 15 15 15 15 16 16 16 16 16 16 16 16
 16 16 17 17 17 17 17 17 17 17 17 17 18 18 18 18 18 18 18 18 18 18 19 19
 19 19 19 19 19 19 19 19 20 20 20 20 20 20 20 20 20 20 21 21 21 21 21 21
 21 21 21 21 22 22 22 22 22 22 22 22 22 22 23 23 23 23 23 23 23 23 23 23
 24 24 24 24 24 24 24 24 24 24 25 25 25 25 25 25 25 25 25 25 26 26 26 26
 26 26 26 26 26 26 27 27 27 27 27 27 27 27 27 27 28 28 28 28 28 28 28 28
 28 28 29 29 29 29 29 29 29 29 29 29 30 30 30 30 30 30 30 30 30 30 31 31
 31 31 31 31 31 31 31 31 32 32 32 32 32 32 32 32 32

In [52]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

In [53]:
print(X_train)

[[0.5        0.5371901  0.5289256  ... 0.5206612  0.59917355 0.38429752]
 [0.37603307 0.40082645 0.41322315 ... 0.607438   0.7107438  0.6487603 ]
 [0.1570248  0.1983471  0.24380165 ... 0.20661157 0.19421488 0.19008264]
 ...
 [0.7107438  0.74793386 0.76859504 ... 0.39256197 0.47933885 0.7107438 ]
 [0.4876033  0.3305785  0.2520661  ... 0.17768595 0.17768595 0.18181819]
 [0.11157025 0.11157025 0.1446281  ... 0.6859504  0.7066116  0.71487606]]


In [54]:
print(X_test)

[[0.3677686  0.3677686  0.35123968 ... 0.4876033  0.5123967  0.5495868 ]
 [0.14876033 0.20661157 0.19421488 ... 0.35123968 0.35950413 0.3677686 ]
 [0.15289256 0.1446281  0.19421488 ... 0.8057851  0.7892562  0.7768595 ]
 ...
 [0.338843   0.40495867 0.5082645  ... 0.58264464 0.57024795 0.5495868 ]
 [0.3264463  0.48347107 0.5247934  ... 0.08264463 0.0661157  0.0785124 ]
 [0.23553719 0.2603306  0.39256197 ... 0.22727273 0.23966943 0.23966943]]


In [55]:
print(y_train)

[12 34 19 19 23 21  5 17 20 11 36  7 17 28 38  2 35  6 17 37 28 37 28  8
  9  2 31  4 13 34 23 34 22 10 12 29  6 35 37 19 23 23 38 16 15 24 34 37
 25 11 13 38  6 20 27 31 27 33 25 14  7 30  0 13 33 33 21  5 38 31 12 31
 39 23  8 26 20  6  1 37 28 24  4  3 31 36  3 33  3 36 28 26  0 31 25 22
  1 20  6 15  2  7 22 36 28 16 24  0 14 25 22 21  7  1 36 19 20  5 33 27
 24 12 32 26 30  5 26 26 27 14 22 21  1 12  9 26  6 17 28 19 12 15 16 39
 34 23 30 29 36 11 31 19 10 12  2 20 29 23 27 26 10 28 35 39 16  2 15 33
  7 27  5  7 17 32 11  5  8 24 30  0  2 18 34 12 10  8 26 18 36 22  6 29
  4 13 18 11 21 29 19 15 22 17 15 32 32 30 25 12 24 26 39 32 25 34 34  7
  1 21 37 13 38 39 10 25  8 27 34 16  4 14 30  0 19 21 17 39 35  6 35 18
 29  0  9  9  3 20  3  3 20 28 11 10  3  5  6 35 23 14  9 27  0 15 31 37
 16 22 15 34  1  5  5 20 11 10  6 21 13 24  2 26 32 30 38 17 35 33 33 13
 15 16 18 39 35 16 15  8 38 30 36 23]


In [56]:
print(y_test)

[39 38 16 23 22  4 38 36 25  0 13  4 12 14  2  8 21 39  1 19 13 14  9 33
  7 17  3 29 30  0 31 11  0 14 25  9 35 24  9 38 10 39 36 32  1 22 14 28
 11  3  4 33 24  9  8  8 29 27  3 17  4 37 37 10 18 21 13 10 29 24  1  2
  4  2  7 29 25 18 11 31 30 14 19  8 16  5 27 20 32 18 32 18 18 32 37  1
  7  4  9 35]


## Principal Component Analysis

In [57]:
pca = PCA(0.95)
pca.fit(X_train)

In [58]:
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)

## Model Training & Evaluation

In [60]:
bc = BayesClassifier()
bc.fit(X_train, y_train)

KeyboardInterrupt: 

In [None]:
y_pred = bc.predict(X_test_pca)

In [None]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print(f"The Accuracy of this model is {accuracy*100}%")