## Importing Libraries

In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sympy as sp

## Defined Functions and Classes

In [35]:
class BayesClassifier:
    def __init__(self):
        pass

    def fit(self, X_train, y_train):
        self.classes, self.class_counts = np.unique(y_train, return_counts=True)
        self.no_of_classes = len(self.classes)
        self.total_data_points = len(y_train)
        self.apriori_probabilities = self.class_counts/self.total_data_points
        self.class_split_training_data = {}
        for c in self.classes:
            self.class_split_training_data[c] = []
            for j in range(len(y_train)):
                if c == y_train[j]:
                    self.class_split_training_data[c].append(X_train[j])
            self.class_split_training_data[c] = np.array(self.class_split_training_data[c])
        self.mean_vectors = []
        for c in self.classes:
            self.mean_vectors.append(np.mean(self.class_split_training_data[c], axis = 0))
        self.covariance_matrices = []
        for c in self.classes:
            self.covariance_matrices.append(np.cov(self.class_split_training_data[c], rowvar=False))
        self.inverse_covariance_matrices = []
        for c in range(len(self.classes)):
            self.inverse_covariance_matrices.append(np.linalg.inv(self.covariance_matrices[c]))
        self.dimensions = len(X_train[0])
        self.det_covariance_matrices = []
        for c in range(len(self.classes)):
            self.det_covariance_matrices.append(sp.Matrix(self.covariance_matrices[c]).det())

    def predict(self, X_test):
        y_pred = []
        for X in X_test:
            probability_values = {}
            class_count = 0
            for c in self.classes:
                p_xw = np.exp(-0.5 * np.dot(np.dot((X - self.mean_vectors[class_count]).T, self.inverse_covariance_matrices[class_count]), (X - self.mean_vectors[class_count]))) / (((2 * np.pi) ** (self.dimensions / 2)) * np.power(self.det_covariance_matrices[c], 0.5))
                p_wx = p_xw * self.apriori_probabilities[class_count]
                probability_values[c] = p_wx
                class_count += 1
            y_pred.append(max(probability_values, key=probability_values.get))
        y_pred = np.array(y_pred)
        return y_pred

## Preprocessing

### Importing Data

In [19]:
dataset = pd.read_csv('face feature vectors.csv')
dataset

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,0,1,2,3,4,5,6,7,...,118,119,120,121,122,123,124,125,126,127
0,1,male,-0.066420,0.151611,0.027740,0.052771,-0.066105,-0.041232,-0.002637,-0.158467,...,0.025989,-0.001087,0.027260,-0.046754,-0.118619,-0.163774,-0.000590,-0.076400,0.107497,0.001567
1,2,male,-0.030614,0.049667,0.008084,-0.050324,0.007649,-0.063818,-0.019530,-0.119905,...,0.044229,-0.023900,-0.028108,0.040618,-0.146579,-0.141244,0.016162,0.017638,0.080610,-0.015930
2,3,male,-0.096178,0.061127,0.035326,-0.035388,-0.090728,-0.018634,-0.024315,-0.139786,...,0.111141,0.059436,-0.029222,0.042115,-0.222173,-0.116908,0.093428,0.017391,0.057652,0.086116
3,4,male,-0.103057,0.085044,0.078333,-0.035873,-0.028163,0.004924,0.007829,-0.017016,...,0.100793,-0.002644,-0.023388,0.029497,-0.139830,-0.119243,0.005306,-0.015100,0.161575,0.062462
4,5,male,-0.125815,0.120046,0.023131,-0.042901,0.038215,-0.049677,-0.054258,-0.130758,...,0.090197,0.067527,0.039926,0.047469,-0.056852,-0.076700,0.004966,0.028171,0.026041,0.084135
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,796,female,-0.164731,0.064301,0.058630,-0.017420,-0.157600,-0.022536,0.002864,-0.072739,...,0.095115,0.007198,-0.004655,0.023957,-0.170753,-0.136630,0.041614,0.031600,0.019064,0.004384
796,797,female,-0.095308,0.051095,0.092913,-0.101745,-0.083153,-0.028159,0.009090,-0.114513,...,0.056078,0.119846,0.087470,0.017481,-0.096594,-0.084553,0.037709,0.030732,-0.083713,0.064970
797,798,female,-0.202852,0.037039,0.079731,-0.047156,-0.140062,-0.080246,0.057668,-0.122083,...,0.066954,0.035684,-0.023112,-0.030452,-0.154243,-0.188270,0.071086,0.037384,-0.006257,0.039977
798,799,female,-0.088300,0.063530,0.049627,-0.026011,-0.172773,0.086218,0.042710,-0.161852,...,0.039460,0.067547,0.040426,0.028007,-0.154515,-0.127736,0.046967,0.009701,-0.016942,0.048071


### Test Train Split of Dataset

In [20]:
types = dataset.iloc[:, 1].unique()
test_df = pd.DataFrame()
train_df = pd.DataFrame()
for t in types:
    type_df = dataset[dataset.iloc[:, 1] == t]
    train_df = pd.concat([train_df, type_df.iloc[5:, :]])
    test_df = pd.concat([test_df, type_df.iloc[:5, :]])

In [21]:
test_df

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,0,1,2,3,4,5,6,7,...,118,119,120,121,122,123,124,125,126,127
0,1,male,-0.06642,0.151611,0.02774,0.052771,-0.066105,-0.041232,-0.002637,-0.158467,...,0.025989,-0.001087,0.02726,-0.046754,-0.118619,-0.163774,-0.00059,-0.0764,0.107497,0.001567
1,2,male,-0.030614,0.049667,0.008084,-0.050324,0.007649,-0.063818,-0.01953,-0.119905,...,0.044229,-0.0239,-0.028108,0.040618,-0.146579,-0.141244,0.016162,0.017638,0.08061,-0.01593
2,3,male,-0.096178,0.061127,0.035326,-0.035388,-0.090728,-0.018634,-0.024315,-0.139786,...,0.111141,0.059436,-0.029222,0.042115,-0.222173,-0.116908,0.093428,0.017391,0.057652,0.086116
3,4,male,-0.103057,0.085044,0.078333,-0.035873,-0.028163,0.004924,0.007829,-0.017016,...,0.100793,-0.002644,-0.023388,0.029497,-0.13983,-0.119243,0.005306,-0.0151,0.161575,0.062462
4,5,male,-0.125815,0.120046,0.023131,-0.042901,0.038215,-0.049677,-0.054258,-0.130758,...,0.090197,0.067527,0.039926,0.047469,-0.056852,-0.0767,0.004966,0.028171,0.026041,0.084135
400,401,female,0.001747,0.185678,0.07326,0.042142,-0.088674,0.028186,-0.02783,-0.064211,...,0.123615,0.030036,0.041442,-0.012818,-0.119177,-0.165786,-0.075368,-0.01769,0.067028,0.036452
401,402,female,-0.091598,0.09534,0.072125,-0.092276,-0.079953,0.047782,-0.004701,-0.092005,...,0.01137,0.144719,0.089139,-0.059767,-0.111235,-0.05542,0.006283,0.0169,-0.081676,0.022809
402,403,female,-0.018751,0.088572,0.068894,-0.0657,-0.115126,0.024339,-0.02842,-0.15932,...,0.010345,0.095309,0.012255,-0.03301,-0.09772,-0.218889,0.077764,0.045943,0.010856,0.100522
403,404,female,-0.130889,0.093262,0.122244,-0.110014,-0.157625,-0.036781,0.073908,-0.098571,...,0.003229,0.04933,0.059733,-0.02382,-0.098432,-0.034316,0.075131,-0.029204,-0.020707,0.031028
404,405,female,-0.037433,0.078158,0.118061,-0.117658,-0.194807,-0.045464,-0.014104,-0.158824,...,0.043848,-0.00976,0.043486,0.020251,-0.11542,-0.040023,0.099409,-0.03224,0.037601,-0.020016


In [22]:
train_df

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,0,1,2,3,4,5,6,7,...,118,119,120,121,122,123,124,125,126,127
5,6,male,-0.149119,0.125288,0.142323,-0.009087,-0.031394,-0.123533,0.043598,-0.063999,...,0.060833,0.089529,-0.034872,0.057080,-0.137162,-0.072522,0.052731,-0.141460,0.019018,0.085765
6,7,male,-0.139035,0.073513,-0.001770,-0.034225,-0.101610,0.065105,-0.014420,-0.054993,...,0.081007,-0.002164,0.060377,0.080294,-0.139369,-0.150245,0.078657,0.024194,0.062180,0.036039
7,8,male,-0.074126,-0.000669,0.004166,-0.082413,-0.096091,-0.021992,0.009714,-0.056961,...,0.050497,0.038932,0.023520,-0.090260,-0.147692,-0.008296,0.007609,-0.026687,-0.017523,-0.038310
8,9,male,-0.166220,0.042769,-0.031647,-0.036892,-0.143837,-0.040566,0.042541,-0.122923,...,0.014732,-0.049135,0.081770,-0.027199,-0.096941,-0.094661,0.057797,-0.101063,0.061373,0.062176
9,10,male,-0.185770,0.154008,0.073184,-0.070829,-0.144617,-0.019732,-0.019418,-0.004675,...,0.093317,0.035101,-0.147997,-0.046010,-0.087777,-0.100660,0.036190,0.012158,0.032304,0.085996
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,796,female,-0.164731,0.064301,0.058630,-0.017420,-0.157600,-0.022536,0.002864,-0.072739,...,0.095115,0.007198,-0.004655,0.023957,-0.170753,-0.136630,0.041614,0.031600,0.019064,0.004384
796,797,female,-0.095308,0.051095,0.092913,-0.101745,-0.083153,-0.028159,0.009090,-0.114513,...,0.056078,0.119846,0.087470,0.017481,-0.096594,-0.084553,0.037709,0.030732,-0.083713,0.064970
797,798,female,-0.202852,0.037039,0.079731,-0.047156,-0.140062,-0.080246,0.057668,-0.122083,...,0.066954,0.035684,-0.023112,-0.030452,-0.154243,-0.188270,0.071086,0.037384,-0.006257,0.039977
798,799,female,-0.088300,0.063530,0.049627,-0.026011,-0.172773,0.086218,0.042710,-0.161852,...,0.039460,0.067547,0.040426,0.028007,-0.154515,-0.127736,0.046967,0.009701,-0.016942,0.048071


### Creation of Variables

In [23]:
X_train = train_df.iloc[:, 2:].values
X_test = test_df.iloc[:, 2:].values
y_train = train_df.iloc[:, 1].values
y_test = test_df.iloc[:, 1].values

In [24]:
print(X_train)

[[-0.14911914  0.1252878   0.14232311 ... -0.1414603   0.01901806
   0.08576487]
 [-0.13903469  0.07351305 -0.00176958 ...  0.02419422  0.06218007
   0.03603866]
 [-0.07412638 -0.00066875  0.00416601 ... -0.02668655 -0.01752269
  -0.0383101 ]
 ...
 [-0.20285167  0.0370395   0.07973114 ...  0.03738441 -0.00625749
   0.03997689]
 [-0.08829999  0.06353012  0.04962703 ...  0.00970074 -0.01694169
   0.04807128]
 [-0.15620135  0.05516458  0.14271647 ... -0.0102984  -0.02885648
   0.0753232 ]]


In [25]:
print(X_test)

[[-0.06641996  0.15161145  0.02773961 ... -0.07640016  0.10749723
   0.00156654]
 [-0.03061386  0.04966652  0.00808374 ...  0.0176384   0.08060966
  -0.01592966]
 [-0.09617768  0.06112669  0.03532604 ...  0.01739147  0.057652
   0.08611634]
 ...
 [-0.0187513   0.08857222  0.06889394 ...  0.04594309  0.01085567
   0.10052187]
 [-0.13088937  0.09326187  0.12224357 ... -0.02920397 -0.02070727
   0.03102766]
 [-0.03743254  0.07815813  0.11806121 ... -0.03223962  0.03760109
  -0.02001636]]


In [26]:
print(y_train)

['male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male' 'male'
 'male

In [27]:
print(y_test)

['male' 'male' 'male' 'male' 'male' 'female' 'female' 'female' 'female'
 'female']


### Encoding the Dependent Variable

In [28]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.fit_transform(y_test)

In [29]:
print(y_train)

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 

In [30]:
print(y_test)

[1 1 1 1 1 0 0 0 0 0]


## Model Creation

In [36]:
bc = BayesClassifier()
bc.fit(X_train, y_train)

## Prediction and Analysis

In [37]:
y_pred = bc.predict(X_test)

In [38]:
final_df = pd.DataFrame({"Actual": y_test, "Predicted": y_pred})
final_df

Unnamed: 0,Actual,Predicted
0,1,1
1,1,1
2,1,1
3,1,1
4,1,1
5,0,1
6,0,0
7,0,0
8,0,0
9,0,0


In [39]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print(f"The Accuracy of this model is {accuracy*100}%")

The Accuracy of this model is 90.0%
