In [None]:

import numpy as np
import pandas as pd
import sympy as sp
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# PCA Function
def pca(X, n_components=0):
    if n_components == 0:
        n_components = round(0.95 * len(X))
    mean_vector = np.mean(X, axis=0)
    covariance_matrix = np.cov(X, rowvar=False)
    eigen_values, eigen_vectors = np.linalg.eig(covariance_matrix)
    ind = np.argsort(eigen_values)[::-1]
    sorted_eigen_values = eigen_values[ind]
    sorted_eigen_vectors = eigen_vectors[ind]
    final_eigen_vectors = sorted_eigen_vectors[:, :n_components]
    X_transformed = np.dot(X, final_eigen_vectors)
    return X_transformed

# Importing Dataset
dataset = pd.read_csv("gender.csv")
dataset.rename( columns={'Unnamed: 0':'a'}, inplace=True )
dataset.rename( columns={'Unnamed: 1':'target'}, inplace=True )
le = LabelEncoder()
dataset.iloc[:,1]=le.fit_transform(dataset.iloc[:,1])
# Test-Train Split
types = dataset.iloc[:, 1].unique()
test_df = pd.DataFrame()
train_df = pd.DataFrame()
for t in types:
    type_df = dataset[dataset.iloc[:, 1] == t]
    train_df = pd.concat([train_df, type_df.iloc[10:]])
    test_df = pd.concat([test_df, type_df.iloc[:10]])

X_train = train_df.iloc[:, 2:].values
X_test = test_df.iloc[:, 2:].values
y_train = train_df.iloc[:, 1].values
y_test = test_df.iloc[:, 1].values



# Before PCA
mean_vectors = []
covariance_matrices = []
inverse_covariance_matrices = []
det_covariance_matrices = []

for c in np.unique(y_train):
    class_data = X_train[y_train == c]
    mean_vectors.append(np.mean(class_data, axis=0))
    covariance_matrices.append(np.cov(class_data, rowvar=False))
    inverse_covariance_matrices.append(np.linalg.inv(covariance_matrices[-1]))
    det_covariance_matrices.append(sp.Matrix(covariance_matrices[-1]).det())

y_pred_before = []
print(X_train[0])
dimensions = len(X_train[0])

for X in X_test:
    probability_values = {}
    class_count = 0
    for c in np.unique(y_train):
        p_xw = np.exp(-0.5 * np.dot(np.dot((X - mean_vectors[class_count]).T, inverse_covariance_matrices[class_count]), (X - mean_vectors[class_count]))) / (((2 * np.pi) ** (dimensions / 2)) * np.power(det_covariance_matrices[class_count], 0.5))
        p_wx = p_xw * np.sum(y_train == c) / len(y_train)
        probability_values[c] = p_wx
        class_count += 1
    y_pred_before.append(max(probability_values, key=probability_values.get))

final_df_before = pd.DataFrame({"Actual": y_test, "Predicted": y_pred_before})
accuracy = accuracy_score(y_test, y_pred_before)
print(f"The Accuracy of this model before PCA is {accuracy * 100}%")


dataset=pca(dataset)
dataset=pd.DataFrame(dataset)
# Test-Train Split
types = dataset.iloc[:, 1].unique()
test_df = pd.DataFrame()
train_df = pd.DataFrame()
for t in types:
    type_df = dataset[dataset.iloc[:, 1] == t]
    train_df = pd.concat([train_df, type_df.iloc[10:]])
    test_df = pd.concat([test_df, type_df.iloc[:10]])

X_train = train_df.iloc[:, 2:].values
X_test = test_df.iloc[:, 2:].values
y_train = train_df.iloc[:, 1].values
y_test = test_df.iloc[:, 1].values




mean_vectors = []
covariance_matrices = []
inverse_covariance_matrices = []
det_covariance_matrices = []  # Initialize as an empty list
dimensions = len(X_train[0])

for c in np.unique(y_train):
    class_data = X_train[y_train == c]
    mean_vectors.append(np.mean(class_data, axis=0))
    covariance_matrices.append(np.cov(class_data, rowvar=False))
    inverse_covariance_matrices.append(np.linalg.inv(covariance_matrices[-1]))
    det_covariance_matrices.append(sp.Matrix(covariance_matrices[-1]).det())

y_pred_after = []
for X in X_test:
    probability_values = {}
    class_count = 0
    for c in np.unique(y_train):
        p_xw = np.exp(-0.5 * np.dot(np.dot((X - mean_vectors[class_count]).T, inverse_covariance_matrices[class_count]), (X - mean_vectors[class_count]))) / (((2 * np.pi) ** (dimensions / 2)) * np.power(det_covariance_matrices[class_count], 0.5))
        p_wx = p_xw * np.sum(y_train == c) / len(y_train)
        probability_values[c] = p_wx
        class_count += 1
    y_pred_after.append(max(probability_values, key=probability_values.get))

    
    
print(y_pred_after)
final_df_after = pd.DataFrame({"Actual": y_test, "Predicted": y_pred_after})
accuracy = accuracy_score(y_test, y_pred_after)
print(f"The Accuracy of this model after PCA is {accuracy * 100}%")

In [49]:
dataset = pd.read_csv("gender.csv")
dataset.rename( columns={'Unnamed: 0':'a'}, inplace=True )
dataset.drop("a",axis=1)
dataset.reset_index(drop=True, inplace=True)
dataset

Unnamed: 0,a,Unnamed: 1,0,1,2,3,4,5,6,7,...,118,119,120,121,122,123,124,125,126,127
0,1,male,-0.066420,0.151611,0.027740,0.052771,-0.066105,-0.041232,-0.002637,-0.158467,...,0.025989,-0.001087,0.027260,-0.046754,-0.118619,-0.163774,-0.000590,-0.076400,0.107497,0.001567
1,2,male,-0.030614,0.049667,0.008084,-0.050324,0.007649,-0.063818,-0.019530,-0.119905,...,0.044229,-0.023900,-0.028108,0.040618,-0.146579,-0.141244,0.016162,0.017638,0.080610,-0.015930
2,3,male,-0.096178,0.061127,0.035326,-0.035388,-0.090728,-0.018634,-0.024315,-0.139786,...,0.111141,0.059436,-0.029222,0.042115,-0.222173,-0.116908,0.093428,0.017391,0.057652,0.086116
3,4,male,-0.103057,0.085044,0.078333,-0.035873,-0.028163,0.004924,0.007829,-0.017016,...,0.100793,-0.002644,-0.023388,0.029497,-0.139830,-0.119243,0.005306,-0.015100,0.161575,0.062462
4,5,male,-0.125815,0.120046,0.023131,-0.042901,0.038215,-0.049677,-0.054258,-0.130758,...,0.090197,0.067527,0.039926,0.047469,-0.056852,-0.076700,0.004966,0.028171,0.026041,0.084135
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,796,female,-0.164731,0.064301,0.058630,-0.017420,-0.157600,-0.022536,0.002864,-0.072739,...,0.095115,0.007198,-0.004655,0.023957,-0.170753,-0.136630,0.041614,0.031600,0.019064,0.004384
796,797,female,-0.095308,0.051095,0.092913,-0.101745,-0.083153,-0.028159,0.009090,-0.114513,...,0.056078,0.119846,0.087470,0.017481,-0.096594,-0.084553,0.037709,0.030732,-0.083713,0.064970
797,798,female,-0.202852,0.037039,0.079731,-0.047156,-0.140062,-0.080246,0.057668,-0.122083,...,0.066954,0.035684,-0.023112,-0.030452,-0.154243,-0.188270,0.071086,0.037384,-0.006257,0.039977
798,799,female,-0.088300,0.063530,0.049627,-0.026011,-0.172773,0.086218,0.042710,-0.161852,...,0.039460,0.067547,0.040426,0.028007,-0.154515,-0.127736,0.046967,0.009701,-0.016942,0.048071


Unnamed: 0,target,0,1,2,3,4,5,6,7,8,...,118,119,120,121,122,123,124,125,126,127
0,male,-0.066420,0.151611,0.027740,0.052771,-0.066105,-0.041232,-0.002637,-0.158467,0.130467,...,0.025989,-0.001087,0.027260,-0.046754,-0.118619,-0.163774,-0.000590,-0.076400,0.107497,0.001567
1,male,-0.030614,0.049667,0.008084,-0.050324,0.007649,-0.063818,-0.019530,-0.119905,0.186553,...,0.044229,-0.023900,-0.028108,0.040618,-0.146579,-0.141244,0.016162,0.017638,0.080610,-0.015930
2,male,-0.096178,0.061127,0.035326,-0.035388,-0.090728,-0.018634,-0.024315,-0.139786,0.052211,...,0.111141,0.059436,-0.029222,0.042115,-0.222173,-0.116908,0.093428,0.017391,0.057652,0.086116
3,male,-0.103057,0.085044,0.078333,-0.035873,-0.028163,0.004924,0.007829,-0.017016,0.114907,...,0.100793,-0.002644,-0.023388,0.029497,-0.139830,-0.119243,0.005306,-0.015100,0.161575,0.062462
4,male,-0.125815,0.120046,0.023131,-0.042901,0.038215,-0.049677,-0.054258,-0.130758,0.173457,...,0.090197,0.067527,0.039926,0.047469,-0.056852,-0.076700,0.004966,0.028171,0.026041,0.084135
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,female,-0.164731,0.064301,0.058630,-0.017420,-0.157600,-0.022536,0.002864,-0.072739,0.030554,...,0.095115,0.007198,-0.004655,0.023957,-0.170753,-0.136630,0.041614,0.031600,0.019064,0.004384
796,female,-0.095308,0.051095,0.092913,-0.101745,-0.083153,-0.028159,0.009090,-0.114513,0.157421,...,0.056078,0.119846,0.087470,0.017481,-0.096594,-0.084553,0.037709,0.030732,-0.083713,0.064970
797,female,-0.202852,0.037039,0.079731,-0.047156,-0.140062,-0.080246,0.057668,-0.122083,0.165443,...,0.066954,0.035684,-0.023112,-0.030452,-0.154243,-0.188270,0.071086,0.037384,-0.006257,0.039977
798,female,-0.088300,0.063530,0.049627,-0.026011,-0.172773,0.086218,0.042710,-0.161852,0.185083,...,0.039460,0.067547,0.040426,0.028007,-0.154515,-0.127736,0.046967,0.009701,-0.016942,0.048071


In [25]:
dataset=pd.DataFrame(dataset)

In [27]:
dataset

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,120,121,122,123,124,125,126,127,128,129
0,0.998172,0.890546,0.223747,-0.071050,-0.138517,0.134040,-0.124984,0.254958,-0.108636,0.053236,...,-0.072756,0.104029,0.036584,0.039078,-0.222778,-0.148006,-0.068748,-0.082201,-0.031613,-0.102967
1,1.998202,0.889296,0.132328,-0.232906,-0.156522,0.164608,-0.128925,0.126367,-0.073777,0.108880,...,-0.081243,0.069002,0.034198,-0.013571,-0.181246,-0.081752,-0.100546,-0.118350,-0.029352,-0.083704
2,2.998235,0.861548,0.132510,-0.183648,-0.128215,0.033146,-0.323645,0.114504,0.027655,-0.024123,...,-0.116013,0.058775,0.020336,0.071478,-0.158942,-0.028477,-0.102812,-0.073259,-0.028956,0.056129
3,3.998145,0.911621,0.288676,-0.101627,-0.215554,0.086198,-0.060832,0.285304,0.018240,0.084380,...,-0.079087,0.062516,0.072578,0.050592,-0.153164,-0.063272,-0.101974,-0.117423,-0.008599,-0.032305
4,4.998140,0.913434,0.010956,-0.110316,-0.191538,0.066674,-0.036182,0.146460,-0.041239,0.069098,...,-0.034279,0.028281,0.014159,0.024356,-0.153345,-0.071569,-0.066146,-0.052803,-0.061366,-0.037596
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,795.998586,1.484719,0.433064,-0.176306,-0.076159,0.073975,-0.149667,0.132612,-0.057989,-0.041403,...,-0.116304,0.015676,0.063921,-0.001151,-0.079259,-0.062028,-0.091801,-0.090257,0.018059,0.076938
796,796.998704,1.419265,0.183412,-0.232005,-0.163499,0.128282,-0.177558,0.163624,0.131282,0.120889,...,-0.117662,0.064044,0.053097,-0.022408,-0.126137,-0.045763,-0.081437,-0.093692,0.020490,0.061892
797,797.998702,1.431059,0.346479,-0.217069,-0.049557,0.104853,-0.103037,0.242186,0.048959,-0.009526,...,-0.096960,0.034040,0.047425,-0.001801,-0.032316,-0.075472,-0.051196,-0.126643,-0.005328,0.040147
798,798.998678,1.443258,0.430054,-0.059469,-0.159491,0.047672,-0.193310,0.126656,-0.129891,-0.024259,...,-0.083806,0.054918,0.032777,0.018923,-0.079716,-0.066676,-0.083959,-0.127937,-0.042245,0.032562


In [6]:
#!/usr/bin/env python
# coding: utf-8

# Importing Libraries
import numpy as np
import pandas as pd
import sympy as sp
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# PCA Function
def pca(X, n_components=0):
    if n_components == 0:
        n_components = round(0.95 * len(X))
    mean_vector = np.mean(X, axis=0)
    covariance_matrix = np.cov(X, rowvar=False)
    eigen_values, eigen_vectors = np.linalg.eig(covariance_matrix)
    ind = np.argsort(eigen_values)[::-1]
    sorted_eigen_values = eigen_values[ind]
    sorted_eigen_vectors = eigen_vectors[ind]
    final_eigen_vectors = sorted_eigen_vectors[:, :n_components]
    X_transformed = np.dot(X, final_eigen_vectors)
    return X_transformed

# Bayes Classifier
class BayesClassifier:
    def __init__(self):
        pass

    def fit(self, X_train, y_train):
        self.classes, self.class_counts = np.unique(y_train, return_counts=True)
        self.no_of_classes = len(self.classes)
        self.total_data_points = len(y_train)
        self.apriori_probabilities = self.class_counts / self.total_data_points
        self.class_split_training_data = {}
        for c in self.classes:
            self.class_split_training_data[c] = []
            for j in range(len(y_train)):
                if c == y_train[j]:
                    self.class_split_training_data[c].append(X_train[j])
            self.class_split_training_data[c] = np.array(self.class_split_training_data[c])
        self.mean_vectors = []
        for c in self.classes:
            self.mean_vectors.append(np.mean(self.class_split_training_data[c], axis=0))
        self.covariance_matrices = []
        for c in self.classes:
            self.covariance_matrices.append(np.cov(self.class_split_training_data[c], rowvar=False))
        self.inverse_covariance_matrices = []
        for c in range(len(self.classes)):
            self.inverse_covariance_matrices.append(np.linalg.inv(self.covariance_matrices[c]))
        self.dimensions = len(X_train[0])
        self.det_covariance_matrices = []
        for c in range(len(self.classes)):
            self.det_covariance_matrices.append(sp.Matrix(self.covariance_matrices[c]).det())

    def predict(self, X_test):
        y_pred = []
        for X in X_test:
            probability_values = {}
            class_count = 0
            for c in self.classes:
                p_xw = np.exp(-0.5 * np.dot(np.dot((X - self.mean_vectors[class_count]).T, self.inverse_covariance_matrices[class_count]), (X - self.mean_vectors[class_count]))) / (((2 * np.pi) ** (self.dimensions / 2)) * np.power(self.det_covariance_matrices[c], 0.5))
                p_wx = p_xw * self.apriori_probabilities[class_count]
                probability_values[c] = p_wx
                class_count += 1
            y_pred.append(max(probability_values, key=probability_values.get))
        y_pred = np.array(y_pred)
        return y_pred

# Importing Dataset
dataset = pd.read_csv("gender.csv")

# Test-Train Split
types = dataset.iloc[:, 1].unique()
test_df = pd.DataFrame()
train_df = pd.DataFrame()
for t in types:
    type_df = dataset[dataset.iloc[:, 1] == t]
    train_df = pd.concat([train_df, type_df.iloc[10:]])
    test_df = pd.concat([test_df, type_df.iloc[:10]])

X_train = train_df.iloc[:, 2:].values
X_test = test_df.iloc[:, 2:].values
y_train = train_df.iloc[:, 1].values
y_test = test_df.iloc[:, 1].values

# Encoding Dependent Variable
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.fit_transform(y_test)

# Before PCA
bc = BayesClassifier()
bc.fit(X_train, y_train)
y_pred_before = bc.predict(X_test)

final_df_before = pd.DataFrame({"Actual": y_test, "Predicted": y_pred_before})
accuracy = accuracy_score(y_test, y_pred_before)
print(f"The Accuracy of this model before PCA is {accuracy * 100}%")

# Applying PCA
X_train = pca(X_train)

# After PCA
bc = BayesClassifier()
bc.fit(X_train, y_train)
X_test = pca(X_test)  
y_pred_after = bc.predict(X_test)

final_df_after = pd.DataFrame({"Actual": y_test, "Predicted": y_pred_after})
accuracy = accuracy_score(y_test, y_pred_after)
print(f"The Accuracy of this model after PCA is {accuracy * 100}%")

The Accuracy of this model before PCA is 85.0%


ValueError: operands could not be broadcast together with shapes (19,) (128,) 

In [57]:
#!/usr/bin/env python
# coding: utf-8

# Importing Libraries
import numpy as np
import pandas as pd
import sympy as sp
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Principal Component Analysis (PCA) Class
class PrincipalComponentAnalysis:
    def __init__(self, n_components=0):
        self.num_components = n_components

    def fit(self, X):
        if self.num_components == 0:
            self.num_components = round(0.95 * len(X))
        self.mean_vector = np.mean(X, axis=0)
        self.covariance_matrix = np.cov(X, rowvar=False)
        self.eigen_values, self.eigen_vectors = np.linalg.eig(self.covariance_matrix)
        ind = np.argsort(self.eigen_values)[::-1]
        self.sorted_eigen_values = self.eigen_values[ind]
        self.sorted_eigen_vectors = self.eigen_vectors[ind]
        self.final_eigen_vectors = self.sorted_eigen_vectors[:, :self.num_components]

    def transform(self, X):
        X_transformed = np.dot(X, self.final_eigen_vectors)
        return X_transformed

# Bayes Classifier Class
class BayesianClassifier:
    def __init__(self):
        pass

    def fit(self, X_train, y_train):
        self.classes, self.class_counts = np.unique(y_train, return_counts=True)
        self.num_classes = len(self.classes)
        self.total_data_points = len(y_train)
        self.apriori_probabilities = self.class_counts / self.total_data_points
        self.class_split_training_data = {}
        
        for c in self.classes:
            self.class_split_training_data[c] = []
            for j in range(len(y_train)):
                if c == y_train[j]:
                    self.class_split_training_data[c].append(X_train[j])
            self.class_split_training_data[c] = np.array(self.class_split_training_data[c])
        
        self.mean_vectors = [np.mean(self.class_split_training_data[c], axis=0) for c in self.classes]
        self.covariance_matrices = [np.cov(self.class_split_training_data[c], rowvar=False) for c in self.classes]
        self.inverse_covariance_matrices = [np.linalg.inv(self.covariance_matrices[c]) for c in range(len(self.classes))]
        self.dimensions = len(X_train[0])
        self.det_covariance_matrices = [sp.Matrix(self.covariance_matrices[c]).det() for c in range(len(self.classes))]

    def predict(self, X_test):
        y_pred = []
        for X in X_test:
            probability_values = {}
            class_count = 0
            for c in self.classes:
                p_xw = np.exp(-0.5 * np.dot(np.dot((X - self.mean_vectors[class_count]).T, self.inverse_covariance_matrices[class_count]), (X - self.mean_vectors[class_count]))) / (((2 * np.pi) ** (self.dimensions / 2)) * np.power(self.det_covariance_matrices[class_count], 0.5))
                p_wx = p_xw * self.apriori_probabilities[class_count]
                probability_values[c] = p_wx
                class_count += 1
            y_pred.append(max(probability_values, key=probability_values.get))
        y_pred = np.array(y_pred)
        return y_pred

# Data Import and Preprocessing
dataset = pd.read_csv("gender.csv")
types = dataset.iloc[:, 1].unique()
test_df = pd.DataFrame()
train_df = pd.DataFrame()

# Split the dataset into test and train
for t in types:
    type_df = dataset[dataset.iloc[:, 1] == t]
    train_df = pd.concat([train_df, type_df.iloc[10:]])
    test_df = pd.concat([test_df, type_df.iloc[:10]])

X_train = train_df.iloc[:, 2:].values
X_test = test_df.iloc[:, 2:].values
y_train = train_df.iloc[:, 1].values
y_test = test_df.iloc[:, 1].values

# Encoding Dependent Variable
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.fit_transform(y_test)

# Initialize and fit the PCA model
pca = PrincipalComponentAnalysis()
pca.fit(X_train)

# Transform the data using PCA
X_train = pca.transform(X_train)
X_test = pca.transform(X_test)

# Initialize and fit the Bayesian Classifier
bc = BayesianClassifier()
bc.fit(X_train, y_train)

# Make predictions
y_pred = bc.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"The Accuracy of this model is {accuracy * 100}%")


The Accuracy of this model is 85.0%


In [59]:
#!/usr/bin/env python
# coding: utf-8

# Importing Libraries
import numpy as np
import pandas as pd
import sympy as sp
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Principal Component Analysis (PCA) Class
class PrincipalComponentAnalysis:
    def __init__(self, n_components=0.95):
        self.n_components = n_components

    def fit(self, X):
        self.mean_vector = np.mean(X, axis=0)
        self.covariance_matrix = np.cov(X, rowvar=False)
        self.eigen_values, self.eigen_vectors = np.linalg.eig(self.covariance_matrix)
        ind = np.argsort(self.eigen_values)[::-1]
        self.sorted_eigen_values = self.eigen_values[ind]
        
        if self.n_components >= 1:
            self.num_components = int(self.n_components)
        else:
            total_variance = np.sum(self.sorted_eigen_values)
            self.selected_eigen_values = []
            cum_variance = 0
            i = 0
            while cum_variance < 0.95 * total_variance:
                cum_variance += self.sorted_eigen_values[i]
                self.selected_eigen_values.append(self.sorted_eigen_values[i])
                i += 1
            self.num_components = len(self.selected_eigen_values)
        
        self.sorted_eigen_vectors = self.eigen_vectors[ind]
        self.final_eigen_vectors = self.sorted_eigen_vectors[:, :self.num_components]

    def transform(self, X):
        X1 = np.dot(X, self.final_eigen_vectors)
        return X1

# Bayesian Classifier Class
class BayesianClassifier:
    def __init__(self):
        pass

    def fit(self, X_train, y_train):
        self.classes, self.class_counts = np.unique(y_train, return_counts=True)
        self.num_classes = len(self.classes)
        self.total_data_points = len(y_train)
        self.apriori_probabilities = self.class_counts / self.total_data_points
        self.class_split_training_data = {}
        
        for c in self.classes:
            self.class_split_training_data[c] = []
            for j in range(len(y_train)):
                if c == y_train[j]:
                    self.class_split_training_data[c].append(X_train[j])
            self.class_split_training_data[c] = np.array(self.class_split_training_data[c])
        
        self.mean_vectors = [np.mean(self.class_split_training_data[c], axis=0) for c in self.classes]
        self.covariance_matrices = [np.cov(self.class_split_training_data[c], rowvar=False) for c in self.classes]
        self.inverse_covariance_matrices = [np.linalg.inv(self.covariance_matrices[c]) for c in range(len(self.classes))]
        self.dimensions = len(X_train[0])
        self.det_covariance_matrices = [sp.Matrix(self.covariance_matrices[c]).det() for c in range(len(self.classes))]

    def predict(self, X_test):
        y_pred = []
        for X in X_test:
            probability_values = {}
            class_count = 0
            for c in self.classes:
                p_xw = np.exp(-0.5 * np.dot(np.dot((X - self.mean_vectors[class_count]).T, self.inverse_covariance_matrices[class_count]), (X - self.mean_vectors[class_count]))) / (((2 * np.pi) ** (self.dimensions / 2)) * np.power(self.det_covariance_matrices[class_count], 0.5))
                p_wx = p_xw * self.apriori_probabilities[class_count]
                probability_values[c] = p_wx
                class_count += 1
            y_pred.append(max(probability_values, key=probability_values.get))
        y_pred = np.array(y_pred)
        return y_pred

# Data Import and Preprocessing
dataset = pd.read_csv('face.csv')
dataset = dataset.fillna(dataset.mean())

# Test-Train Split
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

# Initialize and fit the PCA model
pca = PrincipalComponentAnalysis()
pca.fit(X_train)

# Transform the data using PCA
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)

# Initialize and fit the Bayesian Classifier
bc = BayesianClassifier()
bc.fit(X_train_pca, y_train)

# Make predictions
y_pred = bc.predict(X_test_pca)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"The Accuracy of this model is {accuracy * 100}%")


KeyboardInterrupt: 