In [None]:
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

# Define the Gaussian Naive Bayes Classifier
class GaussianNaiveBayes:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = {}
        self.var = {}
        self.prior = {}

        for c in self.classes:
            X_c = X[y == c]
            self.mean[c] = np.mean(X_c, axis=0)
            self.var[c] = np.var(X_c, axis=0)
            self.prior[c] = len(X_c) / len(X)

    def predict(self, X):
        predictions = []

        for x in X:
            posteriors = []

            for c in self.classes:
                prior = np.log(self.prior[c])
                likelihood = np.sum(np.log(self.pdf(c, x)))
                posterior = prior + likelihood
                posteriors.append(posterior)

            predictions.append(self.classes[np.argmax(posteriors)])

        return np.array(predictions)

    def pdf(self, class_label, x):
        mean = self.mean[class_label]
        var = self.var[class_label]
        num = np.exp(-(x - mean) ** 2 / (2 * var))
        den = np.sqrt(2 * np.pi * var)
        return num / den

# Create and fit the Gaussian Naive Bayes classifier
gnb = GaussianNaiveBayes()
gnb.fit(X_train, y_train)

# Make predictions on the test set
y_pred = gnb.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")


In [1]:
#!/usr/bin/env python
# coding: utf-8

# Importing Libraries
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import sympy as sp
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# PCA Function
def pca(X, n_components=0.95):
    mean_vector = np.mean(X, axis=0)
    covariance_matrix = np.cov(X, rowvar=False)
    eigen_values, eigen_vectors = np.linalg.eig(covariance_matrix)
    ind = np.argsort(eigen_values)[::-1]
    sorted_eigen_values = eigen_values[ind]
    
    if n_components >= 1:
        d1 = int(n_components)
    else:
        total_variance = np.sum(sorted_eigen_values)
        selected_eigen_values = []
        cum_variance = 0
        i = 0
        while cum_variance < n_components * total_variance:
            cum_variance += sorted_eigen_values[i]
            selected_eigen_values.append(sorted_eigen_values[i])
            i += 1
        d1 = len(selected_eigen_values)
        
    sorted_eigen_vectors = eigen_vectors[ind]
    final_eigen_vectors = sorted_eigen_vectors[:, :d1]
    
    X1 = np.dot(X, final_eigen_vectors)
    return X1

# Bayes Classifier
def bayes_classifier(X_train, y_train, X_test):
    classes, class_counts = np.unique(y_train, return_counts=True)
    no_of_classes = len(classes)
    total_data_points = len(y_train)
    apriori_probabilities = class_counts / total_data_points
    class_split_training_data = {}
    
    for c in classes:
        class_split_training_data[c] = []
        for j in range(len(y_train)):
            if c == y_train[j]:
                class_split_training_data[c].append(X_train[j])
        class_split_training_data[c] = np.array(class_split_training_data[c])
    
    mean_vectors = [np.mean(class_split_training_data[c], axis=0) for c in classes]
    covariance_matrices = [np.cov(class_split_training_data[c], rowvar=False) for c in classes]
    inverse_covariance_matrices = [np.linalg.inv(covariance_matrices[c]) for c in range(len(classes))]
    dimensions = len(X_train[0])
    det_covariance_matrices = [sp.Matrix(covariance_matrices[c]).det() for c in range(len(classes))]

    y_pred = []
    
    for X in X_test:
        probability_values = {}
        class_count = 0
        for c in classes:
            
            p_xw = np.exp(-0.5 * np.dot(np.dot((X - mean_vectors[class_count]).T, inverse_covariance_matrices[class_count]), (X - mean_vectors[class_count]))) / (((2 * np.pi) ** (dimensions / 2)) * np.power(det_covariance_matrices[c], 0.5))
            p_wx = p_xw * apriori_probabilities[class_count]
            probability_values[c] = p_wx
            class_count += 1
        y_pred.append(max(probability_values, key=probability_values.get))
    
    return y_pred

# Importing Dataset
dataset = pd.read_csv('face.csv')
dataset = dataset.fillna(dataset.mean())
print(dataset)
# Test-Train Split
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

# Principal Component Analysis
X_train_pca = pca(X_train, 0.95)
X_test_pca = pca(X_test, 0.95)

# Model Training & Evaluation
y_pred = bayes_classifier(X_train_pca, y_train, X_test_pca)
accuracy = accuracy_score(y_test, y_pred)
print(f"The Accuracy of this model is {accuracy*100}%")


            0         1         2         3         4         5         6  \
0    0.309917  0.367769  0.417355  0.442149  0.528926  0.607438  0.657025   
1    0.454545  0.471074  0.512397  0.557851  0.595041  0.640496  0.681818   
2    0.318182  0.400826  0.491736  0.528926  0.586777  0.657025  0.681818   
3    0.198347  0.194215  0.194215  0.194215  0.190083  0.190083  0.243802   
4    0.500000  0.545455  0.582645  0.623967  0.648760  0.690083  0.694215   
..        ...       ...       ...       ...       ...       ...       ...   
395  0.400826  0.495868  0.570248  0.632231  0.648760  0.640496  0.661157   
396  0.367769  0.367769  0.351240  0.301653  0.247934  0.247934  0.367769   
397  0.500000  0.533058  0.607438  0.628099  0.657025  0.632231  0.657025   
398  0.214876  0.219008  0.219008  0.223140  0.210744  0.202479  0.276859   
399  0.516529  0.462810  0.280992  0.252066  0.247934  0.367769  0.574380   

            7         8         9  ...      4087      4088      4089  \
0  

KeyboardInterrupt: 