In [None]:
import numpy as np
import numpy as np
import torch
from torch.utils.data import DataLoader
from collections import defaultdict
import torchvision.models as models
from sklearn.decomposition import PCA
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pickle
import joblib

In [None]:
# Load training features and labels
train_data = torch.load('../Data/ProcessedData/train_data.pth')
train_features_loaded = train_data['features'].cpu().numpy()
train_labels_loaded = train_data['labels'].cpu().numpy()

# Load test features and labels
test_data = torch.load('../Data/ProcessedData/train_data.pth')
test_features_loaded = test_data['features'].cpu().numpy()
test_labels_loaded = test_data['labels'].cpu().numpy()

# Check if the loaded data is correct
print(f'Loaded Training Features Shape: {train_features_loaded.shape}')
print(f'Loaded Training Labels Shape: {train_labels_loaded.shape}')
print(f'Loaded Test Features Shape: {test_features_loaded.shape}')
print(f'Loaded Test Labels Shape: {test_labels_loaded.shape}')

# Check the first few items to verify the contents
print(f'First training feature vector: {train_features_loaded[0]}')
print(f'First training label: {train_labels_loaded[0]}')
print(f'First test feature vector: {test_features_loaded[0]}')
print(f'First test label: {test_labels_loaded[0]}')

# Summarize metrics for both models
results = []

Implement Gaussian Naive Bayes using basic Python and NumPy

In [None]:
class NaiveBayesFromScratch:
    def __init__(self):
        self.mean = None
        self.var = None
        self.prior = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.classes = np.unique(y)
        n_classes = len(self.classes)

        # Initialize mean, variance, and prior
        self.mean = np.zeros((n_classes, n_features), dtype=np.float64)
        self.var = np.zeros((n_classes, n_features), dtype=np.float64)
        self.prior = np.zeros(n_classes, dtype=np.float64)

        # Calculate mean, variance, and prior for each class
        for idx, c in enumerate(self.classes):
            X_c = X[y == c]
            self.mean[idx, :] = X_c.mean(axis=0)
            self.var[idx, :] = X_c.var(axis=0)
            self.prior[idx] = X_c.shape[0] / n_samples

    def _pdf(self, class_idx, x):
        # Calculate the probability density function for Gaussian distribution
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        numerator = np.exp(- (x - mean) ** 2 / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator

    def _predict(self, x):
        # Calculate the posterior for each class
        posteriors = []
        for idx, c in enumerate(self.classes):
            prior = np.log(self.prior[idx])
            class_conditional = np.sum(np.log(self._pdf(idx, x)))
            posterior = prior + class_conditional
            posteriors.append(posterior)
        return self.classes[np.argmax(posteriors)]

    def predict(self, X):
        y_pred = [self._predict(x) for x in X]
        return np.array(y_pred)
    
    def save_weights(self, filename="Weights/Custom_Bayesian.pth"):
        # Save model parameters to a file
        with open(filename, "wb") as f:
            pickle.dump({
                "mean": self.mean,
                "var": self.var,
                "prior": self.prior,
                "classes": self.classes
            }, f)
        print(f"Model weights saved to {filename}")

    def load_weights(self, filename="Weights/Custom_Bayesian.pth"):
        # Load model parameters from a file
        with open(filename, "rb") as f:
            weights = pickle.load(f)
            self.mean = weights["mean"]
            self.var = weights["var"]
            self.prior = weights["prior"]
            self.classes = weights["classes"]
        print(f"Model weights loaded from {filename}")

        # Prepare tetsing labels Naive Bayes
test_labels_np = test_labels_loaded


Train Scratch Naive Bayes the model and save the weights 

In [None]:
# Prepare training Naive Bayes
train_labels_np = train_labels_loaded

# Fit and predict using custom Naive Bayes
nb_custom = NaiveBayesFromScratch()
nb_custom.fit(train_features_loaded, train_labels_np)
nb_custom.save_weights()

Implement and Train sci-kit Bayesian 

In [None]:
# Fit and predict using scikit-learn's Gaussian Naive Bayes
nb_sklearn = GaussianNB()
nb_sklearn.fit(train_features_loaded, train_labels_np)
# Save the trained model to a file
joblib.dump(nb_sklearn, 'Weights/Sklearn_Bayesian.pth')
print("Scikit-learn model weights saved.")


Predict using custom bayesian saved weights 

P.S In testing flow after imports, please run the cell of dataloading (2nd cell) first then the class NaiveBayesFromScratch (3rd cell) before predicition . After predictions pleas run the last cell to display the results.

In [None]:
def plot_confusion_matrix(y_true, y_pred, model_name):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=range(10), yticklabels=range(10))
    plt.xlabel('Predicted Class')
    plt.ylabel('Actual Class')
    plt.title(f'Confusion Matrix - {model_name}')
    plt.show()
def summarize_metrics(y_true, y_pred, model_name):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    return [model_name, accuracy, precision, recall, f1]
nb_custom = NaiveBayesFromScratch()
nb_custom.load_weights('Weights/Custom_Bayesian.pth')
y_pred_custom = nb_custom.predict(test_features_loaded)

# Plot confusion matrices
plot_confusion_matrix(test_labels_np, y_pred_custom, "Custom Naive Bayes")

results.append(summarize_metrics(test_labels_np, y_pred_custom, "Custom Naive Bayes"))

Predict using sci-kit bayesian saved weights

P.S In testing flow after imports, please run the cell of dataloading (2nd cell) first then the class NaiveBayesFromScratch (3rd cell) before predicition . After predictions pleas run the last cell to display the results.

In [None]:
nb_sklearn = GaussianNB()
nb_sklearn=joblib.load('Weights/Sklearn_Bayesian.pth')
y_pred_sklearn = nb_sklearn.predict(test_features_loaded)

plot_confusion_matrix(test_labels_np, y_pred_sklearn, "Scikit-learn Naive Bayes")
results.append(summarize_metrics(test_labels_np, y_pred_sklearn, "Scikit-learn Naive Bayes"))

Display results of both models in a table

In [None]:
results_df = pd.DataFrame(results, columns=["Model", "Accuracy", "Precision", "Recall", "F1-Measure"])
print(results_df)