In [2]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as sts
import pandas as pd
import seaborn as sns


In [3]:
data_train = np.load('data/fashion_train.npy', mmap_mode='r')
data_test = np.load('data/fashion_test.npy', mmap_mode='r')

In [4]:
X_train = data_train[:, :-1]
y_train = data_train[:, -1]
X_test = data_test[:, :-1]
y_test = data_test[:, -1]


In [10]:
class NaiveBayes:
    def __init__(self):
        # Initialize the NaiveBayes object with None for prior, mean, and var
        self.prior = None
        self.mean = None
        self.var = None

    def fit(self, X, y):
        # Get unique class labels and dimensions of the feature matrix
        self.class_labels = np.unique(y)
        n, n_features = X.shape[0], X.shape[1]

        # Prior class probabilities
        prior = [np.sum(y == c) / n for c in self.class_labels]

        # Calculate means and variances for each class and feature using list comprehensions
        mean = np.array([np.mean(X[y == c], axis=0) for c in self.class_labels])
        var = np.array([np.var(X[y == c], axis=0) for c in self.class_labels])

        # Store prior, mean, and var as class variables
        self.prior = prior
        self.mean = mean
        self.var = var

    def predict(self, X):
        # Loop over each sample in X and make predictions using _predict method
        y_pred = [self._predict(x) for x in X]
        return np.array(y_pred)

    def _predict(self, x):
        posteriors = []

        # calculate posterior probability for each class
        for i, label in enumerate(self.class_labels):
            prior = np.log(self.prior[i])  # Calculate the log-prior probability for the current class
            likelihood = np.sum(self._pdf(i, x))  # Calculate the log-likelihood (sum of log-pdf) for the current class
            posterior = likelihood + prior  # Calculate the log-posterior probability for the current class
            posteriors.append(posterior)

        # return class with the highest posterior
        return self.class_labels[np.argmax(posteriors)]

    # def _pdf(self, class_idx, x):
    #     # Calculate the probability density function (pdf) for a given class
    #     mean = self.mean[class_idx]
    #     var = self.var[class_idx]

    #     # formula for pdf
    #     numerator = np.exp(-((x - mean) ** 2) / (2 * var))
    #     denominator = np.sqrt(2 * np.pi * var)
    #     return numerator / denominator


In [None]:
# Testing
if __name__ == "__main__":

    def accuracy(y_true, y_pred):
        accuracy = np.sum(y_true == y_pred) / len(y_true)
        return accuracy

    
    nb = NaiveBayes()
    nb.fit(X_train, y_train)
    predictions = nb.predict(X_test)

    print("Naive Bayes classification accuracy", accuracy(y_test, predictions))

In [None]:
# class NaiveBayes:
#     def __init__(self):
#         # Initialize the NaiveBayes object with None for prior, mean, and var
#         self.prior = None
#         self.mean = None
#         self.var = None
        
    

#     def fit(self, X, y):
#         # Get unique class labels and dimensions of the feature matrix
#         self.class_labels = np.unique(y)
#         n, n_features = X.shape[0], X.shape[1]
        
#         # Prior class probabilities
#         prior = [np.sum(y == c) / n for c in self.class_labels]
        
#         # Calculate means and variances for each class and feature using list comprehensions
#         mean = np.array([np.mean(X[y == c], axis=0) for c in self.class_labels])
#         var = np.array([np.var(X[y == c], axis=0) for c in self.class_labels])

#         # Store prior, mean, and var as class variables
#         self.prior = prior
#         self.mean = mean
#         self.var = var 
            

#     def predict(self, X):
#         # Loop over each sample in X and make predictions using _predict method
#         y_pred = [self._predict(x) for x in X]
#         return np.array(y_pred)



#     def _predict(self, c):
#         posteriors = []
        

        


        

