In [None]:
data = np.load("/Users/bogdancristianmihaila/Desktop/3rd Semester/ML-project/data/fashion_train.npy")
X = data[:, :-1]  # All rows, first 784 columns
y = data[:, -1]   # All rows, last column
print(X.shape)
print(y.shape)

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from scipy.stats import gaussian_kde

class NaiveBayesClassifier:
    def __init__(self, bin_width=None, use_kde=False):
        self.bin_width = bin_width
        self.use_kde = use_kde
        self.feature_pdfs = {}
        self.classes = None

    def histogram_pdf(self, data):
        counts, bins = np.histogram(data, bins=np.arange(min(data), max(data) + self.bin_width, self.bin_width))
        pdf = counts / (len(data) * self.bin_width)
        return lambda x: np.interp(x, bins[:-1], pdf, left=0, right=0)

    def gaussian_kde_pdf(self, data):
        kde = gaussian_kde(data)
        return kde.evaluate

    def fit(self, X, y):
        self.classes = np.unique(y)
        for cls in self.classes:
            class_data = X[y == cls]
            for i in range(X.shape[1]):
                feature = class_data[:, i]
                if self.use_kde:
                    pdf = self.gaussian_kde_pdf(feature)
                else:
                    pdf = self.histogram_pdf(feature)
                self.feature_pdfs[(cls, i)] = pdf

    def predict(self, X):
        predictions = []
        for x in X:
            probs = []
            for cls in self.classes:
                prob = 1
                for i, feature_value in enumerate(x):
                    prob *= self.feature_pdfs[(cls, i)](feature_value)
                probs.append(prob)
            predictions.append(self.classes[np.argmax(probs)])
        return predictions

# Example usage:
# Load your data
# data, labels = ...

# Split the data
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2)

# Create an instance of the classifier
classifier = NaiveBayesClassifier(bin_width=0.5, use_kde=False)

# Train the classifier
classifier.fit(X_train, y_train)

# Predict using the classifier
y_pred = classifier.predict(X_test)

# Evaluate the classifier
# ...


In [None]:
## BACKUP
import numpy as np
from scipy.stats import gaussian_kde

class NaiveBayes:
    def __init__(self, bin_width=1.0, use_kde=False):
        self.bin_width = bin_width
        self.use_kde = use_kde
        self.feature_pdfs = {}
        self.classes = None

    def histogram_pdf(self, data):
        if np.iscomplexobj(data):
            raise ValueError("Data contains complex numbers, which are not supported for histogram PDF.")

        if self.bin_width is None or self.bin_width <= 0:
            raise ValueError("Bin width is not properly set.")

        min_data, max_data = np.min(data), np.max(data)
        bins = np.arange(min_data, max_data + self.bin_width, self.bin_width)
        counts, bins = np.histogram(data, bins=bins)
        pdf = counts / (len(data) * self.bin_width)
        return lambda x: np.interp(x, bins[:-1], pdf, left=0, right=0)


    def gaussian_kde_pdf(self, data):
        kde = gaussian_kde(data)
        return kde.evaluate

    def fit(self, X, y):
        self.classes = np.unique(y)
        for cls in self.classes:
            class_data = X[y == cls]
            for i in range(X.shape[1]):
                feature = class_data[:, i]
                if self.use_kde:
                    pdf = self.gaussian_kde_pdf(feature)
                else:
                    pdf = self.histogram_pdf(feature)
                self.feature_pdfs[(cls, i)] = pdf

    def predict(self, X):
        predictions = []
        for x in X:
            probs = []
            for cls in self.classes:
                prob = 1
                for i, feature_value in enumerate(x):
                    prob *= self.feature_pdfs[(cls, i)](feature_value)
                probs.append(prob)
            predictions.append(self.classes[np.argmax(probs)])
        return predictions