In [1]:
import pandas as pd
import math


In [3]:
class NaiveBayes:
    def __init__(self):
        # Initialize parameters for storing probabilities
        self.class_probs = {}
        self.feature_probs = {}
    

In [5]:
    def fit(self, X, y):
        # X: DataFrame of features
        # y: Target labels
        n_samples = X.shape[0]
        # Compute class probabilities
        for class_value in y.unique():
            X_class = X[y == class_value]
            self.class_probs[class_value] = len(X_class) / n_samples
            self.feature_probs[class_value] = {}
            for col in X.columns:
                if X[col].dtype == 'object':
                    # Handle discrete features
                    self.feature_probs[class_value][col] = X_class[col].value_counts(normalize=True).to_dict()
                else:
                    # Handle continuous features
                    self.feature_probs[class_value][col] = {
                        'mean': X_class[col].mean(),
                        'var': X_class[col].var()
                    }


In [7]:
def predict_probability(self, X):
        probabilities = []
        for index, row in X.iterrows():
            class_probs = {}
            for class_value in self.class_probs:
                class_prob = math.log(self.class_probs[class_value])
                for col in X.columns:
                    if X[col].dtype == 'object':
                        feature_val = row[col]
                        if feature_val in self.feature_probs[class_value][col]:
                            class_prob += math.log(self.feature_probs[class_value][col][feature_val])
                    else:
                        # Gaussian for continuous features
                        mean = self.feature_probs[class_value][col]['mean']
                        var = self.feature_probs[class_value][col]['var']
                        gaussian = (1 / math.sqrt(2 * math.pi * var)) * math.exp(-(row[col] - mean) ** 2 / (2 * var))
                        class_prob += math.log(gaussian)
                class_probs[class_value] = class_prob
            probabilities.append(max(class_probs, key=class_probs.get))
        return probabilities

In [9]:
def evaluate_on_data(self, X, y_true):
        y_pred = self.predict_probability(X)
        accuracy = sum(y_pred == y_true) / len(y_true)
        return accuracy
