In [24]:
import numpy as np
import pandas as pd

class NaiveBayes:
    def __init__(self, distr='Gaussian'):
        self.distr = distr
    
    def get_stats(self, X, y):
        temp_class = np.unique(y)
        classes = [X[y==i] for i in temp_class]
        self.mean = np.array([np.mean(class_) for class_ in classes])
        self.variance = np.array([np.var(class_) for class_ in classes])
        
    def pdf(self, X, mean, variance):
        # implemented only Gaussian
        return np.exp((-(X - mean)*(X - mean)) / (2 * variance)) / (2 * np.pi * variance)
    
    def fit(self, X_train, y_train):
        X = np.array(X_train)
        y = np.array(y_train)
        self.features = X.shape[1]
        classes, classes_count = np.unique(y, return_counts = True)
        self.n = len(classes)
        self.start_prob = classes_count / y_train.shape[0]
        self.get_stats(X_train, y_train)
    
    def predict_proba(self, X):
        X = np.array(X)
        rows = X.shape[0]
        proba = np.array([[]] * rows)
        mean, var = self.mean, self.variance
        for i in range(self.n):
            pdf = self.pdf(X, mean[i], var[i])
            prod = np.prod([pdf[:, j] for j in range(self.features)])
            proba_i = prod * self.start_prob[i]
            proba = np.append(proba, proba_i.reshape(-1,1), axis=1)
        return proba
        
    
    def predict(self, X_test):
        y_pred = np.max(self.predict_proba(X_test), axis=1)
        return y_pred
        
    def score(self, X_test, y_test):
        return accuracy_score(y_test, self.predict(X_test))