In [16]:
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.metrics import accuracy_score

from mlcore.gaussian_nb import CustomGaussianNB

In [2]:
X, y = datasets.make_classification(
    n_samples=100000, n_features=4, random_state=4
)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=1234
)

In [3]:
def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

In [4]:
np.unique(y, return_counts=True)

(array([0, 1]), array([49968, 50032]))

In [5]:
X[y==0].mean(axis=0)

array([ 0.03215466, -0.37110632, -0.99300135,  0.00248402])

In [6]:
model = CustomGaussianNB()
model.fit(X_train, y_train)

[0 1] 4 2 [0.4992 0.5008]


In [7]:
model.mean[0], model.mean[1]

(array([ 0.02931777, -0.36867544, -0.99425243,  0.00673802]),
 array([-0.02655309,  0.36620397,  0.99517125, -0.01086861]))

In [8]:
model.var[0], model.var[1]

(array([0.53011342, 0.61963542, 0.52141149, 1.15373541]),
 array([0.92640922, 0.76339383, 0.68922281, 2.07922228]))

In [9]:
model.predict(X_test)

array([1, 1, 0, ..., 0, 0, 1], shape=(20000,))

In [10]:
y_test

array([1, 1, 0, ..., 0, 0, 1], shape=(20000,))

In [11]:
predictions = model.predict(X_test)

In [12]:
mse = mean_squared_error(y_test, predictions)
print("MSE:", mse)

accu = accuracy_score(y_test, predictions)
print("Accuracy:", accu)

MSE: 0.08495
Accuracy: 0.91505


In [13]:
gnb_model = GaussianNB()
gnb_model.fit(X_train, y_train)
gnb_predictions = gnb_model.predict(X_test)

gnb_mse = mean_squared_error(y_test, gnb_predictions)  
print("Sklearn MSE:", gnb_mse)

gnb_accu = accuracy_score(y_test, gnb_predictions)
print("Sklearn Accuracy:", gnb_accu)

Sklearn MSE: 0.08495
Sklearn Accuracy: 0.91505
