## Bernoulli Naive Bayes classifier example

Create the dataset for the naive bayes classifier

In [1]:
import numpy as np
import pandas as pd

# Set random seed for reproducibility
np.random.seed(42)

# Create the artificial dataset
num_samples = 200
num_features = 4
num_classes = 2

# Generate random binary features
features = np.random.randint(0, 2, size=(num_samples, num_features))

# Generate random class labels (0 or 1)
labels = np.random.randint(0, num_classes, size=num_samples)

# Convert the dataset into a pandas DataFrame for convenience
df = pd.DataFrame(features, columns=[f'Feature_{i+1}' for i in range(num_features)])
df['Label'] = labels

# Assuming you have already defined and trained the BernoulliNaiveBayes class
# Split the dataset into training and testing sets
train_size = int(0.8 * num_samples)
training_data = df[:train_size]
testing_data = df[train_size:]

# Extract features and labels both as float32
training_features = training_data.drop('Label', axis=1)
training_labels = training_data['Label']
testing_features = testing_data.drop('Label', axis=1)
testing_labels = testing_data['Label']

In [2]:
df.head()

Unnamed: 0,Feature_1,Feature_2,Feature_3,Feature_4,Label
0,0,1,0,0,0
1,0,1,0,0,1
2,0,1,0,0,0
3,0,0,1,0,0
4,1,1,1,0,1


Load and test the model

In [5]:
from tensorflow_ml.classification.naive_bayes.bernoulli import BernoulliNaiveBayes

bnb = BernoulliNaiveBayes()
bnb.fit(training_features, training_labels)
accuracy = bnb.evaluate(testing_features, testing_labels)
print(f"Accuracy: {accuracy*100:.2f}%")

Accuracy: 35.00%


Let's test the scikit-learn equivalent and compare the results.

In [4]:
# test the sklearn naive bayes classifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import accuracy_score

bnb2 = BernoulliNB()
bnb2.fit(training_features, training_labels)
y_pred = bnb2.predict(testing_features)
print("Accuracy: {:.2f}%".format(accuracy_score(testing_labels, y_pred) * 100))

Accuracy: 40.00%


-----

## Gaussian Naive Bayes classifier example

Create the dataset for the naive bayes classifier

In [4]:
import numpy as np
import tensorflow as tf
import pandas as pd

# create artificial classification dataset
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_redundant=5, n_classes=2, random_state=1) 
X = pd.DataFrame(X)
y = pd.Series(y)
df = pd.concat([X, y], axis=1)

# split into train and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, :-1], df.iloc[:, -1], test_size=0.2, random_state=1)

ModuleNotFoundError: No module named 'sklearn'

In [None]:
df.head()

Load and test the model

In [None]:
from tensorflow_ml.classification.naive_bayes.gaussian import GaussianNaiveBayes

gnb = GaussianNaiveBayes()
gnb.fit(X_train, y_train)
gnb.predict(X_test)
accuracy = gnb.evaluate(X_test, y_test)
print(f"\nTest Accuracy: {accuracy:.4f}")