## Bernoulli Naive Bayes classifier example

Create the dataset for the naive bayes classifier

In [2]:
import numpy as np
import pandas as pd

# Set random seed for reproducibility
np.random.seed(42)

# Create the artificial dataset
num_samples = 200
num_features = 4
num_classes = 2

# Generate random binary features
features = np.random.randint(0, 2, size=(num_samples, num_features))

# Generate random class labels (0 or 1)
labels = np.random.randint(0, num_classes, size=num_samples)

# Convert the dataset into a pandas DataFrame for convenience
df = pd.DataFrame(features, columns=[f'Feature_{i+1}' for i in range(num_features)])
df['Label'] = labels

# Assuming you have already defined and trained the BernoulliNaiveBayes class
# Split the dataset into training and testing sets
train_size = int(0.8 * num_samples)
training_data = df[:train_size]
testing_data = df[train_size:]

# Extract features and labels both as float32
training_features = training_data.drop('Label', axis=1)
training_labels = training_data['Label']
testing_features = testing_data.drop('Label', axis=1)
testing_labels = testing_data['Label']

In [3]:
df.head()

Unnamed: 0,Feature_1,Feature_2,Feature_3,Feature_4,Label
0,0,1,0,0,0
1,0,1,0,0,1
2,0,1,0,0,0
3,0,0,1,0,0
4,1,1,1,0,1


Load and test the model

In [6]:
from tensorflow_ml.classification.naive_bayes.bernoulli import BernoulliNaiveBayes

bnb = BernoulliNaiveBayes()
bnb.fit(training_features, training_labels, validation_data=(testing_features, testing_labels))

accuracy = bnb.evaluate(testing_features, testing_labels)
print(f"\nTest Accuracy: {accuracy:.4f}")



ValueError: in user code:

    File "/Users/siddhantpathak/Desktop/Projects/tensorflow-ml/tensorflow_ml/classification/naive_bayes/bernoulli.py", line 101, in train_step  *
        log_likelihoods += class_priors_broadcasted

    ValueError: Dimensions must be equal, but are 32 and 160 for '{{node add}} = AddV2[T=DT_DOUBLE](Cast, Tile_1)' with input shapes: [32,2], [160,2].


-----

## Gaussian Naive Bayes classifier example

Create the dataset for the naive bayes classifier

In [None]:
import numpy as np
import tensorflow as tf
import pandas as pd

# create artificial classification dataset
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_redundant=5, n_classes=2, random_state=1) 
X = pd.DataFrame(X)
y = pd.Series(y)
df = pd.concat([X, y], axis=1)

# split into train and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, :-1], df.iloc[:, -1], test_size=0.2, random_state=1)

In [None]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,0.1
0,2.569995,-0.1302,3.160751,-4.359364,-1.61272,-1.393521,-2.489249,-1.930941,3.261304,2.056921,1
1,0.341293,2.513214,-0.804166,1.291966,2.057731,-3.110983,1.46583,6.247344,-1.927694,2.950315,0
2,2.2754,3.365615,0.171644,1.24862,0.302498,-1.137814,-1.608199,2.746938,0.134924,2.003395,0
3,0.357846,0.905176,-0.259241,0.930414,0.112336,0.143484,-0.367149,0.658955,-0.269128,0.155807,1
4,-2.722477,0.324236,2.906472,-0.021218,-2.867399,1.591744,0.620849,3.383528,0.945621,3.498071,0


Load and test the model

In [None]:
from tensorflow_ml.classification.naive_bayes.gaussian import GaussianNaiveBayes

gnb = GaussianNaiveBayes()
gnb.fit(X_train, y_train)
gnb.predict(X_test)
accuracy = gnb.evaluate(X_test, y_test)
print(f"\nTest Accuracy: {accuracy:.4f}")