### Naive Bayes (Gaussian NB)

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets

In [2]:
X, y = datasets.make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=123)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

In [3]:
# Function to train the Naive Bayes (calculating the mean, variance, and priors for each class)
def fit_naive_bayes(X, y):
    n_samples, n_features = X.shape
    classes = np.unique(y)
    n_classes = len(classes)

    # Initialize arrays to store the mean, variance, and prior for each class
    mean = np.zeros((n_classes, n_features), dtype=np.float64)
    var = np.zeros((n_classes, n_features), dtype=np.float64)
    priors = np.zeros(n_classes, dtype=np.float64)

    # Calculate mean, variance, and prior for each class
    for idx, c in enumerate(classes):
        X_c = X[y == c]
        mean[idx, :] = X_c.mean(axis=0)
        var[idx, :] = X_c.var(axis=0)
        # Calculate prior probability of the current class
        priors[idx] = X_c.shape[0] / float(n_samples)

    return mean, var, priors, classes

In [4]:
# Function to calculate the Gaussian probability density function for a feature value
def pdf(class_idx, x, mean, var):
    numerator = np.exp(-((x - mean[class_idx]) ** 2) / (2 * var[class_idx]))
    denominator = np.sqrt(2 * np.pi * var[class_idx])
    return numerator / denominator

In [5]:
# Function to predict the class label for a single data point
def predict_single(x, mean, var, priors, classes):
    posteriors = []

    # Calculate the posterior probability for each class
    for idx, _ in enumerate(classes):
        # Calculate the log of the prior probability for the current class
        prior = np.log(priors[idx])
        # Calculate the sum of the log of the probability density function for all features
        posterior = np.sum(np.log(pdf(idx, x, mean, var)))
        
        posterior = prior + posterior
        posteriors.append(posterior)

    return classes[np.argmax(posteriors)]

In [6]:
# Function to predict class labels for input data using the trained Naive Bayes model
def predict_naive_bayes(X, mean, var, priors, classes):
    y_pred = [predict_single(x, mean, var, priors, classes) for x in X]
    return np.array(y_pred)

In [7]:
# Function to calculate the accuracy
def accuracy(y_true, y_pred):
    return np.sum(y_true == y_pred) / len(y_true)


In [8]:
mean, var, priors, classes = fit_naive_bayes(X_train, y_train)
predictions = predict_naive_bayes(X_test, mean, var, priors, classes)
print("Naive Bayes classification accuracy:", accuracy(y_test, predictions))

Naive Bayes classification accuracy: 0.965
