In [15]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from collections import defaultdict
from sklearn.metrics import precision_score, recall_score, f1_score
import math

Naive Bayes on Categorical Data

In [None]:
np.random.seed(42)
data_size = 1000

feature_a = np.random.choice(['A', 'B', 'C'], data_size)
feature_b = np.random.choice(['X', 'Y', 'Z'], data_size)
feature_c = np.random.choice(['M', 'N'], data_size)
feature_d = np.random.choice(['P', 'Q'], data_size)
labels = np.random.choice([0, 1], data_size)

df = pd.DataFrame({'FeatureA': feature_a, 'FeatureB': feature_b, 'FeatureC': feature_c, 'FeatureD': feature_d, 'Label': labels})

In [None]:
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

In [6]:
feature_probs = defaultdict(lambda: defaultdict(lambda: defaultdict(float)))
class_probs = train_data['Class'].value_counts(normalize=True).to_dict()

for col in train_data.columns[:-1]:
    for label in [0, 1]:
        subset = train_data[train_data['Class'] == label]
        value_counts = subset[col].value_counts(normalize=True).to_dict()
        for val, prob in value_counts.items():
            feature_probs[col][val][label] = prob

In [7]:
y_true = test_data['Class'].values
y_pred = []

for _, row in test_data.iterrows():
    scores = {}
    for label in [0, 1]:
        prob = class_probs[label]
        for col in test_data.columns[:-1]:
            prob *= feature_probs[col].get(row[col], {}).get(label, 1e-6)
        scores[label] = prob
    y_pred.append(max(scores, key=scores.get))

y_pred = np.array(y_pred)

In [8]:
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

Precision: 0.4420
Recall: 0.6854
F1-Score: 0.5374


Naive Bayes on Continuous Data

In [None]:
np.random.seed(42)
data_size = 1000

feature_a = np.random.randint(1, 11, data_size)
feature_b = np.random.randint(5, 16, data_size)
feature_c = np.random.randint(0, 6, data_size)
feature_d = np.random.randint(1, 21, data_size)
labels = np.random.choice([0, 1], data_size)

df = pd.DataFrame({'FeatureA': feature_a, 'FeatureB': feature_b, 'FeatureC': feature_c, 'FeatureD': feature_d, 'Label': labels})

In [None]:
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

In [14]:
mean_std = {}
class_probs = {}

class_counts = train_data['Class'].value_counts()
total_samples = len(train_data)
for label in class_counts.index:
    class_probs[label] = class_counts[label] / total_samples

for col in train_data.columns[:-1]:
    mean_std[col] = {}
    for label in class_counts.index:
        subset = train_data[train_data['Class'] == label][col]
        mean_std[col][label] = (subset.mean(), subset.std())

In [16]:
y_true = test_data['Class'].values
y_pred = []

for _, row in test_data.iterrows():
    scores = {}
    for label in class_probs:
        prob = class_probs[label]
        for col in test_data.columns[:-1]:
            mean, std = mean_std[col][label]
            exponent = math.exp(-((row[col] - mean) ** 2) / (2 * (std ** 2)))
            probability_density = (1 / (math.sqrt(2 * math.pi) * std)) * exponent
            prob *= probability_density
        scores[label] = prob
    y_pred.append(max(scores, key=scores.get))

y_pred = np.array(y_pred)

In [17]:
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

Precision: 0.4444
Recall: 0.3404
F1-Score: 0.3855
