In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

df = pd.read_csv('../dataset/play_tennis.csv')

def train_naive_bayes(data, target):
    class_probabilities = {}
    feature_probabilities = {}

    total_samples = len(data)

    for class_label, class_count in data[target].value_counts().items():
        class_probabilities[class_label] = class_count / total_samples
        class_data = data[data[target] == class_label]

        for feature in data.columns[:-1]: 
            feature_counts = class_data[feature].value_counts()

            for value, count in feature_counts.items():
                feature_probabilities.setdefault(class_label, {}).setdefault(feature, {})[value] = count / class_count

    return class_probabilities, feature_probabilities

def predict_naive_bayes(sample, class_probabilities, feature_probabilities):
    predictions = {}

    for class_label, class_probability in class_probabilities.items():
        likelihood = 1.0

        for feature, value in sample.items():
            if feature != target:
                likelihood *= feature_probabilities.get(class_label, {}).get(feature, {}).get(value, 0)

        predictions[class_label] = class_probability * likelihood

    return max(predictions, key=predictions.get)

target = 'play'
X = df.drop(target, axis=1)
y = df[target]

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
n_splits = 3
indices = np.arange(len(X))
np.random.shuffle(indices)

fold_indices = np.array_split(indices, n_splits)


for i in range(n_splits):
    test_indices = fold_indices[i]
    train_indices = np.concatenate(fold_indices[:i] + fold_indices[i+1:])

    X_train, X_test = X.iloc[train_indices], X.iloc[test_indices]
    y_train, y_test = y.iloc[train_indices], y.iloc[test_indices]

class_probabilities, feature_probabilities = train_naive_bayes(pd.concat([X_train, y_train], axis=1), target)

y_pred = X_test.apply(lambda sample: predict_naive_bayes(sample, class_probabilities, feature_probabilities), axis=1)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2%}")

Accuracy: 50.00%


In [2]:
cm = confusion_matrix(y_test, y_pred)
tp = cm[0,0]
fp = cm[0,1]
fn = cm[1,0]
tn = cm[1,1]
accuracy = (tp+tn)/(tp+fp+fn+tn)
print(accuracy)

0.5
