In [1]:
import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score, confusion_matrix 
from sklearn.preprocessing import LabelEncoder 

class NaiveBayesClassifier: 
    def __init__(self): 
        self.class_probs = {} 
        self.feature_probs = {} 
    
    def fit(self, X, y): 
        num_samples, num_features = X.shape 
        unique_classes = np.unique(y) 
        
        for c in unique_classes: 
            # Calculate class probabilities 
            self.class_probs[c] = np.sum(y == c) / num_samples 
            
            # Calculate feature probabilities for each class 
            features_given_class = X[y == c] 
            self.feature_probs[c] = (np.sum(features_given_class, axis=0) + 1) / (np.sum(y == c) + 2)  # Laplace smoothing
    
    def predict(self, X): 
        predictions = [] 
        for sample in X: 
            class_scores = {} 
            for c, class_prob in self.class_probs.items(): 
                feature_probs_given_class = self.feature_probs[c] 
                log_prob = np.sum(np.log(sample * feature_probs_given_class + (1 - sample) * (1 - feature_probs_given_class) + 1e-9))  # Avoid log(0) 
                class_scores[c] = np.log(class_prob) + log_prob 
            
            # Predict the class with the highest probability 
            predicted_class = max(class_scores, key=class_scores.get) 
            predictions.append(predicted_class) 
        return np.array(predictions) 

# Load dataset
data = pd.read_csv('C:\\Users\\Musakalim Khan\\Downloads\\csv-dataset\\play_tennis.csv')

# Convert categorical features to numerical values using label encoding 
label_encoder = LabelEncoder() 
for column in data.select_dtypes(include=['object']).columns: 
    data[column] = label_encoder.fit_transform(data[column]) 

X = data.drop('play', axis=1).values 
y = data['play'].values 

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 

# Train Naive Bayes classifier
nb_classifier = NaiveBayesClassifier() 
nb_classifier.fit(X_train, y_train) 

# Make predictions
predictions = nb_classifier.predict(X_test) 

# Calculate accuracy
accuracy = accuracy_score(y_test, predictions) 
print("Predictions:", predictions) 
print("Accuracy:", accuracy) 

# Compute confusion matrix
cm = confusion_matrix(y_test, predictions) 
tp, fp, fn, tn = cm.ravel() 

print("True Positives:", tp) 
print("False Positives:", fp) 
print("False Negatives:", fn) 
print("True Negatives:", tn) 

# Accuracy calculation
accuracy = (tp + tn) / (tp + fp + tn + fn) 
print("Final Accuracy:", accuracy)


Predictions: [1 0 0]
Accuracy: 0.6666666666666666
True Positives: 1
False Positives: 0
False Negatives: 1
True Negatives: 1
Final Accuracy: 0.6666666666666666


  log_prob = np.sum(np.log(sample * feature_probs_given_class + (1 - sample) * (1 - feature_probs_given_class) + 1e-9))  # Avoid log(0)
