In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict

class NaiveBayes:
    def __init__(self):
        self.prior = {}
        self.likelihood = {}
        self.classes = None
        self.features = None

    def fit(self, X, y):
        self.classes = np.unique(y)
        self.features = X.columns
        total_count = len(y)

        # Calculate prior probabilities P(class)
        self.prior = {cls: count / total_count for cls, count in zip(*np.unique(y, return_counts=True))}

        # Calculate likelihood P(feature=value | class)
        self.likelihood = {cls: {} for cls in self.classes}
        for cls in self.classes:
            X_cls = X[y == cls]
            for feature in self.features:
                feature_counts = X_cls[feature].value_counts().to_dict()
                total_feature_count = len(X_cls)
                self.likelihood[cls][feature] = defaultdict(lambda: 1e-6)  # Laplace smoothing
                for feature_val, count in feature_counts.items():
                    self.likelihood[cls][feature][feature_val] = count / total_feature_count

    def predict(self, X):
        predictions = []
        for _, row in X.iterrows():
            class_probs = {}
            for cls in self.classes:
                # Start with the prior probability
                class_probs[cls] = np.log(self.prior[cls])
                for feature in self.features:
                    feature_val = row[feature]
                    # Add the log likelihood
                    class_probs[cls] += np.log(self.likelihood[cls][feature].get(feature_val, 1e-6))
            # Select the class with the highest probability
            predicted_class = max(class_probs, key=class_probs.get)
            predictions.append(predicted_class)
        return predictions

# Sample dataset
data = {
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain',
                'Overcast', 'Sunny', 'Sunny', 'Rain', 'Sunny',
                'Overcast', 'Overcast', 'Rain'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool',
                    'Cool', 'Mild', 'Cool', 'Mild', 'Mild',
                    'Mild', 'Hot', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal',
                'Normal', 'High', 'Normal', 'Normal', 'Normal',
                'High', 'Normal', 'High'],
    'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong',
             'Strong', 'Weak', 'Weak', 'Weak', 'Strong',
             'Strong', 'Weak', 'Strong'],
    'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No',
                  'Yes', 'No', 'Yes', 'Yes', 'Yes',
                  'Yes', 'Yes', 'No']
}

df = pd.DataFrame(data)

# Features and target
X = df[['Outlook', 'Temperature', 'Humidity', 'Wind']]
y = df['PlayTennis']

# Initialize the classifier
nb = NaiveBayes()

# Fit the model
nb.fit(X, y)

# Predict on the training data
predictions = nb.predict(X)

# Display the results
df['Predicted'] = predictions
print(df)


     Outlook Temperature Humidity    Wind PlayTennis Predicted
0      Sunny         Hot     High    Weak         No        No
1      Sunny         Hot     High  Strong         No        No
2   Overcast         Hot     High    Weak        Yes       Yes
3       Rain        Mild     High    Weak        Yes       Yes
4       Rain        Cool   Normal    Weak        Yes       Yes
5       Rain        Cool   Normal  Strong         No       Yes
6   Overcast        Cool   Normal  Strong        Yes       Yes
7      Sunny        Mild     High    Weak         No        No
8      Sunny        Cool   Normal    Weak        Yes       Yes
9       Rain        Mild   Normal    Weak        Yes       Yes
10     Sunny        Mild   Normal  Strong        Yes       Yes
11  Overcast        Mild     High  Strong        Yes       Yes
12  Overcast         Hot   Normal    Weak        Yes       Yes
13      Rain        Mild     High  Strong         No        No
