In [1]:
import pandas as pd

In [2]:
# Sample dataset
data = {
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Rainy', 'Sunny', 'Overcast', 'Overcast', 'Rainy'],
    'Temperature': ['Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
    'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Strong'],
    'Play': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
}

df = pd.DataFrame(data)

In [3]:

def calculate_prior_probabilities(data, target_column):
    # Calculate prior probabilities
    prior_probabilities = data[target_column].value_counts(normalize=True)
    return prior_probabilities

In [4]:
def calculate_likelihoods(data, feature, target_column):
    # Calculate likelihoods
    likelihoods = data.groupby([target_column, feature]).size() / data.groupby(target_column).size()
    return likelihoods

In [5]:
def naive_bayes_predictor(data, new_instance, target_column):
    # Calculate prior probabilities
    prior_probabilities = calculate_prior_probabilities(data, target_column)

    # Calculate likelihoods for each feature
    likelihoods = {}
    for feature in data.columns[:-1]:  # Exclude the target column
        likelihoods[feature] = calculate_likelihoods(data, feature, target_column)

    # Calculate posterior probabilities
    posteriors = {}
    for label in prior_probabilities.index:
        posterior = prior_probabilities[label]
        for feature in new_instance.index:
            posterior *= likelihoods[feature].get((label, new_instance[feature]), 0)  # Use Laplace smoothing

        posteriors[label] = posterior

    # Predict the class with the highest posterior probability
    predicted_class = max(posteriors, key=posteriors.get)
    return predicted_class

In [6]:
# Test the Naive Bayes model with a new instance
new_instance = pd.Series({'Outlook': 'Sunny', 'Temperature': 'Mild', 'Humidity': 'High', 'Wind': 'Weak'})
predicted_class = naive_bayes_predictor(df, new_instance, 'Play')

In [7]:
print(f"Predicted class for new instance: {predicted_class}")

Predicted class for new instance: No
