In [None]:
import math

dataset = [
    ['Sunny', 'Hot', 'No'],
    ['Sunny', 'Hot', 'No'],
    ['Overcast', 'Hot', 'Yes'],
    ['Rain', 'Mild', 'Yes'],
    ['Rain', 'Cool', 'Yes'],
    ['Rain', 'Cool', 'No'],
    ['Overcast', 'Cool', 'Yes'],
    ['Sunny', 'Mild', 'No'],
    ['Sunny', 'Cool', 'Yes'],
    ['Rain', 'Mild', 'Yes'],
    ['Sunny', 'Mild', 'Yes'],
    ['Overcast', 'Mild', 'Yes'],
    ['Overcast', 'Hot', 'Yes'],
    ['Rain', 'Mild', 'No']
]

features = ["Outlook", "Temp"]  # Feature names

def train_naive_bayes(data):
    label_counts = {}      
    feature_counts = {}   

    for row in data:
        outlook, temp, label = row

        # Count each label
        label_counts[label] = label_counts.get(label, 0) + 1

        # Initialize dictionary if new label
        if label not in feature_counts:
            feature_counts[label] = {"Outlook": {}, "Temp": {}}

        # Count Outlook feature
        feature_counts[label]["Outlook"][outlook] = feature_counts[label]["Outlook"].get(outlook, 0) + 1

        # Count Temp feature
        feature_counts[label]["Temp"][temp] = feature_counts[label]["Temp"].get(temp, 0) + 1

    return label_counts, feature_counts


# Prediction function

def predict_naive_bayes(x, label_counts, feature_counts):
    """
    x = list of feature values [Outlook, Temp]
    """
    total_samples = sum(label_counts.values())
    probs = {}

    for label in label_counts:
        # Start with prior probability: log(P(label))
        probs[label] = math.log(label_counts[label] / total_samples)

        # Calculate conditional probabilities for each feature
        for i, feature in enumerate(features):
            value = x[i]
            # Count how many times feature=value given label
            count = feature_counts[label][feature].get(value, 0)

            # Total count of current feature for this label
            total_feature_count = label_counts[label]

            # Number of unique values in this feature
            unique_feature_values = len(feature_counts[label][feature])

            # Apply Laplace Smoothing
            likelihood = (count + 1) / (total_feature_count + unique_feature_values)

            # Use log to prevent underflow
            probs[label] += math.log(likelihood)

    # Return label with highest probability
    return max(probs, key=probs.get)

# Train the model
label_counts, feature_counts = train_naive_bayes(dataset)


test_samples = [
    ['Sunny', 'Cool'],
    ['Overcast', 'Mild'],
    ['Rain', 'Hot'],
    ['Sunny', 'Hot']
]

for sample in test_samples:
    prediction = predict_naive_bayes(sample, label_counts, feature_counts)
    print(f"Test Sample: {sample} -> Predicted Class: {prediction}")

print("\n--- Model Learned Parameters ---")
print("Label Counts:", label_counts)
print("Feature Counts:", feature_counts)


Test Sample: ['Sunny', 'Cool'] -> Predicted Class: Yes
Test Sample: ['Overcast', 'Mild'] -> Predicted Class: Yes
Test Sample: ['Rain', 'Hot'] -> Predicted Class: No
Test Sample: ['Sunny', 'Hot'] -> Predicted Class: No

--- Model Learned Parameters ---
Label Counts: {'No': 5, 'Yes': 9}
Feature Counts: {'No': {'Outlook': {'Sunny': 3, 'Rain': 2}, 'Temp': {'Hot': 2, 'Cool': 1, 'Mild': 2}}, 'Yes': {'Outlook': {'Overcast': 4, 'Rain': 3, 'Sunny': 2}, 'Temp': {'Hot': 2, 'Mild': 4, 'Cool': 3}}}
