In [25]:
import pandas as pd
import numpy as np
from collections import defaultdict


In [24]:
data = [['Sunny', 'Hot', 'High', 'Weak', 0], ['Sunny', 'Hot', 'High', 'Strong', 0],
        ['Overcast', 'Hot', 'High', 'Weak', 1], ['Rain', 'Mild', 'High', 'Weak', 1],
        ['Rain', 'Cool', 'Normal', 'Weak', 1], ['Rain', 'Cool', 'Normal', 'Strong', 0],
        ['Overcast', 'Cool', 'Normal', 'Strong', 1], ['Sunny', 'Mild', 'High', 'Weak', 0],
        ['Sunny', 'Cool', 'Normal', 'Weak', 1], ['Rain', 'Mild', 'Normal', 'Weak', 'Yes'],
        ['Sunny', 'Mild', 'Normal', 'Strong', 1], ['Overcast', 'Mild', 'High', 'Strong', 0],
        ['Overcast', 'Hot', 'Normal', 'Weak', 1], ['Rain', 'Mild', 'High', 'Strong', 0]
        ]


In [28]:
def c_prior_prob(data):
    class_counts = defaultdict(int)
    total_samples = len(data)
    for sample in data:
        class_counts[sample[-1]] += 1
    
    prior_prob = {}
    for class_label, count in class_counts.items():
        prior_prob[class_label] = count / total_samples
    
    return prior_prob


In [30]:
def c_conditional_prob(data):
    conditional_prob= defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
    feature_value_counts = defaultdict(lambda: defaultdict(int))
    
    for sample in data:
        class_label = sample[-1]
        for i, feature_value in enumerate(sample[:-1]):
            feature_value_counts[i][class_label, feature_value] += 1
    
    for feature, class_values in feature_value_counts.items():
        for (class_label, feature_value), count in class_values.items():
            conditional_prob[feature][class_label][feature_value] = count
    
    # Normalize probabilities
    for feature, class_values in conditional_prob.items():
        for class_label, feature_values in class_values.items():
            total_count = sum(feature_values.values())
            for feature_value in feature_values:
                conditional_prob[feature][class_label][feature_value] /= total_count
    
    return conditional_prob

In [16]:
def predict(test_data, prior_prob, conditional_prob):
    classes = list(prior_prob.keys())
    prob = {}
    
    for class_label in classes:
        prob1 = prior_prob[class_label]
        for i, feature_value in enumerate(test_data):
            prob1 *= conditional_prob[i][class_label][feature_value]
        prob[class_label] = prob1
    
    # Normalize probabilities
    total_prob= sum(prob.values())
    for class_label in classes:
        prob[class_label] /= total_prob
    
    return prob

In [32]:
prior = c_prior_prob(data)

# 计算条件概率
conditional_prob = c_conditional_prob(data)

# 测试数据
test_data = ['Sunny', 'Cool', 'High', 'Strong']

# 预测
predicted_prob = predict(test_data, prior, conditional_prob)
print("Predicted Probabilities:", predicted_prob)

Predicted Probabilities: {0: 0.7987890079180252, 1: 0.20121099208197482, 'Yes': 0.0}
