In [1]:
import pandas as pd
data = {
    "Outlook": ["sunny", "sunny", "overcast", "rain", "rain", "rain", "overcast", "sunny", "sunny", "rain", "sunny", "overcast", "overcast", "rain"],
    "Temperature": ["hot", "hot", "hot", "mild", "cool", "cool", "cool", "mild", "cool", "mild", "mild", "mild", "hot", "mild"],
    "Humidity": ["high", "high", "high", "high", "normal", "normal", "normal", "high", "normal", "normal", "normal", "high", "normal", "high"],
    "Windy": ["weak", "strong", "weak", "weak", "weak", "strong", "strong", "weak", "weak", "weak", "strong", "strong", "weak", "strong"],
    "Class": ["No", "No", "Yes", "Yes", "Yes", "No", "Yes", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "No"]
}
df = pd.DataFrame(data)



In [2]:
total_instances = df.shape[0]

# Count instances for each class
count_yes = df[df['Class'] == 'Yes'].shape[0]
count_no = df[df['Class'] == 'No'].shape[0]

# Calculate probabilities
prob_yes = count_yes / total_instances
prob_no = count_no / total_instances
print(f"Total Probability of Playing Tennis (Yes): {prob_yes:.4f}")
print(f"Total Probability of Not Playing Tennis (No): {prob_no:.4f}")

Total Probability of Playing Tennis (Yes): 0.6429
Total Probability of Not Playing Tennis (No): 0.3571


In [3]:
yes_count = df[df['Class'] == 'Yes'].shape[0]
no_count = df[df['Class'] == 'No'].shape[0]
total_count = df.shape[0]

print(f"Total Yes (Play Tennis): {yes_count}")
print(f"Total No (Do Not Play Tennis): {no_count}")
print()

Total Yes (Play Tennis): 9
Total No (Do Not Play Tennis): 5



In [4]:
def calculate_probabilities(feature):
    probabilities = {}

    for value in df[feature].unique():
        yes_given_value = df[(df[feature] == value) & (df['Class'] == 'Yes')].shape[0] / yes_count
        no_given_value = df[(df[feature] == value) & (df['Class'] == 'No')].shape[0] / no_count
        probabilities[value] = {
            'P(Yes | {})'.format(value): yes_given_value,
            'P(No | {})'.format(value): no_given_value
        }

    return probabilities
outlook_probs = calculate_probabilities('Outlook')
temperature_probs = calculate_probabilities('Temperature')
humidity_probs = calculate_probabilities('Humidity')
windy_probs = calculate_probabilities('Windy')

In [5]:
print("Outlook probabilities:")
for value, probs in outlook_probs.items():
    print(f"{value}: {probs}")
print()


Outlook probabilities:
sunny: {'P(Yes | sunny)': 0.2222222222222222, 'P(No | sunny)': 0.6}
overcast: {'P(Yes | overcast)': 0.4444444444444444, 'P(No | overcast)': 0.0}
rain: {'P(Yes | rain)': 0.3333333333333333, 'P(No | rain)': 0.4}



In [6]:
print("Temperature probabilities:")
for value, probs in temperature_probs.items():
    print(f"{value}: {probs}")
print()


Temperature probabilities:
hot: {'P(Yes | hot)': 0.2222222222222222, 'P(No | hot)': 0.4}
mild: {'P(Yes | mild)': 0.4444444444444444, 'P(No | mild)': 0.4}
cool: {'P(Yes | cool)': 0.3333333333333333, 'P(No | cool)': 0.2}



In [7]:
print("Humidity probabilities:")
for value, probs in humidity_probs.items():
    print(f"{value}: {probs}")
print()


Humidity probabilities:
high: {'P(Yes | high)': 0.3333333333333333, 'P(No | high)': 0.8}
normal: {'P(Yes | normal)': 0.6666666666666666, 'P(No | normal)': 0.2}



In [8]:
print("Windy probabilities:")
for value, probs in windy_probs.items():
    print(f"{value}: {probs}")
print()

Windy probabilities:
weak: {'P(Yes | weak)': 0.6666666666666666, 'P(No | weak)': 0.4}
strong: {'P(Yes | strong)': 0.3333333333333333, 'P(No | strong)': 0.6}



In [9]:
total = df.shape[0]
yes_count = df[df['Class'] == 'Yes'].shape[0]
no_count = df[df['Class'] == 'No'].shape[0]

p_yes = yes_count / total
p_no = no_count / total


In [10]:
def conditional_probability(feature, value, target_class):
    total_class = df[df['Class'] == target_class].shape[0]
    feature_class_count = df[(df[feature] == value) & (df['Class'] == target_class)].shape[0]
    return feature_class_count / total_class
outlook = 'sunny'
temperature = 'cool'
humidity = 'high'
windy = 'strong'

# Calculate conditional probabilities for each feature given Yes
p_sunny_given_yes = conditional_probability('Outlook', outlook, 'Yes')
p_cool_given_yes = conditional_probability('Temperature', temperature, 'Yes')
p_high_given_yes = conditional_probability('Humidity', humidity, 'Yes')
p_strong_given_yes = conditional_probability('Windy', windy, 'Yes')
# Calculate conditional probabilities for each feature given No
p_sunny_given_no = conditional_probability('Outlook', outlook, 'No')
p_cool_given_no = conditional_probability('Temperature', temperature, 'No')
p_high_given_no = conditional_probability('Humidity', humidity, 'No')
p_strong_given_no = conditional_probability('Windy', windy, 'No')


# Calculate posterior probabilities for Yes and No using Bayes' Theorem
p_yes_given_conditions = (p_yes * p_sunny_given_yes * p_cool_given_yes * p_high_given_yes * p_strong_given_yes)
p_no_given_conditions = (p_no * p_sunny_given_no * p_cool_given_no * p_high_given_no * p_strong_given_no)
total_prob = p_yes_given_conditions + p_no_given_conditions
p_yes_final = p_yes_given_conditions / total_prob
p_no_final = p_no_given_conditions / total_prob



In [11]:
print(f"Probability of playing tennis (Yes) given the conditions: {p_yes_final:.4f}")
print(f"Probability of not playing tennis (No) given the conditions: {p_no_final:.4f}")

Probability of playing tennis (Yes) given the conditions: 0.2046
Probability of not playing tennis (No) given the conditions: 0.7954


In [12]:
if p_yes_final > p_no_final:
    print("Prediction: Play Tennis")
else:
    print("Prediction: Do Not Play Tennis")

Prediction: Do Not Play Tennis
