In [17]:
import numpy as np

In [18]:
def create_train_data():
    data = [
        ['Sunny', 'Hot', 'High', 'Weak', 'no'],
        ['Sunny', 'Hot', 'High', 'Strong', 'no'],
        ['Overcast', 'Hot', 'High', 'Weak', 'yes'],
        ['Rain', 'Mild', 'High', 'Weak', 'yes'],
        ['Rain', 'Cool', 'Normal', 'Weak', 'yes'],
        ['Rain', 'Cool', 'Normal', 'Strong', 'no'],
        ['Overcast', 'Cool', 'Normal', 'Strong', 'yes'],
        ['Overcast', 'Mild', 'High', 'Weak', 'no'],
        ['Sunny', 'Cool', 'Normal', 'Weak', 'yes'],
        ['Rain', 'Mild', 'Normal', 'Weak', 'yes']
    ]
    return np.array(data)

train_data = create_train_data()
print(train_data)

[['Sunny' 'Hot' 'High' 'Weak' 'no']
 ['Sunny' 'Hot' 'High' 'Strong' 'no']
 ['Overcast' 'Hot' 'High' 'Weak' 'yes']
 ['Rain' 'Mild' 'High' 'Weak' 'yes']
 ['Rain' 'Cool' 'Normal' 'Weak' 'yes']
 ['Rain' 'Cool' 'Normal' 'Strong' 'no']
 ['Overcast' 'Cool' 'Normal' 'Strong' 'yes']
 ['Overcast' 'Mild' 'High' 'Weak' 'no']
 ['Sunny' 'Cool' 'Normal' 'Weak' 'yes']
 ['Rain' 'Mild' 'Normal' 'Weak' 'yes']]


In [19]:
def compute_prior_probability(train_data):
    y_unique = ['no', 'yes']
    prior_probability = np.zeros(len(y_unique))
    total = len(train_data)
    counts = {label: 0 for label in y_unique}
    
    for row in train_data:
        counts[row[-1]] += 1
    
    for i, label in enumerate(y_unique):
        prior_probability[i] = counts[label]/total
    
    return prior_probability

prior_probability = compute_prior_probability(train_data)
print('P(play tennis = No)', prior_probability[0])
print('P(play tennis = Yes)', prior_probability[1])


P(play tennis = No) 0.4
P(play tennis = Yes) 0.6


In [20]:
def compute_conditional_probability(train_data):
    y_unique = ['no', 'yes']
    conditional_probability = []
    list_x_name = []

    for i in range(train_data.shape[1] - 1):
        x_unique = np.unique(train_data[:, i])
        list_x_name.append(x_unique)
        x_conditional_probability = np.zeros((len(y_unique), len(x_unique)))

        for j, label in enumerate(y_unique):
            subset = train_data[train_data[:, -1] == label]
            for k, value in enumerate(x_unique):
                x_conditional_probability[j, k] = np.sum(subset[:, i] == value)/len(subset)

        conditional_probability.append(x_conditional_probability)
    
    return conditional_probability, list_x_name

conditional_probability, list_x_name = compute_conditional_probability(train_data)
print('x1 =', list_x_name[0])
print('x2 =', list_x_name[1])
print('x3 =', list_x_name[2])
print('x4 =', list_x_name[3])

x1 = ['Overcast' 'Rain' 'Sunny']
x2 = ['Cool' 'Hot' 'Mild']
x3 = ['High' 'Normal']
x4 = ['Strong' 'Weak']


In [21]:
def get_index_from_value(feature_name, list_features):
    return np.where(list_features == feature_name)[0][0]

train_data = create_train_data()
_, list_x_name = compute_conditional_probability(train_data)
outlook = list_x_name[0]

i1 = get_index_from_value('Overcast', outlook)
i2 = get_index_from_value('Rain', outlook)
i3 = get_index_from_value('Sunny', outlook)

print(i1, i2, i3)

0 1 2


In [22]:
train_data = create_train_data()
conditional_probability, list_x_name = compute_conditional_probability(train_data)
x1 = get_index_from_value('Sunny', list_x_name[0])
print ("P('Outlook'='Sunny'|Play Tennis'='Yes') = ", np.round(conditional_probability[0][1, x1], 2))

P('Outlook'='Sunny'|Play Tennis'='Yes') =  0.17


In [23]:
def train_naive_bayes(train_data):
    prior_probability = compute_prior_probability(train_data)
    conditional_probability, list_x_name = compute_conditional_probability(train_data)
    return prior_probability, conditional_probability, list_x_name

def prediction_play_tennis(X, list_x_name, prior_probability, conditional_probability):
    x1 = get_index_from_value(X[0], list_x_name[0])
    x2 = get_index_from_value(X[1], list_x_name[1])
    x3 = get_index_from_value(X[2], list_x_name[2])
    x4 = get_index_from_value(X[3], list_x_name[3])

    p0 = prior_probability[0]*conditional_probability[0][0, x1]*conditional_probability[1][0, x2]*conditional_probability[2][0, x3]*conditional_probability[3][0, x4]
    p1 = prior_probability[1]*conditional_probability[0][1, x1]*conditional_probability[1][1, x2]*conditional_probability[2][1, x3]*conditional_probability[3][1, x4]

    if p0 > p1:
        y_pred = 0
    else:
        y_pred = 1
    
    return y_pred

X = ['Sunny', 'Cool', 'High', 'Strong']
data = create_train_data()
prior_probability, conditional_probability, list_x_name = train_naive_bayes(data)
pred = prediction_play_tennis(X, list_x_name, prior_probability, conditional_probability)

if pred:
    print('Ad should go!')
else:
    print('Ad should not go!')


Ad should not go!
