### Implement Naive Bayes classification

In [6]:
# Create data

import numpy as np


def create_train_data():
    data = [
        ["Sunny", "Hot", "High", "Weak", "no"],
        ["Sunny", "Hot", "High", "Strong", "no"],
        ["Overcast", "Hot", "High", "Weak", "yes"],
        ["Rain", "Mild", "High", "Weak", "yes"],
        ["Rain", "Cool", "Normal", "Weak", "yes"],
        ["Rain", "Cool", "Normal", "Strong", "no"],
        ["Overcast", "Cool", "Normal", "Strong", "yes"],
        ["Overcast", "Mild", "High", "Weak", "no"],
        ["Sunny", "Cool", "Normal", "Weak", "yes"],
        ["Rain", "Mild", "Normal", "Weak", "yes"],
    ]
    return np.array(data)

train_data = create_train_data()
print(train_data)

[['Sunny' 'Hot' 'High' 'Weak' 'no']
 ['Sunny' 'Hot' 'High' 'Strong' 'no']
 ['Overcast' 'Hot' 'High' 'Weak' 'yes']
 ['Rain' 'Mild' 'High' 'Weak' 'yes']
 ['Rain' 'Cool' 'Normal' 'Weak' 'yes']
 ['Rain' 'Cool' 'Normal' 'Strong' 'no']
 ['Overcast' 'Cool' 'Normal' 'Strong' 'yes']
 ['Overcast' 'Mild' 'High' 'Weak' 'no']
 ['Sunny' 'Cool' 'Normal' 'Weak' 'yes']
 ['Rain' 'Mild' 'Normal' 'Weak' 'yes']]


In [7]:
def compute_prior_probablity(train_data):
    y_unique = ['no', 'yes']
    prior_probablity = np.zeros(len(y_unique))
    for i in range(len(y_unique)):
        prior_probablity[i] = np.sum(train_data[:, -1] == y_unique[i]) / train_data.shape[0]
    return prior_probablity

prior_probablity = compute_prior_probablity(train_data)
print("P(play tennis = No", prior_probablity[0])
print("P(play tennis = Yes", prior_probablity[1])

P(play tennis = No 0.4
P(play tennis = Yes 0.6


In [22]:
def compute_conditional_probablity(train_data):
    y_unique = ["no", "yes"]
    conditional_probability = []
    list_x_name = []
    for i in range(0, train_data.shape[1] - 1):
        x_unique = np.unique(train_data[:, i])
        list_x_name.append(x_unique)
        x_conditional_probability = np.zeros((len(y_unique), len(x_unique)))
        for j in range(len(y_unique)):
           for k in range(0, len(x_unique)):
              x_conditional_probability[j, k] = len(np.where((train_data[:, i] == x_unique[k]) & (train_data[:, -1] == y_unique[j]))[0])/len(np.where(train_data[:, 4] == y_unique[j])[0])
        conditional_probability.append(x_conditional_probability)
    return conditional_probability, list_x_name

train_data = create_train_data()
z, list_x_name = compute_conditional_probablity(train_data)
print("x1 = ", list_x_name[0])
print("x2 = ", list_x_name[1])
print("x3 = ", list_x_name[2])
print("x4 = ", list_x_name[3])
print("z", z)


x1 =  ['Overcast' 'Rain' 'Sunny']
x2 =  ['Cool' 'Hot' 'Mild']
x3 =  ['High' 'Normal']
x4 =  ['Strong' 'Weak']
z [array([[0.25      , 0.25      , 0.5       ],
       [0.33333333, 0.5       , 0.16666667]]), array([[0.25      , 0.5       , 0.25      ],
       [0.5       , 0.16666667, 0.33333333]]), array([[0.75      , 0.25      ],
       [0.33333333, 0.66666667]]), array([[0.5       , 0.5       ],
       [0.16666667, 0.83333333]])]


In [31]:
def get_index_from_value(feature_name, list_features):
    return np.where(list_features == feature_name)[0][0]

train_data = create_train_data()
_, list_x_name = compute_conditional_probablity(train_data)
outlook = list_x_name[0]

i1 = get_index_from_value("Overcast", outlook)
i2 = get_index_from_value("Rain", outlook)
i3 = get_index_from_value("Sunny", outlook)

print(i1, i2, i3)

0 1 2


In [23]:
train_data = create_train_data()
conditional_probability, list_x_name = compute_conditional_probablity(train_data)
# Compute P("Outlook" = "Sunny" | "Play Tennis" = "Yes")
x1 = get_index_from_value("Sunny", list_x_name[0])
print("P('Outlook' = 'Sunny' | 'Play Tennis' = 'Yes') = ", np.round(conditional_probability[0][1, x1], 2))

['Overcast' 'Rain' 'Sunny']
(array([2]),)
P('Outlook' = 'Sunny' | 'Play Tennis' = 'Yes') =  0.17


In [27]:
train_data = create_train_data()
conditional_probability, list_x_name = compute_conditional_probablity(train_data)
# Compute P("Outlook" = "Sunny" | "Play Tennis" = "No")
x1 = get_index_from_value("Sunny", list_x_name[0])
print("P('Outlook' = 'Sunny' | 'Play Tennis' = 'No') = ", np.round(conditional_probability[0][0, x1], 2))

['Overcast' 'Rain' 'Sunny']
(array([2]),)
P('Outlook' = 'Sunny' | 'Play Tennis' = 'No') =  0.5


In [28]:
def train_naive_bayes(train_data):
    y_unique = ['no', 'yes']
    prior_probability = compute_prior_probablity(train_data)
    conditional_probability, list_x_name = compute_conditional_probablity(train_data)
    return prior_probablity, conditional_probability, list_x_name

In [32]:
def prediction_play_tennis(X, list_x_name, prior_probability, conditional_probability):
    x1 = get_index_from_value(X[0], list_x_name[0])
    x2 = get_index_from_value(X[1], list_x_name[1])
    x3 = get_index_from_value(X[2], list_x_name[2])
    x4 = get_index_from_value(X[3], list_x_name[3])

    p0 = 0
    p1 = 0

    p0 = conditional_probability[0][0, x1] * conditional_probability[1][0, x2] * conditional_probability[2][0, x3] * conditional_probability[3][0, x4] * prior_probability[0]
    p1 = conditional_probability[0][1, x1] * conditional_probability[1][1, x2] * conditional_probability[2][1, x3] * conditional_probability[3][1, x4] * prior_probability[0]

    if p0>p1:
        y_pred = 0
    else:
        y_pred = 1
    return y_pred

In [30]:
X = ['Sunny', 'Cool', 'High', 'Strong']
data = create_train_data()
prior_probability, conditional_probability, list_x_name = train_naive_bayes(data)
pred = prediction_play_tennis(X, list_x_name, prior_probability, conditional_probability)
if (pred):
    print('Ad should not go!')
else:
    print('Ad should go!')
    

['Overcast' 'Rain' 'Sunny']
(array([2]),)
['Cool' 'Hot' 'Mild']
(array([0]),)
['High' 'Normal']
(array([0]),)
['Strong' 'Weak']
(array([0]),)
Ad should go!
