### Tennis Classifier based on Naive Bayes

In [2]:
# Create data
import numpy as np

filepath = './data.txt'

def create_train_data(filepath):
    data = []
    with open(filepath, 'r') as file:
        lines = file.readlines()
        data = [line.strip().split() for line in lines]
    data = np.array(data)
    return data

train_data = create_train_data(filepath=filepath)
print(train_data)

[['Sunny' 'Hot' 'High' 'Weak' 'No']
 ['Sunny' 'Hot' 'High' 'Strong' 'No']
 ['Overcast' 'Hot' 'High' 'Weak' 'Yes']
 ['Rain' 'Mild' 'High' 'Weak' 'Yes']
 ['Rain' 'Cool' 'Normal' 'Weak' 'Yes']
 ['Rain' 'Cool' 'Normal' 'Strong' 'No']
 ['Overcast' 'Cool' 'Normal' 'Strong' 'Yes']
 ['Overcast' 'Mild' 'High' 'Weak' 'No']
 ['Sunny' 'Cool' 'Normal' 'Weak' 'Yes']
 ['Rain' 'Mild' 'Normal' 'Weak' 'Yes']]


In [3]:
def compute_prior_probability(train_data):
    y_unique = ['No', 'Yes']
    prior_probability = np.zeros(len(y_unique))
    train_class = train_data[:, -1]
    sample_space = len(train_class)
    prior_probability[0] = len(train_class[train_class == y_unique[0]]) / sample_space
    prior_probability[1] = len(train_class[train_class == y_unique[1]]) / sample_space
    return prior_probability

prior_probability = compute_prior_probability(train_data=train_data)
print("P(play tennis = No) = ", prior_probability[0])
print("P(play tennis = Yes) = ", prior_probability[1])

P(play tennis = No) =  0.4
P(play tennis = Yes) =  0.6


In [4]:
def compute_conditional_probability(train_data):
    y_unique = ['No', 'Yes']
    train_class = train_data[:, -1]
    conditional_probability = []
    list_x_name = []
    for i in range(0, train_data.shape[1] - 1):
        sample = train_data[:, i]
        x_unique = np.unique(sample)
        list_x_name.append(x_unique)
        
        x_conditional_probability = []
        for x in x_unique:
            each_prob = []
            for y in y_unique:
                count_x_if_y = np.sum((sample == x) & (train_class == y))
                count_y = np.sum(train_class == y)
                if count_y == 0:
                    prob = 0
                else:
                    prob = round(count_x_if_y / count_y, 4)
                each_prob.append(prob)
            x_conditional_probability.append(each_prob)
        conditional_probability.append(x_conditional_probability)
        
    return conditional_probability, list_x_name

likelihood, list_x_name = compute_conditional_probability(train_data=train_data)
print(likelihood)

[[[0.25, 0.3333], [0.25, 0.5], [0.5, 0.1667]], [[0.25, 0.5], [0.5, 0.1667], [0.25, 0.3333]], [[0.75, 0.3333], [0.25, 0.6667]], [[0.5, 0.1667], [0.5, 0.8333]]]


In [14]:
def get_index_from_value(feature_name, list_features):
    return np.nonzero(feature_name == list_features)[0][0]

outlook = list_x_name[0]

i1 = get_index_from_value('Overcast', outlook)
i2 = get_index_from_value('Rain', outlook)
i3 = get_index_from_value('Sunny', outlook)

print(i1, i2, i3)

0 1 2


In [9]:
# Compute P(Outlook = Sunny | Play = 'Yes')
x1 = get_index_from_value('Sunny', outlook)
print('P(Outlook = "Sunny" | Play = "Yes") = ', likelihood[0][x1][1])

x1 = get_index_from_value('Sunny', outlook)
print('P(Outlook = "Sunny" | Play = "No") = ', likelihood[0][x1][0])

P(Outlook = "Sunny" | Play = "Yes") =  0.1667
P(Outlook = "Sunny" | Play = "No") =  0.5


In [31]:
def train_naive_bayes(train_data):
    # Step 1 -- Calculate Prior Probability
    prior_probability = compute_prior_probability(train_data)
    
    # Step 2 -- Calculate Conditional Probability 
    likelihood, list_x_name = compute_conditional_probability(train_data=train_data)
    
    return prior_probability, likelihood, list_x_name

In [32]:
def prediction_play_tennis(x, list_x_name, prior_probability, likelihood):
    p0 = 1
    p1 = 1
    
    # using Maximum A Posterior (MAP), we have 
    for i in range(len(likelihood)):
        x_index = get_index_from_value(x[i], list_x_name[i])
        p0 *= likelihood[i][x_index][0]
        p1 *= likelihood[i][x_index][1]
    
    p0 *= prior_probability[0]
    p1 *= prior_probability[1]
    
    if p0 > p1:
        y_pred = 0
    else:
        y_pred = 1
    return y_pred

In [34]:
x = ['Sunny', 'Cool', 'High', 'Strong']
data = create_train_data(filepath=filepath)
prior_probability, likelihood, list_x_name = train_naive_bayes(data)
pred = prediction_play_tennis(x, list_x_name, likelihood=likelihood, prior_probability=prior_probability)
if pred:
    print('Ad should go!')
else:
    print('Ad should not go!')

Ad should not go!
