In [1]:
import numpy as np
import pandas as pd

In [16]:

def create_train_data():

  data=[['Sunny','Hot', 'High', 'Weak', 'no'],
        ['Sunny','Hot', 'High', 'Strong', 'no'],
        ['Overcast','Hot', 'High', 'Weak', 'yes'],
        ['Rain','Mild', 'High', 'Weak', 'yes'],
        ['Rain','Cool', 'Normal', 'Weak', 'yes'],
        ['Rain','Cool', 'Normal', 'Strong', 'no'],
        ['Overcast','Cool', 'Normal', 'Strong', 'yes'],
        ['Overcast','Mild', 'High', 'Weak', 'no'],
        ['Sunny','Cool', 'Normal', 'Weak', 'yes'],
        ['Rain','Mild', 'Normal', 'Weak', 'yes']]

  return pd.DataFrame(data, columns=['Outlook', 'Temperature', 'Humidity', 'Wind', 'Play'])

train_data = create_train_data()
print(train_data)

    Outlook Temperature Humidity    Wind Play
0     Sunny         Hot     High    Weak   no
1     Sunny         Hot     High  Strong   no
2  Overcast         Hot     High    Weak  yes
3      Rain        Mild     High    Weak  yes
4      Rain        Cool   Normal    Weak  yes
5      Rain        Cool   Normal  Strong   no
6  Overcast        Cool   Normal  Strong  yes
7  Overcast        Mild     High    Weak   no
8     Sunny        Cool   Normal    Weak  yes
9      Rain        Mild   Normal    Weak  yes


In [17]:
def compute_prior_probablity(train_data):
  total_length = len(train_data.Play)
  # The count() method does not need any arguments. It counts non-NA cells.
  temp = train_data.Play.value_counts()["no"]/total_length

  return [temp, 1-temp]

prior_probablity = compute_prior_probablity(train_data)
print("P(“Play Tennis” = Yes)", prior_probablity[0]) # The output was reversed.
print("P(“Play Tennis” = No)", prior_probablity[1])

P(“Play Tennis” = Yes) 0.4
P(“Play Tennis” = No) 0.6


In [20]:
train_data.iloc[1]

Outlook         Sunny
Temperature       Hot
Humidity         High
Wind           Strong
Play               no
Name: 1, dtype: object

In [45]:
def compute_conditional_probability(train_data: pd.DataFrame):
    y_unique = train_data.iloc[:, -1].unique()
    conditional_probability = []
    list_x_name = []

    for i in range(0, train_data.shape[1] - 1):
        x_unique = train_data.iloc[:, i].unique()
        print("x_unique", x_unique)

        list_x_name.append(x_unique)

        x_conditional_probability = np.zeros((len(y_unique), len(x_unique)))
        for j, y_val in enumerate(y_unique):
            for k, x_val in enumerate(x_unique):
                y_condition = train_data.iloc[:, -1] == y_val
                x_condition = train_data.iloc[:, i] == x_val
                x_conditional_probability[j, k] = len(train_data[y_condition & x_condition]) / len(train_data[y_condition])

        conditional_probability.append(x_conditional_probability)

    return conditional_probability, list_x_name

In [46]:
a,b =compute_conditional_probability(train_data)

x_unique ['Sunny' 'Overcast' 'Rain']
x_unique ['Hot' 'Mild' 'Cool']
x_unique ['High' 'Normal']
x_unique ['Weak' 'Strong']


In [48]:
def train_naive_bayes(train_data):
    # Step 1: Calculate Prior Probability
    prior_probability = compute_prior_probablity(train_data)

    # Step 2: Calculate Conditional Probability
    conditional_probability, list_x_name  = compute_conditional_probability(train_data)

    return prior_probability,conditional_probability, list_x_name

data = create_train_data()
prior_probability,conditional_probability, list_x_name = train_naive_bayes(data)

x_unique ['Sunny' 'Overcast' 'Rain']
x_unique ['Hot' 'Mild' 'Cool']
x_unique ['High' 'Normal']
x_unique ['Weak' 'Strong']


In [49]:
def get_index_from_value(feature_name, list_features):
  return np.where(list_features == feature_name)[0][0]

In [50]:
train_data = create_train_data()
_, list_x_name  = compute_conditional_probability(train_data)
outlook = list_x_name[0]
i1 = get_index_from_value("Overcast", outlook)
i2 = get_index_from_value("Rain", outlook)
i3 = get_index_from_value("Sunny", outlook)

print(i1, i2, i3)

x_unique ['Sunny' 'Overcast' 'Rain']
x_unique ['Hot' 'Mild' 'Cool']
x_unique ['High' 'Normal']
x_unique ['Weak' 'Strong']
1 2 0


In [51]:
train_data = create_train_data()
conditional_probability, list_x_name  = compute_conditional_probability(train_data)
# Compute P("Outlook"="Sunny"|Play Tennis"="Yes")
x1=get_index_from_value("Sunny",list_x_name[0])
print("P('Outlook'='Sunny'|Play Tennis'='Yes') = ", np.round(conditional_probability[0][1, x1],2))

x_unique ['Sunny' 'Overcast' 'Rain']
x_unique ['Hot' 'Mild' 'Cool']
x_unique ['High' 'Normal']
x_unique ['Weak' 'Strong']
P('Outlook'='Sunny'|Play Tennis'='Yes') =  0.17


In [52]:
train_data = create_train_data()
conditional_probability, list_x_name  = compute_conditional_probability(train_data)
# Compute P("Outlook"="Sunny"|Play Tennis"="No")
x1=get_index_from_value("Sunny",list_x_name[0])
print("P('Outlook'='Sunny'|Play Tennis'='No') = ", np.round(conditional_probability[0][0, x1],2))

x_unique ['Sunny' 'Overcast' 'Rain']
x_unique ['Hot' 'Mild' 'Cool']
x_unique ['High' 'Normal']
x_unique ['Weak' 'Strong']
P('Outlook'='Sunny'|Play Tennis'='No') =  0.5


In [53]:
def prediction_play_tennis(X, list_x_name, prior_probability, conditional_probability):

    x1=get_index_from_value(X[0],list_x_name[0])
    x2=get_index_from_value(X[1],list_x_name[1])
    x3=get_index_from_value(X[2],list_x_name[2])
    x4=get_index_from_value(X[3],list_x_name[3])

    p0=prior_probability[0] \
    *conditional_probability[0][0,x1] \
    *conditional_probability[1][0,x2] \
    *conditional_probability[2][0,x3] \
    *conditional_probability[3][0,x4]

    p1=prior_probability[1]\
    *conditional_probability[0][1,x1]\
    *conditional_probability[1][1,x2]\
    *conditional_probability[2][1,x3]\
    *conditional_probability[3][1,x4]

    # print(p0, p1)

    if p0>p1:
        y_pred=0
    else:
        y_pred=1

    return y_pred

In [54]:
X = ['Sunny','Cool', 'High', 'Strong']
data = create_train_data()
prior_probability,conditional_probability, list_x_name = train_naive_bayes(data)
pred =  prediction_play_tennis(X, list_x_name, prior_probability, conditional_probability)

if(pred):
  print("Ad should go!")
else:
  print("Ad should not go!")

x_unique ['Sunny' 'Overcast' 'Rain']
x_unique ['Hot' 'Mild' 'Cool']
x_unique ['High' 'Normal']
x_unique ['Weak' 'Strong']
Ad should not go!
