# A5 : naive bayes classifier

---

In [1]:
import numpy as np


data = np.array(
    [
        ["Sunny", 30, "High", 1, 3, "No"],
        ["Sunny", 31, "High", 4, 2, "No"],
        ["Overcast", 28, "High", 2, 5, "Yes"],
        ["Rain", 23, "High", 1, 3, "Yes"],
        ["Rain", 10, "Normal", 2, 2, "Yes"],
        ["Rain", 13, "Normal", 3, 2, "No"],
        ["Overcast", 14, "Normal", 5, 1, "Yes"],
        ["Sunny", 25, "High", 2, 3, "No"],
        ["Sunny", 15, "Normal", 1, 2, "Yes"],
        ["Rain", 22, "Normal", 0, 4, "Yes"],
        ["Sunny", 19, "Normal", 3, 1, "Yes"],
        ["Overcast", 20, "High", 3, 2, "Yes"],
        ["Overcast", 33, "Normal", 1, 2, "Yes"],
        ["Rain", 18, "High", 3, 3, "No"],
    ]
)

data_yes = data[data[:, 5] == "Yes"]
data_no = data[data[:, 5] == "No"]

print(data_yes)
print(data_no)

[['Overcast' '28' 'High' '2' '5' 'Yes']
 ['Rain' '23' 'High' '1' '3' 'Yes']
 ['Rain' '10' 'Normal' '2' '2' 'Yes']
 ['Overcast' '14' 'Normal' '5' '1' 'Yes']
 ['Sunny' '15' 'Normal' '1' '2' 'Yes']
 ['Rain' '22' 'Normal' '0' '4' 'Yes']
 ['Sunny' '19' 'Normal' '3' '1' 'Yes']
 ['Overcast' '20' 'High' '3' '2' 'Yes']
 ['Overcast' '33' 'Normal' '1' '2' 'Yes']]
[['Sunny' '30' 'High' '1' '3' 'No']
 ['Sunny' '31' 'High' '4' '2' 'No']
 ['Rain' '13' 'Normal' '3' '2' 'No']
 ['Sunny' '25' 'High' '2' '3' 'No']
 ['Rain' '18' 'High' '3' '3' 'No']]


In [2]:
# naive bayes classifier

# 1. outlook = overcast, temp = 25, humidity = normal, s-wind = 3, w-wind = 2

# P(Yes|overcast, 25, normal, 3, 2)
# = P(overcast|Yes)
# * P(25|Yes)
# * P(normal|Yes)
# * P(s-wind = 3, w-wind = 2|Yes)
# * P(Yes)
# / P(overcast, 25, normal, 3, 2)

p_yes = data_yes.shape[0] / data.shape[0]
p_overcast_yes = data_yes[data_yes[:, 0] == "Overcast"].shape[0] / data_yes.shape[0]
p_normal_yes = data_yes[data_yes[:, 2] == "Normal"].shape[0] / data_yes.shape[0]

# P(temp = 25|Yes) in gaussian distribution
temp_yes = data_yes[:, 1]
temp_yes_mean = np.mean(temp_yes.astype(int))
temp_yes_std = np.std(temp_yes.astype(int))

p_temp_yes = (
    1
    / (temp_yes_std * np.sqrt(2 * np.pi))
    * np.exp(-((25 - temp_yes_mean) ** 2) / (2 * temp_yes_std**2))
)

# P(s-wind = 3, w-wind = 2|Yes) in multinomial distribution
# sum of all s-wind / sum of all w-wind + s-wind
s_wind_yes = data_yes[:, 3]
w_wind_yes = data_yes[:, 4]
s_wind_yes_sum = np.sum(s_wind_yes.astype(int))
w_wind_yes_sum = np.sum(w_wind_yes.astype(int))

p_s_wind_yes = s_wind_yes_sum / (s_wind_yes_sum + w_wind_yes_sum)
p_w_wind_yes = w_wind_yes_sum / (s_wind_yes_sum + w_wind_yes_sum)

p_sw3_ww2_yes = p_s_wind_yes**3 * p_w_wind_yes**2 * 10

# all probability
print(
    f"""
P(Yes) = {p_yes}
P(overcast|Yes) = {p_overcast_yes}
P(temp=25|Yes) = {p_temp_yes}
P(normal|Yes) = {p_normal_yes}
P(s-wind = 3, w-wind = 2|Yes) = {p_sw3_ww2_yes}
"""
)

p_yes_overcast_25_normal_3_2 = (
    p_overcast_yes * p_temp_yes * p_normal_yes * p_sw3_ww2_yes * p_yes
)
print(
    f"P(Yes|overcast, 25, normal, 3, 2) = {p_yes_overcast_25_normal_3_2} / P(overcast, 25, normal, 3, 2)"
)


# ===============================

# P(No|overcast, 25, normal, 3, 2)

p_no = data_no.shape[0] / data.shape[0]
p_overcast_no = (data_no[data_no[:, 0] == "Overcast"].shape[0] + 1) / (
    data_no.shape[0] + 2
)  #
p_normal_no = data_no[data_no[:, 2] == "Normal"].shape[0] / data_no.shape[0]

# P(temp = 25|No) in gaussian distribution
temp_no = data_no[:, 1]
temp_no_mean = np.mean(temp_no.astype(int))
temp_no_std = np.std(temp_no.astype(int))

p_temp_no = (
    1
    / (temp_no_std * np.sqrt(2 * np.pi))
    * np.exp(-((25 - temp_no_mean) ** 2) / (2 * temp_no_std**2))
)

# P(s-wind = 3, w-wind = 2|No) in multinomial distribution
# sum of all s-wind / sum of all w-wind + s-wind
s_wind_no = data_no[:, 3]
w_wind_no = data_no[:, 4]
s_wind_no_sum = np.sum(s_wind_no.astype(int))
w_wind_no_sum = np.sum(w_wind_no.astype(int))

p_s_wind_no = s_wind_no_sum / (s_wind_no_sum + w_wind_no_sum)
p_w_wind_no = w_wind_no_sum / (s_wind_no_sum + w_wind_no_sum)

p_sw3_ww2_no = p_s_wind_no**3 * p_w_wind_no**2 * 10

# all probability
print(
    f"""
P(No) = {p_no}
P(overcast|No) = {p_overcast_no} -- with laplace smoothing alpha = 1, n_i = 2
P(temp=25|No) = {p_temp_no}
P(normal|No) = {p_normal_no}
P(s-wind = 3, w-wind = 2|No) = {p_sw3_ww2_no}
"""
)
p_no_overcast_25_normal_3_2 = (
    p_overcast_no * p_temp_no * p_normal_no * p_sw3_ww2_no * p_no
)
print(
    f"P(No|overcast, 25, normal, 3, 2) = {p_no_overcast_25_normal_3_2} / P(overcast, 25, normal, 3, 2)"
)

# print the prediction
print("====================================")
if p_yes_overcast_25_normal_3_2 > p_no_overcast_25_normal_3_2:
    print("predicted: Yes")
else:
    print("predicted: No")


P(Yes) = 0.6428571428571429
P(overcast|Yes) = 0.4444444444444444
P(temp=25|Yes) = 0.04718531850379891
P(normal|Yes) = 0.6666666666666666
P(s-wind = 3, w-wind = 2|Yes) = 0.27565312500000005

P(Yes|overcast, 25, normal, 3, 2) = 0.0024774819999414275 / P(overcast, 25, normal, 3, 2)

P(No) = 0.35714285714285715
P(overcast|No) = 0.14285714285714285 -- with laplace smoothing alpha = 1, n_i = 2
P(temp=25|No) = 0.0559348976718134
P(normal|No) = 0.2
P(s-wind = 3, w-wind = 2|No) = 0.3125

P(No|overcast, 25, normal, 3, 2) = 0.00017836383186164986 / P(overcast, 25, normal, 3, 2)
predicted: Yes


In [3]:
# Calculate the probability for class "Yes"

# P(Yes|Rain, 20, High, 0, 3)
# = P(Rain|Yes)
# * P(20|Yes)
# * P(High|Yes)
# * P(S-Wind = 0, W-Wind = 3|Yes)
# * P(Yes)
# / P(Rain, 20, High, 0, 3)

p_yes = data_yes.shape[0] / data.shape[0]
p_rain_yes = data_yes[data_yes[:, 0] == "Rain"].shape[0] / data_yes.shape[0]
p_temp20_yes = (
    1
    / (temp_yes_std * np.sqrt(2 * np.pi))
    * np.exp(-((20 - temp_yes_mean) ** 2) / (2 * temp_yes_std**2))
)
p_high_yes = data_yes[data_yes[:, 2] == "High"].shape[0] / data_yes.shape[0]
p_sw0_ww3_yes = p_s_wind_yes**0 * p_w_wind_yes**3 * 1

# all probability
print(
    f"""
P(Yes) = {p_yes}
P(Rain|Yes) = {p_rain_yes}
P(Temp=20|Yes) = {p_temp20_yes}
P(High|Yes) = {p_high_yes}
P(S-Wind = 0, W-Wind = 3|Yes) = {p_sw0_ww3_yes}
"""
)

p_yes_rain_20_high_0_3 = p_rain_yes * p_temp20_yes * p_high_yes * p_sw0_ww3_yes * p_yes
print(
    f"P(Yes|Rain, 20, High, 0, 3) = {p_yes_rain_20_high_0_3} / P(Rain, 20, High, 0, 3)"
)


# Calculate the probability for class "No"

# P(No|Rain, 20, High, 0, 3)

p_no = data_no.shape[0] / data.shape[0]
p_rain_no = (data_no[data_no[:, 0] == "Rain"].shape[0]) / (data_no.shape[0])
p_temp20_no = (
    1
    / (temp_no_std * np.sqrt(2 * np.pi))
    * np.exp(-((20 - temp_no_mean) ** 2) / (2 * temp_no_std**2))
)
p_high_no = data_no[data_no[:, 2] == "High"].shape[0] / data_no.shape[0]
p_sw0_ww3_no = p_s_wind_no**0 * p_w_wind_no**3 * 1

# all probability
print(
    f"""
P(No) = {p_no}
P(Rain|No) = {p_rain_no}
P(Temp=20|No) = {p_temp20_no}
P(High|No) = {p_high_no}
P(S-Wind = 0, W-Wind = 3|No) = {p_sw0_ww3_no}
"""
)
p_no_rain_20_high_0_3 = p_rain_no * p_temp20_no * p_high_no * p_sw0_ww3_no * p_no
print(f"P(No|Rain, 20, High, 0, 3) = {p_no_rain_20_high_0_3} / P(Rain, 20, High, 0, 3)")

# Print the prediction
print("====================================")
if p_yes_rain_20_high_0_3 > p_no_rain_20_high_0_3:
    print("predicted: Yes")
else:
    print("predicted: No")


P(Yes) = 0.6428571428571429
P(Rain|Yes) = 0.3333333333333333
P(Temp=20|Yes) = 0.05925147492884364
P(High|Yes) = 0.3333333333333333
P(S-Wind = 0, W-Wind = 3|Yes) = 0.16637500000000005

P(Yes|Rain, 20, High, 0, 3) = 0.0007041402958061687 / P(Rain, 20, High, 0, 3)

P(No) = 0.35714285714285715
P(Rain|No) = 0.4
P(Temp=20|No) = 0.05095306377300642
P(High|No) = 0.8
P(S-Wind = 0, W-Wind = 3|No) = 0.125

P(No|Rain, 20, High, 0, 3) = 0.0007279009110429491 / P(Rain, 20, High, 0, 3)
predicted: No


In [4]:
# Calculate the probability for class "Yes"

# P(Yes|Sunny, 24, Normal, 4, 4)
# = P(Sunny|Yes)
# * P(24|Yes)
# * P(Normal|Yes)
# * P(S-Wind = 4, W-Wind = 4|Yes)
# * P(Yes)
# / P(Sunny, 24, Normal, 4, 4)

p_yes = data_yes.shape[0] / data.shape[0]
p_sunny_yes = data_yes[data_yes[:, 0] == "Sunny"].shape[0] / data_yes.shape[0]
p_temp24_yes = (
    1
    / (temp_yes_std * np.sqrt(2 * np.pi))
    * np.exp(-((24 - temp_yes_mean) ** 2) / (2 * temp_yes_std**2))
)
p_normal_yes = data_yes[data_yes[:, 2] == "Normal"].shape[0] / data_yes.shape[0]
p_sw4_ww4_yes = p_s_wind_yes**4 * p_w_wind_yes**4 * 70

# all probability
print(
    f"""
P(Yes) = {p_yes}
P(Sunny|Yes) = {p_sunny_yes}
P(Temp=24|Yes) = {p_temp24_yes}
P(Normal|Yes) = {p_normal_yes}
P(S-Wind = 4, W-Wind = 4|Yes) = {p_sw4_ww4_yes}
"""
)

p_yes_sunny_24_normal_4_4 = (
    p_sunny_yes * p_temp24_yes * p_normal_yes * p_sw4_ww4_yes * p_yes
)
print(
    f"P(Yes|Sunny, 24, Normal, 4, 4) = {p_yes_sunny_24_normal_4_4} / P(Sunny, 24, Normal, 4, 4)"
)


# Calculate the probability for class "No"

p_no = data_no.shape[0] / data.shape[0]
p_sunny_no = (data_no[data_no[:, 0] == "Sunny"].shape[0]) / (data_no.shape[0])
p_temp24_no = (
    1
    / (temp_no_std * np.sqrt(2 * np.pi))
    * np.exp(-((24 - temp_no_mean) ** 2) / (2 * temp_no_std**2))
)
p_normal_no = data_no[data_no[:, 2] == "Normal"].shape[0] / data_no.shape[0]
p_sw4_ww4_no = p_s_wind_no**4 * p_w_wind_no**4 * 70

# all probability
print(
    f"""
P(No) = {p_no}
P(Sunny|No) = {p_sunny_no}
P(Temp=24|No) = {p_temp24_no}
P(Normal|No) = {p_normal_no}
P(S-Wind = 4, W-Wind = 4|No) = {p_sw4_ww4_no}
"""
)
p_no_sunny_24_normal_4_4 = p_sunny_no * p_temp24_no * p_normal_no * p_sw4_ww4_no * p_no
print(
    f"P(No|Sunny, 24, Normal, 4, 4) = {p_no_sunny_24_normal_4_4} / P(Sunny, 24, Normal, 4, 4)"
)

# Print the prediction
print("====================================")
if p_yes_sunny_24_normal_4_4 > p_no_sunny_24_normal_4_4:
    print("predicted: Yes")
else:
    print("predicted: No")


P(Yes) = 0.6428571428571429
P(Sunny|Yes) = 0.2222222222222222
P(Temp=24|Yes) = 0.05162133691646785
P(Normal|Yes) = 0.6666666666666666
P(S-Wind = 4, W-Wind = 4|Yes) = 0.26266297148437506

P(Yes|Sunny, 24, Normal, 4, 4) = 0.0012913346425214773 / P(Sunny, 24, Normal, 4, 4)

P(No) = 0.35714285714285715
P(Sunny|No) = 0.6
P(Temp=24|No) = 0.05722501490401209
P(Normal|No) = 0.2
P(S-Wind = 4, W-Wind = 4|No) = 0.2734375

P(No|Sunny, 24, Normal, 4, 4) = 0.0006706056434063918 / P(Sunny, 24, Normal, 4, 4)
predicted: Yes
