In [9]:
import numpy as np
import pandas as pd

In [10]:
data=[["Sunny","Hot","High","Weak","No"],["Sunny","Hot","High","Strong","No"],
      ["Overcast","Hot","High","Weak","Yes"],["Rain","Mild","High","Weak","Yes"],
      ["Rain","Cool","Normal","Weak","Yes"],["Rain","Cool","Normal","Strong","No"],
      ["Overcast","Cool","Normal","Strong","Yes"],["Sunny","Mild","High","Weak","No"],
      ["Sunny","Cool","Normal ","Weak","Yes"],["Rain","Mild","Normal","Weak","Yes"],
      ["Sunny","Mild","Normal","Strong","Yes"],["Overcast","Mild","High","Strong","Yes"],
      ["Overcast","Hot","Normal","Weak","Yes"],["Rain","Mild","High","Strong","No"]]

In [11]:
Data=pd.DataFrame(data,columns=['Outlook','Temperature','Humidity','Wind','PlayTennis'])
Data.head()

Unnamed: 0,Outlook,Temperature,Humidity,Wind,PlayTennis
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes


In [12]:
cols=Data.shape[1]
X_data=Data.iloc[:,:cols-1]
Y_data=Data.iloc[:,cols-1:]
featureNames=X_data.columns

In [13]:
def Naive_Bayes(X_data, Y_data):
    y = Y_data.values.flatten()
    X = X_data.values
    y_unique = np.unique(y)
    prior_prob = np.zeros(len(y_unique))

    for i in range(len(y_unique)):
        prior_prob[i] = sum(y == y_unique[i]) / len(y)

    condition_prob = {}

    for feat in featureNames:
        x_unique = list(set(X_data[feat]))
        x_condition_prob = np.zeros((len(y_unique), len(x_unique)))

        for j in range(len(y_unique)):
            for k in range(len(x_unique)):
                x_condition_prob[j, k] = sum(
                    (X_data[feat] == x_unique[k]) & (Y_data.values.flatten() == y_unique[j])) / sum(y == y_unique[j])

        x_condition_prob = pd.DataFrame(x_condition_prob, columns=x_unique, index=y_unique)
        condition_prob[feat] = x_condition_prob

    return prior_prob, condition_prob


In [14]:
prior_prob, condition_prob = Naive_Bayes(X_data, Y_data)
print(prior_prob)
print(condition_prob['Outlook'])
print(condition_prob['Temperature'])
print(condition_prob['Humidity'])
print(condition_prob['Wind'])

[0.35714286 0.64285714]
     Overcast     Sunny      Rain
No   0.000000  0.600000  0.400000
Yes  0.444444  0.222222  0.333333
         Mild       Hot      Cool
No   0.400000  0.400000  0.200000
Yes  0.444444  0.222222  0.333333
         High   Normal     Normal
No   0.800000  0.000000  0.200000
Yes  0.333333  0.111111  0.555556
       Strong      Weak
No   0.600000  0.400000
Yes  0.333333  0.666667


In [15]:
def Prediction(testData, prior, condition_prob):
    numclass = prior.shape[0]
    numsample = testData.shape[0]
    featureNames = testData.columns
    post_prob = np.zeros((numsample, numclass))
    for k in range(numsample):
        prob_k = np.zeros((numclass,))
        for i in range(numclass):
            pri = prior[i]
            for feat in featureNames:
                feat_val = testData[feat][k]
                cp = condition_prob[feat]
                if feat_val in cp.columns:
                    cp_val = cp.iloc[i, cp.columns.get_loc(feat_val)]
                else:  # If the value is not present in training data, assume a small non-zero probability
                    cp_val = 0.01
                pri *= cp_val
            prob_k[i] = pri
        prob = prob_k / np.sum(prob_k, axis=0)
        post_prob[k, :] = prob
    return post_prob

In [16]:
test_data=[['Sunny','Cool','High','Strong']]
testData=pd.DataFrame(test_data, columns=['Outlook', 'Temperature', 'Humidity', 'Wind'])
testData.head()

postPrior=Prediction(testData, prior_prob, condition_prob)
postPrior

array([[0.79541735, 0.20458265]])