In [None]:
### CUSTOM NAIVE BAYES CLASSIFIER ###

import pandas as pd
import numpy as np

df = pd.read_csv('playTennis.csv', sep = ',') # load csv data into dataframe

In [None]:
col = df.columns[1:] # extract column heads

In [None]:
vals = []
for i in col:
  vals.append(df[i].unique()) # retrieve the unique values of each column
                              # these are the possible target values for each parameter

In [None]:
prob_tables = [] # stores probability tables (list of lists)

for i in range(len(col[-1])):
  priors = []
  for j in vals[i]:
    row = []
    for k in vals[-1]:
      row.append(sum((df[col[i]] == j) & (df[col[-1]] == k)) / sum(df[col[-1]] == k)) 
      # example: P(outlook = Sunny|Output = Yes) 
      # = P(outlook = Sunny & Output = Yes) / P(Output = Yes)
      
    priors.append(row)
  prob_tables.append(priors)

In [None]:
test1 = ["Sunny","Cool","High","Weak"] # test vector 1
test2 = ["Rain","Hot","High","Weak"] # test vector 2

prob_yes = sum(df['play'] == "Yes")/len(df) # P(Yes)
prob_no = sum(df['play'] == "No")/len(df)   # P(No)

play_no1, play_yes1 = 1, 1 # stores the chance of each output case (vector 1)
play_no2, play_yes2 = 1, 1 # stores the chance of each output case (vector 2)

for i in range(len(col[-1])): # repeatedly multipy respective conditional priors by picking out 
                              # probabilities from prob_tables (3D list)
  # Vector 1
  play_no1 *= prob_tables[i][np.where(vals[i] == test1[i])[0][0]][0]
  play_yes1 *= prob_tables[i][np.where(vals[i] == test1[i])[0][0]][1]

  # Vector 2
  play_no2 *= prob_tables[i][np.where(vals[i] == test2[i])[0][0]][0]
  play_yes2 *= prob_tables[i][np.where(vals[i] == test2[i])[0][0]][1]

# finally multiply the probability of output values
play_yes1 *= prob_yes
play_no1 *= prob_no
play_yes2 *= prob_yes
play_no2 *= prob_no

In [None]:
print("Chance of play = Yes, given test1: ", play_yes1)
print("Chance of play = No, given test1: ", play_no1)

if play_yes1 > play_no1: print("Output label for ", test1, "is Yes",) 
else: print("Output label for ", test1, "is No\n\n")

print("Chance of play = Yes, given test2: ", play_yes2)
print("Chance of play = No, given test2: ", play_no2)

if play_yes2 > play_no2: print("Output label for ", test2, "is Yes",) 
else: print("Output label for ", test2, "is No")

Chance of play = Yes, given test1:  0.010582010582010581
Chance of play = No, given test1:  0.013714285714285715
Output label for  ['Sunny', 'Cool', 'High', 'Weak'] is No


Chance of play = Yes, given test2:  0.010582010582010581
Chance of play = No, given test2:  0.01828571428571429
Output label for  ['Rain', 'Hot', 'High', 'Weak'] is No


In [None]:
### IN-BUILT NAIVE BAYES CLASSIFIER ###

import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB

# load csv data into dataframe
df = pd.read_csv('playTennis.csv', sep = ',')
encoder = LabelEncoder()

# encode the labels in the dataframe
for i in df.columns[1:]:
 df[i] = encoder.fit_transform(df[i])

# remove unwated columns and create the input and the output sets
del df['day']
Y = df['play'].to_numpy()
del df['play']
X = df.to_numpy()

# train the model and do prediction
model = GaussianNB()
model.fit(X,Y)
result = model.predict([[2,0,0,1],[1,1,0,1]])

label = {1:"Yes", 0:"No"} # dictionary to map integer to output label

for i in range(len(result)):
  print("Output label for test case ",i+1," is", label[result[i]] )

Output label for test case  1  is No
Output label for test case  2  is Yes
