In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import accuracy_score as acs

dataset = pd.read_csv('id3.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

X_train, X_test, y_train, y_test = tts(X, y, random_state=0, test_size=0.4)

print('Training set length = {}\nTest set length - {}'.format(X_train.shape[0], X_test.shape[0]))

pYes = list(y_train).count('yes') / y_train.shape[0]
pNo = list(y_train).count('no') / y_train.shape[0]

print(f'\nProbability for yes = {pYes}\nProbability for no = {pNo}')

pYesValues = {}
pNoValues = {}

countYesValues = {}
countNoValues = {}

for i, row in enumerate(X_train):
    for j, val in enumerate(row):
        if y_train[i] == 'yes':
            countYesValues[val] = countYesValues.get(val, 0) + 1
        else:
            countNoValues[val] = countNoValues.get(val, 0) + 1
            
uniqueValues = np.unique(X)

for val in uniqueValues:
    pYesValues[val] = countYesValues.get(val, 0) / list(y_train).count('yes')
    pNoValues[val] = countNoValues.get(val, 0) / list(y_train).count('no')
    

print('\nPreditions on test set:\n')
print('Test Instance', '\t\t\t\t\tPredicted Output', '\tActual Output')
y_pred = []
for i, row in enumerate(X_test):
    pYesInstance = pYes
    pNoInstance = pNo
    for j, val in enumerate(row):
        pYesInstance *= pYesValues.get(val, 0)
        pNoInstance *= pNoValues.get(val, 0)
    y_pred.append('yes' if pYesInstance >= pNoInstance else 'no')
    print(row, "\t\t",y_pred[i], "\t\t\t", y_test[i])

print('\nAccuracy = {}%'.format(acs(y_test, y_pred)*100))

Training set length = 8
Test set length - 6

Probability for yes = 0.5
Probability for no = 0.5

Preditions on test set:

Test Instance 					Predicted Output 	Actual Output
['sunny' 'cool' 'normal' 'weak'] 		 no 			 yes
['overcast' 'cool' 'normal' 'strong'] 		 yes 			 yes
['rain' 'cool' 'normal' 'weak'] 		 no 			 yes
['overcast' 'mild' 'high' 'strong'] 		 yes 			 yes
['overcast' 'hot' 'high' 'weak'] 		 yes 			 yes
['rain' 'mild' 'high' 'strong'] 		 yes 			 no

Accuracy = 50.0%
