### Importing the Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

### Defining the Model Class

In [2]:
class MyNaiveBayes:
    def __init__(self):
        pass
    
    def fit(self, X, Y):
        self.outcome_probs = {}
        self.feature_given_outcome = {}
        
        unique_features = {}
        for feature in X.columns:
            unique_features[feature] = X[feature].unique()
        
        outcome_counts = Y.value_counts()
        
        for outcome in outcome_counts.index:
            self.outcome_probs[outcome] = outcome_counts[outcome] / outcome_counts.sum()
            feature_given_outcome = {}
            
            for feature, uniques in unique_features.items():
                X_ = X[feature][Y == outcome]
                probs_given_feature = {}
                
                feature_counts = X_.value_counts()
                
                for inp_val in uniques:
                    try:
                        probs_given_feature[inp_val] = feature_counts[inp_val] / feature_counts.sum()
                    except KeyError as e:
                        probs_given_feature[inp_val] = 0.0
                    
                feature_given_outcome[feature] = probs_given_feature
            
            self.feature_given_outcome[outcome] = feature_given_outcome
    
    def predict(self, X):
        Y_pred = pd.Series()
        
        for index in X.index:
            feature_vec = X.loc[index]
            max_prob = None
            max_outcome = None
            
            for outcome, prob in self.outcome_probs.items():
                prob = self.outcome_probs[outcome]
                for feature in feature_vec.index:
                    prob *= self.feature_given_outcome[outcome][feature][feature_vec[feature]]
                
                if max_prob is None or prob > max_prob:
                    max_prob = prob
                    max_outcome = outcome
            
            Y_pred[index] = max_outcome
        
        return Y_pred

In [3]:
def accuracy(Y_true, Y_pred):
    return sum(Y_true == Y_pred) / len(Y_true)

### Data Importing and Splitting

In [4]:
df = pd.read_csv('tennis.csv')

df.head()

Unnamed: 0,outlook,temp,humidity,windy,play
0,sunny,hot,high,False,no
1,sunny,hot,high,True,no
2,overcast,hot,high,False,yes
3,rainy,mild,high,False,yes
4,rainy,cool,normal,False,yes


In [5]:
df_train, df_test = train_test_split(df, test_size=0.2, stratify=df['play'], random_state=7)

In [6]:
X_train, Y_train = df_train.drop('play', axis=1), df_train['play']
X_test, Y_test = df_test.drop('play', axis=1), df_test['play']

### Running the Model and Computation of Accuracy

In [7]:
model = MyNaiveBayes()
model.fit(X_train, Y_train)

In [8]:
Y_pred = model.predict(X_train)

print("Training Accuracy: {:.2f}%".format(accuracy(Y_train, Y_pred) * 100))

Training Accuracy: 81.82%


In [9]:
Y_pred = model.predict(X_test)

print("Testing Accuracy: {:.2f}%".format(accuracy(Y_test, Y_pred) * 100))

Testing Accuracy: 100.00%


### Data Preparation for SkLearn Model

In [10]:
replacement_values = {}
for feature in df.columns:
    categories = []
    numeric = []
    
    for i, val in enumerate(df[feature].unique()):
        categories.append(val)
        numeric.append(i)
    
    replacement_values[feature] = (categories, numeric)

In [11]:
for feature in X_train.columns:
    cat, num = replacement_values[feature]
    X_train[feature].replace(cat, num, inplace=True)
    X_test[feature].replace(cat, num, inplace=True)
    
feature = Y_train.name
cat, num = replacement_values[feature]
Y_train.replace(cat, num, inplace=True)
Y_test.replace(cat, num, inplace=True)

### SkLearn Model Testing

In [12]:
from sklearn.naive_bayes import CategoricalNB
from sklearn.metrics import accuracy_score

In [13]:
model = CategoricalNB()
model.fit(X_train, Y_train)

In [14]:
Y_pred = model.predict(X_train)

print("Training Accuracy: {:.2f}%".format(accuracy_score(Y_train, Y_pred) * 100))

Training Accuracy: 81.82%


In [15]:
Y_pred = model.predict(X_test)

print("Testing Accuracy: {:.2f}%".format(accuracy_score(Y_test, Y_pred) * 100))

Testing Accuracy: 100.00%
