In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_table('./weather.txt')
df

Unnamed: 0,Outlook,Temp,Humidity,Windy,Play
0,Rainy,Hot,High,f,no
1,Rainy,Hot,High,t,no
2,Overcast,Hot,High,f,yes
3,Sunny,Mild,High,f,yes
4,Sunny,Cool,Normal,f,yes
5,Sunny,Cool,Normal,t,no
6,Overcast,Cool,Normal,t,yes
7,Rainy,Mild,High,f,no
8,Rainy,Cool,Normal,f,yes
9,Sunny,Mild,Normal,f,yes


In [11]:
def pre_process(df):
    y = df[df.columns[-1]]
    x = df.drop(df.columns[-1], axis = 1)
    return x,y

In [44]:
class NB: 
    def __init__(self):
        self.likelihood = {}
        self.class_prob = {}
        self.pred_prob = {}

        self.features = list
        self.num_rec = int
        self.num_feat = int

        self.x = np.array
        self.y = np.array

    def fit(self,x,y):
        self.x = x
        self.y = y

        self.features = list(x.columns)
        self.num_feat = x.shape[1]

        self.num_rec = x.shape[0]
        for feature in self.features:
            self.likelihood[feature] = {}
            self.pred_prob[feature] = {}
            for feat_val in np.unique(x[self.features]):
                self.pred_prob[feature].update({feat_val : 0})
                for outcome in np.unique(self.y):
                    self.likelihood[feature].update({feat_val + '_' + outcome : 0})
                    self.class_prob.update({outcome : 0})
        self.calc_likelihood()
        self.calc_class_prob()
        self.calc_pred_prob()

    def calc_likelihood(self):
        for feature in self.features:
            for outcome in np.unique(self.y):
                outcome_count = sum(self.y==outcome)
                feat_vals = self.x[feature][self.y[self.y==outcome].index.values.tolist()].value_counts().to_dict()
                for feat_val ,count in feat_vals.items():
                    self.likelihood[feature][feat_val + '_' + outcome] = count / outcome_count
    def calc_class_prob(self):
        for outcome in np.unique(self.y):
            count = sum(self.y==outcome)
            self.class_prob[outcome] = count/ self.num_rec
        
    def calc_pred_prob(self):
        for feature in self.features:
            feat_vals = self.x[feature].value_counts().to_dict()
            for feat_val ,count in feat_vals.items():
                self.pred_prob[feature][feat_val] = count/ self.num_rec
    def predict(self,X):
        X = list(X)
        res = {}
        for outcome in np.unique(self.y):
            num = 1.0
            den = 1.0
            class_res = self.class_prob[outcome]
            for feat,feat_val in zip(self.features,X):
                num*=self.likelihood[feat][feat_val + '_' + outcome]
                den*=self.pred_prob[feat][feat_val]
            final = num * class_res / den
            res[outcome] = final

        return max(res, key = lambda x : res[outcome])

                

        

In [45]:
X,y = pre_process(df)

In [46]:
nb = NB()
nb.fit(X,y)

In [47]:
query = np.array(['Rainy','Mild', 'Normal', 't'])
nb.predict(query)

'no'