In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_table('./weather.txt')
df

Unnamed: 0,Outlook,Temp,Humidity,Windy,Play
0,Rainy,Hot,High,f,no
1,Rainy,Hot,High,t,no
2,Overcast,Hot,High,f,yes
3,Sunny,Mild,High,f,yes
4,Sunny,Cool,Normal,f,yes
5,Sunny,Cool,Normal,t,no
6,Overcast,Cool,Normal,t,yes
7,Rainy,Mild,High,f,no
8,Rainy,Cool,Normal,f,yes
9,Sunny,Mild,Normal,f,yes


In [4]:
def process(df):
    y = df[df.columns[-1]]
    x = df.drop(df.columns[-1], axis = 1)
    return x, y

x,y = process(df)

In [11]:
class NB:
    def __init__(self):
        self.x = np.array
        self.y = np.array

        self.numrec= int
        self.numfeat = int
        self.features = list

        self.likelihood = {}
        self.class_prob = {}
        self.pred_prob = {}
        
    def fit(self,x,y):
        self.x = x
        self.y = y
        self.numrec = x.shape[0]
        self.numfeat = x.shape[1]
        self.features = list(x.columns)
        for feat in self.features:
            self.likelihood[feat] = {}
            self.pred_prob[feat] = {}
            for feat_val in np.unique(self.x[feat]):
                self.pred_prob[feat].update({feat_val : 0 })
                for outcome in np.unique(self.y):
                    self.likelihood[feat].update({feat_val + '_' + outcome : 0})
                    self.class_prob.update({outcome: 0})
        
        self.calc_likelihood()
        self.calc_pred_prob()
        self.calc_class_prob()

    def calc_class_prob(self):
        for outcome in np.unique(self.y):
            count = sum(self.y == outcome)
            self.class_prob[outcome] = count / self.numrec
    def calc_pred_prob(self):
        for feat in self.features:
            feat_vals = self.x[feat].value_counts().to_dict()
            for feat_val, count in feat_vals.items():
                self.pred_prob[feat][feat_val] = count / self.numrec
    def calc_likelihood(self):
        for feat in self.features:
            for outcome in np.unique(self.y):
                total = sum(self.y == outcome)
                feat_vals = self.x[feat][self.y[self.y==outcome].index.values.tolist()].value_counts().to_dict()
                for feat_val, count in feat_vals.items():
                    self.likelihood[feat][feat_val+'_'+outcome] = count / total
    

    def predict(self,x):
        res = {}
        x = list(x)
        for outcome in np.unique(self.y):
            num = 1.0
            den = 1.0
            class_res = self.class_prob[outcome]
            for feat , feat_val in zip (self.features,x):
                num*= self.likelihood[feat][feat_val+'_'+outcome]
                den*= self.pred_prob[feat][feat_val]
            final = num * class_res / den
            res[outcome] = final

        return max(res, key = lambda x : res[x])

In [12]:
nb = NB()
nb.fit(x,y)

In [13]:
query = np.array(['Rainy','Mild', 'Normal', 't'])
nb.predict(query)

'yes'