In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_table('./weather.txt')
df

Unnamed: 0,Outlook,Temp,Humidity,Windy,Play
0,Rainy,Hot,High,f,no
1,Rainy,Hot,High,t,no
2,Overcast,Hot,High,f,yes
3,Sunny,Mild,High,f,yes
4,Sunny,Cool,Normal,f,yes
5,Sunny,Cool,Normal,t,no
6,Overcast,Cool,Normal,t,yes
7,Rainy,Mild,High,f,no
8,Rainy,Cool,Normal,f,yes
9,Sunny,Mild,Normal,f,yes


In [3]:
def pre_processing(df):
    y = df[df.columns[-1]]
    X = df.drop([df.columns[-1]],axis = 1)
    return X,y
X,y = pre_processing(df)

In [79]:
X

Unnamed: 0,Outlook,Temp,Humidity,Windy
0,Rainy,Hot,High,f
1,Rainy,Hot,High,t
2,Overcast,Hot,High,f
3,Sunny,Mild,High,f
4,Sunny,Cool,Normal,f
5,Sunny,Cool,Normal,t
6,Overcast,Cool,Normal,t
7,Rainy,Mild,High,f
8,Rainy,Cool,Normal,f
9,Sunny,Mild,Normal,f


In [5]:
class NB: 
    def __init__(self):
        self.likelihood = {} # feature : {feature_val+outcome : probab}
        self.class_prob = {} # class : probab
        self.pred_prob = {} # feature : {feature_val : probab}

        self.x = np.array
        self.y = np.array

        self.features = list
        self.num_feat = int

        self.num_records = int
    def fit(self,X,y):

        self.X = X
        self.y = y

        self.features = list(X.columns)
        self.num_feat = X.shape[1]

        self.num_records = X.shape[0]

        # 3 loops
        for feature in self.features:
            self.likelihood[feature]={}
            self.pred_prob[feature]={}
            for feat_val in np.unique(self.X[feature]):
                self.pred_prob[feature].update({feat_val:0})
                for outcome in np.unique(self.y):
                    self.likelihood[feature].update({feat_val+'_'+outcome:0})
                    self.class_prob.update({outcome: 0})

        self.calc_class_prob()
        self.calc_likelihood()
        self.calc_pred_prob()

    def calc_class_prob(self):
        for outcome in np.unique(self.y):
            count = sum(self.y == outcome)
            self.class_prob[outcome] = count / self.num_records

    def calc_likelihood(self):
        for feature in self.features:
            for outcome in np.unique(self.y):
                outcome_count = sum(self.y  == outcome)
                feat_val_count = X[feature][self.y[self.y==outcome].index.values.tolist()].value_counts().to_dict()
                # where outcome is set -> those indices -> club the count of each feat val
                for feat_val, count in feat_val_count.items():
                    self.likelihood[feature][feat_val+'_'+outcome] = count / outcome_count
                
        

    def calc_pred_prob(self):
        for feature in self.features:
            feat_vals = self.X[feature].value_counts().to_dict()
            for feat_val,count in feat_vals.items():
                self.pred_prob[feature][feat_val] = count / self.num_records

    def predict(self,X):
        X = list(X)
        print(X)
        probs_outcome = {}
        for outcome in np.unique(self.y):
            class_probab = self.class_prob[outcome]
            num = 1.0
            den = 1.0
            for feat,feat_val in zip(self.features,X):
                num *=self.likelihood[feat][feat_val+'_'+outcome]
                den *=self.pred_prob[feat][feat_val]

            final = num * class_probab /den
            probs_outcome[outcome] = final
            
        result = max(probs_outcome, key = lambda x: probs_outcome[x])
        
        return result
        
                    

In [6]:
nb = NB()
nb.fit(X,y)

In [7]:
query = np.array(['Rainy','Mild', 'Normal', 't'])
nb.predict(query)

['Rainy', 'Mild', 'Normal', 't']


'yes'

In [98]:
query = np.array(['Sunny','Hot', 'High', 't'])
nb.predict(query)

['Sunny', 'Hot', 'High', 't']


'no'