In [4]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [5]:
iris = load_iris()
data = iris["data"]
data = StandardScaler().fit_transform(data)
labels = iris["target"]

In [6]:
trainX, testX, trainY, testY = train_test_split(data, labels, train_size=0.75,
                                                test_size=0.25, random_state=33)

In [13]:
class NBClassifier(object):

    def fit(self, data, labels):
        self.X = data
        self.Y = labels
        self.N = self.X.shape[0]
        self.M = self.X.shape[1]
        self.class_values = np.unique(self.Y)
        self.attr_means = np.mean(self.X, axis=0)
        self.attr_stdevs = np.std(self.X, axis=0)

    def get_gaussian_probability(self, mu, sigma, x):
        #write a function that returns the gaussian probability of x given mean as mu and std as sigma.
        a=(x-mu)**2
        a*=-1.0
        b=2.0*sigma*sigma
        c=np.exp(a/b)
        d=np.sqrt(2*np.pi*sigma)
        ans=c/d
        return ans
        
    def get_class_probablity(self, class_value):
        # This function computes the probability of a particular
        # class label in the train set
        loc = np.where(self.Y == class_value)[0]
        return len(loc)/self.N

    def get_all_class_probabilities(self):
        # This function computes the probs. of all classes and returns a list
        return [get_class_probability(self.class_values[i]) for i in range(len(self.class_values))]
        #You need to write this function

    def get_prob_of_value_in_attr(self, data, attr_idx, value):
        # Calculates the prob. of a value in an attribute for a given data set
        # data - dataset
        # attr_idx - The attribute
        # value - The value to consider
        mu = np.mean(data[:, attr_idx])
        sigma = np.std(data[:, attr_idx])
        return self.get_gaussian_probability(mu, sigma, value)
    
    def get_prob_of_tuple(self, t):
        # Calculate the prob. of a multi attribute tuple. 
        #This function considers the entire train set.
        return np.prod([clf.get_prob_of_value_in_attr(self.X, i, t[i])
                        for i in range(self.M)])

    def get_prob_of_tuple_in_class(self, t, class_value):
        # This function calculates the prob. of a tuple 
        # when a class is specified.
        locs = np.where(self.Y == class_value)
        #Subset of the data belonging to the specified class.
        subset = self.X[locs] 
        return np.prod([clf.get_prob_of_value_in_attr(subset, i, t[i])
                        for i in range(self.M)])

    def predict(self, t):
        # Predicts the most likely class for a tuple taking 
        #into consideration all classes
        class_probs = [self.get_prob_of_tuple_in_class(t, c)
                       for c in self.class_values]
        # P(C|t) = (P(t|C)*P(C))/P(t)
        #Compute the value of class_probs to fit the definition of the Naive Bayesian
        indexes = np.argsort(class_probs)
        return indexes[-1]

In [14]:
clf = NBClassifier()
clf.fit(trainX, trainY)

In [15]:
predictions = []
for t in testX:
    predictions.append(clf.predict(t))
predictions = np.array(predictions).astype("int")
print(testY, "\n", predictions)
score = accuracy_score(testY, predictions)
print("Score = ", score)

[1 1 0 1 2 2 0 0 2 2 2 0 2 1 2 1 2 0 1 2 0 0 2 0 2 2 1 1 2 2 1 1 2 2 2 2 2
 1] 
 [1 1 0 1 1 2 0 0 2 2 2 0 2 1 2 1 2 0 1 2 0 0 2 0 1 2 1 1 2 2 1 1 2 2 2 2 2
 1]
Score =  0.947368421053
