In [3]:
import numpy as np
import pandas as pd
import random
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics import accuracy_score
from matplotlib import pyplot as plt
import warnings
warnings.filterwarnings('ignore')
iris = datasets.load_iris()

In [4]:
scaler = StandardScaler()
iris_scaled = scaler.fit_transform(iris.data)
X = iris_scaled
y = iris["target"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 50)
%matplotlib inline

In [5]:
class Naive_Bayes(object):
    
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.class_prob = []
        self.class_count = len(np.unique(y))
        self.classes = np.unique(y)
        for i in range(self.class_count):
            self.class_prob.append(len(np.where(self.y == i)[0]) / len(y))
    
    def classify(self, t):
        total_tuple_prob = 0
        final_class_prob = []
        for i in self.classes:
            locs = np.where(self.y == i)[0]
            attr_prob = 1
            for j in range(len(t)):
                mean = np.mean(self.X[locs, j], axis = 0)
                std = np.std(self.X[locs, j], axis = 0)
                p = 1.0 / (np.sqrt(2 * np.pi * std)) * np.exp(-(t[j] - mean) ** 2 / (2 * std ** 2))
                attr_prob *= p
            total_tuple_prob += attr_prob * self.class_prob[i]
            final_class_prob.append(attr_prob * self.class_prob[i])
        final_class_prob /= total_tuple_prob
        best_class = 0
        
        for i in range(len(final_class_prob)):
            if final_class_prob[i] > final_class_prob[best_class]:
                best_class = i
        return best_class

In [6]:
obj = Naive_Bayes(X_train, y_train)
predictions = []
for tuples in X_test:
    predictions.append(obj.classify(tuples))
print(accuracy_score(predictions, y_test))
print(obj.class_prob)
print(obj.classes)
print(obj.X)

0.947368421053
[0.3482142857142857, 0.3125, 0.3392857142857143]
[0 1 2]
[[ -5.37177559e-01  -1.24957601e-01   4.21564419e-01   3.96171883e-01]
 [ -1.02184904e+00   8.00654259e-01  -1.28440670e+00  -1.31297673e+00]
 [ -1.62768839e+00  -1.74477836e+00  -1.39813811e+00  -1.18150376e+00]
 [ -1.14301691e+00  -1.51337539e+00  -2.60824029e-01  -2.61192967e-01]
 [  7.95669016e-01  -1.24957601e-01   1.16081857e+00   1.31648267e+00]
 [ -1.74885626e+00  -3.56360566e-01  -1.34127240e+00  -1.31297673e+00]
 [  1.52267624e+00  -1.24957601e-01   1.21768427e+00   1.18500970e+00]
 [ -9.00681170e-01   1.03205722e+00  -1.34127240e+00  -1.18150376e+00]
 [ -1.74885626e+00   3.37848329e-01  -1.39813811e+00  -1.31297673e+00]
 [ -1.73673948e-01   1.72626612e+00  -1.17067529e+00  -1.18150376e+00]
 [  5.53333275e-01  -1.28197243e+00   7.05892939e-01   9.22063763e-01]
 [ -5.25060772e-02  -5.87763531e-01   7.62758643e-01   1.57942861e+00]
 [  9.16836886e-01  -1.24957601e-01   3.64698715e-01   2.64698913e-01]
 [ -5

In [45]:
locs = np.where(y == 0)
print(locs[0])

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49]


In [41]:
x = np.array([1,2])
print(x)
for i in x:
    print(i)

[1 2]
1
2
