In [5]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [6]:
data = load_iris()

In [7]:
X = data.data
y = data.target

In [16]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB

In [27]:
model1 = LogisticRegression()
model2 = GaussianNB()

In [8]:
X_train , X_test , y_train , y_test = train_test_split(X  ,y , test_size = 0.2)

In [9]:
from sklearn.metrics import accuracy_score

In [36]:
model1.fit(X_train  ,y_train)

In [37]:
accuracy_score(y_test , model1.predict(X_test))

0.9333333333333333

In [38]:
model2.fit(X_train , y_train)

In [39]:
accuracy_score(y_test , model2.predict(X_test))

1.0

In [50]:
df = pd.DataFrame(data['data'])

In [52]:
df['target'] = data['target']

In [64]:
df.columns = data['feature_names'] + [df.columns[-1]]

In [65]:
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [1]:
# Lets implement Gaussian Naibe Bayes from scratch

In [3]:
class GaussianNB:
    def __init__(self):
        self.prior_prob = {}
        self.mean = {}
        self.var = {}
        self.classes = []

    def fit(self,X,y):
        self.classes = np.unique(y)
        for cls in self.classes:
            class_data = X[y == cls]
            self.mean[cls] = np.mean(class_data,axis=0)
            self.var[cls] = np.var(class_data,axis=0)
            self.prior_prob[cls] = class_data.shape[0] / X.shape[0]

    def predict(self,X_test):
        result = [self.predict_single(x) for x in X_test]
        return np.array(result)
    
    def predict_single(self,x):
        posteriors = []
        for cls in self.classes:
            log_prior = np.log(self.prior_prob[cls])
            log_likelihood = np.sum(np.log(self.pdf(cls,x)))
            posterior = log_likelihood + log_prior
            posteriors.append(posterior)
        return self.classes[np.argmax(posteriors)]
    
    def pdf(self,cls,x):
        mean = self.mean[cls]
        var = self.var[cls]
        numerator = np.exp(-(x - mean)**2 / (2 * var))
        denominator = (2 * np.pi * var) ** 0.5
        return numerator * denominator   

        

In [4]:
model = GaussianNB()

In [12]:
model.fit(X_train,y_train)

In [13]:
pred = model.predict(X_test)

In [15]:
accuracy_score(y_test , pred)

0.9666666666666667