In [112]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split

In [113]:
irys = pd.read_csv('iris.csv')
X_train, X_test = train_test_split(irys, test_size=0.3)

In [114]:
class NaiveBayes:
    def __init__(self, df: pd.DataFrame) -> None:
        self.flowerNames = df.iloc[:, -1].unique()
        self.irysSplitted = {k : df[df["variety"] == k].iloc[:, :-1] for k in self.flowerNames}
        self.columnNames = df.columns[:-1]
        self.probDict = {k : len(self.irysSplitted[k])/len(df) for k in self.flowerNames}
        self.meanDict = {k : self.irysSplitted[k].mean() for k in self.flowerNames}
        self.stdDict = {k : self.irysSplitted[k].std() for k in self.flowerNames}
    
    @staticmethod
    def gauss(a1, classMean, classStd) -> float:
        return np.exp(-((a1 - classMean)**2)/(2*classStd**2))

    def predict(self, sample: pd.core.series):
        result = {}
        for flowerClass in self.flowerNames:
            prob = self.probDict[flowerClass]
            for column in range(len(self.columnNames)):
                prob *= self.gauss(sample[column], self.meanDict[flowerClass][column], self.stdDict[flowerClass][column])
            result[flowerClass] = prob
        return max(result, key=result.get)
    
    def score(self, X_test: pd.DataFrame):
        good, bad = 0, 0
        for sample in X_test.values:
            if (x:=self.predict(sample)) == sample[-1]:
                good += 1
            else:
                bad +=1
        return good/(bad+good)*100

In [115]:
nb = NaiveBayes(X_train)
#print(type(irys.iloc[0, :-1]))
#print(nb.predict(irys.iloc[0, :-1]))
print(nb.score(X_test))

95.55555555555556
