In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

class NaiveBayesClassifier:
    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
        self.classes = np.unique(y)
        self.parameters = {}

        for c in self.classes:
            X_c = X[y == c]
            self.parameters[c] = {
                "mean": X_c.mean(axis=0),
                "var": X_c.var(axis=0)
            }

    def _pdf(self, X, mean, var):
        eps = 1e-6
        numerator = np.exp(-((X - mean) ** 2) / (2 * var + eps))
        denominator = np.sqrt(2 * np.pi * var + eps)
        return numerator / denominator

    def _predict_single(self, x):
        posteriors = []

        for c in self.classes:
            prior = np.log(len(self.X_train[y_train == c]) / len(self.X_train))
            posterior = np.sum(np.log(self._pdf(x, self.parameters[c]["mean"], self.parameters[c]["var"])))
            posterior += prior
            posteriors.append(posterior)

        return self.classes[np.argmax(posteriors)]

    def predict(self, X):
        y_pred = [self._predict_single(x) for x in X]
        return np.array(y_pred)

df = pd.read_csv("atlanta_9-24-2016_9-30-2017.csv")
df = df.dropna(axis=1)

encoder = LabelEncoder()
df['Origin'] = encoder.fit_transform(df['Origin'])
df['Commodity Name'] = encoder.fit_transform(df['Commodity Name'])
df['City Name'] = encoder.fit_transform(df['City Name'])
df['Package'] = encoder.fit_transform(df['Package'])
df['Variety'] = encoder.fit_transform(df['Variety'])
df['Date'] = encoder.fit_transform(df['Date'])
df['Repack'] = encoder.fit_transform(df['Repack'])

x = df.iloc[:, :10]
y = df.iloc[:, 10]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=21)

st_x = StandardScaler()
x_train = st_x.fit_transform(x_train)
x_test = st_x.transform(x_test)

nb_classifier = NaiveBayesClassifier()
nb_classifier.fit(x_train, y_train)
y_pred = nb_classifier.predict(x_test)

score = accuracy_score(y_test, y_pred)
print("Accuracy:", score * 100)

Accuracy: 100.0
