In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from assignment1_Finn import *

In [None]:
class NaiveBayes:
    
    
    def fit(self, X, y):
        # gets number of samples, number of features
        n_samples, n_features = X.shape
        # gets classes, then # of classes
        self._classes = np.unique(y)
        n_classes = len(self._classes)
        
        #init mean, var, priors
        self._mean = np.zeros((n_classes, n_features))
        self._var = np.zeros((n_classes, n_features))
        self._priors = np.zeros((n_classes))
        
        for c in self._classes:
            X_c = X[c==y]
            self._mean[c,:] = X_c.mean(axis=0)
            self._var[c,:] = X_c.var(axis=0)
            self._priors[c] = X_c.shape[0] / float(n_samples)
            

    def predict(self, X):
        y_predict = [self._predict(x) for x in X]
        return y_predict
    
    def _predict(self, x):
        posteriors = []

        # calculate posterior probability for each class
        for idx, c in enumerate(self._classes):
            prior = np.log(self._priors[idx])
            posterior = np.sum(np.log(self._pdf(idx, x)))
            posterior = prior + posterior
            posteriors.append(posterior)
            
        # return class with highest posterior probability
        return self._classes[np.argmax(posteriors)]
    
    def _pdf(self, class_idx, x):
        mean = self._mean[class_idx]
        var = self._var[class_idx]
        numerator = np.exp(- (x-mean)**2 / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator
    
    
    def evaluate_acc(self, true_y, target_y):
        return (np.sum(true_y == target_y) / len(true_y))


In [None]:
features = DataFrame.drop(['mpg', 'car-name'] , axis=1)
X = features.values
output = DataFrame['mpg']
Y = output.values

train_x = X[:350]
train_y = Y[:350]
test_x = X[351:]
test_y = Y[351:]

model = NaiveBayes()

model.fit(train_x, train_y)
predictions = model.predict(test_x)
model.evaluate_acc(test_y, predictions)