## Guassian Discriminant Analysis

### Naive Bayes

In [6]:
import numpy as np
from sklearn import datasets
import pandas as pd
import scipy.stats
from sklearn.model_selection import train_test_split 

In [2]:
class Gaussian_NB():
    def __init__(self):
        pass
    
    def train(self, data):
        self.n_samples, self.n_features = data.shape[0], data.shape[1]-1
        self.classes = np.unique(data[:, -1])
        self.n_classes = len(self.classes)
        self._means = np.zeros((self.n_classes, self.n_features), dtype = np.float32)
        self._stds = np.zeros((self.n_classes, self.n_features), dtype = np.float32)
        data_c = np.zeros((self.n_samples, self.n_features-1), dtype = np.float32)
        
        for i, c in enumerate(self.classes):
            data_c = data[data[:,-1]==c]
            data_c = data_c[:, :-1]
            self._means[i, :] = data_c.mean(axis = 0)
            self._stds[i, :] = data_c.std(axis = 0)
        _, self.class_priors = np.unique(data[:, -1], return_counts = True)
        print(self._means, self._stds)
        
    def predict(self, input_value):
        predictions = []
        for item in input_value:
            posteriors = np.zeros((self.n_classes, self.n_features))
            class_conditions = []
            for i in range(self.n_classes):
                for j in range(self.n_features):
                    posteriors[i][j] = scipy.stats.norm(loc = self._means[i][j], scale = self._stds[i][j]).pdf(item[j])
                class_conditions.append(np.log(posteriors[i, :]).sum() + np.log(self.class_priors[i]))
            predictions.append(np.argmax(class_conditions))
        return predictions

#### Load Iris Dataset

In [68]:
iris = datasets.load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['class'] = pd.Series(iris.target)

iris_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


#### model, train and test

In [69]:
model = Gaussian_NB()

train, test = train_test_split(np.array(np.append(iris.data, iris.target[:, None], axis  = 1)), test_size = 0.2)
model.train(train)
print("model Accuracy: {0}".format(np.sum(model.predict(test[:, :-1]) == test[:, -1])/test.shape[0]))

[[5.0292683  3.4487805  1.4634147  0.24634147]
 [5.9864864  2.7837837  4.3297296  1.3486487 ]
 [6.633333   2.959524   5.5833335  2.0142858 ]] [[0.3542384  0.39082274 0.16785026 0.09651882]
 [0.5115753  0.3316735  0.4209801  0.19675973]
 [0.64611167 0.3170245  0.56100106 0.27130324]]
model Accuracy: 1.0


In [173]:
class Multivariate_Guassian():
    
    def __init__(self, X, y, method = 'LDA'):
        self.n_samples, self.n_features = X.shape
        self.classes = np.unique(y)
        self.method = method
        
        self.sigma = []
        
        #Overall mean
        self.overall_mean = X.mean(axis = 0)
        # COV matrix
        self.sigma.append(np.cov(X.T))
        self.priors = []
        self.means = np.zeros((len(self.classes), self.n_features))
        # mu matrix
        for i, c in enumerate(self.classes):
            X_c = X[y == c]
            self.priors.append(X_c.shape[0] / X.shape[0])
            self.means[i, :] = X_c.mean(axis = 0)
            if self.method == 'QDA':
                self.sigma.append(np.cov(X_c))

#         sigma_inv = np.linalg.inv(self.sigma[0])
#         denominator = np.sqrt((2*np.pi)**self.n_features * np.linalg.det(sigma))
#         exponent = -0.5 * ((input_x - self.means[i, :]).T @sigma_inv@ (input_x - self.means[i, :]))

    def LDA_score(self, input_x):
        probs = []
        for i, c in enumerate(self.classes):
            sigma_inv = np.linalg.inv(self.sigma[0])
            conditional_prob = np.log(self.priors[i]) - 0.5 * (self.means[i, :]).T @sigma_inv@ (self.means[i, :])\
            + input_x.T @sigma_inv@ (self.means[i, :])
            probs.append(conditional_prob)
        return np.argmax(probs)
    def predict(self, x):
        predictions = []
        for input_value in x:
            predictions.append(self.LDA_score(input_value))
        return predictions

In [174]:
mgda = Multivariate_Guassian(train[:, :-1], train[:, -1])
print("model Accuracy: {0}".format(np.sum(mgda.predict(test[:, :-1]) == test[:, -1])/test.shape[0]))

Classes: [0. 1. 2.]
X_c [[5.  3.3 1.4 0.2]
 [4.9 3.6 1.4 0.1]
 [4.8 3.1 1.6 0.2]
 [5.7 4.4 1.5 0.4]
 [4.4 3.2 1.3 0.2]
 [5.8 4.  1.2 0.2]
 [5.1 3.8 1.5 0.3]
 [4.9 3.  1.4 0.2]
 [4.8 3.4 1.6 0.2]
 [5.2 4.1 1.5 0.1]
 [5.2 3.4 1.4 0.2]
 [5.1 3.3 1.7 0.5]
 [5.1 3.8 1.9 0.4]
 [5.7 3.8 1.7 0.3]
 [5.4 3.4 1.5 0.4]
 [5.2 3.5 1.5 0.2]
 [5.1 3.4 1.5 0.2]
 [5.1 3.5 1.4 0.2]
 [4.5 2.3 1.3 0.3]
 [4.9 3.1 1.5 0.2]
 [4.6 3.4 1.4 0.3]
 [5.5 3.5 1.3 0.2]
 [5.4 3.9 1.3 0.4]
 [5.  3.2 1.2 0.2]
 [5.5 4.2 1.4 0.2]
 [5.1 3.8 1.6 0.2]
 [4.8 3.  1.4 0.1]
 [4.7 3.2 1.3 0.2]
 [5.4 3.9 1.7 0.4]
 [5.1 3.5 1.4 0.3]
 [4.6 3.2 1.4 0.2]
 [4.3 3.  1.1 0.1]
 [5.  3.5 1.3 0.3]
 [5.  3.4 1.5 0.2]
 [5.4 3.7 1.5 0.2]
 [5.  3.4 1.6 0.4]
 [4.4 3.  1.3 0.2]
 [5.1 3.7 1.5 0.4]
 [4.8 3.4 1.9 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.  1.6 0.2]]
X_c [[7.  3.2 4.7 1.4]
 [6.4 2.9 4.3 1.3]
 [5.7 3.  4.2 1.2]
 [6.5 2.8 4.6 1.5]
 [5.  2.  3.5 1. ]
 [6.7 3.1 4.7 1.5]
 [5.9 3.2 4.8 1.8]
 [6.  2.9 4.5 1.5]
 [6.8 2.8 4.8 1.4]
 [6.4 3.2 4.5 1.5]
 [

In [177]:
mgda.LDA_score(np.array([5.02926829, 3.44878049, 1.46341463, 0.24634146]))

0