### Gaussian Discrminant Analysis

Gaussian Discriminant Analysis is a simple Bayes Classifier wherein the Class Conditional Densities are Assumed to be Gaussian with some parameters. The parameters are learnt by Maximum Likelihood Estimate

In [1]:
import numpy as np

In [2]:
class GDA():
    
    def __init__(self, x_dim, y_dim):
        self.x_dim = x_dim
        self.y_dim = y_dim
        self.classes = np.array(list(range(y_dim)))
        self.initialize_parameters(x_dim, y_dim)
        
    def initialize_parameters(self, x_dim, y_dim):
        meu = {}
        sigma = {}
        priors = {}
        
        for i in range(y_dim):
            meu[i] = np.zeros((x_dim))
            sigma[i] = np.zeros((x_dim, x_dim))
        self.meu = meu
        self.sigma = sigma
        self.priors = priors
    
    def train(self, data, labels):
        classes = set(list(labels))
        
        for c in classes:
            class_data = data[labels == c]
            self.learn_parameters(class_data, c)
            self.priors[c] = float(len(class_data)) / len(data)
            
    def learn_parameters(self, class_data, c):
        self.meu[c] = np.mean(class_data, axis = 0)
        self.sigma[c] = np.dot((class_data - self.meu[c]).T, (class_data - self.meu[c]))
        
    def test(self, data, labels):
        preds = np.zeros((data.shape[0]))
        for i in range(data.shape[0]):
            preds[i] = self.predict(data[i])
        return preds, np.mean(preds == labels)
    
    def predict(self, data):
        probs = np.zeros((len(self.classes)))
        for c in self.classes:
            probs[c] = self.priors[c] * (1. / (np.power(2 * np.pi, self.x_dim / 2.) * np.power(np.linalg.det(self.sigma[c]), 0.5))) * np.exp(- 0.5 * np.dot((data - self.meu[c]).T, np.dot(np.linalg.inv(self.sigma[c]), (data - self.meu[c]))))
        return np.argmax(probs / probs.sum())
    

### Importing Toy Dataset IRIS

In [3]:
import pandas as pd

df = pd.read_csv("iris.csv", header = None)

X = df.loc[:, [0, 1, 2, 3]].values
y = df.loc[:, 4].values

cy = list(set(y))
new_y = [0] * len(y)
for i in range(len(y)):
    new_y[i] = cy.index(y[i])
new_y = np.array(new_y)

#### Training Test Data Splitting

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, new_y, test_size=0.10)

##### Initialize model

In [8]:
model = GDA(X.shape[1], len(cy))

##### Training Model

In [9]:
model.train(X_train, y_train)

##### Prediction and Test Accuracy

In [10]:
preds, acc = model.test(X_test, y_test)

In [11]:
print("Accuracy of the model: {}".format(acc))

Accuracy of the model: 0.733333333333


### Gaussian Discriminant Analysis on MNIST Data, using PCA to reduce feature dimensions

We would need to reduce data dimensions to a small number so that it becomes practical. We use PCA for dimensionality reduction

In [12]:
class PCA:
    def __init__(self, data):
        self.num_data = data.shape[0]
        self.dimension = data.shape[1]
        self.data = data
        self.covariance = self.compute_covariance(data)
        self.principal_components, self.principal_axes = self.compute_pca()
    
    def compute_covariance(self, X):
        return np.cov(X.T)
    
    def compute_pca(self):
        eigen_values, eigen_vectors = np.linalg.eig(self.covariance)
        eigen_values, eigen_vectors = eigen_values.real, eigen_vectors.real
        eigen = []
        for i in range(len(eigen_values)):
            eigen.append((eigen_values[i], eigen_vectors[:, i]))
        eigen.sort(key=lambda x: x[0], reverse = True)
        principal_axes = np.array([x[1] for x in eigen])
        principal_comps = np.array([x[0] for x in eigen])
        return principal_comps, principal_axes
    
    def get_principal_axes(self, num):
        return self.principal_axes[:num]
    
    def get_projection(self, num, X):
        n_axes = self.get_principal_axes(num)
        projections = np.dot(n_axes, X.T)
        return projections.T
    
    def inverse_PCA(self, X):
        num = X.shape[1]
        n_axes = self.get_principal_axes(num)
        original_projections = np.dot(X, n_axes)
        return original_projections

#### Reading in the MNIST Dataset
##### We only work with 1000 training and 200 test data to keep the model fast and simple

In [13]:
df = pd.read_csv("mnist.csv", header=0, index_col=0)
data_flat = {}
data_image = {}
for i in range(9):
    x = df[df.index == i]
    data_flat[i] = np.array(x)
    x = np.array(x).reshape(len(x), 28, 28)
    data_image[i] = x
    
X_train = np.array(df)[:1000]
X_test = np.array(df)[1000:1200]
y_train = np.array(df.index)[:1000].astype(np.int8).reshape(-1)
y_test = np.array(df.index)[1000:1200].astype(np.int8).reshape(-1)

#### PCA is trained/learnt using the training data

In [14]:
pca = PCA(X_train)

#### The first 50 principal components are extracted from both training and test data

In [15]:
X2_train = pca.get_projection(50, X_train)
X2_test = pca.get_projection(50, X_test)

##### Model Learnt

In [16]:
model = GDA(X2_train.shape[1], 10)

In [17]:
model.train(X2_train, y_train)

##### Model Tested

In [18]:
preds, acc = model.test(X2_test, y_test)

  r = _umath_linalg.det(a, signature=signature)


In [19]:
print("Accuracy of the model: {}".format(acc))

Accuracy of the model: 0.29


#### Accuracy of the GDA model on the MNIST Dataset is only 29%. This is better than random chance (10%), but much lower than other more powerful bayes classifier models which model the class conditional densities as more powerful distributions like GMMs

### Naiive Bayes Model

In this part we will use a Naiive Bayes Model for the tasks.
Each of the constituent probability distributions are modelled using a Gaussian distribution

In [20]:
class NB():
    
    def __init__(self, x_dim, y_dim):
        self.x_dim = x_dim
        self.y_dim = y_dim
        self.classes = np.array(list(range(y_dim)))
        self.initialize_parameters(x_dim, y_dim)
        
    def initialize_parameters(self, x_dim, y_dim):
        mu = np.zeros((y_dim, x_dim))
        sigma = np.zeros((y_dim, x_dim))
        priors = np.zeros((y_dim))
            
        self.mu = mu
        self.sigma = sigma
        self.priors = priors
        
    def train(self, data, labels):
        classes = set(list(labels))
        
        for c in classes:
            class_data = data[labels == c]
            self.learn_parameters(class_data, c)
            self.priors[c] = float(len(class_data)) / len(data)
            
    def learn_parameters(self, class_data, c):
        self.mu[c] = np.mean(class_data, axis = 0)
        self.sigma[c] = np.var(class_data, axis = 0)

    def test(self, data, labels):
        preds = np.zeros((data.shape[0]))
        for i in range(data.shape[0]):
            preds[i] = self.predict(data[i])
        return preds, np.mean(preds == labels)
    
    def predict(self, data):
        logprobs = np.log(self.priors)
        for c in self.classes:
            for i in range(self.x_dim):
                logprobs[c] += np.log(self.gaussian_prob(data[i], i, c))
        return np.argmax(logprobs)
    
    def gaussian_prob(self, x, i, c):
        val = 1. / (np.power(2 * np.pi * self.sigma[c, i], 0.5)) * np.exp(- 0.5 * np.power(x - self.mu[c, i], 2) / self.sigma[c, i])
        return val

#### NB for iris type prediction

In [21]:
df = pd.read_csv("iris.csv", header = None)

X = df.loc[:, [0, 1, 2, 3]].values
y = df.loc[:, 4].values

cy = list(set(y))
new_y = [0] * len(y)
for i in range(len(y)):
    new_y[i] = cy.index(y[i])
new_y = np.array(new_y)

X_train, X_test, y_train, y_test = train_test_split(X, new_y, test_size=0.10)
model = NB(X.shape[1], len(cy))

model.train(X_train, y_train)

preds, acc = model.test(X_test, y_test)

print("Accuracy of the model: {}".format(acc))

Accuracy of the model: 0.866666666667


#### The Naiive bayes Model showed good accuracy. Now lets try this on the MNIST Dataset

#### NB for MNIST Class Prediction

In [22]:
df = pd.read_csv("mnist.csv", header=0, index_col=0)
data_flat = {}
data_image = {}
for i in range(9):
    x = df[df.index == i]
    data_flat[i] = np.array(x)
    x = np.array(x).reshape(len(x), 28, 28)
    data_image[i] = x
    
X_train = np.array(df)[:1000]
X_test = np.array(df)[1000:1200]
y_train = np.array(df.index)[:1000].astype(np.int8).reshape(-1)
y_test = np.array(df.index)[1000:1200].astype(np.int8).reshape(-1)

pca = PCA(X_train)
X2_train = pca.get_projection(50, X_train)
X2_test = pca.get_projection(50, X_test)

model = NB(50, 10)

model.train(X2_train, y_train)

preds, acc = model.test(X2_test, y_test)

print("Accuracy of the model: {}".format(acc))

Accuracy of the model: 0.815


#### The accuracy is quite high considering that they are very simple models, and that the amount of data used is very small