## Gaussian Discriminant Analysis

For binary output $y \in \{ 0, 1 \}$

In [1]:
import pandas as pd
import numpy as np

In [22]:
from sklearn.datasets import load_iris
dataset = load_iris()
X = dataset.data
y = dataset.target

# Change to binary class
y = (y > 0).astype(int)

In [23]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [24]:
np.unique(y)

array([0, 1])

In [25]:
class GaussianDiscAnalysis:    
    # Compute phi
    def compute_phi(self, y):
        return (1/len(y)) * len(y[y==1])
    
    def compute_mu(self, X, y, idx):
        return np.sum(X[y==idx], axis=0)/ len(y==idx)
    
    
    def compute_sigma(self, X, y):
        n = len(X)
        #y = y.reshape(-1,1)
        mu1 = self.mu[1]
        mu0 = self.mu[0]
        Xmu = X \
              - mu1*np.ones_like(X)*(y==1).reshape(-1,1) \
              - mu0*np.ones_like(X)*(y==0).reshape(-1,1)
        return (1/n) * Xmu.T@Xmu
    
    
    def compute_Pxyi(self, X, idx):
        """Probability of X given y"""
        d = X.shape[1]
        sigma_inv = np.linalg.inv(self.sigma)
        det_sigma = np.linalg.det(self.sigma)
        #mu_i = mu(X, y, idx)
        Pxi = (1/((2*np.pi)**(d/2))) \
                *(1/(det_sigma**0.5)) \
                * np.exp(- 0.5*np.sum(((X-self.mu[idx])@sigma_inv)*(X-self.mu[idx]), axis=1))
    #     Pxi = np.log(1) \
    #             - np.log((2*np.pi)**(m/2)) \
    #             - np.log(np.sqrt(det_sigma)) \
    #             - np.sum(((X-mu_i)@sigma_inv)*(X-mu_i), axis=1)
        return Pxi
    
    def fit(self, X, y):
        """Computes mean, covariance and proabilities of y (phi)"""
        self.mu = []
        for i in np.unique(y):
            self.mu.append(self.compute_mu(X, y, i))
        #self.mu1 = self.compute_mu(X, y, 1)
        self.sigma = self.compute_sigma(X, y)
        self.phi = self.compute_phi(y)
        
    def predict(self, X):
        Py0 = self.compute_Pxyi(X, 0) * (1-self.phi)
        Py1 = self.compute_Pxyi(X, 1) * self.phi
        Py0 = Py0.reshape(-1, 1)
        Py1 = Py1.reshape(-1, 1)
        return np.argmax(np.concatenate((Py0, Py1), axis=1), axis=1)
        

In [26]:
GDA = GaussianDiscAnalysis()

In [27]:
GDA.fit(X_train,y_train)

In [28]:
predictions = GDA.predict(X_test)

In [29]:
predictions

array([1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1])

In [30]:
np.sum(predictions == y_test) / len(y_test)

1.0