## MVG Gaussian Classifier

In [17]:
import numpy as np
import scipy
import matplotlib.pyplot as plt
import sklearn.datasets
def load_iris():
    D, L = sklearn.datasets.load_iris()['data'].T, sklearn.datasets.load_iris()['target']
    return D, L

In [18]:
D, L = load_iris()

In [19]:
def split_db_2to1(D, L, seed=0):
    nTrain = int(D.shape[1]*2.0/3.0) # 2/3 of the dataset D are used for training, 1/3 for validation
    np.random.seed(seed)
    idx = np.random.permutation(D.shape[1]) # take a random array of 150 elements, each element is 0<x<=149 (np.arange(150))
    idxTrain = idx[0:nTrain] # first 100 are indices of training samples 
    idxTest = idx[nTrain:] # remaining 50 are indices of validation samples
    DTR = D[:, idxTrain] # D for training
    DTE = D[:, idxTest] # D for validation
    LTR = L[idxTrain] # L for training
    LTE = L[idxTest] # L for validation
    return (DTR, LTR), (DTE, LTE)

In [20]:
(DTR, LTR), (DTE, LTE) = split_db_2to1(D, L)

Now we have to compute the ML solution. First we compute the empirical mean and variance for each class label

The training phase consists in computing the empirical class mean and the empirical class covariance matrix given the training samples DTR. Here we're fitting a normal distribution to our training dataset

In [21]:
mu_classes = [] # list of empiracal mean for each class
cov_classes = [] # list of covariance matrix for each class
for i in set(LTR):
    DTR_class_i = DTR[:,LTR==i]
    N_class_i = DTR_class_i.shape[1]
    mu_class_i = DTR_class_i.mean(axis=1).reshape(-1,1)
    cov_class_i = 1/N_class_i * np.dot(DTR_class_i-mu_class_i, (DTR_class_i-mu_class_i).T)
    mu_classes.append(mu_class_i)
    cov_classes.append(cov_class_i)

The test phase consists in computing the normal density for each testing sample, thus the probability for each test sample to belong to either class 0 or 1 or 2.

In [22]:
def logpdf_GAU_ND_1sample(x,mu,C):
    M = x.shape[0] # num of features of sample x
    mu = mu.reshape(M,1) # mean of the sample
    xc = x - mu # x centered
    invC = np.linalg.inv(C)
    _,log_abs_detC = np.linalg.slogdet(C)
    return -M/2 * np.log(2*np.pi) - 1/2 * log_abs_detC - 1/2 * np.dot(np.dot(xc.T,invC),xc)

In [23]:
S = np.zeros(shape=(3,DTE.shape[1]))
for i in range(DTE.shape[1]):
    xt = DTE[:,i:i+1] # test sample xt
    # now compute the probability density related to each class label for the sample xt
    score = np.zeros(shape=(3,1))
    for j in set(LTE):
        mu = mu_classes[j]
        C = cov_classes[j]
        score[j,:] = np.exp(logpdf_GAU_ND_1sample(xt,mu,C))
    S[:,i:i+1] = score

In [24]:
SJoint = 1/3 * S # assuming that the prior probability is 1/3 for each class
SJoint_sol = np.load('Solution/SJoint_MVG.npy')
print(np.abs(SJoint_sol - SJoint).max()) # test if it's correct

6.661338147750939e-15


In [25]:
SMarginal = SJoint.sum(0).reshape(-1,1)

In [26]:
SPost = np.zeros((3,50))
for c in range(3):
    SJoint_c = SJoint[c,:].reshape(-1,1)
    SPost_c = (SJoint_c / SMarginal).reshape(1,-1)
    SPost[c,:] = SPost_c

In [27]:
predicted_labels = np.argmax(SPost,axis=0)
corrected_assigned_labels = LTE==predicted_labels
acc = sum(corrected_assigned_labels) / len(LTE)
err = 1-acc
print('Error rate %f%%' % (err * 100))

Error rate 4.000000%


Using logarithms calculus to avoid numerical issues:

In [28]:
logSJoint = np.log(SJoint) + np.log(1/3)
logSMarginal = scipy.special.logsumexp(logSJoint, axis=0).reshape(1,-1)
log_SPost = logSJoint - logSMarginal  
SPost_ = np.exp(log_SPost)

## Naive Bayes

In [87]:
cov_classes_nbayes = []
for i in range(3):
    cov_classes_nbayes.append(cov_classes[i]*np.identity(4))
SPost_nbayes = compute_post_probabilities(DTE,LTE,mu_classes,cov_classes_nbayes)
predicted_labels = np.argmax(SPost_nbayes,axis=0)
corrected_assigned_labels = LTE==predicted_labels
acc = sum(corrected_assigned_labels) / len(LTE)
err = 1-acc
print('Error rate %f%%' % (err * 100))

Error rate 4.000000%


In [88]:
def compute_post_probabilities(DTE, LTE, mu_classes, cov_classes):
    num_classes = len(set(LTE))
    num_test_samples = DTE.shape[1]
    S = np.zeros(shape=(num_classes,num_test_samples))
    for i in range(num_test_samples):
        xt = DTE[:,i:i+1] # test sample xt
        # now compute the probability density related to each class label for the sample xt
        score = np.zeros(shape=(num_classes,1))
        for j in set(LTE):
            mu = mu_classes[j]
            C = cov_classes[j]
            score[j,:] = np.exp(logpdf_GAU_ND_1sample(xt,mu,C))
        S[:,i:i+1] = score
        
    prior_prob = 1 / num_test_samples
    SJoint = S * prior_prob
    SMarginal = SJoint.sum(0).reshape(-1,1)
    # compute class posterior probabilities SPost = SJoint / SMarginal
    SPost = np.zeros((num_classes,num_test_samples))
    for c in range(num_classes):
        SJoint_c = SJoint[c,:].reshape(-1,1)
        SPost_c = (SJoint_c / SMarginal).reshape(1,-1)
        SPost[c,:] = SPost_c
    return SPost

In [89]:
def predict_labels(SPost ,LTE):
    predicted_labels = np.argmax(SPost,axis=0)
    corrected_assigned_labels = LTE==predicted_labels
    acc = sum(corrected_assigned_labels) / len(LTE)
    err = 1-acc
    return (predicted_labels, acc, err)

## Tied Covariance

In [110]:
num_samples_per_class = [sum(LTR == i) for i in range(3)]
tied_cov = 0
for i in range(3):
    tied_cov += (num_samples_per_class[i] * cov_classes[i])
tied_cov *= 1/sum(num_samples_per_class)

In [112]:
tied_cov

array([[0.23637589, 0.09525344, 0.1364944 , 0.03614529],
       [0.09525344, 0.11618517, 0.05768855, 0.0357726 ],
       [0.1364944 , 0.05768855, 0.14992811, 0.03746458],
       [0.03614529, 0.0357726 , 0.03746458, 0.04291763]])

To compute the probabilities maybe we can use the compute_post_probabilities function but inside we have only one covariance matrix so the row C = cov_classes[j] should be removed?