# FSL Project Part 1
Feature Extraction, Density Estimation and Bayesian Classification

In [31]:
''' Imports '''
import scipy.io
from scipy.spatial import distance
import numpy as np
from numpy.linalg import inv, det
import math

In [53]:
def getNormalizedFeatures(data_arr):
    ''' Calculate mean and std dev of dataset '''
    n_imgs = data_arr.shape[0]
    # print(n_imgs)
    m_arr = np.zeros(n_imgs)
    s_arr = np.zeros(n_imgs)

    for i in range(n_imgs):
        m_arr[i] = np.mean(data_arr[i])
        s_arr[i] = np.std(data_arr[i])
        
    ''' Calculate mean and std dev of the means and std devs respectively '''
    M1 = np.mean(m_arr)
    M2 = np.mean(s_arr)
    S1 = np.std(m_arr)
    S2 = np.std(s_arr)
    ''' Create array of normalized vectors '''
    y = np.zeros((n_imgs, 2))

    for i in range(n_imgs):
        y[i] = [(m_arr[i]-M1)/S1, (s_arr[i]-M2)/S2]
    
    return y

In [32]:
def discrim(x, mean, cov, prior):
    maha = distance.mahalanobis(x, mean, inv(cov))
    dfactor = math.log(det(cov))
    return -0.5*(maha**2)-0.5*dfactor+math.log(prior)

In [61]:
''' Import training dataset '''
train_data = scipy.io.loadmat('train_data.mat')
# print(data)

train_data_arr = np.array(train_data['data'])
label_arr = np.array(data['label'][0])
# print(dataset.shape)
# print(dataset[0][10])
# print(label_arr)

n_train = dataset.shape[0]

In [51]:
''' Import test dataset '''
test_data = scipy.io.loadmat('test_data.mat')
# print(test_data)
test_data_arr = np.array(test_data['data'])
test_label_arr = np.array(test_data['label'])[0]
# print(test_data_arr.shape)
# print(test_label_arr.shape)

n_test = test_data_arr.shape[0]
# print(n_test)

2886


In [58]:
train_y = getNormalizedFeatures(train_data_arr)
print(train_y[0:3])

[[ 0.15069489  0.12996069]
 [-0.97386658 -0.93473171]
 [-0.60346935 -0.6292426 ]]


In [60]:
''' Split features into 3 and 7 classes '''
feature_3 = []
feature_7 = []

for i in range(n_train):
    if label_arr[i] == 3:
        feature_3.append(train_y[i])
    else:
        feature_7.append(train_y[i])

feature_3 = np.array(feature_3)
feature_7 = np.array(feature_7)
# print(feature_3[0])
# print(len(feature_7))

In [28]:
mu_3 = np.mean(feature_3, 0)
mu_7 = np.mean(feature_7, 0)
print(mu_3)
print(mu_7)

''' Calculate variance '''
len_3 = len(feature_3)
sum_3 = 0
for i in range(len_3):
    mat = (feature_3[i] - mu_3).reshape(2,1)
    trans = mat.reshape(1,2)
    sum_3 += mat @ trans
sigma_3 = (1/len_3)*sum_3

len_7 = len(feature_7)
sum_7 = 0
for i in range(len_7):
    mat = (feature_7[i] - mu_7).reshape(2,1)
    trans = mat.reshape(1,2)
    sum_7 += mat @ trans
sigma_7 = (1/len_7)*sum_7

''' TODO: Task 2 - Confirm value of mean and variance '''
print(sigma_3)
print(sigma_7)

[0.37687996 0.31851855]
[-0.36900004 -0.31185886]
[[1.0491056  0.98717364]
 [0.98717364 0.96037982]]
[[0.67669136 0.74435619]
 [0.74435619 0.842203  ]]


In [49]:
''' Using the formula g(x) = ln(likelihood) + ln(prior) '''
''' Training Case 1: Classification '''
train_case1_output = np.zeros(n_imgs)
for i in range(n_imgs):
    g3 = discrim(y[i], mu_3, sigma_3, 0.5)
    g7 = discrim(y[i], mu_7, sigma_3, 0.5)
    if g3 > g7:
        train_case1_output[i] = 3
    else:
        train_case1_output[i] = 7

''' Training Case 2: Classification '''
train_case2_output = np.zeros(n_imgs)
for i in range(10):
    g3 = discrim(y[i], mu_3, sigma_3, 0.3)
    g7 = discrim(y[i], mu_7, sigma_3, 0.7)
    if g3 > g7:
        train_case2_output[i] = 3
    else:
        train_case2_output[i] = 7

In [54]:
test_y = getNormalizedFeatures(test_data_arr)
# print(test_y[0:10])

[[ 1.27642966  1.17904022]
 [ 0.32451918  0.49973571]
 [ 1.26264329  1.10318912]
 [ 2.60889625  2.12969339]
 [-0.07164414 -0.10773717]
 [-1.15218415 -1.31728734]
 [ 0.89860483  0.87134993]
 [-0.94968045 -1.07152469]
 [ 0.01224468  0.14448575]
 [-0.54896502 -0.5212292 ]]


In [56]:
test_case1_output = np.zeros(n_test)
for i in range(n_test):
    g3 = discrim(test_y[i], mu_3, sigma_3, 0.5)
    g7 = discrim(test_y[i], mu_7, sigma_3, 0.5)
    if g3 > g7:
        test_case1_output[i] = 3
    else:
        test_case1_output[i] = 7

''' Training Case 2: Classification '''
test_case2_output = np.zeros(n_test)
for i in range(n_test):
    g3 = discrim(test_y[i], mu_3, sigma_3, 0.3)
    g7 = discrim(test_y[i], mu_7, sigma_3, 0.7)
    if g3 > g7:
        test_case2_output[i] = 3
    else:
        test_case2_output[i] = 7