# Assignment 2
by K. Sai Somanath, 18MCMT28

## Question 1
Implement PCA and LDA

In [1]:
import numpy as np
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler

In [8]:
def pca(data, components, std=True):
    if std:
        data_std = StandardScaler().fit_transform(data)
    else:
        data_std = data
    # print(data_std)
    cov = np.cov(data_std.T)
    eigen_values, eigen_vectors = np.linalg.eig(cov)
    print(eigen_values, eigen_vectors, sep='\n')
    
    # Make a list of (eigenvalue, eigenvector) tuples
    eigen_pairs = [(np.abs(eigen_values[i]), eigen_vectors[:,i]) for i in range(len(eigen_values))]

    # Sort the (eigenvalue, eigenvector) tuples from high to low
    eigen_pairs.sort()
    eigen_pairs.reverse()

    matrix_w = np.hstack((eigen_pairs[0][1]))
    for k in range(1, components):
        matrix_w = np.hstack((matrix_w.reshape(data_std.shape[1], 1), eigen_pairs[k][1].reshape(data_std.shape[1],1)))

    Y = data_std.dot(matrix_w)
    return Y

In [9]:
def lda(data, components):
    '''
    data: shape: classes x samples_size x no_features
    '''    
    # np.set_printoptions(precision=4)
    # Find the means
    means = np.mean(data, axis=1)
    # print(means)
    # Find the within class scatter matrix
    scatter_within = np.zeros((data.shape[-1], data.shape[-1]))
    for index, cls in enumerate(data):
        sc_mat = np.zeros((4,4))
        for row in cls:
            r, m = row.reshape(data.shape[-1], 1), means[index].reshape(data.shape[-1], 1)
            sc_mat += (r-m).dot((r-m).T)
        scatter_within += sc_mat
        
    # print(scatter_within)
    
    # Find the between class matrix
    mu = np.mean(data.reshape(data.shape[0] * data.shape[1], data.shape[2]), axis=0).reshape(data.shape[-1], 1)
    # print(mu)
    scatter_between = np.zeros((data.shape[-1], data.shape[-1]))
    for m in means:
        m = m.reshape(data.shape[-1], 1)
        # print(m, mu, sep='\n\n')
        scatter_between += data.shape[1] * (m - mu).dot((m - mu).T)
    
    # print(scatter_between)
    
    eigen_values, eigen_vectors = np.linalg.eig(np.linalg.inv(scatter_within).dot(scatter_between))
    # print(eigen_values)
    
    # Make a list of (eigenvalue, eigenvector) tuples
    eigen_pairs = [(np.abs(eigen_values[i]), eigen_vectors[:,i]) for i in range(len(eigen_values))]
    # Sort the (eigenvalue, eigenvector) tuples from high to low
    eigen_pairs.sort(key=lambda k: k[0], reverse=True)
    
    matrix_w = np.hstack((eigen_pairs[0][1]))
    for k in range(1, components):
        matrix_w = np.hstack((matrix_w.reshape(data.shape[-1], 1), eigen_pairs[k][1].reshape(data.shape[-1],1)))
    # print(matrix_w)
    Y = data.reshape(data.shape[0] * data.shape[1], data.shape[2]).dot(matrix_w)
    return Y

In [10]:
# Laod data
iris = np.genfromtxt('../iris.csv', delimiter=',', skip_header=1, usecols=(0,1,2,3))

In [12]:
A = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float64)
pca(A, 2)


[3. 0.]
[[ 0.70710678 -0.70710678]
 [ 0.70710678  0.70710678]]


array([[-1.73205081e+00, -7.75139526e-18],
       [ 0.00000000e+00,  0.00000000e+00],
       [ 1.73205081e+00,  0.00000000e+00]])

In [6]:
lda(iris.reshape(3, 50, 4), 2)

array([[ 1.49220928+0.j,  1.9047102 +0.j],
       [ 1.25765567+0.j,  1.60841445+0.j],
       [ 1.3487506 +0.j,  1.74984635+0.j],
       [ 1.18024885+0.j,  1.63919095+0.j],
       [ 1.51043263+0.j,  1.96271183+0.j],
       [ 1.40183784+0.j,  2.22012481+0.j],
       [ 1.27966155+0.j,  1.91802239+0.j],
       [ 1.37835575+0.j,  1.81948346+0.j],
       [ 1.11648646+0.j,  1.54502342+0.j],
       [ 1.3131003 +0.j,  1.56518244+0.j],
       [ 1.57646265+0.j,  1.99977597+0.j],
       [ 1.28272558+0.j,  1.79225834+0.j],
       [ 1.30854321+0.j,  1.530813  +0.j],
       [ 1.37003298+0.j,  1.60260779+0.j],
       [ 1.9385142 +0.j,  2.25635444+0.j],
       [ 1.76617886+0.j,  2.5681761 +0.j],
       [ 1.62043071+0.j,  2.32183942+0.j],
       [ 1.42083076+0.j,  1.98141342+0.j],
       [ 1.49597495+0.j,  2.08721643+0.j],
       [ 1.48232553+0.j,  2.13268434+0.j],
       [ 1.35102322+0.j,  1.77221909+0.j],
       [ 1.37223268+0.j,  2.1504877 +0.j],
       [ 1.6470616 +0.j,  2.06083351+0.j],
       [ 1.