# Linear Discreminant Analysis

Import the required libraries

In [1]:
import numpy as np
import scipy as sp
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
import DatasetSplitter as ds

training_data, training_labels, testing_data, testing_labels = ds.splitData()

Split the training data into 40 classes

In [2]:
def split_data(training_data):
    mat = np.array(training_data)
    return np.split(mat, indices_or_sections=40)

Calculate the class means

In [3]:
def get_class_means(split_data):
    return np.mean(split_data, axis=1)

Calculate the overall mean

In [4]:
def get_overall_mean(training_data):
    return np.mean(training_data, axis=0)

Calculate the centered classes matrix

In [6]:
def get_centered_classes(split_data, class_means):
    centered_classes = []
    for class_matrix, class_mean in zip(split_data, class_means):
        centered_class = []
        for row in class_matrix:
            centered_class.append(row - class_mean)
        centered_classes.append(centered_class)
    return centered_classes

Calculate the between-class scatter matrix (Sb)

In [7]:
def get_between_class(class_means, overall_mean):
    Sb = np.zeros((10304, 10304))
    for class_mean in class_means:
        Sb += 5 * np.outer(class_mean - overall_mean, class_mean - overall_mean)
    return Sb

Calculate the within-class scatter matrix and its inverse (Sw, Sw-1)

In [8]:
def get_within_class(centered_classes):
    Sw = np.zeros((10304, 10304))
    for centered_class in centered_classes:
        mat = np.array(centered_class)
        Sw += np.dot(np.transpose(mat), mat)
    Sw_inverse = sp.linalg.pinv(Sw)
    return Sw_inverse

Calculate the eigenvalues and eigenvectors for the inverse of Sw multiplied by Sb

In [9]:
def get_eigens(Sw_inverse, Sb):
    eigenvalues, eigenvectors = np.linalg.eig(np.dot(Sw_inverse, Sb))
    return eigenvalues, eigenvectors

Calculate the 39 most significant eigenvectors (Corresponding to the most significant eigenvalues)

In [10]:
def get_U(eigenvalues, eigenvectors):
    sorted_indecies = np.argsort(eigenvalues)[::-1]
    sorted_eigen_vectors = eigenvectors[:,sorted_indecies]
    U = np.real(sorted_eigen_vectors[:,:39])
    return U

# Driver Code

In [11]:
split_data = split_data(training_data)

In [12]:
class_means = get_class_means(split_data)

In [13]:
overall_mean = get_overall_mean(training_data)

In [14]:
centered_classes = get_centered_classes(split_data, class_means)

In [15]:
Sb = get_between_class(class_means, overall_mean)

In [17]:
Sw_inverse = get_within_class(centered_classes)

In [18]:
eigenvalues, eigenvectors = get_eigens(Sw_inverse, Sb)

In [19]:
projection_matrix = get_U(eigenvalues, eigenvectors)

# Projection and Classification

In [20]:
projected_training_data = np.dot(training_data , projection_matrix)
projected_testing_data = np.dot(testing_data , projection_matrix)

In [21]:
classifier = KNeighborsClassifier(1)
classifier.fit(projected_training_data, training_labels)
prediction = classifier.predict(projected_testing_data)
accuracy = accuracy_score(testing_labels, prediction)

print ("Accuracy = ", accuracy * 100)

Accuracy =  95.5


# Bonus

### Use RLDA

In [22]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.metrics import accuracy_score
import DatasetSplitter as ds

training_data, training_labels, testing_data, testing_labels = ds.splitData()

# Create an RLDA classifier with regularization
clf = LDA(solver='lsqr', shrinkage='auto')  # 'lsqr' solver for efficiency, 'auto' shrinkage

# Train the RLDA model
clf.fit(training_data, training_labels)

# Make predictions on unseen data
predected_labels = clf.predict(testing_data)

# Evaluate the model performance (optional)
accuracy = accuracy_score(testing_labels, predected_labels)
print("Accuracy:", accuracy * 100)

Accuracy: 95.0
