In [108]:
import numpy as np
import pandas as pd
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
import pickle
import matplotlib.pyplot as plt
import gzip
import warnings
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)
from sklearn.preprocessing import StandardScaler

In [109]:
train_data = pd.read_csv("./data/fmnist/fashion-mnist_train.csv")
test_data = pd.read_csv("./data/fmnist/fashion-mnist_test.csv")

In [110]:
train_x = np.array(train_data.iloc[:,1:])
train_y = np.array(train_data.iloc[:, 0])
test_x = np.array(test_data.iloc[:, 1:])
test_y = np.array(test_data.iloc[:,0])
labels_to_name = { 0:"T-shirt/top", 1 :"Trouser", 2 :"Pullover", 3 :"Dress", 4 :"Coat", 5 :"Sandal", 6 :"Shirt", 7 :"Sneaker", 8 :"Bag", 9 :"Ankle boot"}

In [111]:
# train_x = StandardScaler().fit_transform(train_x)
# test_x = StandardScaler().fit_transform(test_x)

In [112]:
def get_class_wise_data(train_x, train_y):
    class_wise_data = []
    
    for label in range(10):
        # print(label)
        label_idx = list(np.where(train_y == label)[0])
        # print(f"label_idx = {len(label_idx)}")
        class_x_data = train_x[label_idx].T
        # print(f"class_x = {class_x_data.shape}")
        class_wise_data.append(class_x_data)
    total_X_matrix = np.array(class_wise_data[0])
    # print(f"INITIAL : {total_X_matrix.shape}")
    for label in range(1, 10):
        # print(class_wise_data[label].shape)
        total_X_matrix = np.concatenate((total_X_matrix, class_wise_data[label]), axis=1)
        # print(f"AFTER : {total_X_matrix.shape}" )
        
    return class_wise_data, total_X_matrix

In [113]:
def get_class_wise_means(class_wise_x_train):
    class_wise_means = []
    for i in range(len(class_wise_x_train)):
        mean_x_i = np.mean(class_wise_x_train[i], axis=1)
        mean_x_i = mean_x_i.reshape(mean_x_i.shape[0], 1)
        class_wise_means.append(mean_x_i)
    return class_wise_means

In [114]:
def get_S_w(class_wise_x_train, class_wise_means):
    num_classes = len(class_wise_x_train)
    num_features = class_wise_means[0].shape[0]
    # print(f"Num classes = {num_classes} | Num_feats = {num_features}")
    S_mats = []
    S_w = np.zeros((num_features, num_features))
    for i in range(num_classes):
        S_i = np.dot((class_wise_x_train[i] - class_wise_means[i]), (class_wise_x_train[i] - class_wise_means[i]).T)
        # print(f"shape of S_i = {S_i.shape}")
        S_mats.append(S_i)
        S_w += S_i
    # print(f"Final shape S_w = {S_w.shape}")
    return S_w

In [115]:
def get_S_b(total_x_train, S_w):
    mean_X_total = np.mean(total_x_train, axis=1)
    mean_X_total = mean_X_total.reshape((mean_X_total.shape[0], 1))
    S_t = np.dot((total_x_train - mean_X_total), (total_x_train - mean_X_total).T)
    S_b = S_t - S_w
    return S_b

In [116]:
def sort_eigenvectors(eigenvectors, eigenvalues):
    eigenvectors_sorted_inc = eigenvectors[:, eigenvalues.argsort()]
    eigenvectors_sorted_dec = np.flip(eigenvectors_sorted_inc, axis=1)
    return eigenvectors_sorted_dec

In [117]:
def perform_FDA(train_x, train_y):
    class_wise_x_train, total_x_train = get_class_wise_data(train_x, train_y)
    class_wise_means = get_class_wise_means(class_wise_x_train)
    S_w = get_S_w(class_wise_x_train, class_wise_means)
    S_b = get_S_b(total_x_train, S_w)
    scat_prod = np.dot(np.linalg.inv(S_w), S_b)
    un, eigenvalues, eigenvectors = np.linalg.svd(scat_prod)
    W = sort_eigenvectors(eigenvectors, eigenvalues)[:, :9]
    return W
def project_data(X, W):
    return np.dot(W.T, X)


In [118]:
W = perform_FDA(train_x, train_y)

(784, 784)


In [120]:
def compute_accuracy(y_true, y_pred):
    correct_classif = 0
    total = len(y_true)
    for i in range(total):
        if(y_true[i] == y_pred[i]):
            correct_classif += 1
    return correct_classif / total

In [121]:
projected_X_train = project_data(train_x.T, W)

In [122]:
clf = LDA()
clf.fit(projected_X_train.T, train_y)
pickle.dump(clf, open(f'./models/q3_lda.sav', 'wb'))
projected_X_test = project_data(test_x.T, W)
y_preds = clf.predict(projected_X_test.T)

projected X test shape = (9, 10000)


In [123]:
overall_accuracy = compute_accuracy(test_y, y_preds)
print(f"\nOverall accuracy = {overall_accuracy}\n\n-------------\n")
# #Class-wise-accuracy
class_wise_accuracy = {}
for lab in labels_to_name.keys():
    class_idxs = np.where(test_y == lab)[0]
    true_labs = test_y[class_idxs]
    pred_labs = y_preds[class_idxs]
    class_wise_accuracy[lab] = compute_accuracy(true_labs, pred_labs)
for lab in class_wise_accuracy.keys():
    print(f"Class-wise accuracy for class-{lab} [{labels_to_name[lab]}] : {class_wise_accuracy[lab]}")


Overall accuracy = 0.6088

-------------

Class-wise accuracy for class-0 [T-shirt/top] : 0.647
Class-wise accuracy for class-1 [Trouser] : 0.85
Class-wise accuracy for class-2 [Pullover] : 0.402
Class-wise accuracy for class-3 [Dress] : 0.647
Class-wise accuracy for class-4 [Coat] : 0.545
Class-wise accuracy for class-5 [Sandal] : 0.609
Class-wise accuracy for class-6 [Shirt] : 0.197
Class-wise accuracy for class-7 [Sneaker] : 0.749
Class-wise accuracy for class-8 [Bag] : 0.609
Class-wise accuracy for class-9 [Ankle boot] : 0.833
