In [9]:
import numpy as np
import imageio as iio
import DatasetSplitter
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA as pca_test


In [10]:
def covariance(D):
    global mean_vector 
    mean_vector = np.mean(D, axis=0)
    Z = D - mean_vector
    cov = (1/(len(D) - 1)) * (Z.T @ Z) 
    return cov

In [11]:
trainingData, trainingLabels, testingData, testingLabels = DatasetSplitter.differentSplitData()
trainingData = np.array(trainingData)
print(np.cov(trainingData,  rowvar=False).T)

[[1283.63333333 1269.42831541 1261.390681   ... -186.80860215
  -140.96594982  -83.68530466]
 [1269.42831541 1273.30133129 1258.84587814 ... -177.35637481
  -136.70481311  -80.32667691]
 [1261.390681   1258.84587814 1261.02508961 ... -188.94623656
  -142.06451613  -88.08602151]
 ...
 [-186.80860215 -177.35637481 -188.94623656 ... 2204.85796211
  1921.37224782 1784.1593446 ]
 [-140.96594982 -136.70481311 -142.06451613 ... 1921.37224782
  1969.2859703  1811.91858679]
 [ -83.68530466  -80.32667691  -88.08602151 ... 1784.1593446
  1811.91858679 1870.55012801]]


In [12]:
print(covariance(trainingData))

[[1283.63333333 1269.42831541 1261.390681   ... -186.80860215
  -140.96594982  -83.68530466]
 [1269.42831541 1273.30133129 1258.84587814 ... -177.35637481
  -136.70481311  -80.32667691]
 [1261.390681   1258.84587814 1261.02508961 ... -188.94623656
  -142.06451613  -88.08602151]
 ...
 [-186.80860215 -177.35637481 -188.94623656 ... 2204.85796211
  1921.37224782 1784.1593446 ]
 [-140.96594982 -136.70481311 -142.06451613 ... 1921.37224782
  1969.2859703  1811.91858679]
 [ -83.68530466  -80.32667691  -88.08602151 ... 1784.1593446
  1811.91858679 1870.55012801]]


# Dual PCA

In [13]:
def covariance(D):
    global mean_vector 
    mean_vector = np.mean(D, axis=0)
    Z = D - mean_vector 
    cov = (Z @ Z.T) 
    return cov

In [14]:
def DualPCA(D, testingData, alpha): # D is n x d
    cov = covariance(D)
    global eigenvectors, eigenvalues, UT                # U -> n x rank
    eigenvectors, eigenvalues, UT = np.linalg.svd(cov) # With resc pect to Z @ Z.T
    eigenvalues = np.sqrt(eigenvalues) # Sigma
    
    sorted_idx = eigenvalues.argsort()[::-1]  # Sort in descending order
    eigenvalues = eigenvalues[sorted_idx]
    eigenvectors = eigenvectors[:, sorted_idx]
    # Project training Data Z . V = U . Sigma
    trace = sum(eigenvalues)
    c = 0 # Accumulator for sum of eigenvaleus
    lastIndex = 0
    for index in range(len(eigenvalues)):
        c += eigenvalues[index]
        if c / trace >= alpha :
            lastIndex = index
            break
    Up = eigenvectors[:, :lastIndex + 1] # U`
    sigmap = np.diag(eigenvalues[:lastIndex + 1])
    projectedData = Up @ sigmap
    
    # Project testing
    Z = D - mean_vector 
    testingData = testingData - mean_vector
    
    epsilon = 1e-10  # Small constant
    # Check if an element is zero consider it eps o.w leave it as it is.
    inv = np.diag(np.where(eigenvalues[:lastIndex + 1] == 0, 1 / epsilon, 
                           np.reciprocal(eigenvalues[:lastIndex + 1]))) 
    
    # inverse_matrix = np.linalg.inv(sigmap) # Sigma inverse
    
    projectionMatrix  =  Z.T @ Up @ inv
    projectedTestingData = testingData @ projectionMatrix
    
    return projectedData, projectedTestingData

In [15]:
def classifyKNN(projectedTrainData, trainLabel, projectedTestData, testLabel, k):
    knn = KNeighborsClassifier(n_neighbors=k, weights='distance')
    knn.fit(projectedTrainData, trainLabel)
    # Calc accuracy of clssification
    predictedClasses = knn.predict(projectedTestData)
    accuracy = accuracy_score(testLabel, predictedClasses)
    print(f"Accuracy {100 * (accuracy)}")
    return accuracy

In [16]:
alpha_list = np.array([0.95])
accuracy_results = np.array([])
k = 1
for alpha in alpha_list:
    # Train the model
    projectedTrainingData, projectedTestingData = DualPCA(trainingData, testingData, 0.8)
    # Classify and test
    accuracy_results = np.append(accuracy_results, classifyKNN(
        projectedTrainingData, trainingLabels, 
        projectedTestingData, testingLabels,
        k
    ))

Accuracy 94.16666666666667
