In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.decomposition import PCA

In [None]:
def wiPCA(samples, components=1): 
    if samples.shape[1] < components:
        return False
    mean = np.mean(samples, axis=0)
    mean_data = samples - mean
    cov = np.cov(mean_data, rowvar=False)
    eig_val, eig_vec = np.linalg.eigh(cov)
    sorted_indices = np.argsort(eig_val)[::-1]
    eig_val = eig_val[sorted_indices]
    eig_vec = eig_vec[:, sorted_indices]
    principal_components = eig_vec[:, :components]
    pca_projection = np.dot(mean_data, principal_components)
    reconstruction = np.dot(pca_projection, principal_components.T) + mean
    # print('pca_projection', pca_projection.shape)
    sum_eig_val = np.sum(eig_val)
    explained_variance = eig_val/ sum_eig_val
    cumulative_explained_variance = np.cumsum(explained_variance)
    # print("var", explained_variance.shape)
    return pca_projection, reconstruction, cumulative_explained_variance

s = np.dot(np.random.randn(2, 2), np.random.randn(2, 200))
s = s.T
_, reconstruction, _ = wiPCA(s)
plt.scatter(s[:, 0], s[:, 1])
plt.scatter(reconstruction[:, 0], reconstruction[:, 1])
# plt.scatter(s[0, :], s[1, :])
# plt.scatter(reconstruction[0, :], reconstruction[1, :])
plt.show()

s.shape, reconstruction.shape

In [None]:
iris = datasets.load_iris()
pca_projection, _, _ = wiPCA(iris.data, 2)
# plt.scatter(iris.data[:, 0], iris.data[:, 1], c=iris.target)
# plt.scatter(reconstruction[:, 0], reconstruction[:, 1], c=iris.target)

colors = ['red', 'green', 'blue']
for i, color in enumerate(colors):
    idx = iris.target == i
    plt.scatter(pca_projection[idx, 0], pca_projection[idx, 1], c=color, label=iris.target_names[i])
plt.legend()

plt.show()
iris.data[0].shape

In [None]:
digits = datasets.load_digits()
pca_projection, _, cumulative_explained_variance = wiPCA(digits.data, 2)

scatter = plt.scatter(pca_projection[:, 0], pca_projection[:, 1], c=digits.target, edgecolor='k')

# Create a colorbar with boundaries and ticks
colorbar = plt.colorbar(scatter, boundaries=np.arange(-0.5, 10, 1), spacing='proportional', ticks=np.arange(0, 10))

# Set the label of the colorbar
colorbar.set_label('Digit Label', rotation=270, labelpad=20)

plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('PCA of Digits Dataset')

plt.show()

digits.data.shape, reconstruction.shape 

In [None]:
plt.plot(cumulative_explained_variance)

In [None]:
errors = []
for num_components in range(1, 65):  
    _, reconstruction, _ = wiPCA(digits.data, components=num_components)
    error = np.mean((digits.data - reconstruction)**2)
    errors.append(error)

plt.plot(range(1, 65), errors)