### Please compute the covariance matrix C of Figure 3.

In [59]:
# Compute the covarience matrix of Figure 3's data points.
import numpy as np
data = np.array([[20, 5], [8, -2], [-6, -3], [6, 4]]).T;
data
matrix_C = np.cov(data, rowvar=True, bias=True)
matrix_C

array([[0.10846523, 0.20203051],
       [0.20203051, 0.28284271]])

### Please compute the vectors {z1, z2, z3, z4} after applying Zscore transformation to the dataset in Figure 3. Denote by µ the 2-dimensional mean vector of the four instances and by σd the standard deviation of the d-th dimension, the formula of Z-score is

In [4]:
# Perform ZCA Whitening on the given matrix.
# Borrowed from https://gryslik.github.io/osu_cse3521_spring2022/course_materials/lecture_examples/Standardization_Normalization_Whitening_PCA.ipynb
def zca_whitening_matrix(X):
    """
    Function to compute ZCA whitening matrix (aka Mahalanobis whitening).
    INPUT:  X: [M x N] matrix.
        Rows: Variables
        Columns: Observations
    OUTPUT: ZCAMatrix: [M x M] matrix
    """
    # Covariance matrix [column-wise variables]: Sigma = (X-mu)' * (X-mu) / N
    sigma = np.cov(X, rowvar=True, bias = True) # [M x M]
    # Singular Value Decomposition. X = U * np.diag(S) * V
    U,S,V = np.linalg.svd(sigma)
        # U: [M x M] eigenvectors of sigma.
        # S: [M x 1] eigenvalues of sigma.
        # V: [M x M] transpose of U
    # Whitening constant: prevents division by zero
    epsilon = 1e-15
    # ZCA Whitening matrix: U * Lambda * U'
    ZCAMatrix = np.dot(U, np.dot(np.diag(1.0/np.sqrt(S + epsilon)), U.T)) # [M x M]
    return ZCAMatrix, sigma, U, S, V
data_mean_vals = data.mean(axis=1).reshape(-1,1)
#print(data_mean_vals)
zca_converter, _, _, _, _ = zca_whitening_matrix(data)
data_zca = np.dot(zca_converter, data-data_mean_vals)
#print(data_zca)

# Get the Z-scores.
from scipy import stats
z_scores = stats.zscore(data_zca, axis=None)
print("Z1:", z_scores[:,0])
print("Z2:", z_scores[:,1])
print("Z3:", z_scores[:,2])
print("Z4:", z_scores[:,3])


Z1: [1.34983853 0.42182454]


### What is the 2-dimensional mean vector and what is the standard deviation of each dimension of the resulting {z1, z2, z3, z4}?

In [40]:
vect = np.mean(z_scores[:,1], axis=0)
print("Z1 Mean Vector:", np.around(vect,3))
vect = np.mean(z_scores[:,1], axis=0)
print("Z2 Mean Vector:", np.around(vect,3))
vect = np.mean(z_scores[:,2], axis=0)
print("Z3 Mean Vector:", np.around(vect,3))
vect = np.mean(z_scores[:,3], axis=0)
print("Z4 Mean Vector:", np.around(vect,3))
print("--------------------------")
std = np.std(z_scores[:,0], axis=0)
print("Z1 Std. Deviation:", np.around(std,3))
std = np.std(z_scores[:,1], axis=0)
print("Z2 Std. Deviation:", np.around(std,3))
std = np.std(z_scores[:,2], axis=0)
print("Z3 Std. Deviation:", np.around(std,3))
std = np.std(z_scores[:,3], axis=0)
print("Z4 Std. Deviation:", np.around(std,3))

Z1 Mean Vector: -0.464
Z2 Mean Vector: -0.464
Z3 Mean Vector: -0.886
Z4 Mean Vector: 0.464
--------------------------
Z1 Std. Deviation: 0.464
Z2 Std. Deviation: 0.886
Z3 Std. Deviation: 0.464
Z4 Std. Deviation: 0.886


### Part 4: Please applying whitening to the dataset in Figure 4.

In [74]:
data2 = np.array([[20, -5], [8, 2], [-6, 3], [6, -4]]).T;
cov_2 = np.array([[.133, .096], [.096, .418]]).T
mean_vect = np.array([[7, -1]]).T
whitened = np.dot(cov_2, data2-mean_vect)
print("Whitened Matrix:\n", whitened)

Whitened Matrix:
 [[ 1.345  0.421 -1.345 -0.421]
 [-0.424  1.35   0.424 -1.35 ]]


### Part 5: Please apply PCA to the dataset in Figure 4 without reducing the dimensionality. That is, you are to construct the 2-by-2 matrix W = [w1, w2], where w1 and w2 are two 2-dimensional eigenvectors

In [93]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
data2_pca = pca.fit_transform(data2.T)
data2_pca

array([[ 13.60145851,  -0.01806411],
       [  0.07772008,   3.16132244],
       [-13.60145851,   0.01806411],
       [ -0.07772008,  -3.16132244]])

In [98]:
normed_mat= np.linalg.norm(data2_pca, axis=1, ord = 2)
normed_mat

array([13.60147051,  3.16227766, 13.60147051,  3.16227766])