# PCA - Example 2
## Consider the following dataset with two features (x1 and x2) for 4 observations. Apply PCA to reduce the dimension from two to one.

In [1]:
import numpy as np

# Step 1: Define the dataset
data = np.array([
    [2, 4],
    [0, 0],
    [1, 1],
    [3, 3],
    [5, 5]
])

In [2]:
# Step 2: Standardize the data (center by subtracting the mean)
mean = np.mean(data, axis=0)
centered_data = data - mean

In [3]:
mean

array([2.2, 2.6])

In [4]:
# Step 3: Compute the covariance matrix
cov_matrix = np.cov(centered_data, rowvar=False)

In [5]:
cov_matrix

array([[3.7, 3.6],
       [3.6, 4.3]])

In [6]:
# Step 4: Compute eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

In [7]:
eigenvalues

array([0.38752163, 7.61247837])

In [8]:
eigenvectors

array([[-0.73588229, -0.67710949],
       [ 0.67710949, -0.73588229]])

In [9]:
# Step 5: Sort eigenvalues and eigenvectors in descending order
sorted_indices = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[sorted_indices]
eigenvectors = eigenvectors[:, sorted_indices]

In [10]:
eigenvalues

array([7.61247837, 0.38752163])

In [11]:
eigenvectors

array([[-0.67710949, -0.73588229],
       [-0.73588229,  0.67710949]])

In [12]:
# Step 6: Select the principal component (1D reduction)
pc1 = eigenvectors[:, 0]

In [13]:
pc1

array([-0.67710949, -0.73588229])

In [14]:
# Step 7: Project the data onto the principal component
projected_data = centered_data @ pc1

In [15]:
projected_data

array([-0.8948133 ,  3.40293482,  1.98994305, -0.83604051, -3.66202406])

In [16]:
# Print results
print("Original Data:\n", data)
print("\nMean:\n", mean)
print("\nCentered Data:\n", centered_data)
print("\nCovariance Matrix:\n", cov_matrix)
print("\nEigenvalues:\n", eigenvalues)
print("\nEigenvectors:\n", eigenvectors)
print("\nPrincipal Component (PC1):\n", pc1)
print("\nProjected Data:\n", projected_data)

Original Data:
 [[2 4]
 [0 0]
 [1 1]
 [3 3]
 [5 5]]

Mean:
 [2.2 2.6]

Centered Data:
 [[-0.2  1.4]
 [-2.2 -2.6]
 [-1.2 -1.6]
 [ 0.8  0.4]
 [ 2.8  2.4]]

Covariance Matrix:
 [[3.7 3.6]
 [3.6 4.3]]

Eigenvalues:
 [7.61247837 0.38752163]

Eigenvectors:
 [[-0.67710949 -0.73588229]
 [-0.73588229  0.67710949]]

Principal Component (PC1):
 [-0.67710949 -0.73588229]

Projected Data:
 [-0.8948133   3.40293482  1.98994305 -0.83604051 -3.66202406]
