# PCA - Example 1
## Consider the following dataset with two features (x1 and x2) for 4 observations. Apply PCA to reduce the dimension from two to one.

In [None]:
import numpy as np

# Step 1: Define the dataset
data = np.array([
    [4, 11],
    [8, 4],
    [13, 5],
    [7, 14]
])

In [None]:
# Step 2: Standardize the data (center by subtracting the mean)
mean = np.mean(data, axis=0)
centered_data = data - mean

In [None]:
mean

array([8. , 8.5])

In [None]:
# Step 3: Compute the covariance matrix
cov_matrix = np.cov(centered_data, rowvar=False)

In [None]:
cov_matrix

array([[ 14., -11.],
       [-11.,  23.]])

In [None]:
# Step 4: Compute eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

In [None]:
eigenvalues

array([ 6.61513568, 30.38486432])

In [None]:
eigenvectors

array([[-0.83025082,  0.55738997],
       [-0.55738997, -0.83025082]])

In [None]:
# Step 5: Sort eigenvalues and eigenvectors in descending order
sorted_indices = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[sorted_indices]
eigenvectors = eigenvectors[:, sorted_indices]

In [None]:
eigenvalues

array([30.38486432,  6.61513568])

In [None]:
eigenvectors

array([[ 0.55738997, -0.83025082],
       [-0.83025082, -0.55738997]])

In [None]:
# Step 6: Select the principal component (1D reduction)
pc1 = eigenvectors[:, 0]

In [None]:
pc1

array([ 0.55738997, -0.83025082])

In [None]:
# Step 7: Project the data onto the principal component
projected_data = centered_data @ pc1

In [None]:
projected_data

array([-4.30518692,  3.73612869,  5.69282771, -5.12376947])

In [None]:
# Print results
print("Original Data:\n", data)
print("\nMean:\n", mean)
print("\nCentered Data:\n", centered_data)
print("\nCovariance Matrix:\n", cov_matrix)
print("\nEigenvalues:\n", eigenvalues)
print("\nEigenvectors:\n", eigenvectors)
print("\nPrincipal Component (PC1):\n", pc1)
print("\nProjected Data:\n", projected_data)

Original Data:
 [[ 4 11]
 [ 8  4]
 [13  5]
 [ 7 14]]

Mean:
 [8.  8.5]

Centered Data:
 [[-4.   2.5]
 [ 0.  -4.5]
 [ 5.  -3.5]
 [-1.   5.5]]

Covariance Matrix:
 [[ 14. -11.]
 [-11.  23.]]

Eigenvalues:
 [30.38486432  6.61513568]

Eigenvectors:
 [[ 0.55738997 -0.83025082]
 [-0.83025082 -0.55738997]]

Principal Component (PC1):
 [ 0.55738997 -0.83025082]

Projected Data:
 [-4.30518692  3.73612869  5.69282771 -5.12376947]
