In [None]:
import numpy as np
from sklearn.decomposition import PCA

X = np.array([[126, 78],
     [128, 80],
     [128, 82],
     [130, 82],
     [130, 84],
     [132, 86]])


# Initialize PCA to reduce to 2 components
pca = PCA()

# Fit and transform the scaled data
X_projected = pca.fit_transform(X)

print(X_projected)

[[-4.99953747 -0.06800833]
 [-2.20523715  0.37003933]
 [-0.58906316 -0.80808699]
 [ 0.58906316  0.80808699]
 [ 2.20523715 -0.37003933]
 [ 4.99953747  0.06800833]]


**Input:** Dataset `X` with `n_samples` and `n_features`

**Output:** Transformed dataset `X_projected` with reduced dimensions

---

**Class:** `PCA`
1. **Initialize:**
   - Set `components` to `None`
   - Set `mean` to `None`

---

**Method:** `fit_transform(X)`
1. **Mean Centering:**
   - Calculate the mean of each feature:
     \[
     \text{mean} = \text{mean of } X \text{ along axis 0}
     \]
   - Subtract the mean from each sample in `X` to center the data.

2. **Compute Covariance Matrix:**
   - Calculate the covariance matrix of the mean-centered `X`:
     \[
     \text{covariance matrix} = \text{np.cov}(X^T, \text{ddof}=1)
     \]

3. **Compute Eigenvalues and Eigenvectors:**
   - Use linear algebra to compute eigenvalues and eigenvectors of the covariance matrix:
     \[
     \text{eigenvalues}, \text{eigenvectors} = \text{np.linalg.eig}(\text{covariance matrix})
     \]

4. **Sort Eigenvalues and Eigenvectors:**
   - Sort the eigenvalues in descending order, along with their corresponding eigenvectors:
     - Get sorted indices of eigenvalues:
       \[
       \text{sorted indices} = \text{np.argsort(eigenvalues)}[::-1]
       \]
     - Reorder eigenvalues and eigenvectors using these indices.

5. **Transform Data:**
   - Project the mean-centered data onto the sorted eigenvectors:
     \[
     X_{\text{projected}} = X \cdot \text{eigenvectors}^T
     \]

6. **Return Transformed Data:**
   - Return `X_projected`

---

**Main Execution:**
1. Define input dataset `X` with `n_samples` and `n_features`.
2. Create an instance of the `PCA` class.
3. Call the `fit_transform` method on the dataset to get the transformed dataset `X_projected`.
4. Print or visualize the transformed dataset.

---

**Notes:**
- The covariance matrix captures the relationships between features.
- Eigenvalues represent the amount of variance captured by each principal component.
- Eigenvectors define the directions of the principal components.


In [None]:
import numpy as np

class PCA:
  def __init__(self) :
    # self.n_components = n_components
    self.components = 1
    self.mean = None

  def fit_transform(self, X):
    # mean centering
    self.mean = np.mean(X, axis=0)

    # print(self.mean)
    X = X - self.mean

    # print(X)

    # covariance, function needs the samples as columns
    cov = np.cov(X.T, ddof=1)

    # print(cov)

    # eigenvectors, eigenvalues
    eigenvalues, eigenvectors  = np.linalg.eig(cov)

    # print(eigenvectors)
    # print(eigenvalues)

    #eigenvectors v = [:, i] column vector, transpose this for easier caclulations
    # eigenvalues = eigenvalues.T
    # print("eigenvalues")
    # print(eigenvalues)
    # # sort eigenvectors
    idxs = np.argsort(eigenvalues)[::-1]
    # print(idxs)
    eigenvalues = eigenvalues[idxs]
    eigenvectors = eigenvectors[:, idxs]

    # print(eigenvectors)
    # print("eigenvalues")
    # print(eigenvalues)
    X_projected = np.dot(X, eigenvectors.T)
    # print(X_projected)
    new_array = []
    for x in X_projected:
        new_array.append(x[:self.components])
    X_projected = np.array(new_array)
    # print(X_projected)
    return X_projected

import matplotlib.pyplot as plt
from sklearn import datasets

X = np.array([[126, 78],
     [128, 80],
     [128, 82],
     [130, 82],
     [130, 84],
     [132, 86]])
# y = data.target

pca = PCA()
X_projected = pca.fit_transform(X)
# X_projected = pca.transform(X)

print(X_projected)

print("shape of X:", X.shape)
print("shape of transformed X:", X_projected.shape)

[[ 4.99953747]
 [ 2.20523715]
 [ 0.58906316]
 [-0.58906316]
 [-2.20523715]
 [-4.99953747]]
shape of X: (6, 2)
shape of transformed X: (6, 1)
