In [None]:
import numpy as np
import scipy
import scipy.stats
import matplotlib.pyplot as plt
from ipywidgets import interact

In [None]:
from mfml.resources.data import load_mnist
MNIST = load_mnist()
images = MNIST['data'].astype(np.double)
labels = MNIST['target'].astype(np.int64)

In [None]:
plt.figure(figsize=(4,4))
plt.imshow(images[0].reshape(28,28), cmap='gray');
plt.grid(False)

## PCA

Now we will implement PCA. Before we do that, let's pause for a moment and
think about the steps for performing PCA. Assume that we are performing PCA on
some dataset $\boldsymbol X$ for $M$ principal components. 
We then need to perform the following steps, which we break into parts:

1. Data normalization (`normalize`).
2. Find eigenvalues and corresponding eigenvectors for the covariance matrix $S$.
   Sort by the largest eigenvalues and the corresponding eigenvectors (`eig`).
3. Compute the orthogonal projection matrix and use that to project the data onto the subspace spanned by the eigenvectors.

### Data normalization `normalize`


In [None]:
def normalize(X):
    """Normalize the given dataset X to have zero mean & 1 unit of standard deviation.
    Args:
        X: ndarray, dataset of shape (N,D)
    
    Returns:
        (Xbar, mean): tuple of ndarray, Xbar is the normalized dataset
        with mean 0 & standard deviation of 1.
    """
    mu = X.mean(axis=0)
    sd = X.std(axis=0)
    x_norm = (X - mu) / sd
    return x_norm, mu, sd

### Compute eigenvalues and eigenvectors `eig`