# Intro to 2D PCA.
stough 202-

- [Khan academy](https://www.youtube.com/playlist?list=PLbPhAbAhvjUzeLkPVnv0kc3_9rAfXpGtS) videos on Linear Algebra background.
- [Neat walkthrough](https://towardsdatascience.com/the-mathematics-behind-principal-component-analysis-fff2d7f4b643) for a simple example.

In [None]:
%matplotlib widget
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import PCA

### Generate Some Random Data.

In [None]:
X = np.array([4,6], ndmin=2).T + np.random.randn(2,500)

def gen_pca_data(mu=[10,20], var=[6,1], evs=[[1,1], [1,-1]], num=500):
    # Gaussian Normal axis-oriented data scaled by the variance.
    X = np.sqrt(np.array(var)[:,None])*np.random.randn(len(mu),num)

    # norm the row vectors: https://necromuralist.github.io/neural_networks/posts/normalizing-with-numpy/
    evs = evs/np.linalg.norm(evs, ord=2, axis=1, keepdims=True)

    # Apply the row vectors as a rotation, to reorient the data
    X = np.matmul(evs, X)

    # Add back in the mean
    X = np.array(mu, ndmin=2).T + X
    
    return X.T

In [None]:
X = gen_pca_data()

In [None]:
plt.figure(figsize=(4,4))
plt.scatter(X[:,0], X[:,1], edgecolors='k', alpha=.25)
plt.scatter([0],[0], color='r', marker='+');
plt.axis('equal');

&nbsp;

### PCA will find directions in the data space that account for the most variance. 

In [None]:
pca = PCA(n_components=2)
Xt = pca.fit_transform(X)

In [None]:
print(f'PCA found MEAN: {pca.mean_}')
print(f'          VAR : {pca.explained_variance_}')
print(f'components    : \n{pca.components_}')

&nbsp;

### Plot with the PCA directions.
$\pm 3\sigma$ 

In [None]:
plt.figure(figsize=(4,4))

# Data and origin
plt.scatter(X[:,0], X[:,1], color = 'gray', edgecolors='k', alpha=.25)
plt.scatter([0],[0], color='r', marker='+');

# mean
plt.scatter(pca.mean_[0], pca.mean_[1], color='orange')

# pca lines, +- 3 sigma
sigmas = np.sqrt(pca.explained_variance_)
pca0 = np.array([pca.mean_ - 3*sigmas[0]*pca.components_[0,:], pca.mean_ + 3*sigmas[0]*pca.components_[0,:]])
pca1 = np.array([pca.mean_ - 3*sigmas[1]*pca.components_[1,:], pca.mean_ + 3*sigmas[1]*pca.components_[1,:]])

plt.plot(pca0[:,0], pca0[:,1], 'c--', linewidth=3)
plt.plot(pca1[:,0], pca1[:,1], 'r--', linewidth=3);
plt.axis('equal');

&nbsp;

### Let's reconstruct 

Very similar to the reconstruction we've done in the past with respect to
DCT and Haar. Just more general. 

Each point is just the mean + sum(coeff*component 
for every transform coefficient)

In [None]:
print(f"We'll here attempt to reconstruct \n"
      f"the point {X[0,:]} from its \n"
      f"transform {Xt[0,:]}.")

In [None]:
print('Recall')
print(f'PCA is MEAN: {pca.mean_}')
print(f'       VAR : {pca.explained_variance_}')
print(f'components : \n{pca.components_}')

In [None]:
tp = Xt[0,:]  # transform point.

print('The reconstruction is \n'
      f'     {pca.mean_} \n'
      f'plus {tp[0]}*{pca.components_[0,:]}\n'
      f'plus {tp[1]}*{pca.components_[1,:]}')

In [None]:
# Add first component
rp = pca.mean_ + tp[0]*pca.components_[0,:]
print(f'rp is {rp} after the first component.')

# Add second component
rp += tp[1]*pca.components_[1,:]
print(f'rp is {rp} after the second component.\n')

print(f'So, pretty close to {X[0,:]}')