# Exercise PCA

Take the digits dataset and apply a PCA model.

* With how many components can we explain more than 80% of the variance?
* Try to show the reduced digits accessing to the attribute of a PCA model with a heatmap to refresent the digit, for instance: sns.heatmap(pca.components_[i].reshape(8,8))

## SOLUTION

In [None]:
# Packages
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_digits
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

In [None]:
digits = load_digits()

X = digits.data
y = digits.target

In [None]:
len(digits.feature_names)

In [None]:
# Create a PCA model
pca = PCA()
pca

In [None]:
pca.fit(X)

In [None]:
pca.explained_variance_ratio_

In [None]:
np.cumsum(pca.explained_variance_ratio_)

In [None]:
# We have 64 componentes one by pixel
len(digits.feature_names)

In [None]:
# Represent the cumulative explained variance
plt.plot(range(len(pca.explained_variance_ratio_)), 
         pca.explained_variance_ratio_, c="orange")
plt.bar(range(len(pca.explained_variance_ratio_)), 
        np.cumsum(pca.explained_variance_ratio_))
plt.xlabel("Principal Component")
plt.ylabel("Cumulative explained variance")
plt.ylim(0, 1)
plt.axhline(0.80, c="black") # Threshold on 0.8
plt.show()

We can reduce the image to 12 pixels

In [None]:
np.cumsum(pca.explained_variance_ratio_) <= 0.8

Train again a PCA with 12 factors

In [None]:
# PCA of 2 componentes
pca = PCA(n_components=12)
pca.fit(X)

In [None]:
pr_comp = pca.transform(X)
pr_comp[:3]

In [None]:
plt.scatter(pr_comp[:, 1], pr_comp[:, 4], c=y)
plt.title('Two first components')
plt.xlabel("Component 1")
plt.ylabel("Component 2")
plt.show()

In [None]:
fig = plt.figure(figsize=(10,8))
axes = [fig.add_subplot(220+i+1) for i in range(4)]

for i,ax in enumerate(axes):
    sns.heatmap(pca.components_[i].reshape(8,8), 
                square=True, ax=ax, 
                vmax = 0.30, vmin=-0.30, cmap = "Blues")
    ax.set_title('Principal Component '+str(i))
    
plt.show()