# Task 2: Generate PCA and t-SNE projections of the MNIST handwritten digits dataset

***

In [None]:
import numpy as np
import time
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib import cm
from sklearn import manifold
from sklearn import datasets
from sklearn.datasets import fetch_openml

%matplotlib inline

In [None]:
random_seed = 100

In [None]:
def plot_projection(projection, labels):

    n_clusters = len(np.unique(labels))

    x = projection[:,0]
    y = projection[:,1]

    color_map_colors = cm.get_cmap('tab10', n_clusters)
    cluster_colors = color_map_colors(np.linspace(0, 1, n_clusters))

    figure = plt.figure(figsize=(10,10))

    for k in range(0,n_clusters):
        plt.scatter(x[np.where(labels==k)], y[np.where(labels==k)], color=cluster_colors[k], marker='o', s=40, alpha=1, label=str(k))

    l = plt.legend(ncol=1, handletextpad=0, frameon=False, fontsize=22, markerscale=4, bbox_to_anchor=(1,1))
    for k, text in enumerate(l.get_texts()):
        text.set_color(cluster_colors[k])
        
    plt.xticks([])
    plt.yticks([])
    plt.gca().spines["top"].set_visible(False)
    plt.gca().spines["right"].set_visible(False)
    plt.gca().spines["bottom"].set_visible(False)
    plt.gca().spines["left"].set_visible(False)

    return plt

Load the MNIST handwritten digits dataset:

In [None]:
mnist = fetch_openml("mnist_784", version=1)
X = mnist.data
labels = mnist.target.astype(int)
(n_observations, n_dimensions) = np.shape(X)

In [None]:
labels.shape

In [None]:
X.shape

Once you have your PCA or t-SNE projection, use the `plot_projection` function to visualize it:

In [None]:
plot_projection(projection, labels);

***