In [1]:
import numpy as np 
import pandas as pd 

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

from scipy.io import loadmat
from sklearn.utils import shuffle

import matplotlib.pyplot as plt
import seaborn as sns

from mpl_toolkits.mplot3d import Axes3D

# From PyPI
# !pip install tsne

# From Conda
# ! conda install -c conda-forge tsne 

from tsne import bh_sne
from sklearn.manifold import TSNE

# Load MNIST data

In [2]:
mnist_train = loadmat('mnist/mnist_train.mat')
mnist_test = loadmat('mnist/mnist_test.mat')
X_train = mnist_train['train_X']
y_train = mnist_train['train_labels'].ravel()

y_train[y_train == 10] = 0 # I notice the labels have a value '10'. I update it to '0'
Xs, ys = shuffle(X_train, y_train, n_samples=1000, random_state=0) # Randomly selecting 1000 samples

In [3]:
df_mnist_train = pd.DataFrame(Xs)
df_mnist_train['label']=ys
print(df_mnist_train['label'].unique())

[1 4 0 2 7 6 5 9 8 3]


# Understanding the data

In [4]:
df_mnist_train.head(n=5) # List first five rows

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,label
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7


### Uncomment the relevant code lines below and execute to print more description about the data

In [5]:
# Information about the data

# df_mnist_train.info()

In [6]:
# Get the data shape

# df_mnist_train.shape

In [7]:
# List columns

# df_mnist_train.columns

In [8]:
# List counts

# df_mnist_train.count()

In [9]:
# Print basic statistics of the data
# df_mnist_train.describe()

In [10]:
# MNIST dataset has ten digits. 

# print(df_mnist_train['label'].unique())

# Plotting Functions

In [11]:
def scatter2d(X, y):
    fig, ax = plt.subplots(figsize=(15,10))
    ax.scatter(X[:,0], X[:,1], c=y)
    palette = sns.color_palette("husl", len(np.unique(y)))

    for i in range(len(y)):
        ax.text(X[i,0], X[i,1], y[i], color=palette[y[i]-1], fontsize='small')
        
def scatter3d(X, y):
    fig = plt.figure(figsize=(15,10))
    fig.add_subplot(111, projection='3d')
    ax = Axes3D(fig)
    palette = sns.color_palette("husl", len(np.unique(y)))

    ax.mouse_init()
    ax.scatter(X[:,0], X[:,1], X[:,2], c=y)
    for i in range(len(y)):
        ax.text(X[i,0], X[i,1], X[i,2], y[i], color=palette[y[i]-1], fontsize='small')

# Principal Component Analysis

PCA is a statistical procedure that uses an orthogonal transformation to convert a set of observations of possibly correlated variables into a set of values of linearly uncorrelated variables called principal components.

In [12]:
###
#Your code here. Try PCA with two components
###


### To getter a better understanding of interaction of the dimensions plot the first three PCA dimensions

In [13]:
###
#Your code here. Try PCA with three components
###



# t-SNE on the MNIST dataset

In [14]:
###
#Your code here. Try t-SNE with two components
###


In [15]:
# Plot 2d
# scatter2d(P, ys)

In [16]:
###
#Your code here. Try t-SNE with three components
###


In [17]:
# Plot 3d
# scatter3d(P, ys)

## Exploration Tasks

t-SNE offers others parameters including perplexity. The perplexity is related to the number of nearest neighbors that is used in other manifold learning algorithms. Try varying perplexity and observe the changes in the output

https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html?highlight=tsne#
    

In [18]:
###
#Your code here
###

### Run Barnes-Hut t-SNE (see https://github.com/danielfrg/tsne)

In [19]:
###
#Your code here
###

# %time B = bh_sne(Xs, verbose=True)

In [20]:
###
#Your code here to plot 2d
###

# plt.rcParams["figure.figsize"] = 20, 20
# scatter2d(B, ys)

# UMAP

In [21]:
###
#Your code here
###

# Random Projections

In [22]:
###
#Your code here
###