In [1]:
# Data manipulation
import pandas as pd

# Visualization
import plotly.express as px

# Skleran
from sklearn.datasets import make_swiss_roll # for creating a swiss roll
from sklearn.manifold import MDS # for MDS dimensionality reduction

We create some data using Sklearn’s make_swiss_roll and display it on a 3D plot.

In [2]:
# Make a swiss roll
X, y = make_swiss_roll(n_samples=2000, noise=0.05)
# Make it thinner
X[:, 1] *= .5


# Create a 3D scatter plot
fig = px.scatter_3d(None, x=X[:,0], y=X[:,1], z=X[:,2], color=y,)

# Update chart looks
fig.update_layout(#title_text="Swiss Roll",
                  showlegend=False,
                  scene_camera=dict(up=dict(x=0, y=0, z=1), 
                                        center=dict(x=0, y=0, z=-0.1),
                                        eye=dict(x=1.25, y=1.5, z=1)),
                                        margin=dict(l=0, r=0, b=0, t=0),
                  scene = dict(xaxis=dict(backgroundcolor='white',
                                          color='black',
                                          gridcolor='#f0f0f0',
                                          title_font=dict(size=10),
                                          tickfont=dict(size=10),
                                         ),
                               yaxis=dict(backgroundcolor='white',
                                          color='black',
                                          gridcolor='#f0f0f0',
                                          title_font=dict(size=10),
                                          tickfont=dict(size=10),
                                          ),
                               zaxis=dict(backgroundcolor='lightgrey',
                                          color='black', 
                                          gridcolor='#f0f0f0',
                                          title_font=dict(size=10),
                                          tickfont=dict(size=10),
                                         )))

# Update marker size
fig.update_traces(marker=dict(size=3, 
                              line=dict(color='black', width=0.1)))

fig.update(layout_coloraxis_showscale=False)
fig.show()

In [3]:
### Step 1 - Configure MDS function, note we use default hyperparameter values for this example
model2d=MDS(n_components=2, 
          metric=True, 
          n_init=4, 
          max_iter=300, 
          verbose=0, 
          eps=0.001, 
          n_jobs=None, 
          random_state=42, 
          dissimilarity='euclidean')

### Step 2 - Fit the data and transform it, so we have 2 dimensions instead of 3
X_trans = model2d.fit_transform(X)
    
### Step 3 - Print a few stats
print('The new shape of X: ',X_trans.shape)
print('No. of Iterations: ', model2d.n_iter_)
print('Stress: ', model2d.stress_)

# Dissimilarity matrix contains distances between data points in the original high-dimensional space
#print('Dissimilarity Matrix: ', model2d.dissimilarity_matrix_)
# Embedding contains coordinates for data points in the new lower-dimensional space
#print('Embedding: ', model2d.embedding_)

The new shape of X:  (2000, 2)
No. of Iterations:  61
Stress:  3320406.116566159


We can see that the shape of the new array is 2000 by 2, which means that we have successfully reduced it to 2 dimensions. Also, it took the algorithm 61 iterations to reach the lowest Stress level.

In [4]:
# Create a scatter plot
fig = px.scatter(None, x=X_trans[:,0], y=X_trans[:,1], opacity=1, color=y)

# Change chart background color
fig.update_layout(dict(plot_bgcolor = 'white'))

# Update axes lines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

# Set figure title
fig.update_layout(title_text="MDS Transformation")

# Update marker size
fig.update_traces(marker=dict(size=5,
                             line=dict(color='black', width=0.2)))

fig.show()

The results are pretty good since we could preserve the global structure while at the same time not losing the separation observed between points in the original depth dimension.

In [8]:
## Comparison with PCA
from sklearn.decomposition import PCA

### Make an instance of the PCA class
pca = PCA(n_components=2)

## Fit the data and transform it, so we have 2 dimensions instead of 3
X_trans_PCA = pca.fit_transform(X)

In [10]:
# Create a scatter plot
fig = px.scatter(None, x=X_trans_PCA[:,0], y=X_trans_PCA[:,1], opacity=1, color=y)

# Change chart background color
fig.update_layout(dict(plot_bgcolor = 'white'))

# Update axes lines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

# Set figure title
fig.update_layout(title_text="PCA Transformation")

# Update marker size
fig.update_traces(marker=dict(size=5,
                             line=dict(color='black', width=0.2)))

fig.show()

While it depends on the exact problem we want to solve, MDS seems to perform better in this scenario than PCA (Principal Component Analysis). For comparison, the below graph shows a 2D representation of the same 3D swiss roll after applying PCA transformation.

As you can see, PCA gives us a result that looks like a picture from one side of the swiss roll, failing to preserve depth information from the third dimension.