## Umap-learn python library

In [None]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
from sklearn.datasets import fetch_openml

# figure size in inches
import matplotlib.pyplot as plt
from matplotlib import rcParams
import seaborn as sns

import umap

sns.set_theme()
plt.rcParams['figure.figsize'] = [10, 8]

## UMAP embedding of iris data

In [None]:
# Load the data
iris = load_iris()

iris_data = iris['data']
iris_label = iris['target_names'][iris['target']]

# Scale iris data
scaled_iris_data = StandardScaler().fit_transform(iris_data)

# Transform the data
umap_obj = umap.UMAP(n_neighbors=20)
umap_obj = umap_obj.fit(scaled_iris_data)
embedding = umap_obj.transform(scaled_iris_data)

# Plot embeded data
embedding_df = pd.DataFrame(embedding, columns=['UMAP1', 'UMAP2'])
embedding_df['label'] = iris_label

sns.scatterplot(embedding_df, x='UMAP1', y='UMAP2', hue='label')

In [None]:
umap_obj.embedding_

In [None]:
umap_obj.graph_

## UMAP embedding of digits data

In [None]:
# Load the digits data
mnist = fetch_openml('mnist_784', parser='auto')

# Normalize digits data
mnist_data = mnist.data/255
mnist_label = mnist.target

n_row = 10
n_col = 10
fig, axes = plt.subplots(n_row, n_col, figsize=(10, 10))

mnist_data_np = mnist_data.to_numpy()
mnist_labels_np = mnist_label.to_numpy()

# Loop through subplots to populate them with MNIST digits
for i in range(n_row):
    # Get indices of digits that match the current row number (i.e., '0' for the first row, '1' for the second, etc.)
    indices = np.where(mnist_labels_np == str(i))[0][:n_col]  # Take first 'n_col' indices

    for j in range(n_col):
        
        ax = axes[i, j]  # Get the subplot

        # Reshape the 784-number array back to a 28x28 image and plot it
        ax.imshow(mnist_data_np[indices[j]].reshape(28, 28), cmap='gray')

        # Hide axis labels and ticks
        ax.axis('off')

# Show the grid of MNIST digits
plt.show()

In [None]:
# Perfrom UMAP dimensionality reduction
umap_obj = umap.UMAP()
embedding = umap_obj.fit_transform(mnist_data)

# Plot embeddings
embedding_df = pd.DataFrame(embedding, columns=['UMAP1', 'UMAP2'])
embedding_df['label'] = mnist_label

sns.scatterplot(embedding_df, x='UMAP1', y='UMAP2', hue='label', alpha=0.6, s=3)
plt.title('UMAP - digits dataset')
plt.show()

### UMAP plotting capabilities

In [None]:
import umap.plot as uplot

In [None]:
# Plot with visible densities and labels
uplot.points(umap_obj, labels=mnist_label)

In [None]:
uplot.connectivity(umap_obj, show_points=True, labels=mnist_label)