In [None]:
# Use t-SNE (t-Distributed Stochastic Neighbor Embedding) to visualize high-dimensional data in 2D or 3D.

from sklearn.datasets import load_digits
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

# Load the digits dataset
digits = load_digits()
X = digits.data
y = digits.target

# Apply t-SNE for 2D visualization
tsne = TSNE(n_components=2, random_state=42)
X_tsne = tsne.fit_transform(X)

# Plot t-SNE results
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y, cmap='viridis', s=10, alpha=0.7)
plt.colorbar(scatter, label="Digit Label")
plt.title("t-SNE Visualization of Digits Dataset")
plt.xlabel("t-SNE 1")
plt.ylabel("t-SNE 2")
plt.show()

In [None]:
# Use UMAP (Uniform Manifold Approximation and Projection) for dimensionality reduction and visualization.

from umap import UMAP

# Apply UMAP for 2D visualization
umap = UMAP(n_components=2, random_state=42)
X_umap = umap.fit_transform(X)

# Plot UMAP results
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_umap[:, 0], X_umap[:, 1], c=y, cmap='plasma', s=10, alpha=0.7)
plt.colorbar(scatter, label="Digit Label")
plt.title("UMAP Visualization of Digits Dataset")
plt.xlabel("UMAP 1")
plt.ylabel("UMAP 2")
plt.show()

In [None]:
# Compare t-SNE and UMAP results to understand their strengths and use cases.

fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# t-SNE plot
axes[0].scatter(X_tsne[:, 0], X_tsne[:, 1], c=y, cmap='viridis', s=10, alpha=0.7)
axes[0].set_title("t-SNE Visualization")
axes[0].set_xlabel("t-SNE 1")
axes[0].set_ylabel("t-SNE 2")

# UMAP plot
axes[1].scatter(X_umap[:, 0], X_umap[:, 1], c=y, cmap='plasma', s=10, alpha=0.7)
axes[1].set_title("UMAP Visualization")
axes[1].set_xlabel("UMAP 1")
axes[1].set_ylabel("UMAP 2")

plt.tight_layout()
plt.show()

# t-SNE captures local structures but may struggle with global structure.
# UMAP balances both local and global structures, making it better for larger datasets.

In [None]:
# Use UMAP and t-SNE to uncover clusters or patterns in complex datasets.

from sklearn.cluster import KMeans

# Apply K-Means clustering to UMAP-reduced data
kmeans_umap = KMeans(n_clusters=10, random_state=42)
labels_umap = kmeans_umap.fit_predict(X_umap)

# Plot clusters identified on UMAP-reduced data
plt.figure(figsize=(10, 6))
scatter = plt.scatter(X_umap[:, 0], X_umap[:, 1], c=labels_umap, cmap='tab10', s=10, alpha=0.7)
plt.colorbar(scatter, label="Cluster Label")
plt.title("Clusters Identified on UMAP-Reduced Data")
plt.xlabel("UMAP 1")
plt.ylabel("UMAP 2")
plt.show()

In [None]:
# Analyze the computational efficiency and interpretability of t-SNE and UMAP.

import time

# Measure execution time for t-SNE
start_tsne = time.time()
tsne.fit_transform(X)
end_tsne = time.time()

# Measure execution time for UMAP
start_umap = time.time()
umap.fit_transform(X)
end_umap = time.time()

print(f"t-SNE Execution Time: {end_tsne - start_tsne:.2f} seconds")
print(f"UMAP Execution Time: {end_umap - start_umap:.2f} seconds")

# t-SNE may take longer for larger datasets, while UMAP is faster and scales better.