# 04 – Dimensionality Reduction Explainer (PCA / t-SNE / UMAP)
Demonstrates multiple projections of the same feature matrix and basic visual comparison.

In [None]:
import numpy as np
from smclust.data_loader import load_messages
from smclust.preprocessing import preprocess_df
from smclust.vectorize import tfidf_features
from smclust.reduce import pca_reduce, tsne_embed, umap_embed
from smclust.cluster import kmeans_cluster
from smclust.viz import scatter_2d

In [None]:
# Build a TF-IDF baseline for visual projections
df = load_messages()
df = preprocess_df(df)
X_sparse, _ = tfidf_features(df["text_clean"], max_features=3000)
X = X_sparse.toarray()

In [None]:
Xp, pca = pca_reduce(X, n_components=50, random_state=42)
labels, _ = kmeans_cluster(Xp, n_clusters=8, random_state=42)

# PCA 2D (using the first two PCs of the reduced space)
pca2d = Xp[:, :2]
scatter_2d(pca2d, labels, title="PCA (first two components)")

# t-SNE
tsne2d = tsne_embed(Xp, n_components=2, perplexity=30, random_state=42)
scatter_2d(tsne2d, labels, title="t-SNE (perplexity=30)")

# UMAP
umap2d = umap_embed(Xp, n_neighbors=15, min_dist=0.1, n_components=2, random_state=42)
scatter_2d(umap2d, labels, title="UMAP (n_neighbors=15, min_dist=0.1)")