# Uniform Manifold Approximation and Projection (UMAP) - Simplified Scratch

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler

In [None]:
df = pd.read_csv('../../Mall_Customers.csv')
df['Genre'] = df['Genre'].map({'Male': 0, 'Female': 1})
X = df.drop('CustomerID', axis=1).values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_subset = X_scaled[:100]

In [None]:
class UMAPScratch:
    def __init__(self, n_components=2, n_neighbors=15, min_dist=0.1, n_epochs=200):
        self.n_components = n_components
        self.n_neighbors = n_neighbors
        self.min_dist = min_dist
        self.n_epochs = n_epochs

    def _compute_membership_strengths(self, X):
        n = X.shape[0]
        dists = np.sqrt(np.sum((X[:, np.newaxis] - X[np.newaxis, :])**2, axis=2))
        
        P = np.zeros((n, n))
        for i in range(n):
            sorted_dists = np.sort(dists[i])
            rho = sorted_dists[1]
            sigma = sorted_dists[self.n_neighbors]
            P[i] = np.exp(-np.maximum(0, dists[i] - rho) / (sigma + 1e-10))
            
        return (P + P.T) - (P * P.T)

    def fit_transform(self, X):
        n = X.shape[0]
        P = self._compute_membership_strengths(X)
        
        Y = np.random.normal(0, 1, (n, self.n_components))
        
        a, b = 1.0, 1.0 
        
        for epoch in range(self.n_epochs):
            for i in range(n):
                for j in range(n):
                    if P[i, j] > 0:
                        dist_sq = np.sum((Y[i] - Y[j])**2)
                        if dist_sq == 0: continue
                        
                        grad_coeff = -2 * a * b * (dist_sq**(b-1)) / (1 + a * (dist_sq**b))
                        grad = grad_coeff * (Y[i] - Y[j]) * P[i, j]
                        Y[i] += 0.1 * grad
                        Y[j] -= 0.1 * grad
                        
        return Y

In [None]:
umap = UMAPScratch(n_epochs=50)
Y_embedded = umap.fit_transform(X_subset)

In [None]:
sns.set_style("white")
plt.figure(figsize=(10, 6))
scatter = plt.scatter(Y_embedded[:, 0], Y_embedded[:, 1], c=df['Spending Score (1-100)'].values[:100], cmap='magma', s=80, alpha=0.9, edgecolor='white')
plt.title('UMAP Embedding: Preserving Local Neighborhoods', fontsize=15)
plt.xlabel('Embedding Dimension 1', fontsize=12)
plt.ylabel('Embedding Dimension 2', fontsize=12)
plt.colorbar(scatter, label='Spending Score (1-100)')
plt.show()