# t-Distributed Stochastic Neighbor Embedding (t-SNE) - From Scratch

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler

In [None]:
df = pd.read_csv('../../Mall_Customers.csv')
df['Genre'] = df['Genre'].map({'Male': 0, 'Female': 1})
X = df.drop('CustomerID', axis=1).values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_subset = X_scaled[:150] 

In [None]:
class TSNEScratch:
    def __init__(self, n_components=2, perplexity=30.0, learning_rate=200.0, n_iter=300):
        self.n_components = n_components
        self.perplexity = perplexity
        self.learning_rate = learning_rate
        self.n_iter = n_iter

    def _compute_affinities(self, X):
        n = X.shape[0]
        dists = np.sum((X[:, np.newaxis] - X[np.newaxis, :])**2, axis=2)
        P = np.exp(-dists / (2 * 1.0**2))
        np.fill_diagonal(P, 0)
        P /= np.sum(P, axis=1)[:, np.newaxis]
        return (P + P.T) / (2 * n)

    def fit_transform(self, X):
        n = X.shape[0]
        P = self._compute_affinities(X)
        P = np.maximum(P, 1e-12)
        
        Y = np.random.normal(0, 1e-4, (n, self.n_components))
        
        for i in range(self.n_iter):
            L = np.sum((Y[:, np.newaxis] - Y[np.newaxis, :])**2, axis=2)
            Q = 1 / (1 + L)
            np.fill_diagonal(Q, 0)
            Q /= np.sum(Q)
            Q = np.maximum(Q, 1e-12)
            
            grad = np.zeros((n, self.n_components))
            for j in range(n):
                diff = Y[j] - Y
                mult = (P[j] - Q[j]) * (1 / (1 + L[j]))
                grad[j] = 4 * np.dot(mult, diff)
            
            Y -= self.learning_rate * grad
            
        return Y

In [None]:
tsne = TSNEScratch(n_iter=100)
Y_embedded = tsne.fit_transform(X_subset)

In [None]:
sns.set_style("darkgrid")
plt.figure(figsize=(10, 7))
plt.scatter(Y_embedded[:, 0], Y_embedded[:, 1], c=df['Spending Score (1-100)'].values[:150], cmap='inferno', s=100, alpha=0.8, edgecolor='black', marker='o')
plt.title('t-SNE Embedding: Unfolding Mall Customer Clusters', fontsize=15)
plt.xlabel('Low-Dim Dimension 1', fontsize=12)
plt.ylabel('Low-Dim Dimension 2', fontsize=12)
plt.colorbar(label='Spending Score (1-100)')
plt.show()