This notebook demonstrates how to load GloVe embeddings, reduce their dimensionality, and visualize them in 3D with color encoding.

## Steps
1. Load GloVe vectors from `glove.6B.100d.txt`
2. Select sample words
3. Reduce dimensions using PCA
4. Generate RGB colors based on vector components
5. Create a 3D scatter plot with Plotly

In [None]:
import numpy as np
from sklearn.decomposition import PCA
import plotly.graph_objects as go

# Load GloVe vectors

In [None]:
def load_glove(file_path, vocab_size=100):
    embeddings = []
    labels = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for i, line in enumerate(f):
            if i >= vocab_size:
                break
            values = line.strip().split()
            labels.append(values[0])
            vector = np.asarray(values[1:], dtype='float32')
            embeddings.append(vector)
    return np.array(embeddings), labels


# Normalize to unit vectors

In [None]:
def normalize(vectors):
    norms = np.linalg.norm(vectors, axis=1, keepdims=True)
    return vectors / norms


# RGB Color Mapping

In [None]:
def generate_colors(vectors):
    min_vals = vectors.min(axis=0)
    max_vals = vectors.max(axis=0)
    normed = (vectors - min_vals) / (max_vals - min_vals + 1e-8)
    return [f'rgb({int(r*255)}, {int(g*255)}, {int(b*255)})' for r, g, b in normed]

# Main

In [None]:
embeddings, labels = load_glove('../data/glove.6B.100d.txt', vocab_size=50)
embeddings = normalize(embeddings)
pca = PCA(n_components=3)
reduced = pca.fit_transform(embeddings)
colors = generate_colors(reduced)

# Plot

In [None]:
fig = go.Figure(data=[
    go.Scatter3d(
        x=reduced[:, 0],
        y=reduced[:, 1],
        z=reduced[:, 2],
        mode='markers+text',
        text=labels,
        marker=dict(size=6, color=colors)
    )
])
fig.update_layout(title="3D Word Embeddings Visualization")
fig.show()