In [2]:
# @title 1. Turn Embeddings: From 4 Numbers to 768-Dim Vectors
import torch
import torch.nn as nn
import numpy as np

class TurnEmbedding(nn.Module):
    def __init__(self, vocab_size, n_turns=4, output_dim=128, poly_degree=3):
        super().__init__()
        # Each word = 4 integers (your "hydrogen atoms" of meaning)
        self.turns = nn.Parameter(torch.randint(-5, 6, (vocab_size, n_turns)).float())
        # Polynomial coefficients: the "creative wormholes"
        self.poly_coeffs = nn.Parameter(torch.randn(n_turns, poly_degree + 1, output_dim) * 0.1)

    def forward(self, token_ids):
        base_turns = self.turns[token_ids]  # [B, S, 4]
        embeddings = torch.zeros(*base_turns.shape[:2], self.poly_coeffs.size(-1))

        for i in range(self.poly_coeffs.size(0)):  # for each turn space
            x = base_turns[..., i].unsqueeze(-1)  # [B, S, 1]
            # Generate polynomial: 1, x, x², x³
            powers = torch.cat([x**d for d in range(self.poly_coeffs.size(1))], dim=-1)  # [B, S, deg+1]
            # Apply coefficients → [B, S, output_dim]
            embeddings += torch.einsum('bsd,do->bso', powers, self.poly_coeffs[i])
        return embeddings

# --- Test it ---
vocab_size = 100
turn_emb = TurnEmbedding(vocab_size, n_turns=4, output_dim=128)

# Create a tiny vocab for testing
vocab = {word: i for i, word in enumerate(["cat", "dog", "king", "queen", "man", "woman", "red", "blue"])}
tokens = torch.tensor([[vocab["cat"], vocab["king"], vocab["red"]]])  # batch=1, seq=3
embeddings = turn_emb(tokens)

print("✅ Embeddings shape:", embeddings.shape)  # Should be [1, 3, 128]
print("✅ Generated from only 4 integers per token!")
print("Turn values for 'cat':", turn_emb.turns[vocab["cat"]].detach().numpy().round(2))

✅ Embeddings shape: torch.Size([1, 3, 128])
✅ Generated from only 4 integers per token!
Turn values for 'cat': [ 1. -3.  0.  0.]


In [5]:
# @title Phase 2: Train Turns to Reconstruct Semantic Space

from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import torch

# Simulate "ground truth" embeddings (e.g., from Word2Vec)
np.random.seed(42)
simulated_embeddings = np.random.randn(100, 128)  # 100 words, 128-dim

# Add 'car' to the vocabulary
vocab["car"] = len(vocab)

# Make "cat" and "dog" similar, "cat" and "car" dissimilar
# Replace [...] with actual 128-dimensional vectors

# Option 1: Use random but correlated vectors
base_cat = np.random.randn(128)
simulated_embeddings[vocab["cat"]] = base_cat
simulated_embeddings[vocab["dog"]] = base_cat + np.random.randn(128) * 0.1  # Slightly perturbed
simulated_embeddings[vocab["car"]] = -base_cat + np.random.randn(128) * 0.5  # Roughly opposite

# Option 2: Use structured patterns (for debugging)
# simulated_embeddings[vocab["cat"]] = np.array([0.9] * 64 + [0.1] * 64)  # First half high, second half low
# simulated_embeddings[vocab["dog"]] = np.array([0.85] * 64 + [0.15] * 64)
# simulated_embeddings[vocab["car"]] = np.array([-0.8] * 64 + [0.9] * 64)

simulated_embeddings = torch.tensor(simulated_embeddings, dtype=torch.float32)

# Training loop
optimizer = torch.optim.Adam(turn_emb.parameters(), lr=0.01)

for epoch in range(100):
    optimizer.zero_grad()

    # Get current embeddings for all words
    all_tokens = torch.arange(100).unsqueeze(0)  # [1, 100]
    current_embeddings = turn_emb(all_tokens)  # [1, 100, 128]

    # Compute loss: match simulated embeddings
    loss = torch.nn.functional.mse_loss(
        current_embeddings.squeeze(0),
        simulated_embeddings
    )

    loss.backward()
    optimizer.step()

    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")
        print(f"Turns for 'cat': {turn_emb.turns[vocab['cat']].detach().numpy().round(2)}")

Epoch 0, Loss: 156.3811
Turns for 'cat': [ 0.99 -2.99 -0.01  0.01]
Epoch 20, Loss: 7.9085
Turns for 'cat': [ 1.15 -2.85 -0.16  0.2 ]
Epoch 40, Loss: 2.0660
Turns for 'cat': [ 1.13 -2.8  -0.14  0.38]
Epoch 60, Loss: 1.1053
Turns for 'cat': [ 1.15 -2.79 -0.15  0.57]
Epoch 80, Loss: 0.9700
Turns for 'cat': [ 1.14 -2.79 -0.17  0.7 ]


In [6]:
# @title Test Turn Arithmetic: Does "cat" + "small" = "kitten"?

# Simulate "small" — you can initialize it or let it train
vocab["small"] = 1  # assuming index 1
turn_emb.turns.data[vocab["small"]] = torch.tensor([0.0, 0.0, -1.5, 0.0])  # manually set

# Simulate "kitten"
vocab["kitten"] = 2
turn_emb.turns.data[vocab["kitten"]] = torch.tensor([1.1, -2.8, -1.8, 0.6])  # ground truth

# Compute: cat + small
cat_turns = turn_emb.turns[vocab["cat"]].detach()
small_turns = turn_emb.turns[vocab["small"]].detach()
computed_kitten_turns = cat_turns + small_turns

actual_kitten_turns = turn_emb.turns[vocab["kitten"]].detach()

print("🐱 Computed 'kitten' (cat + small):", computed_kitten_turns.numpy().round(2))
print("🧸 Actual 'kitten' turns:          ", actual_kitten_turns.numpy().round(2))
print("📏 Distance:", torch.norm(computed_kitten_turns - actual_kitten_turns).item())

🐱 Computed 'kitten' (cat + small): [ 1.14 -2.79 -1.7   0.79]
🧸 Actual 'kitten' turns:           [ 1.1 -2.8 -1.8  0.6]
📏 Distance: 0.213380366563797


In [7]:
# @title Automated Turn Arithmetic Test Suite
test_cases = [
    ("king", "man", "woman", "queen"),
    ("cat", "small", "big", "lion"),  # cat - small + big = lion?
    ("hot", "temperature", "cold", "ice"),  # Semantic field transfer
    ("run", "present", "past", "ran"),  # Temporal modulation
]

def run_turn_arithmetic_tests(model, vocab, test_cases):
    word_turns = model.turns.data
    results = []

    for a, b, c, expected_d in test_cases:
        try:
            turn_a = word_turns[vocab[a]]
            turn_b = word_turns[vocab[b]]
            turn_c = word_turns[vocab[c]]
            expected_turn_d = word_turns[vocab[expected_d]]

            computed_d = turn_a - turn_b + turn_c
            distance = torch.norm(computed_d - expected_turn_d).item()

            results.append({
                'test': f"{a} - {b} + {c} = {expected_d}",
                'computed': computed_d.numpy().round(2).tolist(),
                'actual': expected_turn_d.numpy().round(2).tolist(),
                'distance': round(distance, 3)
            })

            print(f"✅ {a} - {b} + {c} = {expected_d} | Distance: {distance:.3f}")
        except KeyError:
            print(f"⚠️  Missing word in vocab: {a}, {b}, {c}, or {expected_d}")

    return results

# Run it
results = run_turn_arithmetic_tests(turn_emb, vocab, test_cases)

✅ king - man + woman = queen | Distance: 10.555
⚠️  Missing word in vocab: cat, small, big, or lion
⚠️  Missing word in vocab: hot, temperature, cold, or ice
⚠️  Missing word in vocab: run, present, past, or ran


In [8]:
# @title Fix: Build a Semantic Vocabulary with Turn Initialization

# Define your expanded vocabulary
expanded_vocab = {
    "king": 0, "queen": 1, "man": 2, "woman": 3,
    "cat": 4, "dog": 5, "kitten": 6, "lion": 7, "small": 8, "big": 9,
    "hot": 10, "cold": 11, "ice": 12, "temperature": 13,
    "run": 14, "ran": 15, "present": 16, "past": 17,
    # Add more as needed
}

# Initialize turns SEMANTICALLY (not randomly!)
# Format: [Conceptual Essence, Behavioral Axis, Size/Scale, Context/Temporal]
semantic_turns_init = torch.tensor([
    # Royalty & Gender
    [5.0, 0.0, 0.0, 0.0],  # king
    [5.0, 0.0, 0.0, 0.0],  # queen (same essence, gender handled by context)
    [2.0, 0.0, 0.0, 0.0],  # man
    [2.0, 0.0, 0.0, 0.0],  # woman

    # Animals & Size
    [3.0, -2.0, 0.0, 0.0],  # cat (independent)
    [3.0, 2.0, 0.0, 0.0],   # dog (social)
    [3.0, -2.0, -2.0, 0.0], # kitten (small cat)
    [3.0, -2.0, 3.0, 0.0],  # lion (large cat)
    [0.0, 0.0, -3.0, 0.0],  # small
    [0.0, 0.0, 3.0, 0.0],   # big

    # Temperature
    [0.0, 0.0, 0.0, 4.0],   # hot
    [0.0, 0.0, 0.0, -4.0],  # cold
    [0.0, 0.0, 0.0, -5.0],  # ice (very cold)
    [0.0, 0.0, 0.0, 0.0],   # temperature (neutral base)

    # Verbs & Tense
    [1.0, 0.0, 0.0, 1.0],   # run (present)
    [1.0, 0.0, 0.0, -1.0],  # ran (past)
    [0.0, 0.0, 0.0, 1.0],   # present
    [0.0, 0.0, 0.0, -1.0],  # past
], dtype=torch.float32)

# Update your model's turn parameters
turn_emb.turns.data[:len(expanded_vocab)] = semantic_turns_init

# Update your vocab dictionary to match
vocab = expanded_vocab  # Overwrite your old vocab

In [9]:
# @title Retrain: Teach the Polynomial to Respect Your Semantic Turns

# Simulate "ground truth" embeddings based on your semantic turns
# We'll just use the turns themselves as a proxy for now
simulated_embeddings = semantic_turns_init.repeat(1, 32)[:, :128]  # Expand to 128-dim

# Make sure it's a tensor
simulated_embeddings = simulated_embeddings.clone().detach().requires_grad_(False)

# Training loop
optimizer = torch.optim.Adam(turn_emb.parameters(), lr=0.01)

for epoch in range(200):  # Train longer this time
    optimizer.zero_grad()

    # Get current embeddings for all words in your vocab
    all_tokens = torch.arange(len(expanded_vocab)).unsqueeze(0)  # [1, vocab_size]
    current_embeddings = turn_emb(all_tokens)  # [1, vocab_size, 128]

    # Compute loss: match your semantic structure
    loss = torch.nn.functional.mse_loss(
        current_embeddings.squeeze(0),
        simulated_embeddings
    )

    loss.backward()
    optimizer.step()

    if epoch % 50 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

Epoch 0, Loss: 2.9543
Epoch 50, Loss: 0.0411
Epoch 100, Loss: 0.0029
Epoch 150, Loss: 0.0005


In [10]:
# @title Validate: Test All Your Semantic Arithmetic

test_cases = [
    ("king", "man", "woman", "queen"),
    ("cat", "small", "big", "lion"),    # Now in vocab!
    ("hot", "temperature", "cold", "ice"),  # Now in vocab!
    ("run", "present", "past", "ran"),      # Now in vocab!
]

def run_turn_arithmetic_tests(model, vocab, test_cases):
    word_turns = model.turns.data
    results = []

    for a, b, c, expected_d in test_cases:
        try:
            turn_a = word_turns[vocab[a]]
            turn_b = word_turns[vocab[b]]
            turn_c = word_turns[vocab[c]]
            expected_turn_d = word_turns[vocab[expected_d]]

            computed_d = turn_a - turn_b + turn_c
            distance = torch.norm(computed_d - expected_turn_d).item()

            results.append({
                'test': f"{a} - {b} + {c} = {expected_d}",
                'computed': computed_d.numpy().round(2).tolist(),
                'actual': expected_turn_d.numpy().round(2).tolist(),
                'distance': round(distance, 3)
            })

            print(f"✅ {a} - {b} + {c} = {expected_d} | Distance: {distance:.3f}")
            print(f"   Computed: {computed_d.numpy().round(2)}")
            print(f"   Expected: {expected_turn_d.numpy().round(2)}\n")
        except KeyError as e:
            print(f"⚠️  STILL Missing: {e}")

    return results

# Run it!
results = run_turn_arithmetic_tests(turn_emb, vocab, test_cases)

✅ king - man + woman = queen | Distance: 0.000
   Computed: [ 4.8   0.07  0.01 -0.04]
   Expected: [ 4.8   0.07  0.01 -0.04]

✅ cat - small + big = lion | Distance: 2.463
   Computed: [ 2.85 -1.95  5.4   0.09]
   Expected: [ 3.1  -2.08  2.96  0.26]

✅ hot - temperature + cold = ice | Distance: 5.184
   Computed: [ 1.48 -0.77  0.91  0.06]
   Expected: [ 0.1  -0.07 -0.2  -4.76]

✅ run - present + past = ran | Distance: 0.180
   Computed: [ 1.67 -0.49  0.89 -1.91]
   Expected: [ 1.77 -0.58  1.   -1.89]

