# Word Embedding Demo

In [36]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define a small vocabulary
vocab = ["john", "abel", "loves", "learning"]
vocab_size = len(vocab)

# Map each word to an index
word_to_idx = {word: i for i, word in enumerate(vocab)}
idx_to_word = {i: word for word, i in word_to_idx.items()}

# Create a small set of training phrases
training_phrases = [
    ["john", "loves", "learning"],
    ["abel", "loves", "learning"],
]
print(word_to_idx)
print(idx_to_word)

{'john': 0, 'abel': 1, 'loves': 2, 'learning': 3}
{0: 'john', 1: 'abel', 2: 'loves', 3: 'learning'}


In [37]:
def make_one_hot_vector(word_index, vocab_size):
    """Create a one-hot vector for a given word index."""
    vec = torch.zeros(vocab_size)
    vec[word_index] = 1.0
    return vec

training_data = []
for phrase in training_phrases:
    for i in range(len(phrase)-1):
        current_word = phrase[i]
        next_word = phrase[i + 1]
        
        current_word_idx = word_to_idx[current_word]
        next_word_idx = word_to_idx[next_word]
        
        # One-hot for input, index for output label
        input_vec = make_one_hot_vector(current_word_idx, vocab_size)
        training_data.append((input_vec, next_word_idx))

print("Sample training data (input vector, target index):")
for i in range(len(training_data)):
    print(training_data[i])

print("input_vec:",input_vec)

Sample training data (input vector, target index):
(tensor([1., 0., 0., 0.]), 2)
(tensor([0., 0., 1., 0.]), 3)
(tensor([0., 1., 0., 0.]), 2)
(tensor([0., 0., 1., 0.]), 3)
input_vec: tensor([0., 0., 1., 0.])


In [38]:
class WordEmbeddingModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim):
        super(WordEmbeddingModel, self).__init__()
        self.embedding_dim = embedding_dim
        
        # First linear layer: from one-hot to embedding
        self.fc1 = nn.Linear(vocab_size, embedding_dim, bias=False)
        
        # Second linear layer: from embedding to vocab-size
        self.fc2 = nn.Linear(embedding_dim, vocab_size, bias=False)
        
    def forward(self, x):
        """
        Forward pass:
          x: one-hot vector of shape (batch_size, vocab_size)
        Returns:
          logits: unnormalized scores of shape (batch_size, vocab_size)
        """
        # x -> embedding
        emb = self.fc1(x)           # shape: (batch_size, embedding_dim)
        emb = torch.relu(emb)       # Non-linearity
        
        # embedding -> distribution over vocab
        logits = self.fc2(emb)      # shape: (batch_size, vocab_size)
        return logits

In [39]:
# Hyperparameters
embedding_dim = 3
learning_rate = 0.02
num_epochs = 10000

# Initialize the model, loss, and optimizer
model = WordEmbeddingModel(vocab_size, embedding_dim)
criterion = nn.CrossEntropyLoss()  # suitable for classification
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    total_loss = 0.0
    
    for input_vec, target_idx in training_data:
        # Reshape input to (batch_size=1, vocab_size)
        input_vec = input_vec.unsqueeze(0)
        target_idx = torch.tensor([target_idx])  # shape: (1,)
        
        # Forward pass
        logits = model(input_vec)
        
        # Compute loss
        loss = criterion(logits, target_idx)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    if (epoch+1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss:.4f}")


Epoch [10/10000], Loss: 4.9669
Epoch [20/10000], Loss: 4.5427
Epoch [30/10000], Loss: 3.9484
Epoch [40/10000], Loss: 3.2387
Epoch [50/10000], Loss: 2.5703
Epoch [60/10000], Loss: 2.0282
Epoch [70/10000], Loss: 1.6056
Epoch [80/10000], Loss: 1.2711
Epoch [90/10000], Loss: 1.0063
Epoch [100/10000], Loss: 0.8002
Epoch [110/10000], Loss: 0.6425
Epoch [120/10000], Loss: 0.5231
Epoch [130/10000], Loss: 0.4326
Epoch [140/10000], Loss: 0.3634
Epoch [150/10000], Loss: 0.3097
Epoch [160/10000], Loss: 0.2676
Epoch [170/10000], Loss: 0.2340
Epoch [180/10000], Loss: 0.2068
Epoch [190/10000], Loss: 0.1844
Epoch [200/10000], Loss: 0.1659
Epoch [210/10000], Loss: 0.1503
Epoch [220/10000], Loss: 0.1371
Epoch [230/10000], Loss: 0.1257
Epoch [240/10000], Loss: 0.1159
Epoch [250/10000], Loss: 0.1073
Epoch [260/10000], Loss: 0.0998
Epoch [270/10000], Loss: 0.0932
Epoch [280/10000], Loss: 0.0873
Epoch [290/10000], Loss: 0.0820
Epoch [300/10000], Loss: 0.0773
Epoch [310/10000], Loss: 0.0730
Epoch [320/10000]

In [40]:
print("Learned Embeddings (fc1.weight):")
print(model.fc1.weight.data)  # shape: (embedding_dim, vocab_size)

# Example: embedding for the word "john"
i_embedding = model.fc1.weight.data[:, word_to_idx["john"]]
print("\nEmbedding for the word 'john':", i_embedding)

Learned Embeddings (fc1.weight):
tensor([[-1.5663e-04, -4.7701e-01,  2.9642e+00,  3.9673e-02],
        [-9.0292e-02,  2.8513e+00, -1.5829e-04, -3.3620e-01],
        [ 2.8334e+00, -4.2791e-01, -4.6171e-01,  1.3145e-01]])

Embedding for the word 'john': tensor([-1.5663e-04, -9.0292e-02,  2.8334e+00])


In [41]:
def predict_next_word(model, word):
    # Convert word to one-hot
    input_index = word_to_idx[word]
    input_vec = make_one_hot_vector(input_index, vocab_size)
    input_vec = input_vec.unsqueeze(0)  # shape: (1, vocab_size)
    
    # Forward pass
    with torch.no_grad():
        logits = model(input_vec)  # shape: (1, vocab_size)
        predicted_idx = torch.argmax(logits, dim=1).item()
    return idx_to_word[predicted_idx]

test_words = ["john", "abel", "loves", "learning"]
for w in test_words:
    next_w = predict_next_word(model, w)
    print(f"Word '{w}' -> Predicted next word: '{next_w}'")

Word 'john' -> Predicted next word: 'loves'
Word 'abel' -> Predicted next word: 'loves'
Word 'loves' -> Predicted next word: 'learning'
Word 'learning' -> Predicted next word: 'loves'
