# Word Embedding Demo

In [48]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define a small vocabulary
vocab = ["john", "abel", "loves", "learning"]
vocab_size = len(vocab)

# Map each word to an index
word_to_idx = {word: i for i, word in enumerate(vocab)}
idx_to_word = {i: word for word, i in word_to_idx.items()}

# Create a small set of training phrases
training_phrases = [
    ["john", "loves", "learning"],
    ["abel", "loves", "learning"],
]
print(word_to_idx)
print(idx_to_word)

{'john': 0, 'abel': 1, 'loves': 2, 'learning': 3}
{0: 'john', 1: 'abel', 2: 'loves', 3: 'learning'}


In [49]:
def make_one_hot_vector(word_index, vocab_size):
    """Create a one-hot vector for a given word index."""
    vec = torch.zeros(vocab_size)
    vec[word_index] = 1.0
    return vec

training_data = []
for phrase in training_phrases:
    for i in range(len(phrase)-1):
        current_word = phrase[i]
        next_word = phrase[i + 1]
        
        current_word_idx = word_to_idx[current_word]
        next_word_idx = word_to_idx[next_word]
        
        # One-hot for input, index for output label
        input_vec = make_one_hot_vector(current_word_idx, vocab_size)
        training_data.append((input_vec, next_word_idx))

print("Sample training data (input vector, target index):")
for i in range(len(training_data)):
    print(training_data[i])

print("input_vec:",input_vec)

Sample training data (input vector, target index):
(tensor([1., 0., 0., 0.]), 2)
(tensor([0., 0., 1., 0.]), 3)
(tensor([0., 1., 0., 0.]), 2)
(tensor([0., 0., 1., 0.]), 3)
input_vec: tensor([0., 0., 1., 0.])


In [50]:
class WordEmbeddingModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim):
        super(WordEmbeddingModel, self).__init__()
        self.embedding_dim = embedding_dim
        
        # First linear layer: from one-hot to embedding
        self.fc1 = nn.Linear(vocab_size, embedding_dim, bias=False)
        
        # Second linear layer: from embedding to vocab-size
        self.fc2 = nn.Linear(embedding_dim, vocab_size, bias=False)
        
    def forward(self, x):
        """
        Forward pass:
          x: one-hot vector of shape (batch_size, vocab_size)
        Returns:
          logits: unnormalized scores of shape (batch_size, vocab_size)
        """
        # x -> embedding
        emb = self.fc1(x)           # shape: (batch_size, embedding_dim)
        emb = torch.relu(emb)       # Non-linearity
        
        # embedding -> distribution over vocab
        logits = self.fc2(emb)      # shape: (batch_size, vocab_size)
        return logits

In [58]:
# Hyperparameters
embedding_dim = 3
learning_rate = 0.01
num_epochs = 10000

# Initialize the model, loss, and optimizer
model = WordEmbeddingModel(vocab_size, embedding_dim)
criterion = nn.CrossEntropyLoss()  # suitable for classification
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    total_loss = 0.0
    
    for input_vec, target_idx in training_data:
        # Reshape input to (batch_size=1, vocab_size)
        input_vec = input_vec.unsqueeze(0)
        target_idx = torch.tensor([target_idx])  # shape: (1,)
        
        # Forward pass
        logits = model(input_vec)
        
        # Compute loss
        loss = criterion(logits, target_idx)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    if (epoch+1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss:.4f}")


Epoch [10/10000], Loss: 5.2520
Epoch [20/10000], Loss: 5.1637
Epoch [30/10000], Loss: 5.0653
Epoch [40/10000], Loss: 4.9553
Epoch [50/10000], Loss: 4.8328
Epoch [60/10000], Loss: 4.6975
Epoch [70/10000], Loss: 4.5501
Epoch [80/10000], Loss: 4.3920
Epoch [90/10000], Loss: 4.2253
Epoch [100/10000], Loss: 4.0526
Epoch [110/10000], Loss: 3.8766
Epoch [120/10000], Loss: 3.6995
Epoch [130/10000], Loss: 3.5227
Epoch [140/10000], Loss: 3.3469
Epoch [150/10000], Loss: 3.1773
Epoch [160/10000], Loss: 3.0129
Epoch [170/10000], Loss: 2.8535
Epoch [180/10000], Loss: 2.6988
Epoch [190/10000], Loss: 2.5486
Epoch [200/10000], Loss: 2.4025
Epoch [210/10000], Loss: 2.2601
Epoch [220/10000], Loss: 2.1212
Epoch [230/10000], Loss: 1.9853
Epoch [240/10000], Loss: 1.8524
Epoch [250/10000], Loss: 1.7224
Epoch [260/10000], Loss: 1.5957
Epoch [270/10000], Loss: 1.4728
Epoch [280/10000], Loss: 1.3544
Epoch [290/10000], Loss: 1.2414
Epoch [300/10000], Loss: 1.1345
Epoch [310/10000], Loss: 1.0346
Epoch [320/10000]

In [59]:
print("Learned Embeddings (fc1.weight):")
print(model.fc1.weight.data)  # shape: (embedding_dim, vocab_size)

# Example: embedding for the word "john"
i_embedding = model.fc1.weight.data[:, word_to_idx["john"]]
print("\nEmbedding for the word 'john':", i_embedding)

Learned Embeddings (fc1.weight):
tensor([[-3.1789e-01,  2.7381e+00, -1.6495e-04, -2.0745e-01],
        [ 7.6829e-01,  5.8568e-01,  3.0529e+00, -3.3495e-01],
        [ 2.7693e+00, -2.2044e-01, -3.7240e-01, -4.6437e-01]])

Embedding for the word 'john': tensor([-0.3179,  0.7683,  2.7693])


In [60]:
def predict_next_word(model, word):
    # Convert word to one-hot
    input_index = word_to_idx[word]
    input_vec = make_one_hot_vector(input_index, vocab_size)
    input_vec = input_vec.unsqueeze(0)  # shape: (1, vocab_size)
    
    # Forward pass
    with torch.no_grad():
        logits = model(input_vec)  # shape: (1, vocab_size)
        predicted_idx = torch.argmax(logits, dim=1).item()
    return idx_to_word[predicted_idx]

test_words = ["john", "abel", "loves", "learning"]
for w in test_words:
    next_w = predict_next_word(model, w)
    print(f"Word '{w}' -> Predicted next word: '{next_w}'")

Word 'john' -> Predicted next word: 'loves'
Word 'abel' -> Predicted next word: 'loves'
Word 'loves' -> Predicted next word: 'learning'
Word 'learning' -> Predicted next word: 'john'
