# Word Embedding Demo

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define a small vocabulary
vocab = ["i", "love", "deep", "learning", "pytorch"]
vocab_size = len(vocab)

# Map each word to an index
word_to_idx = {word: i for i, word in enumerate(vocab)}
idx_to_word = {i: word for word, i in word_to_idx.items()}

# Create a small set of training phrases
training_phrases = [
    ["i", "love", "deep", "learning"],
    ["i", "love", "pytorch"],
    ["deep", "learning", "i", "love"]
]

In [None]:
def make_one_hot_vector(word_index, vocab_size):
    """Create a one-hot vector for a given word index."""
    vec = torch.zeros(vocab_size)
    vec[word_index] = 1.0
    return vec

training_data = []
for phrase in training_phrases:
    for i in range(len(phrase) - 1):
        current_word = phrase[i]
        next_word = phrase[i + 1]
        
        current_word_idx = word_to_idx[current_word]
        next_word_idx = word_to_idx[next_word]
        
        # One-hot for input, index for output label
        input_vec = make_one_hot_vector(current_word_idx, vocab_size)
        training_data.append((input_vec, next_word_idx))

print("Sample training data (input vector, target index):")
for i in range(3):
    print(training_data[i])

In [None]:
class WordEmbeddingModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim):
        super(WordEmbeddingModel, self).__init__()
        self.embedding_dim = embedding_dim
        
        # First linear layer: from one-hot to embedding
        self.fc1 = nn.Linear(vocab_size, embedding_dim, bias=False)
        
        # Second linear layer: from embedding to vocab-size
        self.fc2 = nn.Linear(embedding_dim, vocab_size, bias=False)
        
    def forward(self, x):
        """
        Forward pass:
          x: one-hot vector of shape (batch_size, vocab_size)
        Returns:
          logits: unnormalized scores of shape (batch_size, vocab_size)
        """
        # x -> embedding
        emb = self.fc1(x)           # shape: (batch_size, embedding_dim)
        emb = torch.relu(emb)       # Non-linearity
        
        # embedding -> distribution over vocab
        logits = self.fc2(emb)      # shape: (batch_size, vocab_size)
        return logits

In [None]:
# Hyperparameters
embedding_dim = 4
learning_rate = 0.01
num_epochs = 100

# Initialize the model, loss, and optimizer
model = WordEmbeddingModel(vocab_size, embedding_dim)
criterion = nn.CrossEntropyLoss()  # suitable for classification
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    total_loss = 0.0
    
    for input_vec, target_idx in training_data:
        # Reshape input to (batch_size=1, vocab_size)
        input_vec = input_vec.unsqueeze(0)
        target_idx = torch.tensor([target_idx])  # shape: (1,)
        
        # Forward pass
        logits = model(input_vec)
        
        # Compute loss
        loss = criterion(logits, target_idx)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    if (epoch+1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss:.4f}")


In [None]:
print("Learned Embeddings (fc1.weight):")
print(model.fc1.weight.data)  # shape: (embedding_dim, vocab_size)

# Example: embedding for the word "i"
i_embedding = model.fc1.weight.data[:, word_to_idx["i"]]
print("\nEmbedding for the word 'i':", i_embedding)

In [None]:
def predict_next_word(model, word):
    # Convert word to one-hot
    input_index = word_to_idx[word]
    input_vec = make_one_hot_vector(input_index, vocab_size)
    input_vec = input_vec.unsqueeze(0)  # shape: (1, vocab_size)
    
    # Forward pass
    with torch.no_grad():
        logits = model(input_vec)  # shape: (1, vocab_size)
        predicted_idx = torch.argmax(logits, dim=1).item()
    return idx_to_word[predicted_idx]

test_words = ["i", "love", "deep", "learning", "pytorch"]
for w in test_words:
    next_w = predict_next_word(model, w)
    print(f"Word '{w}' -> Predicted next word: '{next_w}'")