In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from collections import defaultdict
import random

# Sample corpus: each sentence is a list of tokens
corpus = [
    ["I", "love", "deep", "learning"],
    ["deep", "learning", "is", "fun"],
    ["I", "enjoy", "machine", "learning"],
    ["machine", "learning", "is", "powerful"]
]

# Build vocabulary
word2idx = {"<PAD>": 0, "<UNK>": 1}
idx2word = {0: "<PAD>", 1: "<UNK>"}
for sentence in corpus:
    for word in sentence:
        if word not in word2idx:
            idx = len(word2idx)
            word2idx[word] = idx
            idx2word[idx] = word

# Parameters
window_size = 2  # Context size: 2 previous words
embedding_dim = 10
hidden_dim = 32
vocab_size = len(word2idx)

# Prepare data: context -> target
data = []
for sentence in corpus:
    padded = ["<PAD>"] * window_size + sentence
    for i in range(window_size, len(padded)):
        context = padded[i - window_size:i]
        target = padded[i]
        context_ids = [word2idx.get(w, word2idx["<UNK>"]) for w in context]
        target_id = word2idx.get(target, word2idx["<UNK>"])
        data.append((context_ids, target_id))

# Model
class WindowLanguageModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, context_size, hidden_dim):
        super().__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.fc1 = nn.Linear(context_size * embedding_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x):
        embeds = self.embeddings(x).view(x.size(0), -1)
        hidden = self.relu(self.fc1(embeds))
        out = self.fc2(hidden)
        return out

# Training setup
model = WindowLanguageModel(vocab_size, embedding_dim, window_size, hidden_dim)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training loop
for epoch in range(20):
    total_loss = 0
    for context_ids, target_id in data:
        context_tensor = torch.tensor([context_ids], dtype=torch.long)
        target_tensor = torch.tensor([target_id], dtype=torch.long)

        output = model(context_tensor)
        loss = loss_fn(output, target_tensor)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    print(f"Epoch {epoch + 1}, Loss: {total_loss:.4f}")

# Sample prediction
model.eval()
with torch.no_grad():
    test_context = ["deep", "learning"]
    context_ids = [word2idx.get(w, word2idx["<UNK>"]) for w in test_context]
    input_tensor = torch.tensor([context_ids], dtype=torch.long)
    output = model(input_tensor)
    predicted_idx = torch.argmax(output, dim=1).item()
    print(f"Given context: {test_context} → Predicted next word: {idx2word[predicted_idx]}")

Epoch 1, Loss: 38.6635
Epoch 2, Loss: 25.1368
Epoch 3, Loss: 17.0136
Epoch 4, Loss: 12.2504
Epoch 5, Loss: 10.0686
Epoch 6, Loss: 9.3174
Epoch 7, Loss: 8.7085
Epoch 8, Loss: 8.4475
Epoch 9, Loss: 8.3011
Epoch 10, Loss: 8.1975
Epoch 11, Loss: 8.1214
Epoch 12, Loss: 8.0475
Epoch 13, Loss: 7.9919
Epoch 14, Loss: 7.9454
Epoch 15, Loss: 7.9038
Epoch 16, Loss: 7.8672
Epoch 17, Loss: 7.8342
Epoch 18, Loss: 7.8042
Epoch 19, Loss: 7.7773
Epoch 20, Loss: 7.7526
Given context: ['deep', 'learning'] → Predicted next word: is


In [6]:
model.eval()
with torch.no_grad():
    test_context = ["machine", "learning"]
    context_ids = [word2idx.get(w, word2idx["<UNK>"]) for w in test_context]
    input_tensor = torch.tensor([context_ids], dtype=torch.long)
    output = model(input_tensor)
    predicted_idx = torch.argmax(output, dim=1).item()
    print(f"Given context: {test_context} → Predicted next word: {idx2word[predicted_idx]}")

Given context: ['machine', 'learning'] → Predicted next word: is
