In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from collections import Counter
import numpy as np

# Sample dataset (sentences)
corpus = ["the cat sat on the mat", "the dog barked at the cat", "the cat meowed at the dog"]

# Tokenization
tokens = [sentence.split() for sentence in corpus]
vocab = list(set(word for sentence in tokens for word in sentence))
vocab_size = len(vocab)
word_to_idx = {word: i for i, word in enumerate(vocab)}
idx_to_word = {i: word for word, i in word_to_idx.items()}

# Hyperparameters
window_size = 2  # Number of previous words to consider
embedding_dim = 10
hidden_dim = 20
batch_size = 2
epochs = 100
learning_rate = 0.01

# Prepare dataset
train_data = []
for sentence in tokens:
    for i in range(len(sentence) - window_size):
        context = [word_to_idx[sentence[j]] for j in range(i, i + window_size)]
        target = word_to_idx[sentence[i + window_size]]
        train_data.append((context, target))

def collate_fn(batch):
    contexts, targets = zip(*batch)
    return torch.tensor(contexts, dtype=torch.long), torch.tensor(targets, dtype=torch.long)

dataloader = data.DataLoader(train_data, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

# Neural Network Model
class FixedWindowNNLM(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, window_size):
        super(FixedWindowNNLM, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.fc1 = nn.Linear(window_size * embedding_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x):
        x = self.embedding(x).view(x.shape[0], -1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize model, loss, and optimizer
model = FixedWindowNNLM(vocab_size, embedding_dim, hidden_dim, window_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    total_loss = 0
    for contexts, targets in dataloader:
        optimizer.zero_grad()
        output = model(contexts)
        loss = criterion(output, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss:.4f}")

# Testing the model
with torch.no_grad():
    test_context = torch.tensor([[word_to_idx["the"], word_to_idx["dog"]]], dtype=torch.long)
    output = model(test_context)
    predicted_idx = torch.argmax(output, dim=1).item()
    print("Predicted word:", idx_to_word[predicted_idx])


Epoch [10/100], Loss: 2.2376
Epoch [20/100], Loss: 1.5320
Epoch [30/100], Loss: 1.4709
Epoch [40/100], Loss: 1.4730
Epoch [50/100], Loss: 1.4506
Epoch [60/100], Loss: 1.4780
Epoch [70/100], Loss: 1.4506
Epoch [80/100], Loss: 1.4683
Epoch [90/100], Loss: 1.4326
Epoch [100/100], Loss: 1.4242
Predicted word: barked
