In [14]:
import torch 
import torch.nn as nn
import torch.optim as optim 
import re 
from collections import defaultdict

In [15]:
training_data = [
        ("What is the capital of Nepal?", "Kathmandu"),
        ("What is the capital of India?", "New Delhi"),
    ("What's the capital of France?", "Paris"),
    ("Tell me the capital city of Japan.", "Tokyo"),
    ("Which city is the capital of India?", "New Delhi"),
    ("France's capital city is what?", "Paris"),
    ("Name the capital of Japan.", "Tokyo"),
    ("What is Japan's capital?", "Tokyo"),
    ("Capital of India is?", "New Delhi"),
    ("Can you tell me the capital of France?", "Paris")

]

In [16]:
def tokenize_text(text):
    return re.findall(r'\b\w+\b', text.lower()) # removes punctuation and splits words 



In [17]:
# Build Vocabulary 
word_freq = defaultdict(int)
for sentence, _ in training_data:
    for word in tokenize_text(sentence):
        word_freq[word] += 1

In [18]:
# Create word-to-index mapping
word_to_ix = {word: i for i, word in enumerate(word_freq)}
word_to_ix["<UNK>"] = len(word_to_ix) # Add unknown token
ix_to_word = {i: word for word, i in word_to_ix.items()}

In [19]:
# Output label mappings
capital_to_ix = {"New Delhi": 0, "Paris": 1, "Tokyo": 2, "Kathmandu" : 3}
ix_to_capital = {v: k for k, v in capital_to_ix.items()}

In [20]:
# Tokenizer function
def tokenize(sentence):
    tokens = tokenize_text(sentence)
    return [word_to_ix.get(word, word_to_ix["<UNK>"]) for word in tokens]

In [21]:
# Define the model 
class CapitalQA(nn.Module):
    def __init__(self, vocab_size, embedding_dim, output_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.fc = nn.Linear(embedding_dim, output_dim)
    
    def forward(self, x):
        embeds = self.embedding(x)
        pooled = embeds.mean(dim = 0)
        out = self.fc(pooled)
        return out 

In [22]:
# Model setup 
VOCAB_SIZE = len(word_to_ix)
EMBED_DIM = 32 
OUTPUT_DIM = len(capital_to_ix)

model = CapitalQA(VOCAB_SIZE, EMBED_DIM, OUTPUT_DIM)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.01)

In [23]:
# Training loop
for epoch in range(300):
    total_loss = 0
    for sentence, capital in training_data:
        inputs = torch.tensor(tokenize(sentence))
        target = torch.tensor([capital_to_ix[capital]])

        optimizer.zero_grad()
        logits = model(inputs)
        loss = loss_fn(logits.unsqueeze(0), target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    if epoch % 50 == 0:
        print(f"Epoch {epoch} | Loss: {total_loss:.4f}")

Epoch 0 | Loss: 14.8908
Epoch 50 | Loss: 0.0632
Epoch 100 | Loss: 0.0155
Epoch 150 | Loss: 0.0067
Epoch 200 | Loss: 0.0036
Epoch 250 | Loss: 0.0022


In [24]:
def predict(sentence):
    with torch.no_grad():
        inputs = torch.tensor(tokenize(sentence))
        logits = model(inputs)
        pred_ix = torch.argmax(logits).item()
        return ix_to_capital[pred_ix]

In [25]:
print("\n🧠 Ask Your Questions!")
test_sentences = [
    "What is the capital of India?",
    "What's the capital of France?",
    "Tell me the capital city of Japan.",
    "Can you tell me the capital of India?",
    "India capital?",
    "India capital",
    "France's capital city?",
    "What is the capital of Germany?"  # Unseen
]

for q in test_sentences:
    print(f"Q: {q} → A: {predict(q)}")


🧠 Ask Your Questions!
Q: What is the capital of India? → A: New Delhi
Q: What's the capital of France? → A: Paris
Q: Tell me the capital city of Japan. → A: Tokyo
Q: Can you tell me the capital of India? → A: New Delhi
Q: India capital? → A: New Delhi
Q: India capital → A: New Delhi
Q: France's capital city? → A: Paris
Q: What is the capital of Germany? → A: Kathmandu


In [28]:
predict("Capital of Bhutan")

'New Delhi'

In [29]:
predict("Capital city of Bhutan")

'New Delhi'