In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn import Transformer
import math
data = [
    ("xin chào", "hello"),
    ("cảm ơn", "thank you"),
    ("tạm biệt", "goodbye"),
    ("tôi yêu bạn", "i love you"),
    ("bạn khỏe không", "how are you"),
]
def build_vocab(sentences):
    vocab = {"<pad>": 0, "<sos>": 1, "<eos>": 2}
    idx = 3
    for s in sentences:
        for w in s.split():
            if w not in vocab:
                vocab[w] = idx
                idx += 1
    return vocab

src_vocab = build_vocab([s for s, _ in data])
tgt_vocab = build_vocab([t for _, t in data])
inv_tgt_vocab = {v: k for k, v in tgt_vocab.items()}

def encode(sentence, vocab):
    return [vocab["<sos>"]] + [vocab[w] for w in sentence.split()] + [vocab["<eos>"]]

def pad(seq, max_len):
    return seq + [0] * (max_len - len(seq))

src_data = [encode(s, src_vocab) for s, _ in data]
tgt_data = [encode(t, tgt_vocab) for _, t in data]

src_max_len = max(len(s) for s in src_data)
tgt_max_len = max(len(t) for t in tgt_data)

src_tensor = torch.tensor([pad(s, src_max_len) for s in src_data])
tgt_tensor = torch.tensor([pad(t, tgt_max_len) for t in tgt_data])

class TransformerModel(nn.Module):
    def __init__(self, src_vocab_size, tgt_vocab_size, d_model=64, nhead=4, num_layers=2):
        super().__init__()
        self.src_embed = nn.Embedding(src_vocab_size, d_model)
        self.tgt_embed = nn.Embedding(tgt_vocab_size, d_model)
        self.pos_enc = nn.Parameter(torch.zeros(1, 100, d_model))  # vị trí (simple)
        self.transformer = Transformer(d_model=d_model, nhead=nhead, num_encoder_layers=num_layers, num_decoder_layers=num_layers)
        self.fc_out = nn.Linear(d_model, tgt_vocab_size)

    def forward(self, src, tgt):
        src_emb = self.src_embed(src) + self.pos_enc[:, :src.size(1)]
        tgt_emb = self.tgt_embed(tgt) + self.pos_enc[:, :tgt.size(1)]
        out = self.transformer(src_emb.permute(1,0,2), tgt_emb.permute(1,0,2))
        out = self.fc_out(out.permute(1,0,2))
        return out
model = TransformerModel(len(src_vocab), len(tgt_vocab))
criterion = nn.CrossEntropyLoss(ignore_index=0)
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(500):
    optimizer.zero_grad()
    output = model(src_tensor, tgt_tensor[:, :-1])
    loss = criterion(output.reshape(-1, len(tgt_vocab)), tgt_tensor[:, 1:].reshape(-1))
    loss.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")
test = "bạn khỏe không"
test_enc = torch.tensor([pad(encode(test, src_vocab), src_max_len)])
tgt_start = torch.tensor([[tgt_vocab["<sos>"]]])

for i in range(5):
    out = model(test_enc, tgt_start)
    next_token = out.argmax(-1)[:, -1]
    tgt_start = torch.cat([tgt_start, next_token.unsqueeze(0)], dim=1)
    if next_token.item() == tgt_vocab["<eos>"]:
        break

translated = " ".join(inv_tgt_vocab[idx.item()] for idx in tgt_start[0][1:-1])
print("Dịch:", test, "→", translated)


Epoch 0, Loss: 2.3435
Epoch 100, Loss: 0.0137
Epoch 200, Loss: 0.0052
Epoch 300, Loss: 0.0029
Epoch 400, Loss: 0.0021
Dịch: bạn khỏe không → how are you
