In [38]:
import os
os.system("pip install sentencepiece evaluate sacrebleu rouge_score")
print("Dependencies installed.")

Collecting sacrebleu
  Downloading sacrebleu-2.5.1-py3-none-any.whl.metadata (51 kB)
     ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 51.8/51.8 kB 1.1 MB/s eta 0:00:00
Collecting portalocker (from sacrebleu)
  Downloading portalocker-3.1.1-py3-none-any.whl.metadata (8.6 kB)
Downloading sacrebleu-2.5.1-py3-none-any.whl (104 kB)
   ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 104.1/104.1 kB 2.9 MB/s eta 0:00:00
Downloading portalocker-3.1.1-py3-none-any.whl (19 kB)
Installing collected packages: portalocker, sacrebleu
Successfully installed portalocker-3.1.1 sacrebleu-2.5.1
Dependencies installed.


In [40]:
import unicodedata
import re
import html
from sklearn.model_selection import train_test_split
import pickle

def preprocess(text):
    text = unicodedata.normalize("NFC", html.unescape(text))
    text = text.strip()
    text = re.sub(r"\s+", " ", text)
    return text

def prepare_and_split_data(en_file, vi_file, output_dir="data", train_ratio=0.8, val_ratio=0.1):
    os.makedirs(output_dir, exist_ok=True)
    with open(en_file, 'r', encoding='utf-8') as f_en, open(vi_file, 'r', encoding='utf-8') as f_vi:
        en_lines = f_en.readlines()
        vi_lines = f_vi.readlines()
    
    assert len(en_lines) == len(vi_lines), "Number of lines don't match!"
    
    data = [f"[EN] {preprocess(en)} [VI] {preprocess(vi)}" for en, vi in zip(en_lines, vi_lines)]
    
    train_data, temp_data = train_test_split(data, train_size=train_ratio, random_state=42)
    val_data, test_data = train_test_split(temp_data, train_size=val_ratio/(val_ratio + 0.1), random_state=42)
    
    with open(os.path.join(output_dir, "train_data.pkl"), 'wb') as f:
        pickle.dump(train_data, f)
    with open(os.path.join(output_dir, "val_data.pkl"), 'wb') as f:
        pickle.dump(val_data, f)
    with open(os.path.join(output_dir, "test_data.pkl"), 'wb') as f:
        pickle.dump(test_data, f)
    
    print(f"Data saved: {len(train_data)} train, {len(val_data)} val, {len(test_data)} test")
    return train_data, val_data, test_data

In [42]:
# Run Step 2
en_path = "/kaggle/input/machinetranslation/train.en.txt"
vi_path = "/kaggle/input/machinetranslation/train.vi.txt"
train_data, val_data, test_data = prepare_and_split_data(en_path, vi_path, output_dir="data")

Data saved: 106653 train, 13332 val, 13332 test


In [44]:
# Step 3: Train SentencePiece Tokenizer
import sentencepiece as spm

def load_data_splits(output_dir="data"):
    with open(os.path.join(output_dir, "train_data.pkl"), 'rb') as f:
        train_data = pickle.load(f)
    with open(os.path.join(output_dir, "val_data.pkl"), 'rb') as f:
        val_data = pickle.load(f)
    with open(os.path.join(output_dir, "test_data.pkl"), 'rb') as f:
        test_data = pickle.load(f)
    return train_data, val_data, test_data

def train_tokenizer(corpus_lines, model_prefix="gpt_bpe", vocab_size=8000, output_dir="tokenizer"):
    os.makedirs(output_dir, exist_ok=True)
    corpus_file = os.path.join(output_dir, "corpus.txt")
    with open(corpus_file, "w", encoding="utf-8") as f:
        for line in corpus_lines:
            f.write(line + "\n")
    
    spm.SentencePieceTrainer.train(
        input=corpus_file,
        model_prefix=os.path.join(output_dir, model_prefix),
        vocab_size=vocab_size,
        model_type="bpe",
        character_coverage=1.0,
        bos_id=1,
        eos_id=2,
        pad_id=0,
        unk_id=3
    )
    
    sp = spm.SentencePieceProcessor()
    sp.load(os.path.join(output_dir, f"{model_prefix}.model"))
    print(f"Tokenizer trained and saved to {output_dir}")
    return sp

In [45]:
# Run Step 3
train_data, val_data, test_data = load_data_splits(output_dir="data")
sp = train_tokenizer(train_data + val_data + test_data, model_prefix="gpt_bpe", output_dir="tokenizer")

Tokenizer trained and saved to tokenizer


In [46]:
# Step 4: Create Encoded Datasets
import torch
from torch.utils.data import Dataset

def encode_lines(lines, sp, max_len=128):
    encoded = [[1] + sp.encode(line, out_type=int) + [2] for line in lines]
    padded = [seq[:max_len] + [0] * (max_len - len(seq[:max_len])) for seq in encoded]
    return torch.tensor(padded, dtype=torch.long), max_len

class TranslationDataset(Dataset):
    def __init__(self, data):
        self.data = data
    def __getitem__(self, idx):
        x = self.data[idx][:-1]
        y = self.data[idx][1:]
        return x, y
    def __len__(self):
        return len(self.data)

def create_datasets(train_data, val_data, test_data, sp, output_dir="data", max_len=128):
    os.makedirs(output_dir, exist_ok=True)
    
    train_enc, max_len = encode_lines(train_data, sp, max_len)
    val_enc, _ = encode_lines(val_data, sp, max_len)
    test_enc, _ = encode_lines(test_data, sp, max_len)
    
    train_dataset = TranslationDataset(train_enc)
    val_dataset = TranslationDataset(val_enc)
    test_dataset = TranslationDataset(test_enc)
    
    torch.save(train_enc, os.path.join(output_dir, "train_enc.pt"))
    torch.save(val_enc, os.path.join(output_dir, "val_enc.pt"))
    torch.save(test_enc, os.path.join(output_dir, "test_enc.pt"))
    
    with open(os.path.join(output_dir, "max_len.pkl"), 'wb') as f:
        pickle.dump(max_len, f)
    
    print(f"Datasets saved: {len(train_dataset)} train, {len(val_dataset)} val, {len(test_dataset)} test")
    return train_dataset, val_dataset, test_dataset, max_len

In [47]:
# Run Step 4
train_dataset, val_dataset, test_dataset, max_len = create_datasets(
    train_data, val_data, test_data, sp, output_dir="data", max_len=128
)

Datasets saved: 106653 train, 13332 val, 13332 test


In [54]:
# Step 5: Define GPT Model
import torch.nn as nn
import math

import torch
import torch.nn as nn
import math

class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, nhead, dropout=0.1):
        super().__init__()
        assert d_model % nhead == 0
        self.d_model = d_model
        self.nhead = nhead
        self.d_k = d_model // nhead
        self.query = nn.Linear(d_model, d_model)
        self.key = nn.Linear(d_model, d_model)
        self.value = nn.Linear(d_model, d_model)
        self.out = nn.Linear(d_model, d_model)
        self.dropout = nn.Dropout(dropout)
        self.scale = math.sqrt(self.d_k)

    def forward(self, x, mask=None):
        batch_size = x.size(0)
        q = self.query(x).view(batch_size, -1, self.nhead, self.d_k).transpose(1, 2)
        k = self.key(x).view(batch_size, -1, self.nhead, self.d_k).transpose(1, 2)
        v = self.value(x).view(batch_size, -1, self.nhead, self.d_k).transpose(1, 2)
        scores = torch.matmul(q, k.transpose(-2, -1)) / self.scale
        if mask is not None:
            scores = scores.masked_fill(mask == float('-inf'), float('-inf'))
        attn = torch.softmax(scores, dim=-1)
        attn = self.dropout(attn)
        context = torch.matmul(attn, v)
        context = context.transpose(1, 2).contiguous().view(batch_size, -1, self.d_model)
        return self.out(context)

class FeedForward(nn.Module):
    def __init__(self, d_model, dim_ff, dropout=0.1):
        super().__init__()
        self.linear1 = nn.Linear(d_model, dim_ff)
        self.linear2 = nn.Linear(dim_ff, d_model)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        return self.linear2(self.dropout(self.relu(self.linear1(x))))

class DecoderLayer(nn.Module):
    def __init__(self, d_model, nhead, dim_ff, dropout=0.1):
        super().__init__()
        self.self_attn = MultiHeadAttention(d_model, nhead, dropout)
        self.feed_forward = FeedForward(d_model, dim_ff, dropout)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, mask=None):
        x = self.norm1(x + self.dropout(self.self_attn(x, mask)))
        x = self.norm2(x + self.dropout(self.feed_forward(x)))
        return x

class GPTModel(nn.Module):
    def __init__(self, vocab_size, d_model=256, nhead=8, num_layers=6, dim_ff=1024, max_len=128, dropout=0.1):
        super().__init__()
        self.token_embed = nn.Embedding(vocab_size, d_model)
        self.pos_embed = nn.Parameter(self.create_pe(max_len, d_model), requires_grad=False)
        self.layers = nn.ModuleList([
            DecoderLayer(d_model, nhead, dim_ff, dropout) for _ in range(num_layers)
        ])
        self.fc = nn.Linear(d_model, vocab_size)
        self.d_model = d_model
        self.max_len = max_len
        self.dropout = nn.Dropout(dropout)

    def create_pe(self, max_len, d_model):
        pe = torch.zeros(max_len, d_model)
        pos = torch.arange(0, max_len).unsqueeze(1)
        div = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(pos * div)
        pe[:, 1::2] = torch.cos(pos * div)
        return pe.unsqueeze(0)

    def forward(self, x):
        batch_size, seq_len = x.size()
        x = self.token_embed(x) * math.sqrt(self.d_model) + self.pos_embed[:, :seq_len, :].to(x.device)
        x = self.dropout(x)
        mask = torch.triu(torch.ones(seq_len, seq_len, device=x.device) * float('-inf'), diagonal=1)
        for layer in self.layers:
            x = layer(x, mask)
        return self.fc(x)

In [56]:
model = GPTModel(vocab_size=len(sp), max_len=max_len)

In [61]:
from torch.utils.data import DataLoader
import torch.optim as optim
from torch.cuda.amp import GradScaler, autocast
from tqdm import tqdm
import matplotlib.pyplot as plt

def train(model, train_dataset, val_dataset, sp, max_len, test_data, output_dir="model", epochs=15, lr=0.0001, patience=3):
    os.makedirs(output_dir, exist_ok=True)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    optimizer = optim.AdamW(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss(ignore_index=0)
    scaler = GradScaler()
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=1, verbose=True)

    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, pin_memory=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, pin_memory=True, num_workers=2)

    best_val_loss = float("inf")
    counter = 0
    train_losses, val_losses = [], []

    def translate_sample(input_text):
        model.eval()
        input_sentence = f"[EN] {input_text.strip()} [VI]"
        input_ids = [1] + sp.encode(input_sentence, out_type=int)
        input_ids = input_ids[:max_len-1] + [2]
        input_tensor = torch.tensor(input_ids, dtype=torch.long).unsqueeze(0).to(device)

        with torch.no_grad():
            for _ in range(max_len - len(input_ids)):
                out = model(input_tensor)
                next_token = torch.argmax(out[:, -1, :], dim=-1).item()
                if next_token == 2:
                    break
                input_tensor = torch.cat([input_tensor, torch.tensor([[next_token]], device=device)], dim=1)

        decoded = sp.decode(input_tensor[0].tolist())
        return decoded.split("[VI]")[-1].strip() if "[VI]" in decoded else decoded.strip()

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for x, y in tqdm(train_loader, desc=f"Epoch {epoch+1} Training"):
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            with autocast():
                out = model(x)
                loss = criterion(out.view(-1, out.size(-1)), y.view(-1))
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        train_losses.append(avg_loss)

        model.eval()
        total_val_loss = 0
        with torch.no_grad():
            for x, y in val_loader:
                x, y = x.to(device), y.to(device)
                with autocast():
                    out = model(x)
                    loss = criterion(out.view(-1, out.size(-1)), y.view(-1))
                total_val_loss += loss.item()
        avg_val_loss = total_val_loss / len(val_loader)
        val_losses.append(avg_val_loss)

        print(f"Epoch {epoch+1}, Train Loss: {avg_loss:.4f}, Val Loss: {avg_val_loss:.4f}")
        scheduler.step(avg_val_loss)

        print("üß™ Sample Translations (5 sentences):")
        for idx in range(min(5, len(test_data))):
            try:
                item = test_data[idx]
                input_text = item.split("[EN]")[1].split("[VI]")[0].strip()
                ref_text = item.split("[VI]")[-1].strip()
                pred_text = translate_sample(input_text)
                print(f"[{idx+1}]")
                print(f"EN   : {input_text}")
                print(f"REF  : {ref_text}")
                print(f"PRED : {pred_text}")
                print("-" * 50)
            except Exception as e:
                print(f"‚ö†Ô∏è Error at sample {idx}: {e}")

        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            counter = 0
            torch.save(model.state_dict(), os.path.join(output_dir, "best_gpt_model.pth"))
            print("‚úÖ Model saved!")
        else:
            counter += 1
            if counter >= patience:
                print("‚õî Early stopping triggered.")
                break

    plt.figure(figsize=(10, 6))
    plt.plot(train_losses, label="Train Loss", marker='o')
    plt.plot(val_losses, label="Validation Loss", marker='x')
    plt.title("Training vs Validation Loss per Epoch")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(output_dir, "loss_plot.png"))
    plt.close()

In [63]:
# Run Step 6
train(model, train_dataset, val_dataset, sp, max_len, test_data, output_dir="model", epochs=20)

  scaler = GradScaler()
  with autocast():
Epoch 1 Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13332/13332 [06:45<00:00, 32.89it/s]
  with autocast():


Epoch 1, Train Loss: 2.8485, Val Loss: 2.8657
üß™ Sample Translations (5 sentences):
[1]
EN   : And I would ask him every once in a while , " Dad , do you want it to sound like the record ? "
REF  : ƒê√¥i khi , t√¥i h·ªèi √¥ng r·∫±ng : " B·ªë ∆°i , b·ªë c√≥ mu·ªën ch∆°i gi·ªëng nh∆∞ b·∫£n g·ªëc kh√¥ng ·∫° ? "
PRED : n√≥i v·ªõi √¥ng ·∫•y m·ªói l·∫ßn , " Dad , b·∫°n mu·ªën nghe √¢m thanh nh∆∞ ghi √¢m nh·∫°c v·∫≠y ? "
--------------------------------------------------
[2]
EN   : So , we 're only going to look at the stars inside that small square , although we 've looked at all of them .
REF  : V·∫≠y , ch√∫ng ta s·∫Ω ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑t d√π ch√∫ng ta ƒë√£ nh√¨n t·∫•t c·∫£ ch√∫ng .
PRED : ch√∫ng ta ch·ªâ nh√¨n v√†o c√°c ng√¥i sao vu√¥ng trong vu√¥ng nh·ªè , m·∫∑c d√π ch√∫ng ta nh√¨n v√†o t·∫•t c·∫£ ch√∫ng .
--------------------------------------------------
[3]
EN   : And they travel out into the cosmos at the speed of light .
REF 

Epoch 2 Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13332/13332 [06:48<00:00, 32.64it/s]


Epoch 2, Train Loss: 2.8198, Val Loss: 2.8489
üß™ Sample Translations (5 sentences):
[1]
EN   : And I would ask him every once in a while , " Dad , do you want it to sound like the record ? "
REF  : ƒê√¥i khi , t√¥i h·ªèi √¥ng r·∫±ng : " B·ªë ∆°i , b·ªë c√≥ mu·ªën ch∆°i gi·ªëng nh∆∞ b·∫£n g·ªëc kh√¥ng ·∫° ? "
PRED : h·ªèi √¥ng ·∫•y m·ªói l·∫ßn trong m·ªôt l√∫c , " Dad , b·∫°n mu·ªën n√≥ nghe nh∆∞ ghi √¢m thanh ? "
--------------------------------------------------
[2]
EN   : So , we 're only going to look at the stars inside that small square , although we 've looked at all of them .
REF  : V·∫≠y , ch√∫ng ta s·∫Ω ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑t d√π ch√∫ng ta ƒë√£ nh√¨n t·∫•t c·∫£ ch√∫ng .
PRED : ch√∫ng ta ch·ªâ nh√¨n v√†o c√°c ng√¥i sao vu√¥ng trong vu√¥ng nh·ªè , m·∫∑c d√π ch√∫ng ta nh√¨n v√†o t·∫•t c·∫£ ch√∫ng .
--------------------------------------------------
[3]
EN   : And they travel out into the cosmos at the speed of light .
REF  

Epoch 3 Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13332/13332 [06:51<00:00, 32.40it/s]


Epoch 3, Train Loss: 2.7992, Val Loss: 2.8371
üß™ Sample Translations (5 sentences):
[1]
EN   : And I would ask him every once in a while , " Dad , do you want it to sound like the record ? "
REF  : ƒê√¥i khi , t√¥i h·ªèi √¥ng r·∫±ng : " B·ªë ∆°i , b·ªë c√≥ mu·ªën ch∆°i gi·ªëng nh∆∞ b·∫£n g·ªëc kh√¥ng ·∫° ? "
PRED : t√¥i s·∫Ω h·ªèi √¥ng ta m·ªói l·∫ßn trong m·ªôt l√∫c , " Dad , b·∫°n mu·ªën n√≥ nghe nh∆∞ l√† ghi √¢m ? "
--------------------------------------------------
[2]
EN   : So , we 're only going to look at the stars inside that small square , although we 've looked at all of them .
REF  : V·∫≠y , ch√∫ng ta s·∫Ω ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑t d√π ch√∫ng ta ƒë√£ nh√¨n t·∫•t c·∫£ ch√∫ng .
PRED : d·ª• ch√∫ng ta ch·ªâ nh√¨n v√†o c√°c ng√¥i sao nh·ªè b√© x√≠u , m·∫∑c d√π ch√∫ng ta nh√¨n v√†o t·∫•t c·∫£ ch√∫ng .
--------------------------------------------------
[3]
EN   : And they travel out into the cosmos at the speed of light .
REF  

Epoch 4 Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13332/13332 [06:48<00:00, 32.62it/s]


Epoch 4, Train Loss: 2.7789, Val Loss: 2.8275
üß™ Sample Translations (5 sentences):
[1]
EN   : And I would ask him every once in a while , " Dad , do you want it to sound like the record ? "
REF  : ƒê√¥i khi , t√¥i h·ªèi √¥ng r·∫±ng : " B·ªë ∆°i , b·ªë c√≥ mu·ªën ch∆°i gi·ªëng nh∆∞ b·∫£n g·ªëc kh√¥ng ·∫° ? "
PRED : t√¥i h·ªèi √¥ng t·ª´ng l·∫ßn trong m·ªôt l√∫c , " Dad , anh mu·ªën n√≥ nghe nh∆∞ b·∫£n ghi √¢m nh·∫°c ? "
--------------------------------------------------
[2]
EN   : So , we 're only going to look at the stars inside that small square , although we 've looked at all of them .
REF  : V·∫≠y , ch√∫ng ta s·∫Ω ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑t d√π ch√∫ng ta ƒë√£ nh√¨n t·∫•t c·∫£ ch√∫ng .
PRED : d·ª• ch√∫ng ta ch·ªâ nh√¨n v√†o c√°c ng√¥i sao vu√¥ng nh·ªè , m·∫∑c d√π ch√∫ng ta nh√¨n v√†o t·∫•t c·∫£ ch√∫ng .
--------------------------------------------------
[3]
EN   : And they travel out into the cosmos at the speed of light .
REF  : 

Epoch 5 Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13332/13332 [06:49<00:00, 32.57it/s]


Epoch 5, Train Loss: 2.7610, Val Loss: 2.8139
üß™ Sample Translations (5 sentences):
[1]
EN   : And I would ask him every once in a while , " Dad , do you want it to sound like the record ? "
REF  : ƒê√¥i khi , t√¥i h·ªèi √¥ng r·∫±ng : " B·ªë ∆°i , b·ªë c√≥ mu·ªën ch∆°i gi·ªëng nh∆∞ b·∫£n g·ªëc kh√¥ng ·∫° ? "
PRED : t√¥i h·ªèi anh ta m·ªói l·∫ßn trong m·ªôt th·ªùi gian , " Dad , b·∫°n mu·ªën n√≥ nghe nh∆∞ ghi √¢m nh·∫°c ? "
--------------------------------------------------
[2]
EN   : So , we 're only going to look at the stars inside that small square , although we 've looked at all of them .
REF  : V·∫≠y , ch√∫ng ta s·∫Ω ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑t d√π ch√∫ng ta ƒë√£ nh√¨n t·∫•t c·∫£ ch√∫ng .
PRED : ch√∫ng ta ch·ªâ nh√¨n v√†o c√°c ng√¥i sao nh·ªè trong h√¨nh vu√¥ng nh·ªè , m·∫∑c d√π ch√∫ng ta nh√¨n v√†o t·∫•t c·∫£ ch√∫ng .
--------------------------------------------------
[3]
EN   : And they travel out into the cosmos at the speed o

Epoch 6 Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13332/13332 [06:48<00:00, 32.65it/s]


Epoch 6, Train Loss: 2.7456, Val Loss: 2.8067
üß™ Sample Translations (5 sentences):
[1]
EN   : And I would ask him every once in a while , " Dad , do you want it to sound like the record ? "
REF  : ƒê√¥i khi , t√¥i h·ªèi √¥ng r·∫±ng : " B·ªë ∆°i , b·ªë c√≥ mu·ªën ch∆°i gi·ªëng nh∆∞ b·∫£n g·ªëc kh√¥ng ·∫° ? "
PRED : t√¥i h·ªèi √¥ng t·ª´ng c√≥ m·ªôt l·∫ßn trong m·ªôt l·∫ßn , " Dad , b·∫°n mu·ªën n√≥ nghe nh∆∞ ghi √¢m nh·∫°c ? "
--------------------------------------------------
[2]
EN   : So , we 're only going to look at the stars inside that small square , although we 've looked at all of them .
REF  : V·∫≠y , ch√∫ng ta s·∫Ω ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑t d√π ch√∫ng ta ƒë√£ nh√¨n t·∫•t c·∫£ ch√∫ng .
PRED : ch√∫ng ta ch·ªâ nh√¨n v√†o c√°c ng√¥i sao tr√™n h√¨nh vu√¥ng nh·ªè , m·∫∑c d√π ch√∫ng ta nh√¨n v√†o t·∫•t c·∫£ ch√∫ng .
--------------------------------------------------
[3]
EN   : And they travel out into the cosmos at the speed of l

Epoch 7 Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13332/13332 [06:49<00:00, 32.55it/s]


Epoch 7, Train Loss: 2.7307, Val Loss: 2.7992
üß™ Sample Translations (5 sentences):
[1]
EN   : And I would ask him every once in a while , " Dad , do you want it to sound like the record ? "
REF  : ƒê√¥i khi , t√¥i h·ªèi √¥ng r·∫±ng : " B·ªë ∆°i , b·ªë c√≥ mu·ªën ch∆°i gi·ªëng nh∆∞ b·∫£n g·ªëc kh√¥ng ·∫° ? "
PRED : t√¥i h·ªèi √¥ng m·ªói l·∫ßn , " Dad , anh mu·ªën n√≥ nghe √¢m thanh nh∆∞ ghi √¢m ? "
--------------------------------------------------
[2]
EN   : So , we 're only going to look at the stars inside that small square , although we 've looked at all of them .
REF  : V·∫≠y , ch√∫ng ta s·∫Ω ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑t d√π ch√∫ng ta ƒë√£ nh√¨n t·∫•t c·∫£ ch√∫ng .
PRED : ch√∫ng ta ch·ªâ nh√¨n nh·ªØng ng√¥i sao trong h√¨nh vu√¥ng nh·ªè , m·∫∑c d√π ch√∫ng ta nh√¨n v√†o t·∫•t c·∫£ ch√∫ng .
--------------------------------------------------
[3]
EN   : And they travel out into the cosmos at the speed of light .
REF  : V√† ch√∫ng ƒëi v

Epoch 8 Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13332/13332 [06:46<00:00, 32.79it/s]


Epoch 8, Train Loss: 2.7161, Val Loss: 2.7928
üß™ Sample Translations (5 sentences):
[1]
EN   : And I would ask him every once in a while , " Dad , do you want it to sound like the record ? "
REF  : ƒê√¥i khi , t√¥i h·ªèi √¥ng r·∫±ng : " B·ªë ∆°i , b·ªë c√≥ mu·ªën ch∆°i gi·ªëng nh∆∞ b·∫£n g·ªëc kh√¥ng ·∫° ? "
PRED : h·ªèi anh ta m·ªói l·∫ßn trong m·ªôt l√∫c , " Dad , anh mu·ªën n√≥ nghe nh∆∞ l√† ghi √¢m ? "
--------------------------------------------------
[2]
EN   : So , we 're only going to look at the stars inside that small square , although we 've looked at all of them .
REF  : V·∫≠y , ch√∫ng ta s·∫Ω ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑t d√π ch√∫ng ta ƒë√£ nh√¨n t·∫•t c·∫£ ch√∫ng .
PRED : ch√∫ng ta ch·ªâ nh√¨n v√†o c√°c ng√¥i sao trong h√¨nh vu√¥ng nh·ªè b√© , m·∫∑c d√π ch√∫ng ta nh√¨n v√†o t·∫•t c·∫£ ch√∫ng .
--------------------------------------------------
[3]
EN   : And they travel out into the cosmos at the speed of light .
REF  : V√

Epoch 9 Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13332/13332 [06:46<00:00, 32.80it/s]


Epoch 9, Train Loss: 2.7043, Val Loss: 2.7914
üß™ Sample Translations (5 sentences):
[1]
EN   : And I would ask him every once in a while , " Dad , do you want it to sound like the record ? "
REF  : ƒê√¥i khi , t√¥i h·ªèi √¥ng r·∫±ng : " B·ªë ∆°i , b·ªë c√≥ mu·ªën ch∆°i gi·ªëng nh∆∞ b·∫£n g·ªëc kh√¥ng ·∫° ? "
PRED : t√¥i h·ªèi √¥ng ta m·ªói l·∫ßn trong m·ªôt th·ªùi gian , " Dad , anh mu·ªën n√≥ nghe nh∆∞ ghi √¢m nh·∫°c ? "
--------------------------------------------------
[2]
EN   : So , we 're only going to look at the stars inside that small square , although we 've looked at all of them .
REF  : V·∫≠y , ch√∫ng ta s·∫Ω ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑t d√π ch√∫ng ta ƒë√£ nh√¨n t·∫•t c·∫£ ch√∫ng .
PRED : ch√∫ng ta ch·ªâ nh√¨n v√†o c√°c ng√¥i sao nh·ªè b√™n trong h√¨nh vu√¥ng nh·ªè , m·∫∑c d√π ch√∫ng ta nh√¨n v√†o t·∫•t c·∫£ ch√∫ng .
--------------------------------------------------
[3]
EN   : And they travel out into the cosmos at the spe

Epoch 10 Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13332/13332 [06:43<00:00, 33.04it/s]


Epoch 10, Train Loss: 2.6919, Val Loss: 2.7811
üß™ Sample Translations (5 sentences):
[1]
EN   : And I would ask him every once in a while , " Dad , do you want it to sound like the record ? "
REF  : ƒê√¥i khi , t√¥i h·ªèi √¥ng r·∫±ng : " B·ªë ∆°i , b·ªë c√≥ mu·ªën ch∆°i gi·ªëng nh∆∞ b·∫£n g·ªëc kh√¥ng ·∫° ? "
PRED : t√¥i h·ªèi √¥ng ta m·ªói l·∫ßn trong m·ªôt th·ªùi gian , " B·ªë , anh mu·ªën n√≥ nghe nh∆∞ ghi √¢m ? "
--------------------------------------------------
[2]
EN   : So , we 're only going to look at the stars inside that small square , although we 've looked at all of them .
REF  : V·∫≠y , ch√∫ng ta s·∫Ω ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑t d√π ch√∫ng ta ƒë√£ nh√¨n t·∫•t c·∫£ ch√∫ng .
PRED : ch√∫ng ta ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao nh·ªè b√© nh·ªè , m·∫∑c d√π ch√∫ng ta nh√¨n v√†o t·∫•t c·∫£ ch√∫ng .
--------------------------------------------------
[3]
EN   : And they travel out into the cosmos at the speed of light .
REF  : V

Epoch 11 Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13332/13332 [06:44<00:00, 32.95it/s]


Epoch 11, Train Loss: 2.6808, Val Loss: 2.7803
üß™ Sample Translations (5 sentences):
[1]
EN   : And I would ask him every once in a while , " Dad , do you want it to sound like the record ? "
REF  : ƒê√¥i khi , t√¥i h·ªèi √¥ng r·∫±ng : " B·ªë ∆°i , b·ªë c√≥ mu·ªën ch∆°i gi·ªëng nh∆∞ b·∫£n g·ªëc kh√¥ng ·∫° ? "
PRED : t√¥i h·ªèi √¥ng ·∫•y m·ªói l·∫ßn trong m·ªôt th·ªùi gian , " M·∫π , anh mu·ªën n√≥ nghe nh∆∞ ghi √¢m ? "
--------------------------------------------------
[2]
EN   : So , we 're only going to look at the stars inside that small square , although we 've looked at all of them .
REF  : V·∫≠y , ch√∫ng ta s·∫Ω ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑t d√π ch√∫ng ta ƒë√£ nh√¨n t·∫•t c·∫£ ch√∫ng .
PRED : ch√∫ng ta ch·ªâ nh√¨n v√†o c√°c ng√¥i sao trong h√¨nh vu√¥ng nh·ªè , m·∫∑c d√π ch√∫ng ta nh√¨n v√†o t·∫•t c·∫£ ch√∫ng .
--------------------------------------------------
[3]
EN   : And they travel out into the cosmos at the speed of light .


Epoch 12 Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13332/13332 [06:45<00:00, 32.89it/s]


Epoch 12, Train Loss: 2.6693, Val Loss: 2.7690
üß™ Sample Translations (5 sentences):
[1]
EN   : And I would ask him every once in a while , " Dad , do you want it to sound like the record ? "
REF  : ƒê√¥i khi , t√¥i h·ªèi √¥ng r·∫±ng : " B·ªë ∆°i , b·ªë c√≥ mu·ªën ch∆°i gi·ªëng nh∆∞ b·∫£n g·ªëc kh√¥ng ·∫° ? "
PRED : t√¥i h·ªèi √¥ng ta m·ªói khi trong m·ªôt th·ªùi gian , " B·ªë ∆°i , anh mu·ªën √¢m thanh ƒë√≥ nh∆∞ ghi √¢m ? "
--------------------------------------------------
[2]
EN   : So , we 're only going to look at the stars inside that small square , although we 've looked at all of them .
REF  : V·∫≠y , ch√∫ng ta s·∫Ω ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑t d√π ch√∫ng ta ƒë√£ nh√¨n t·∫•t c·∫£ ch√∫ng .
PRED : ch√∫ng ta ch·ªâ nh√¨n v√†o c√°c ng√¥i sao trong h√¨nh vu√¥ng nh·ªè , m·∫∑c d√π ch√∫ng ta nh√¨n v√†o t·∫•t c·∫£ ch√∫ng .
--------------------------------------------------
[3]
EN   : And they travel out into the cosmos at the speed of li

Epoch 13 Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13332/13332 [06:46<00:00, 32.77it/s]


Epoch 13, Train Loss: 2.6603, Val Loss: 2.7634
üß™ Sample Translations (5 sentences):
[1]
EN   : And I would ask him every once in a while , " Dad , do you want it to sound like the record ? "
REF  : ƒê√¥i khi , t√¥i h·ªèi √¥ng r·∫±ng : " B·ªë ∆°i , b·ªë c√≥ mu·ªën ch∆°i gi·ªëng nh∆∞ b·∫£n g·ªëc kh√¥ng ·∫° ? "
PRED : t√¥i h·ªèi √¥ng ta t·ª´ng c√≥ m·ªôt l·∫ßn trong m·ªôt l√∫c , " Dad , anh mu·ªën √¢m thanh ƒë√≥ nghe nh∆∞ l√† b·∫£n ghi √¢m ? "
--------------------------------------------------
[2]
EN   : So , we 're only going to look at the stars inside that small square , although we 've looked at all of them .
REF  : V·∫≠y , ch√∫ng ta s·∫Ω ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑t d√π ch√∫ng ta ƒë√£ nh√¨n t·∫•t c·∫£ ch√∫ng .
PRED : ch√∫ng ta ch·ªâ nh√¨n v√†o c√°c ng√¥i sao trong h√¨nh vu√¥ng nh·ªè , m·∫∑c d√π ch√∫ng ta nh√¨n v√†o t·∫•t c·∫£ ch√∫ng .
--------------------------------------------------
[3]
EN   : And they travel out into the cosmos at

Epoch 14 Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13332/13332 [06:47<00:00, 32.69it/s]


Epoch 14, Train Loss: 2.6498, Val Loss: 2.7593
üß™ Sample Translations (5 sentences):
[1]
EN   : And I would ask him every once in a while , " Dad , do you want it to sound like the record ? "
REF  : ƒê√¥i khi , t√¥i h·ªèi √¥ng r·∫±ng : " B·ªë ∆°i , b·ªë c√≥ mu·ªën ch∆°i gi·ªëng nh∆∞ b·∫£n g·ªëc kh√¥ng ·∫° ? "
PRED : t√¥i h·ªèi anh ta m·ªói khi trong m·ªôt th·ªùi gian , " Dad , anh mu·ªën n√≥ nghe √¢m thanh nh∆∞ b·∫£n ghi √¢m ? "
--------------------------------------------------
[2]
EN   : So , we 're only going to look at the stars inside that small square , although we 've looked at all of them .
REF  : V·∫≠y , ch√∫ng ta s·∫Ω ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑t d√π ch√∫ng ta ƒë√£ nh√¨n t·∫•t c·∫£ ch√∫ng .
PRED : ch√∫ng ta ch·ªâ nh√¨n v√†o c√°c ng√¥i sao nh·ªè b√™n trong h√¨nh vu√¥ng nh·ªè , m·∫∑c d√π ch√∫ng ta nh√¨n v√†o t·∫•t c·∫£ ch√∫ng .
--------------------------------------------------
[3]
EN   : And they travel out into the cosmos at 

Epoch 15 Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13332/13332 [06:51<00:00, 32.37it/s]


Epoch 15, Train Loss: 2.6419, Val Loss: 2.7579
üß™ Sample Translations (5 sentences):
[1]
EN   : And I would ask him every once in a while , " Dad , do you want it to sound like the record ? "
REF  : ƒê√¥i khi , t√¥i h·ªèi √¥ng r·∫±ng : " B·ªë ∆°i , b·ªë c√≥ mu·ªën ch∆°i gi·ªëng nh∆∞ b·∫£n g·ªëc kh√¥ng ·∫° ? "
PRED : t√¥i s·∫Ω h·ªèi anh ta m·ªói l·∫ßn trong m·ªôt l√∫c , " B·ªë , anh mu·ªën n√≥ nghe nh∆∞ b·∫£n l∆∞u tr·ªØ ? "
--------------------------------------------------
[2]
EN   : So , we 're only going to look at the stars inside that small square , although we 've looked at all of them .
REF  : V·∫≠y , ch√∫ng ta s·∫Ω ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑t d√π ch√∫ng ta ƒë√£ nh√¨n t·∫•t c·∫£ ch√∫ng .
PRED : ch√∫ng ta ch·ªâ nh√¨n v√†o c√°c ng√¥i sao nh·ªè b√™n trong h√¨nh vu√¥ng nh·ªè , m·∫∑c d√π ch√∫ng ta nh√¨n v√†o t·∫•t c·∫£ ch√∫ng .
--------------------------------------------------
[3]
EN   : And they travel out into the cosmos at the sp

Epoch 16 Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13332/13332 [06:52<00:00, 32.36it/s]


Epoch 16, Train Loss: 2.6333, Val Loss: 2.7550
üß™ Sample Translations (5 sentences):
[1]
EN   : And I would ask him every once in a while , " Dad , do you want it to sound like the record ? "
REF  : ƒê√¥i khi , t√¥i h·ªèi √¥ng r·∫±ng : " B·ªë ∆°i , b·ªë c√≥ mu·ªën ch∆°i gi·ªëng nh∆∞ b·∫£n g·ªëc kh√¥ng ·∫° ? "
PRED : h·ªèi anh ·∫•y m·ªôt l·∫ßn trong m·ªôt l√∫c , " B·ªë , anh mu·ªën n√≥ nghe nh∆∞ b·∫£n ghi √¢m ? "
--------------------------------------------------
[2]
EN   : So , we 're only going to look at the stars inside that small square , although we 've looked at all of them .
REF  : V·∫≠y , ch√∫ng ta s·∫Ω ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑t d√π ch√∫ng ta ƒë√£ nh√¨n t·∫•t c·∫£ ch√∫ng .
PRED : ch√∫ng ta ch·ªâ nh√¨n v√†o c√°c ng√¥i sao nh·ªè b√™n trong h√¨nh vu√¥ng nh·ªè , m·∫∑c d√π ch√∫ng ta nh√¨n v√†o t·∫•t c·∫£ ch√∫ng .
--------------------------------------------------
[3]
EN   : And they travel out into the cosmos at the speed of ligh

Epoch 17 Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13332/13332 [06:52<00:00, 32.31it/s]


Epoch 17, Train Loss: 2.6249, Val Loss: 2.7519
üß™ Sample Translations (5 sentences):
[1]
EN   : And I would ask him every once in a while , " Dad , do you want it to sound like the record ? "
REF  : ƒê√¥i khi , t√¥i h·ªèi √¥ng r·∫±ng : " B·ªë ∆°i , b·ªë c√≥ mu·ªën ch∆°i gi·ªëng nh∆∞ b·∫£n g·ªëc kh√¥ng ·∫° ? "
PRED : t√¥i h·ªèi anh ta m·ªói khi trong m·ªôt th·ªùi gian , " B·ªë ∆°i , anh mu·ªën n√≥ nh∆∞ b·∫£n ghi √¢m ? "
--------------------------------------------------
[2]
EN   : So , we 're only going to look at the stars inside that small square , although we 've looked at all of them .
REF  : V·∫≠y , ch√∫ng ta s·∫Ω ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑t d√π ch√∫ng ta ƒë√£ nh√¨n t·∫•t c·∫£ ch√∫ng .
PRED : ch√∫ng ta ch·ªâ nh√¨n v√†o c√°c ng√¥i sao trong ng√¥i sao nh·ªè ƒë√≥ , m·∫∑c d√π ch√∫ng ta nh√¨n v√†o t·∫•t c·∫£ ch√∫ng .
--------------------------------------------------
[3]
EN   : And they travel out into the cosmos at the speed of light 

Epoch 18 Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13332/13332 [06:51<00:00, 32.37it/s]


Epoch 18, Train Loss: 2.6165, Val Loss: 2.7483
üß™ Sample Translations (5 sentences):
[1]
EN   : And I would ask him every once in a while , " Dad , do you want it to sound like the record ? "
REF  : ƒê√¥i khi , t√¥i h·ªèi √¥ng r·∫±ng : " B·ªë ∆°i , b·ªë c√≥ mu·ªën ch∆°i gi·ªëng nh∆∞ b·∫£n g·ªëc kh√¥ng ·∫° ? "
PRED : h·ªèi anh ta m·ªói khi , " Dad , anh mu·ªën n√≥ nghe nh∆∞ b·∫£n ghi √¢m ? "
--------------------------------------------------
[2]
EN   : So , we 're only going to look at the stars inside that small square , although we 've looked at all of them .
REF  : V·∫≠y , ch√∫ng ta s·∫Ω ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑t d√π ch√∫ng ta ƒë√£ nh√¨n t·∫•t c·∫£ ch√∫ng .
PRED : ch√∫ng ta ch·ªâ nh√¨n v√†o c√°c ng√¥i sao nh·ªè b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑c d√π ch√∫ng ta nh√¨n v√†o ch√∫ng .
--------------------------------------------------
[3]
EN   : And they travel out into the cosmos at the speed of light .
REF  : V√† ch√∫ng ƒëi v√

Epoch 19 Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13332/13332 [06:51<00:00, 32.38it/s]


Epoch 19, Train Loss: 2.6108, Val Loss: 2.7461
üß™ Sample Translations (5 sentences):
[1]
EN   : And I would ask him every once in a while , " Dad , do you want it to sound like the record ? "
REF  : ƒê√¥i khi , t√¥i h·ªèi √¥ng r·∫±ng : " B·ªë ∆°i , b·ªë c√≥ mu·ªën ch∆°i gi·ªëng nh∆∞ b·∫£n g·ªëc kh√¥ng ·∫° ? "
PRED : t√¥i s·∫Ω h·ªèi anh ta m·ªói khi , " B·ªë ∆°i , anh mu·ªën n√≥ nghe nh∆∞ b·∫£n ghi √¢m ? "
--------------------------------------------------
[2]
EN   : So , we 're only going to look at the stars inside that small square , although we 've looked at all of them .
REF  : V·∫≠y , ch√∫ng ta s·∫Ω ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑t d√π ch√∫ng ta ƒë√£ nh√¨n t·∫•t c·∫£ ch√∫ng .
PRED : ch√∫ng t√¥i ch·ªâ nh√¨n v√†o c√°c ng√¥i sao nh·ªè b√™n trong h√¨nh vu√¥ng nh·ªè , m·∫∑c d√π ch√∫ng t√¥i ƒë√£ nh√¨n v√†o ch√∫ng .
--------------------------------------------------
[3]
EN   : And they travel out into the cosmos at the speed of light .
REF  

Epoch 20 Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13332/13332 [06:51<00:00, 32.37it/s]


Epoch 20, Train Loss: 2.6035, Val Loss: 2.7440
üß™ Sample Translations (5 sentences):
[1]
EN   : And I would ask him every once in a while , " Dad , do you want it to sound like the record ? "
REF  : ƒê√¥i khi , t√¥i h·ªèi √¥ng r·∫±ng : " B·ªë ∆°i , b·ªë c√≥ mu·ªën ch∆°i gi·ªëng nh∆∞ b·∫£n g·ªëc kh√¥ng ·∫° ? "
PRED : h·ªèi √¥ng ·∫•y m·ªôt l·∫ßn trong m·ªôt l√∫c , " Dad , anh mu·ªën n√≥ nghe nh∆∞ b·∫£n ghi √¢m ? "
--------------------------------------------------
[2]
EN   : So , we 're only going to look at the stars inside that small square , although we 've looked at all of them .
REF  : V·∫≠y , ch√∫ng ta s·∫Ω ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao b√™n trong h√¨nh vu√¥ng nh·ªè ƒë√≥ , m·∫∑t d√π ch√∫ng ta ƒë√£ nh√¨n t·∫•t c·∫£ ch√∫ng .
PRED : ch√∫ng ta ch·ªâ nh√¨n v√†o nh·ªØng ng√¥i sao nh·ªè b√™n trong h√¨nh vu√¥ng nh·ªè , m·∫∑c d√π ch√∫ng ta ƒë√£ nh√¨n v√†o t·∫•t c·∫£ ch√∫ng .
--------------------------------------------------
[3]
EN   : And they travel out into the cosmos at the speed

In [64]:
# Step 7: Evaluate Model
import evaluate
from tqdm import tqdm

def evaluate_model_metrics(model, sp, test_data, max_len, output_dir="eval"):
    os.makedirs(output_dir, exist_ok=True)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    rouge = evaluate.load('rouge')
    bleu = evaluate.load('sacrebleu')
    predictions = []
    references = []

    def translate_sample(input_text):
        input_sentence = f"[EN] {input_text.strip()} [VI]"
        input_ids = [1] + sp.encode(input_sentence, out_type=int)
        input_ids = input_ids[:max_len-1] + [2]
        input_tensor = torch.tensor(input_ids, dtype=torch.long).unsqueeze(0).to(device)

        with torch.no_grad():
            for _ in range(max_len - len(input_ids)):
                out = model(input_tensor)
                next_token = torch.argmax(out[:, -1, :], dim=-1).item()
                if next_token == 2:
                    break
                input_tensor = torch.cat([input_tensor, torch.tensor([[next_token]], device=device)], dim=1)

        decoded = sp.decode(input_tensor[0].tolist())
        return decoded.split("[VI]")[-1].strip() if "[VI]" in decoded else decoded.strip()

    for item in tqdm(test_data[:200], desc="Evaluating"):
        input_text = item.split('[VI]')[0].replace('[EN]', '').strip()
        ref = item.split('[VI]')[-1].strip()
        pred = translate_sample(input_text)
        predictions.append(pred)
        references.append([ref])

    rouge_result = rouge.compute(predictions=predictions, references=[r[0] for r in references])
    bleu_result = bleu.compute(predictions=predictions, references=references)

    print("ROUGE:", rouge_result)
    print(f"BLEU: {bleu_result['score']:.2f}")

    with open(os.path.join(output_dir, "eval_results.txt"), 'w') as f:
        f.write(f"ROUGE: {rouge_result}\n")
        f.write(f"BLEU: {bleu_result['score']:.2f}\n")

In [65]:
# Run Step 7
evaluate_model_metrics(model, sp, test_data, max_len, output_dir="eval")

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/8.15k [00:00<?, ?B/s]

Evaluating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:24<00:00,  8.04it/s]


ROUGE: {'rouge1': 0.615170469779794, 'rouge2': 0.3711888026499641, 'rougeL': 0.5191040570084184, 'rougeLsum': 0.5190000486754944}
BLEU: 18.41


In [66]:
# Step 8: Translate Text
def translate(model, sp, input_text=None, test_data=None, max_len=128, num_examples=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    translations = []

    def translate_sample(text):
        input_sentence = f"[EN] {text.strip()} [VI]"
        input_ids = [1] + sp.encode(input_sentence, out_type=int)
        input_ids = input_ids[:max_len-1] + [2]
        input_tensor = torch.tensor(input_ids, dtype=torch.long).unsqueeze(0).to(device)

        with torch.no_grad():
            for _ in range(max_len - len(input_ids)):
                out = model(input_tensor)
                next_token = torch.argmax(out[:, -1, :], dim=-1).item()
                if next_token == 2:
                    break
                input_tensor = torch.cat([input_tensor, torch.tensor([[next_token]], device=device)], dim=1)

        decoded = sp.decode(input_tensor[0].tolist())
        return decoded.split("[VI]")[-1].strip() if "[VI]" in decoded else decoded.strip()

    if input_text:
        translation = translate_sample(input_text)
        translations.append({"input": input_text, "translation": translation, "reference": None})

    if test_data:
        for item in test_data[:num_examples]:
            en_part = item.split('[VI]')[0].replace('[EN]', '').strip()
            reference = item.split('[VI]')[-1].strip()
            translation = translate_sample(en_part)
            translations.append({"input": en_part, "translation": translation, "reference": reference})

    for i, t in enumerate(translations):
        print(f"\nExample {i + 1}:")
        print(f"Input (EN): {t['input']}")
        print(f"Translation (VI): {t['translation']}")
        if t['reference']:
            print(f"Reference (VI): {t['reference']}")

In [69]:
# Run Step 8
translate(model, sp, input_text="Hello, how are you?", test_data=val_data, max_len=max_len, num_examples=10)


Example 1:
Input (EN): Hello, how are you?
Translation (VI): trai : B·∫°n c√≥ th·ªÉ l√†m theo d√µi theo d√µi .

Example 2:
Input (EN): It 's already happening . It 's not science fiction .
Translation (VI): chƒÉng n·ªØa . N√≥ kh√¥ng ph·∫£i l√† khoa h·ªçc vi·ªÖn t∆∞·ªüng .
Reference (VI): ƒêi·ªÅu n√†y th·ª±c s·ª± ƒëang x·∫£y ra , kh√¥ng ph·∫£i trong khoa h·ªçc vi·ªÖn t∆∞·ªüng .

Example 3:
Input (EN): Anticipation made them happy .
Translation (VI): b·∫°i li·ªát khi·∫øn h·ªç h·∫°nh ph√∫c .
Reference (VI): Mong ƒë·ª£i khi·∫øn h·ªç vui .

Example 4:
Input (EN): He was 94 when this photograph was taken .
Translation (VI): trai 94 khi t·∫•m h√¨nh n√†y ƒë∆∞·ª£c ch·ª•p .
Reference (VI): B·ª©c ·∫£nh n√†y ch·ª•p khi √¥ng ·∫•y 94 tu·ªïi

Example 5:
Input (EN): Is this proposition true ? Is this theory a good theory ?
Translation (VI): g·ª£i √Ω ƒë√∫ng kh√¥ng ? L√Ω thuy·∫øt n√†y c√≥ ph·∫£i l√† l√Ω thuy·∫øt t·ªët kh√¥ng ?
Reference (VI): Li·ªáu ƒë·ªÅ xu·∫•t n√†y c√≥ ƒë√∫ng ? Li·ªáu l√Ω thuy·∫øt n√