In [1]:
# =============================================================================
# STEP 1: SETUP AND IMPORTS
# =============================================================================
# This cell installs necessary libraries, downloads the dataset, and imports modules.

!pip install torch torchtext==0.17.0 tqdm sacrebleu -q

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import unicodedata
import re
import random
from collections import Counter
from tqdm import tqdm
import math
import time
import sacrebleu

# Download and extract the English-Indonesian dataset
!wget -q http://www.manythings.org/anki/ind-eng.zip
!unzip -q ind-eng.zip

print("✅ Setup Complete. Dataset is ready.")




[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m892.5 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m755.5/755.5 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m410.6/410.6 MB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m38.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[

In [2]:
# =============================================================================
# STEP 2: DATA PREPARATION
# =============================================================================
# This section contains all functions for loading, cleaning, and preparing the data.

# --- Define special tokens and their indices ---
SPECIALS = ["<pad>", "<bos>", "<eos>", "<unk>"]
PAD_IDX, BOS_IDX, EOS_IDX, UNK_IDX = 0, 1, 2, 3

def normalize_and_tokenize(s: str):
    """Cleans and tokenizes a string."""
    s = s.lower().strip()
    # Add space before punctuation
    s = re.sub(r"([.!?])", r" \1", s)
    # Replace non-alphanumeric characters with spaces
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    s = re.sub(r"\s+", " ", s).strip()
    return s.split()

def load_pairs(path, max_pairs=10000):
    """Loads and tokenizes sentence pairs from a file."""
    pairs = []
    with open(path, encoding="utf-8") as f:
        for line in f:
            cols = line.rstrip("\n").split("\t")
            if len(cols) < 2: continue
            src, tgt = cols[0], cols[1] # English, Indonesian
            src_tokens = normalize_and_tokenize(src)
            tgt_tokens = normalize_and_tokenize(tgt)
            if src_tokens and tgt_tokens:
                pairs.append((src_tokens, tgt_tokens))
    random.shuffle(pairs)
    return pairs[:max_pairs]

def build_vocab(token_lists, min_freq=2):
    """Builds a vocabulary from a list of tokenized sentences."""
    counter = Counter(tok for tokens in token_lists for tok in tokens)
    vocab = {sp: i for i, sp in enumerate(SPECIALS)}
    for word, freq in counter.items():
        if freq >= min_freq:
            vocab[word] = len(vocab)
    itos = {i: w for w, i in vocab.items()}
    return vocab, itos

def to_ids(tokens, vocab):
    """Converts a list of tokens to a list of IDs."""
    return [BOS_IDX] + [vocab.get(t, UNK_IDX) for t in tokens] + [EOS_IDX]

class NMTDataset(Dataset):
    """Custom PyTorch Dataset for NMT."""
    def __init__(self, pairs, src_vocab, trg_vocab):
        self.data = []
        for src, trg in pairs:
            src_ids = torch.tensor(to_ids(src, src_vocab), dtype=torch.long)
            trg_ids = torch.tensor(to_ids(trg, trg_vocab), dtype=torch.long)
            self.data.append((src_ids, trg_ids))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

def collate_batch(batch):
    """Pads sequences in a batch to the same length."""
    src_list, trg_list = [], []
    for _src, _trg in batch:
        src_list.append(_src)
        trg_list.append(_trg)
    src_pad = nn.utils.rnn.pad_sequence(src_list, padding_value=PAD_IDX)
    trg_pad = nn.utils.rnn.pad_sequence(trg_list, padding_value=PAD_IDX)
    return src_pad, trg_pad

# --- Execute Data Preparation ---
pairs = load_pairs("ind.txt", max_pairs=15000)

# Split data: 80% train, 10% validation, 10% test
n_train = int(len(pairs) * 0.8)
n_val = int(len(pairs) * 0.1)
train_pairs, val_pairs, test_pairs = pairs[:n_train], pairs[n_train:n_train+n_val], pairs[n_train+n_val:]

# Build vocabularies from training data
en_vocab, en_itos = build_vocab([p[0] for p in train_pairs])
id_vocab, id_itos = build_vocab([p[1] for p in train_pairs])

# Create DataLoaders
BATCH_SIZE = 64
train_loader = DataLoader(NMTDataset(train_pairs, en_vocab, id_vocab), batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_batch)
val_loader = DataLoader(NMTDataset(val_pairs, en_vocab, id_vocab), batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_batch)
test_loader = DataLoader(NMTDataset(test_pairs, en_vocab, id_vocab), batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_batch)

print(f"✅ Data prepared: {len(train_pairs)} train, {len(val_pairs)} val, {len(test_pairs)} test pairs.")
print(f"   English vocab: {len(en_vocab)} | Indonesian vocab: {len(id_vocab)}")




✅ Data prepared: 11904 train, 1488 val, 1489 test pairs.
   English vocab: 2614 | Indonesian vocab: 2923


In [3]:
# =============================================================================
# STEP 3: MODEL DEFINITIONS
# =============================================================================
# This section contains the PyTorch classes for both the RNN and Transformer models.

# -----------------------------------------------------
# 3.1 Baseline: RNN with Bahdanau Attention
# -----------------------------------------------------
class BahdanauEncoder(nn.Module):
    def __init__(self, input_dim, emb_dim, enc_hid_dim, dec_hid_dim, dropout):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.gru = nn.GRU(emb_dim, enc_hid_dim, bidirectional=True)
        self.fc = nn.Linear(enc_hid_dim * 2, dec_hid_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, src):
        embedded = self.dropout(self.embedding(src))
        outputs, hidden = self.gru(embedded)
        hidden = torch.tanh(self.fc(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1)))
        return outputs, hidden

class BahdanauAttention(nn.Module):
    def __init__(self, enc_hid_dim, dec_hid_dim):
        super().__init__()
        self.attn = nn.Linear((enc_hid_dim * 2) + dec_hid_dim, dec_hid_dim)
        self.v = nn.Linear(dec_hid_dim, 1, bias=False)

    def forward(self, hidden, encoder_outputs):
        batch_size = encoder_outputs.shape[1]
        src_len = encoder_outputs.shape[0]
        hidden = hidden.unsqueeze(1).repeat(1, src_len, 1)
        encoder_outputs = encoder_outputs.permute(1, 0, 2)
        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim=2)))
        attention = self.v(energy).squeeze(2)
        return torch.softmax(attention, dim=1)

class BahdanauDecoder(nn.Module):
    def __init__(self, output_dim, emb_dim, enc_hid_dim, dec_hid_dim, dropout, attention):
        super().__init__()
        self.output_dim = output_dim
        self.attention = attention
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.gru = nn.GRU((enc_hid_dim * 2) + emb_dim, dec_hid_dim)
        self.fc_out = nn.Linear((enc_hid_dim * 2) + dec_hid_dim + emb_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input, hidden, encoder_outputs):
        input = input.unsqueeze(0)
        embedded = self.dropout(self.embedding(input))
        a = self.attention(hidden, encoder_outputs).unsqueeze(1)
        encoder_outputs = encoder_outputs.permute(1, 0, 2)
        weighted = torch.bmm(a, encoder_outputs).permute(1, 0, 2)
        rnn_input = torch.cat((embedded, weighted), dim=2)
        output, hidden = self.gru(rnn_input, hidden.unsqueeze(0))
        prediction = self.fc_out(torch.cat((output.squeeze(0), weighted.squeeze(0), embedded.squeeze(0)), dim=1))
        return prediction, hidden.squeeze(0)

class Seq2SeqRNN(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        trg_len, batch_size = trg.shape
        trg_vocab_size = self.decoder.output_dim
        outputs = torch.zeros(trg_len, batch_size, trg_vocab_size).to(self.device)
        encoder_outputs, hidden = self.encoder(src)
        input = trg[0,:]
        for t in range(1, trg_len):
            output, hidden = self.decoder(input, hidden, encoder_outputs)
            outputs[t] = output
            teacher_force = random.random() < teacher_forcing_ratio
            top1 = output.argmax(1)
            input = trg[t] if teacher_force else top1
        return outputs

    def greedy_decode(self, src, max_len=50):
        with torch.no_grad():
            encoder_outputs, hidden = self.encoder(src)
            ys = torch.ones(1, src.shape[1]).fill_(BOS_IDX).long().to(self.device)
            for _ in range(max_len - 1):
                input_t = ys[-1, :]
                output, hidden = self.decoder(input_t, hidden, encoder_outputs)
                pred_token = output.argmax(1)
                ys = torch.cat([ys, pred_token.unsqueeze(0)], dim=0)
                if (pred_token == EOS_IDX).all(): break
        return ys

# -----------------------------------------------------
# 3.2 Advanced: Transformer
# -----------------------------------------------------
class PositionalEncoding(nn.Module):
    def __init__(self, emb_size, dropout, maxlen=5000):
        super().__init__()
        den = torch.exp(-torch.arange(0, emb_size, 2) * math.log(10000) / emb_size)
        pos = torch.arange(0, maxlen).reshape(maxlen, 1)
        pos_embedding = torch.zeros((maxlen, emb_size))
        pos_embedding[:, 0::2] = torch.sin(pos * den)
        pos_embedding[:, 1::2] = torch.cos(pos * den)
        self.dropout = nn.Dropout(dropout)
        self.register_buffer('pos_embedding', pos_embedding.unsqueeze(-2))

    def forward(self, token_embedding):
        return self.dropout(token_embedding + self.pos_embedding[:token_embedding.size(0), :])

class TokenEmbedding(nn.Module):
    def __init__(self, vocab_size, emb_size):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_size)
        self.emb_size = emb_size
    def forward(self, tokens):
        return self.embedding(tokens.long()) * math.sqrt(self.emb_size)

class Seq2SeqTransformer(nn.Module):
    def __init__(self, num_enc_layers, num_dec_layers, emb_size, nhead,
                 src_vocab_size, tgt_vocab_size, dim_feedforward=512, dropout=0.1):
        super().__init__()
        self.transformer = nn.Transformer(d_model=emb_size, nhead=nhead,
                                          num_encoder_layers=num_enc_layers,
                                          num_decoder_layers=num_dec_layers,
                                          dim_feedforward=dim_feedforward, dropout=dropout)
        self.generator = nn.Linear(emb_size, tgt_vocab_size)
        self.src_tok_emb = TokenEmbedding(src_vocab_size, emb_size)
        self.tgt_tok_emb = TokenEmbedding(tgt_vocab_size, emb_size)
        self.positional_encoding = PositionalEncoding(emb_size, dropout=dropout)

    def forward(self, src, trg, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, memory_key_padding_mask):
        src_emb = self.positional_encoding(self.src_tok_emb(src))
        tgt_emb = self.positional_encoding(self.tgt_tok_emb(trg))
        outs = self.transformer(src_emb, tgt_emb, src_mask, tgt_mask, None,
                                src_padding_mask, tgt_padding_mask, memory_key_padding_mask)
        return self.generator(outs)

    def greedy_decode(self, src, max_len=50):
        with torch.no_grad():
            src_mask = torch.zeros((src.shape[0], src.shape[0]), device=src.device).type(torch.bool)
            memory = self.transformer.encoder(self.positional_encoding(self.src_tok_emb(src)), src_mask)
            ys = torch.ones(1, src.shape[1]).fill_(BOS_IDX).long().to(src.device)
            for _ in range(max_len - 1):
                tgt_mask = (nn.Transformer.generate_square_subsequent_mask(ys.size(0)).type(torch.bool)).to(src.device)
                out = self.transformer.decoder(self.positional_encoding(self.tgt_tok_emb(ys)), memory, tgt_mask)
                prob = self.generator(out[-1, :, :])
                _, next_word = torch.max(prob, dim=1)
                ys = torch.cat([ys, next_word.unsqueeze(0)], dim=0)
                if (next_word == EOS_IDX).all(): break
        return ys

print("✅ Model classes defined.")




✅ Model classes defined.


In [4]:
# =============================================================================
# STEP 4: TRAINING & EVALUATION UTILITIES
# =============================================================================
# This section contains helper functions for training, evaluation, and decoding.

def create_mask(src, tgt, device):
    """Creates masks for the Transformer model."""
    src_seq_len, tgt_seq_len = src.shape[0], tgt.shape[0]
    tgt_mask = nn.Transformer.generate_square_subsequent_mask(tgt_seq_len, device)
    src_mask = torch.zeros((src_seq_len, src_seq_len), device=device).type(torch.bool)
    src_padding_mask = (src == PAD_IDX).transpose(0, 1)
    tgt_padding_mask = (tgt == PAD_IDX).transpose(0, 1)
    return src_mask, tgt_mask, src_padding_mask, tgt_padding_mask

def train_epoch(model, loader, optimizer, criterion, clip, is_transformer=False):
    model.train()
    epoch_loss = 0
    for src, trg in tqdm(loader, desc="Training"):
        src, trg = src.to(device), trg.to(device)
        optimizer.zero_grad()
        if is_transformer:
            trg_input = trg[:-1, :]
            src_mask, tgt_mask, src_pad_mask, tgt_pad_mask = create_mask(src, trg_input, device)
            logits = model(src, trg_input, src_mask, tgt_mask, src_pad_mask, tgt_pad_mask, src_pad_mask)
            trg_out = trg[1:, :].reshape(-1)
            logits = logits.reshape(-1, logits.shape[-1])
        else: # RNN
            logits = model(src, trg)
            # FIX: Slice logits to match target shape, avoiding the ValueError
            trg_out = trg[1:, :].reshape(-1)
            logits = logits[1:].reshape(-1, logits.shape[-1])
        loss = criterion(logits, trg_out)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()
        epoch_loss += loss.item()
    return epoch_loss / len(loader)

def evaluate_epoch(model, loader, criterion, is_transformer=False):
    model.eval()
    epoch_loss = 0
    with torch.no_grad():
        for src, trg in tqdm(loader, desc="Evaluating"):
            src, trg = src.to(device), trg.to(device)
            if is_transformer:
                trg_input = trg[:-1, :]
                src_mask, tgt_mask, src_pad_mask, tgt_pad_mask = create_mask(src, trg_input, device)
                logits = model(src, trg_input, src_mask, tgt_mask, src_pad_mask, tgt_pad_mask, src_pad_mask)
                trg_out = trg[1:, :].reshape(-1)
                logits = logits.reshape(-1, logits.shape[-1])
            else: # RNN
                logits = model(src, trg, teacher_forcing_ratio=0.0)
                # FIX: Slice logits to match target shape
                trg_out = trg[1:, :].reshape(-1)
                logits = logits[1:].reshape(-1, logits.shape[-1])
            loss = criterion(logits, trg_out)
            epoch_loss += loss.item()
    return epoch_loss / len(loader)

def decode_ids(ids, itos):
    """Converts a tensor of IDs back to a string."""
    tokens = []
    for tok_id in ids:
        tok = tok_id.item()
        if tok == EOS_IDX: break
        if tok not in {BOS_IDX, PAD_IDX}:
            tokens.append(itos.get(tok, "<unk>"))
    return " ".join(tokens)

def calculate_bleu(model, loader, id_itos, device):
    """Calculates SacreBLEU score for the model on a given dataset."""
    model.eval()
    hypotheses, references = [], []
    with torch.no_grad():
        for src, trg in loader:
            src, trg = src.to(device), trg.to(device)
            pred_ids = model.greedy_decode(src)
            for b in range(src.size(1)):
                hypotheses.append(decode_ids(pred_ids[:, b], id_itos))
                references.append([decode_ids(trg[:, b], id_itos)])
    return sacrebleu.corpus_bleu(hypotheses, references).score

print("✅ Utility functions defined.")




✅ Utility functions defined.


In [11]:
# =============================================================================
# STEP 5: MAIN EXECUTION
# =============================================================================
# This is the main block to instantiate and train the models.

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"🚀 Using device: {device}")

# --- Hyperparameters ---
N_EPOCHS = 30
CLIP = 1.0
LEARNING_RATE = 0.0005

# --- Train and Evaluate RNN Baseline ---
print("\n--- Training Baseline RNN + Attention ---")
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
ENC_HID_DIM = 512
DEC_HID_DIM = 512
DROPOUT = 0.5

attn_rnn = BahdanauAttention(ENC_HID_DIM, DEC_HID_DIM)
# FIX: Corrected DEC_HID_dim to DEC_HID_DIM
encoder_rnn = BahdanauEncoder(len(en_vocab), ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DROPOUT)
# FIX: Corrected DEC_HID_dim to DEC_HID_DIM
decoder_rnn = BahdanauDecoder(len(id_vocab), DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DROPOUT, attn_rnn)
model_rnn = Seq2SeqRNN(encoder_rnn, decoder_rnn, device).to(device)

optimizer_rnn = optim.Adam(model_rnn.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss(ignore_index=PAD_IDX)

for epoch in range(N_EPOCHS):
    train_loss = train_epoch(model_rnn, train_loader, optimizer_rnn, criterion, CLIP)
    val_loss = evaluate_epoch(model_rnn, val_loader, criterion)
    print(f"Epoch {epoch+1:02} | Train Loss: {train_loss:.3f} | Val Loss: {val_loss:.3f}")

# --- Train and Evaluate Transformer ---
print("\n--- Training Transformer ---")
EMB_SIZE = 512
NHEAD = 8
FFN_HID_DIM = 512
NUM_ENC_LAYERS = 3
NUM_DEC_LAYERS = 3

model_transformer = Seq2SeqTransformer(NUM_ENC_LAYERS, NUM_DEC_LAYERS, EMB_SIZE, NHEAD,
                                       len(en_vocab), len(id_vocab), FFN_HID_DIM).to(device)
optimizer_transformer = optim.Adam(model_transformer.parameters(), lr=0.0001, betas=(0.9, 0.98), eps=1e-9)

for epoch in range(N_EPOCHS):
    train_loss = train_epoch(model_transformer, train_loader, optimizer_transformer, criterion, CLIP, is_transformer=True)
    val_loss = evaluate_epoch(model_transformer, val_loader, criterion, is_transformer=True)
    print(f"Epoch {epoch+1:02} | Train Loss: {train_loss:.3f} | Val Loss: {val_loss:.3f}")

# --- Final Evaluation ---
print("\n--- Final Evaluation on Test Set ---")
bleu_rnn = calculate_bleu(model_rnn, test_loader, id_itos, device)
bleu_transformer = calculate_bleu(model_transformer, test_loader, id_itos, device)
print(f"🏆 Final BLEU Score (RNN Baseline): {bleu_rnn:.2f}")
print(f"🏆 Final BLEU Score (Transformer): {bleu_transformer:.2f}")

# --- Show Example Translations ---
def show_examples(model, loader, en_itos, id_itos, n=3):
    print("\n--- Example Translations ---")
    model.eval()
    with torch.no_grad():
        for i, (src, trg) in enumerate(loader):
            if i >= n: break
            src, trg = src.to(device), trg.to(device)
            pred_ids = model.greedy_decode(src)
            src_text = decode_ids(src[:, 0], en_itos)
            trg_text = decode_ids(trg[:, 0], id_itos)
            pred_text = decode_ids(pred_ids[:, 0], id_itos)
            print(f"\n  SRC:  {src_text}")
            print(f"  TRG:  {trg_text}")
            print(f"  PRED: {pred_text}")

show_examples(model_transformer, test_loader, en_itos, id_itos)

🚀 Using device: cuda

--- Training Baseline RNN + Attention ---


Training: 100%|██████████| 186/186 [00:09<00:00, 20.58it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 62.88it/s]


Epoch 01 | Train Loss: 4.526 | Val Loss: 3.752


Training: 100%|██████████| 186/186 [00:09<00:00, 20.45it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 62.27it/s]


Epoch 02 | Train Loss: 3.287 | Val Loss: 2.958


Training: 100%|██████████| 186/186 [00:09<00:00, 20.52it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 62.29it/s]


Epoch 03 | Train Loss: 2.571 | Val Loss: 2.648


Training: 100%|██████████| 186/186 [00:09<00:00, 20.65it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 62.17it/s]


Epoch 04 | Train Loss: 2.112 | Val Loss: 2.499


Training: 100%|██████████| 186/186 [00:09<00:00, 20.56it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 62.49it/s]


Epoch 05 | Train Loss: 1.802 | Val Loss: 2.486


Training: 100%|██████████| 186/186 [00:09<00:00, 20.57it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 62.93it/s]


Epoch 06 | Train Loss: 1.548 | Val Loss: 2.430


Training: 100%|██████████| 186/186 [00:09<00:00, 20.56it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 62.42it/s]


Epoch 07 | Train Loss: 1.375 | Val Loss: 2.408


Training: 100%|██████████| 186/186 [00:09<00:00, 20.40it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 62.74it/s]


Epoch 08 | Train Loss: 1.220 | Val Loss: 2.426


Training: 100%|██████████| 186/186 [00:09<00:00, 20.55it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 59.77it/s]


Epoch 09 | Train Loss: 1.107 | Val Loss: 2.402


Training: 100%|██████████| 186/186 [00:09<00:00, 20.32it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 62.89it/s]


Epoch 10 | Train Loss: 1.023 | Val Loss: 2.407


Training: 100%|██████████| 186/186 [00:09<00:00, 20.29it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 63.04it/s]


Epoch 11 | Train Loss: 0.926 | Val Loss: 2.508


Training: 100%|██████████| 186/186 [00:09<00:00, 20.43it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 63.14it/s]


Epoch 12 | Train Loss: 0.837 | Val Loss: 2.513


Training: 100%|██████████| 186/186 [00:09<00:00, 20.51it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 61.85it/s]


Epoch 13 | Train Loss: 0.785 | Val Loss: 2.501


Training: 100%|██████████| 186/186 [00:08<00:00, 20.71it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 62.89it/s]


Epoch 14 | Train Loss: 0.726 | Val Loss: 2.607


Training: 100%|██████████| 186/186 [00:09<00:00, 20.58it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 63.05it/s]


Epoch 15 | Train Loss: 0.663 | Val Loss: 2.647


Training: 100%|██████████| 186/186 [00:09<00:00, 20.56it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 62.71it/s]


Epoch 16 | Train Loss: 0.615 | Val Loss: 2.693


Training: 100%|██████████| 186/186 [00:09<00:00, 20.66it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 63.12it/s]


Epoch 17 | Train Loss: 0.561 | Val Loss: 2.705


Training: 100%|██████████| 186/186 [00:09<00:00, 20.44it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 62.86it/s]


Epoch 18 | Train Loss: 0.536 | Val Loss: 2.724


Training: 100%|██████████| 186/186 [00:09<00:00, 20.21it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 61.92it/s]


Epoch 19 | Train Loss: 0.502 | Val Loss: 2.795


Training: 100%|██████████| 186/186 [00:09<00:00, 20.30it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 62.78it/s]


Epoch 20 | Train Loss: 0.478 | Val Loss: 2.819


Training: 100%|██████████| 186/186 [00:09<00:00, 20.37it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 63.07it/s]


Epoch 21 | Train Loss: 0.441 | Val Loss: 2.846


Training: 100%|██████████| 186/186 [00:09<00:00, 20.41it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 62.80it/s]


Epoch 22 | Train Loss: 0.418 | Val Loss: 2.835


Training: 100%|██████████| 186/186 [00:09<00:00, 20.60it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 62.53it/s]


Epoch 23 | Train Loss: 0.392 | Val Loss: 2.911


Training: 100%|██████████| 186/186 [00:09<00:00, 19.99it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 62.98it/s]


Epoch 24 | Train Loss: 0.369 | Val Loss: 2.981


Training: 100%|██████████| 186/186 [00:09<00:00, 20.40it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 62.66it/s]


Epoch 25 | Train Loss: 0.354 | Val Loss: 3.013


Training: 100%|██████████| 186/186 [00:09<00:00, 20.35it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 61.84it/s]


Epoch 26 | Train Loss: 0.343 | Val Loss: 3.037


Training: 100%|██████████| 186/186 [00:09<00:00, 20.43it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 62.28it/s]


Epoch 27 | Train Loss: 0.331 | Val Loss: 3.160


Training: 100%|██████████| 186/186 [00:09<00:00, 20.44it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 62.04it/s]


Epoch 28 | Train Loss: 0.326 | Val Loss: 3.081


Training: 100%|██████████| 186/186 [00:09<00:00, 20.40it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 61.55it/s]


Epoch 29 | Train Loss: 0.300 | Val Loss: 3.162


Training: 100%|██████████| 186/186 [00:08<00:00, 20.71it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 62.56it/s]


Epoch 30 | Train Loss: 0.287 | Val Loss: 3.162

--- Training Transformer ---


Training: 100%|██████████| 186/186 [00:05<00:00, 32.11it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 123.19it/s]


Epoch 01 | Train Loss: 4.669 | Val Loss: 3.875


Training: 100%|██████████| 186/186 [00:05<00:00, 32.16it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 123.55it/s]


Epoch 02 | Train Loss: 3.874 | Val Loss: 3.502


Training: 100%|██████████| 186/186 [00:05<00:00, 32.29it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 123.13it/s]


Epoch 03 | Train Loss: 3.511 | Val Loss: 3.225


Training: 100%|██████████| 186/186 [00:05<00:00, 32.26it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 124.08it/s]


Epoch 04 | Train Loss: 3.228 | Val Loss: 3.017


Training: 100%|██████████| 186/186 [00:05<00:00, 32.23it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 123.72it/s]


Epoch 05 | Train Loss: 2.995 | Val Loss: 2.860


Training: 100%|██████████| 186/186 [00:05<00:00, 32.13it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 123.33it/s]


Epoch 06 | Train Loss: 2.799 | Val Loss: 2.736


Training: 100%|██████████| 186/186 [00:05<00:00, 32.08it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 123.56it/s]


Epoch 07 | Train Loss: 2.619 | Val Loss: 2.608


Training: 100%|██████████| 186/186 [00:05<00:00, 31.97it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 123.75it/s]


Epoch 08 | Train Loss: 2.468 | Val Loss: 2.504


Training: 100%|██████████| 186/186 [00:05<00:00, 31.94it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 123.49it/s]


Epoch 09 | Train Loss: 2.316 | Val Loss: 2.420


Training: 100%|██████████| 186/186 [00:05<00:00, 31.86it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 124.14it/s]


Epoch 10 | Train Loss: 2.179 | Val Loss: 2.338


Training: 100%|██████████| 186/186 [00:05<00:00, 31.96it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 123.24it/s]


Epoch 11 | Train Loss: 2.054 | Val Loss: 2.297


Training: 100%|██████████| 186/186 [00:05<00:00, 32.16it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 124.21it/s]


Epoch 12 | Train Loss: 1.936 | Val Loss: 2.205


Training: 100%|██████████| 186/186 [00:05<00:00, 32.05it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 123.78it/s]


Epoch 13 | Train Loss: 1.826 | Val Loss: 2.159


Training: 100%|██████████| 186/186 [00:05<00:00, 32.09it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 124.02it/s]


Epoch 14 | Train Loss: 1.725 | Val Loss: 2.127


Training: 100%|██████████| 186/186 [00:05<00:00, 32.30it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 123.69it/s]


Epoch 15 | Train Loss: 1.631 | Val Loss: 2.054


Training: 100%|██████████| 186/186 [00:05<00:00, 32.27it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 123.90it/s]


Epoch 16 | Train Loss: 1.535 | Val Loss: 2.030


Training: 100%|██████████| 186/186 [00:05<00:00, 32.24it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 124.44it/s]


Epoch 17 | Train Loss: 1.444 | Val Loss: 1.979


Training: 100%|██████████| 186/186 [00:05<00:00, 32.40it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 123.62it/s]


Epoch 18 | Train Loss: 1.363 | Val Loss: 1.975


Training: 100%|██████████| 186/186 [00:05<00:00, 32.12it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 124.41it/s]


Epoch 19 | Train Loss: 1.282 | Val Loss: 1.957


Training: 100%|██████████| 186/186 [00:05<00:00, 32.24it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 123.55it/s]


Epoch 20 | Train Loss: 1.205 | Val Loss: 1.960


Training: 100%|██████████| 186/186 [00:05<00:00, 32.10it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 124.25it/s]


Epoch 21 | Train Loss: 1.137 | Val Loss: 1.905


Training: 100%|██████████| 186/186 [00:05<00:00, 32.10it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 124.04it/s]


Epoch 22 | Train Loss: 1.068 | Val Loss: 1.909


Training: 100%|██████████| 186/186 [00:05<00:00, 32.02it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 123.27it/s]


Epoch 23 | Train Loss: 1.007 | Val Loss: 1.887


Training: 100%|██████████| 186/186 [00:05<00:00, 32.08it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 123.48it/s]


Epoch 24 | Train Loss: 0.940 | Val Loss: 1.913


Training: 100%|██████████| 186/186 [00:05<00:00, 32.23it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 123.81it/s]


Epoch 25 | Train Loss: 0.887 | Val Loss: 1.886


Training: 100%|██████████| 186/186 [00:05<00:00, 32.04it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 123.76it/s]


Epoch 26 | Train Loss: 0.836 | Val Loss: 1.883


Training: 100%|██████████| 186/186 [00:05<00:00, 32.08it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 123.65it/s]


Epoch 27 | Train Loss: 0.784 | Val Loss: 1.900


Training: 100%|██████████| 186/186 [00:05<00:00, 32.21it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 123.53it/s]


Epoch 28 | Train Loss: 0.733 | Val Loss: 1.877


Training: 100%|██████████| 186/186 [00:05<00:00, 32.19it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 124.08it/s]


Epoch 29 | Train Loss: 0.693 | Val Loss: 1.890


Training: 100%|██████████| 186/186 [00:05<00:00, 32.14it/s]
Evaluating: 100%|██████████| 24/24 [00:00<00:00, 124.47it/s]


Epoch 30 | Train Loss: 0.645 | Val Loss: 1.881

--- Final Evaluation on Test Set ---
🏆 Final BLEU Score (RNN Baseline): 84.09
🏆 Final BLEU Score (Transformer): 18.00

--- Example Translations ---

  SRC:  i know tom will cry .
  TRG:  saya tahu tom akan menangis .
  PRED: aku tahu kalau tom jauh lebih dari jauh dari jauh dari jauh .

  SRC:  i must decline .
  TRG:  aku harus menolak .
  PRED: aku harus jauh dari jauh .

  SRC:  it s a pity that you can t join us .
  TRG:  sangat <unk> anda tidak dapat bergabung dengan kami .
  PRED: sayang sekali kamu tidak bisa bergabung dengan kami .


In [12]:
def translate_sentence(model, sentence, en_vocab, id_itos, device, max_len=50):
    """Menerjemahkan satu kalimat string menggunakan model yang sudah dilatih."""
    model.eval()  # Set model ke mode evaluasi

    # 1. Tokenisasi dan konversi ke ID
    tokens = normalize_and_tokenize(sentence)
    ids = to_ids(tokens, en_vocab)
    
    # 2. Konversi ke tensor dan tambahkan dimensi batch (batch_size=1)
    src_tensor = torch.LongTensor(ids).unsqueeze(1).to(device)

    # 3. Lakukan prediksi dengan greedy_decode
    with torch.no_grad():
        pred_ids = model.greedy_decode(src_tensor, max_len=max_len)

    # 4. Konversi ID hasil prediksi kembali ke teks
    translation = decode_ids(pred_ids[:, 0], id_itos)
    
    return translation

In [18]:
# --- Coba terjemahkan kalimat baru ---
print("\n--- Interactive Test ---")

kalimat_tes_1 = "Bad boy."
kalimat_tes_2 = "My name is Tom and I live in a small city."

# Menggunakan model Transformer
terjemahan_1 = translate_sentence(model_transformer, kalimat_tes_1, en_vocab, id_itos, device)
terjemahan_2 = translate_sentence(model_transformer, kalimat_tes_2, en_vocab, id_itos, device)

print(f"\nModel: Transformer")
print(f"English: {kalimat_tes_1}")
print(f"Indonesian: {terjemahan_1}")

print(f"\nEnglish: {kalimat_tes_2}")
print(f"Indonesian: {terjemahan_2}")


# Menggunakan model RNN (sebagai perbandingan)
terjemahan_rnn = translate_sentence(model_rnn, kalimat_tes_1, en_vocab, id_itos, device)
print(f"\nModel: RNN Baseline")
print(f"English: {kalimat_tes_1}")
print(f"Indonesian: {terjemahan_rnn}")


--- Interactive Test ---

Model: Transformer
English: Bad boy.
Indonesian: anak laki laki yang buruk .

English: My name is Tom and I live in a small city.
Indonesian: saya tinggal di depan saya tinggal .

Model: RNN Baseline
English: Bad boy.
Indonesian: buruk .
