In [4]:

import torch
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader
import io
import torch.nn as nn

In [5]:
filepath_train_en = '../data/raw/train.en' 
filepath_train_fr = '../data/raw/train.fr'

In [6]:
try:
    en_tokenizer = get_tokenizer('spacy', language='en_core_web_sm')
    fr_tokenizer = get_tokenizer('spacy', language='fr_core_news_sm')
except OSError:
    print("ch∆∞a t·∫£i ƒë∆∞·ª£c g√≥i ng√¥n ng·ªØ!")

In [7]:
def yield_tokens(file_path, tokenizer):
    with io.open(file_path, encoding='utf-8') as f:
        for line in f:
            yield tokenizer(line.strip())

In [8]:

special_tokens = ['<unk>', '<pad>', '<sos>', '<eos>']


vocab_en = build_vocab_from_iterator(
    yield_tokens(filepath_train_en, en_tokenizer),
    min_freq=2,
    specials=special_tokens,
    max_tokens=10000     
)

In [9]:
vocab_en.set_default_index(vocab_en['<unk>'])

vocab_fr = build_vocab_from_iterator(
    yield_tokens(filepath_train_fr, fr_tokenizer),
    min_freq=2,
    specials=special_tokens,
    max_tokens=10000
)
vocab_fr.set_default_index(vocab_fr['<unk>'])

In [10]:
PAD_IDX = vocab_en['<pad>']
SOS_IDX = vocab_en['<sos>']
EOS_IDX = vocab_en['<eos>']

In [11]:
def text_transform(tokenizer, vocab, text):
    token_list = tokenizer(text.strip())
    index_list = [vocab[token] for token in token_list]
    return torch.tensor([SOS_IDX] + index_list + [EOS_IDX])

In [12]:
def collate_batch(batch):
    src_batch, trg_batch = [], []
    src_lens = []

    for src_sample, trg_sample in batch:
        # Bi·∫øn ƒë·ªïi vƒÉn b·∫£n th√¥ th√†nh tensor s·ªë
        src_item = text_transform(en_tokenizer, vocab_en, src_sample)
        trg_item = text_transform(fr_tokenizer, vocab_fr, trg_sample)
        
        src_batch.append(src_item)
        trg_batch.append(trg_item)
        # L∆∞u l·∫°i ƒë·ªô d√†i th·∫≠t c·ªßa c√¢u ti·∫øng Anh (ƒë·ªÉ d√πng cho pack_padded_sequence)
        src_lens.append(len(src_item))

    # --- B·∫ÆT BU·ªòC: S·∫Øp x·∫øp batch theo ƒë·ªô d√†i gi·∫£m d·∫ßn ---
    #
    # L√Ω do: PyTorch y√™u c·∫ßu input c·ªßa packing ph·∫£i ƒë∆∞·ª£c sort tr∆∞·ªõc
    zipped = list(zip(src_batch, trg_batch, src_lens))
    # S·∫Øp x·∫øp d·ª±a tr√™n src_lens (ph·∫ßn t·ª≠ th·ª© 2 trong tuple) t·ª´ cao xu·ªëng th·∫•p
    zipped.sort(key=lambda x: x[2], reverse=True)
    
    # T√°ch ng∆∞·ª£c tr·ªü l·∫°i th√†nh c√°c list ri√™ng l·∫ª
    src_batch, trg_batch, src_lens = zip(*zipped)
    
    # Chuy·ªÉn src_lens sang tensor
    src_lens = torch.tensor(src_lens)

    # --- PADDING: ƒêi·ªÅn th√™m <pad> v√†o c√¢u ng·∫Øn ---
    # padding_value=PAD_IDX: ƒêi·ªÅn s·ªë 1 v√†o ch·ªó tr·ªëng
    src_batch = pad_sequence(src_batch, padding_value=PAD_IDX)
    trg_batch = pad_sequence(trg_batch, padding_value=PAD_IDX)

    return src_batch, trg_batch, src_lens

In [13]:
BATCH_SIZE = 64  #

# ƒê·ªçc d·ªØ li·ªáu th√¥ t·ª´ file v√†o list (ƒë·ªÉ ƒë∆∞a v√†o DataLoader)
def read_raw_data(path_en, path_fr):
    with open(path_en, encoding='utf-8') as f_en, open(path_fr, encoding='utf-8') as f_fr:
        return list(zip(f_en, f_fr))


train_data = read_raw_data(filepath_train_en, filepath_train_fr)

train_loader = DataLoader(
    train_data, 
    batch_size=BATCH_SIZE, 
    collate_fn=collate_batch,
    shuffle=True # N√™n x√°o tr·ªôn d·ªØ li·ªáu khi train
)

In [14]:
print("\n=== KI·ªÇM TRA DATALOADER (PADDING & PACKING) ===")
src, trg, src_len = next(iter(train_loader))

print(f"‚úÖ K√≠ch th∆∞·ªõc Source Batch: {src.shape}")
print(f"   (D√†i nh·∫•t trong batch x Batch Size)")
print(f"‚úÖ K√≠ch th∆∞·ªõc Target Batch: {trg.shape}")
print(f"‚úÖ Danh s√°ch ƒë·ªô d√†i (ƒë√£ s·∫Øp x·∫øp gi·∫£m d·∫ßn ch∆∞a?):")
print(src_len) # In ra xem c√≥ ph·∫£i l√† 1 d√£y s·ªë gi·∫£m d·∫ßn kh√¥ng (VD: 20, 19, 15, 10...)


=== KI·ªÇM TRA DATALOADER (PADDING & PACKING) ===
‚úÖ K√≠ch th∆∞·ªõc Source Batch: torch.Size([25, 64])
   (D√†i nh·∫•t trong batch x Batch Size)
‚úÖ K√≠ch th∆∞·ªõc Target Batch: torch.Size([34, 64])
‚úÖ Danh s√°ch ƒë·ªô d√†i (ƒë√£ s·∫Øp x·∫øp gi·∫£m d·∫ßn ch∆∞a?):
tensor([25, 25, 22, 22, 22, 21, 20, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 17,
        17, 16, 16, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 11,
        11, 11, 11, 11, 11, 10, 10, 10, 10,  9])


In [15]:
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence

class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        
        # L∆∞u c√°c tham s·ªë
        self.hid_dim = hid_dim      # K√≠ch th∆∞·ªõc tr·∫°ng th√°i ·∫©n (512)
        self.n_layers = n_layers    # S·ªë l·ªõp LSTM (2)
        
        # 1. Embedding: Bi·∫øn index th√†nh vector dense
        self.embedding = nn.Embedding(input_dim, emb_dim)
        
        # 2. Dropout: Tr√°nh overfitting
        self.dropout = nn.Dropout(dropout)
        
        # 3. LSTM: X·ª≠ l√Ω chu·ªói
        self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout=dropout)
        
    def forward(self, src, src_len):
        # src: [src len, batch size]
        # src_len: [batch size] (ƒê·ªô d√†i th·∫≠t c·ªßa t·ª´ng c√¢u trong batch)
        
        # B∆∞·ªõc 1: Qua Embedding v√† Dropout
        # embedded: [src len, batch size, emb dim]
        embedded = self.dropout(self.embedding(src))
        
        # B∆∞·ªõc 2: PACKING (K·ªπ thu·∫≠t n√©n d·ªØ li·ªáu)
        # Gi√∫p LSTM b·ªè qua c√°c s·ªë 0 (padding), ch·ªâ t√≠nh to√°n tr√™n t·ª´ th·∫≠t
        #
        packed_embedded = pack_padded_sequence(embedded, src_len.to('cpu'), enforce_sorted=True)
        
        # B∆∞·ªõc 3: ƒê∆∞a qua LSTM
        # packed_outputs: ƒê·∫ßu ra d·∫°ng n√©n (ch·ª©a hidden state c·ªßa t·∫•t c·∫£ c√°c b∆∞·ªõc)
        # hidden: Tr·∫°ng th√°i ·∫©n cu·ªëi c√πng (Context Vector) [n layers, batch size, hid dim]
        # cell:   Tr·∫°ng th√°i t·∫ø b√†o nh·ªõ cu·ªëi c√πng (Context Vector)
        packed_outputs, (hidden, cell) = self.rnn(packed_embedded)
        
        # (T√πy ch·ªçn) N·∫øu mu·ªën l·∫•y output d·∫°ng th∆∞·ªùng th√¨ d√πng pad_packed_sequence
        # outputs, _ = torch.nn.utils.rnn.pad_packed_sequence(packed_outputs)
        
        # Tr·∫£ v·ªÅ Context Vector ƒë·ªÉ Decoder s·ª≠ d·ª•ng
        #
        return hidden, cell

In [16]:
# --- C·∫§U H√åNH THAM S·ªê (Theo y√™u c·∫ßu ƒë·ªÅ b√†i) ---
INPUT_DIM = len(vocab_en)  # K√≠ch th∆∞·ªõc t·ª´ ƒëi·ªÉn Anh (Kho·∫£ng 6191)
ENC_EMB_DIM = 256          #
HID_DIM = 512              #
N_LAYERS = 2               #
ENC_DROPOUT = 0.5          #

# Kh·ªüi t·∫°o m√¥ h√¨nh
encoder = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
print("‚úÖ ƒê√£ kh·ªüi t·∫°o Encoder:")
print(encoder)

# --- CH·∫†Y TH·ª¨ V·ªöI 1 BATCH D·ªÆ LI·ªÜU ---
# L·∫•y 1 batch t·ª´ DataLoader ƒë√£ l√†m ·ªü b∆∞·ªõc tr∆∞·ªõc
src, trg, src_len = next(iter(train_loader))

print(f"\n‚è≥ ƒêang test v·ªõi Batch Size = {src.shape[1]}...")
print(f"   ƒê·ªô d√†i c√¢u d√†i nh·∫•t trong batch = {src.shape[0]}")

# Ch·∫°y Forward
# L∆∞u √Ω: Ph·∫£i truy·ªÅn c·∫£ src v√† src_len v√†o
hidden, cell = encoder(src, src_len)

print("\n=== K·∫æT QU·∫¢ TEST ENCODER ===")
print(f"‚úÖ Hidden State Shape: {hidden.shape}")
print(f"   (Mong ƒë·ª£i: [{N_LAYERS}, {src.shape[1]}, {HID_DIM}]) -> [2, 64, 512]")
print(f"‚úÖ Cell State Shape:   {cell.shape}")
print(f"   (Mong ƒë·ª£i: [{N_LAYERS}, {src.shape[1]}, {HID_DIM}]) -> [2, 64, 512]")

if hidden.shape == (2, 64, 512):
    print("\nüéâ CH√öC M·ª™NG! Encoder ho·∫°t ƒë·ªông ho√†n h·∫£o.")
else:
    print("\n‚ö†Ô∏è C√≥ g√¨ ƒë√≥ sai sai v·ªÅ k√≠ch th∆∞·ªõc output.")

‚úÖ ƒê√£ kh·ªüi t·∫°o Encoder:
Encoder(
  (embedding): Embedding(6191, 256)
  (dropout): Dropout(p=0.5, inplace=False)
  (rnn): LSTM(256, 512, num_layers=2, dropout=0.5)
)

‚è≥ ƒêang test v·ªõi Batch Size = 64...
   ƒê·ªô d√†i c√¢u d√†i nh·∫•t trong batch = 26

=== K·∫æT QU·∫¢ TEST ENCODER ===
‚úÖ Hidden State Shape: torch.Size([2, 64, 512])
   (Mong ƒë·ª£i: [2, 64, 512]) -> [2, 64, 512]
‚úÖ Cell State Shape:   torch.Size([2, 64, 512])
   (Mong ƒë·ª£i: [2, 64, 512]) -> [2, 64, 512]

üéâ CH√öC M·ª™NG! Encoder ho·∫°t ƒë·ªông ho√†n h·∫£o.


In [17]:
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        
        # L∆∞u c√°c tham s·ªë
        self.output_dim = output_dim # K√≠ch th∆∞·ªõc t·ª´ ƒëi·ªÉn Ph√°p (kho·∫£ng 6555 t·ª´)
        self.hid_dim = hid_dim       # 512
        self.n_layers = n_layers     # 2
        
        # 1. Embedding
        # Input l√† 1 t·ª´ ƒë∆°n l·∫ª -> vector
        self.embedding = nn.Embedding(output_dim, emb_dim)
        
        # 2. LSTM
        # Input size = emb_dim
        # Hidden size = hid_dim
        self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout=dropout)
        
        # 3. Linear (L·ªõp ƒë·∫ßu ra)
        # Chuy·ªÉn t·ª´ tr·∫°ng th√°i ·∫©n (hid_dim) -> x√°c su·∫•t c·ªßa t·ª´ ti·∫øp theo (output_dim)
        # C√¥ng th·ª©c: p(y_t) = softmax(Linear(h_t))
        self.fc_out = nn.Linear(hid_dim, output_dim)
        
        # 4. Dropout
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, input, hidden, cell):
        # input: [batch size] (Ch·ªâ l√† 1 m·∫£ng ch·ª©a c√°c t·ª´ hi·ªán t·∫°i c·ªßa m·ªói c√¢u trong batch)
        # hidden: [n layers, batch size, hid dim] (Context Vector t·ª´ b∆∞·ªõc tr∆∞·ªõc)
        # cell:   [n layers, batch size, hid dim]
        
        # B∆∞·ªõc 1: Th√™m chi·ªÅu Seq Len v√†o input
        # V√¨ LSTM y√™u c·∫ßu input shape [Seq Len, Batch, Dim] m√† ta ch·ªâ ch·∫°y 1 b∆∞·ªõc
        # input: [1, batch size]
        input = input.unsqueeze(0)
        
        # B∆∞·ªõc 2: Embedding & Dropout
        # embedded: [1, batch size, emb dim]
        embedded = self.dropout(self.embedding(input))
        
        # B∆∞·ªõc 3: Cho qua LSTM
        # output: [1, batch size, hid dim]
        # hidden, cell: Tr·∫°ng th√°i m·ªõi ƒë·ªÉ d√πng cho b∆∞·ªõc sau
        output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
        
        # B∆∞·ªõc 4: D·ª± ƒëo√°n t·ª´ ti·∫øp theo
        # prediction: [batch size, output dim]
        # Lo·∫°i b·ªè chi·ªÅu Seq Len b·∫±ng squeeze(0) tr∆∞·ªõc khi ƒë∆∞a v√†o Linear
        prediction = self.fc_out(output.squeeze(0))
        
        return prediction, hidden, cell

In [18]:
# --- C·∫§U H√åNH THAM S·ªê DECODER ---
OUTPUT_DIM = len(vocab_fr) # K√≠ch th∆∞·ªõc t·ª´ ƒëi·ªÉn Ph√°p (6555)
DEC_EMB_DIM = 256
HID_DIM = 512
N_LAYERS = 2
DEC_DROPOUT = 0.5

# Kh·ªüi t·∫°o Decoder
decoder = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)
print("‚úÖ ƒê√£ kh·ªüi t·∫°o Decoder:")
print(decoder)

# --- CH·∫†Y TH·ª¨ V·ªöI D·ªÆ LI·ªÜU GI·∫¢ L·∫¨P ---
# Gi·∫£ s·ª≠ ta ƒëang ·ªü b∆∞·ªõc ƒë·∫ßu ti√™n c·ªßa qu√° tr√¨nh d·ªãch
# Input: L·∫•y ƒë·∫°i 1 batch ch·ª©a to√†n token <sos> (B·∫Øt ƒë·∫ßu c√¢u)
input_test = torch.tensor([vocab_fr['<sos>']] * 64) # Batch size 64

# Hidden, Cell: L·∫•y t·ª´ k·∫øt qu·∫£ test Encoder l√∫c n√£y (Context Vector)
# (L∆∞u √Ω: Ph·∫£i ch·∫°y block test Encoder ·ªü tr√™n tr∆∞·ªõc nh√©)

print(f"\n‚è≥ ƒêang test Decoder 1 b∆∞·ªõc...")
prediction, hidden_new, cell_new = decoder(input_test, hidden, cell)

print("\n=== K·∫æT QU·∫¢ TEST DECODER ===")
print(f"‚úÖ Prediction Shape: {prediction.shape}")
print(f"   (Mong ƒë·ª£i: [Batch Size, Vocab Size]) -> [64, {OUTPUT_DIM}]")

print(f"‚úÖ Hidden State Shape: {hidden_new.shape}")
print(f"   (Mong ƒë·ª£i: [Layers, Batch, Hid]) -> [2, 64, 512]")

if prediction.shape == (64, OUTPUT_DIM) and hidden_new.shape == (2, 64, 512):
    print("\nüéâ CH√öC M·ª™NG! Decoder ho·∫°t ƒë·ªông ho√†n h·∫£o.")
else:
    print("\n‚ö†Ô∏è K√≠ch th∆∞·ªõc ƒë·∫ßu ra ch∆∞a ƒë√∫ng, ki·ªÉm tra l·∫°i code.")

‚úÖ ƒê√£ kh·ªüi t·∫°o Decoder:
Decoder(
  (embedding): Embedding(6555, 256)
  (rnn): LSTM(256, 512, num_layers=2, dropout=0.5)
  (fc_out): Linear(in_features=512, out_features=6555, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

‚è≥ ƒêang test Decoder 1 b∆∞·ªõc...

=== K·∫æT QU·∫¢ TEST DECODER ===
‚úÖ Prediction Shape: torch.Size([64, 6555])
   (Mong ƒë·ª£i: [Batch Size, Vocab Size]) -> [64, 6555]
‚úÖ Hidden State Shape: torch.Size([2, 64, 512])
   (Mong ƒë·ª£i: [Layers, Batch, Hid]) -> [2, 64, 512]

üéâ CH√öC M·ª™NG! Decoder ho·∫°t ƒë·ªông ho√†n h·∫£o.


In [19]:
import random

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        
        # Ki·ªÉm tra k·ªπ thu·∫≠t: K√≠ch th∆∞·ªõc hidden c·ªßa Encoder v√† Decoder ph·∫£i kh·ªõp nhau
        assert encoder.hid_dim == decoder.hid_dim, \
            "Hidden dimensions of encoder and decoder must be equal!"
        assert encoder.n_layers == decoder.n_layers, \
            "Encoder and decoder must have equal number of layers!"
        
    def forward(self, src, trg, src_len, teacher_forcing_ratio=0.5):
        # src: [src len, batch size]
        # trg: [trg len, batch size]
        # src_len: [batch size] (ƒê·ªô d√†i th·∫≠t c·ªßa c√¢u ngu·ªìn)
        # teacher_forcing_ratio: X√°c su·∫•t d√πng t·ª´ th·∫≠t ƒë·ªÉ train (m·∫∑c ƒë·ªãnh 0.5)
        
        batch_size = src.shape[1]
        trg_len = trg.shape[0]
        trg_vocab_size = self.decoder.output_dim
        
        # Tensor ƒë·ªÉ l∆∞u k·∫øt qu·∫£ d·ª± ƒëo√°n (ban ƒë·∫ßu to√†n s·ªë 0)
        outputs = torch.zeros(trg_len, batch_size, trg_vocab_size).to(self.device)
        
        # B∆∞·ªõc 1: ƒê∆∞a c·∫£ c√¢u qua Encoder ƒë·ªÉ l·∫•y Context Vector (hidden, cell)
        hidden, cell = self.encoder(src, src_len)
        
        # B∆∞·ªõc 2: B·∫Øt ƒë·∫ßu gi·∫£i m√£ (Decoder)
        # Token ƒë·∫ßu ti√™n ƒë∆∞a v√†o Decoder lu√¥n l√† <sos> (Start of Sentence)
        input = trg[0, :]
        
        # V√≤ng l·∫∑p ch·∫°y t·ª´ng t·ª´ m·ªôt: t·ª´ 1 ƒë·∫øn h·∫øt c√¢u
        for t in range(1, trg_len):
            
            # Ch·∫°y Decoder 1 b∆∞·ªõc
            # input: t·ª´ hi·ªán t·∫°i
            # hidden, cell: tr·∫°ng th√°i c≈© -> c·∫≠p nh·∫≠t th√†nh m·ªõi
            output, hidden, cell = self.decoder(input, hidden, cell)
            
            # L∆∞u k·∫øt qu·∫£ d·ª± ƒëo√°n v√†o tensor outputs
            outputs[t] = output
            
            # --- TEACHER FORCING LOGIC ---
            # Quy·∫øt ƒë·ªãnh xem c√≥ "nh·∫Øc b√†i" kh√¥ng?
            teacher_force = random.random() < teacher_forcing_ratio
            
            # L·∫•y t·ª´ c√≥ x√°c su·∫•t cao nh·∫•t m√† m√°y v·ª´a ƒëo√°n ƒë∆∞·ª£c
            top1 = output.argmax(1) 
            
            # N·∫øu teacher_force = True -> l·∫•y t·ª´ ƒë√∫ng trong t·∫≠p trg (target)
            # N·∫øu False -> l·∫•y t·ª´ m√°y v·ª´a ƒëo√°n (top1)
            input = trg[t] if teacher_force else top1
            
        return outputs

In [20]:
# --- 1. C·∫§U H√åNH THI·∫æT B·ªä ---
# N·∫øu c√≥ GPU th√¨ d√πng, kh√¥ng th√¨ d√πng CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"üîπ ƒêang s·ª≠ d·ª•ng thi·∫øt b·ªã: {device}")

# --- 2. KH·ªûI T·∫†O C√ÅC M√î H√åNH ---
# Encoder v√† Decoder (ƒë√£ khai b√°o ·ªü c√°c b∆∞·ªõc tr∆∞·ªõc)
INPUT_DIM = len(vocab_en)
OUTPUT_DIM = len(vocab_fr)
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
HID_DIM = 512
N_LAYERS = 2
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)

# M√¥ h√¨nh t·ªïng Seq2Seq
model = Seq2Seq(enc, dec, device).to(device)

print("‚úÖ ƒê√£ kh·ªüi t·∫°o th√†nh c√¥ng Seq2Seq Model!")

# --- 3. KH·ªûI T·∫†O TR·ªåNG S·ªê (T√πy ch·ªçn nh∆∞ng n√™n l√†m) ---
# Gi√∫p model h·ªçc nhanh h∆°n b·∫±ng c√°ch kh·ªüi t·∫°o tham s·ªë ng·∫´u nhi√™n theo chu·∫©n
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
        
model.apply(init_weights)


# --- 4. CH·∫†Y TH·ª¨ V·ªöI 1 BATCH ---
# L·∫•y 1 batch d·ªØ li·ªáu
src, trg, src_len = next(iter(train_loader))

# Chuy·ªÉn d·ªØ li·ªáu sang thi·∫øt b·ªã (CPU/GPU)
src = src.to(device)
trg = trg.to(device)
# src_len kh√¥ng c·∫ßn .to(device) v√¨ pack_padded_sequence c·∫ßn n√≥ ·ªü CPU

print(f"\n‚è≥ ƒêang ch·∫°y th·ª≠ Forward Pass...")
# model(src, trg, src_len)
output = model(src, trg, src_len)

print("\n=== K·∫æT QU·∫¢ KI·ªÇM TRA SEQ2SEQ ===")
print(f"‚úÖ Output Shape: {output.shape}")
print(f"   (Mong ƒë·ª£i: [Trg Len, Batch Size, Vocab Size]) -> [{trg.shape[0]}, 64, {OUTPUT_DIM}]")

# Ki·ªÉm tra xem c√≥ l·ªói chi·ªÅu kh√¥ng
if output.shape == (trg.shape[0], 64, OUTPUT_DIM):
    print("\nüéâ TUY·ªÜT V·ªúI! M√¥ h√¨nh ƒë√£ s·∫µn s√†ng ƒë·ªÉ hu·∫•n luy·ªán.")
else:
    print("\n‚ö†Ô∏è K√≠ch th∆∞·ªõc ƒë·∫ßu ra ch∆∞a ƒë√∫ng.")

üîπ ƒêang s·ª≠ d·ª•ng thi·∫øt b·ªã: cpu
‚úÖ ƒê√£ kh·ªüi t·∫°o th√†nh c√¥ng Seq2Seq Model!

‚è≥ ƒêang ch·∫°y th·ª≠ Forward Pass...

=== K·∫æT QU·∫¢ KI·ªÇM TRA SEQ2SEQ ===
‚úÖ Output Shape: torch.Size([33, 64, 6555])
   (Mong ƒë·ª£i: [Trg Len, Batch Size, Vocab Size]) -> [33, 64, 6555]

üéâ TUY·ªÜT V·ªúI! M√¥ h√¨nh ƒë√£ s·∫µn s√†ng ƒë·ªÉ hu·∫•n luy·ªán.
