In [1]:
import torch
import torch.nn as nn

# Dummy setup
INPUT_DIM = 10   # input vocab size
OUTPUT_DIM = 10  # output vocab size
EMB_DIM = 8
HID_DIM = 16
BATCH_SIZE = 1

# Encoder
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.rnn = nn.GRU(emb_dim, hid_dim,batch_first=True)

    def forward(self, src):
        embedded = self.embedding(src)
        outputs, hidden = self.rnn(embedded)
        return hidden  # final hidden state

# Decoder (no attention)
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim):
        super().__init__()
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.rnn = nn.GRU(emb_dim, hid_dim,batch_first=True)
        self.fc_out = nn.Linear(hid_dim, output_dim)

    def forward(self, input, hidden):
        input = input.unsqueeze(1)  # (1, batch_size)
        embedded = self.embedding(input)
        output, hidden = self.rnn(embedded, hidden)
        prediction = self.fc_out(output.squeeze(1))
        return prediction, hidden


In [2]:
# Sample input/output (sequence of token indices)
src = torch.tensor([[1, 3, 5, 7]], dtype=torch.long)  # shape (seq_len, batch_size)
trg = torch.tensor([[1, 2, 3]], dtype=torch.long)

encoder = Encoder(INPUT_DIM, EMB_DIM, HID_DIM)
decoder = Decoder(OUTPUT_DIM, EMB_DIM, HID_DIM)

hidden = encoder(src)  # encode source

input_token = trg[:,0]
for t in range(1, trg.shape[1]):
    output, hidden = decoder(input_token, hidden)
    print(f"Step {t}, Output logits: {output}")
    input_token = trg[:,t]  # teacher forcing


Step 1, Output logits: tensor([[ 0.2996,  0.1627,  0.3123,  0.3613,  0.2641, -0.2629,  0.4856, -0.2422,
         -0.1786,  0.1801]], grad_fn=<AddmmBackward0>)
Step 2, Output logits: tensor([[ 0.2931,  0.0007,  0.3797,  0.2226,  0.2565, -0.3488,  0.5331, -0.1346,
         -0.0955,  0.2163]], grad_fn=<AddmmBackward0>)
