In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

from src.dataset import load_parallel, get_tokenizers, build_dataloader
from src.vocab import build_vocab
from src.utils import set_seed
from src.train import train_model

from src.models.encoder import Encoder
from src.models.decoder import Decoder
from src.models.seq2seq import Seq2Seq

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

set_seed(42)


ModuleNotFoundError: No module named 'torch'

In [None]:
train_pairs = load_parallel("data/train.en", "data/train.fr")
val_pairs   = load_parallel("data/val.en",   "data/val.fr")
test_pairs  = load_parallel("data/test.en",  "data/test.fr")

print("Train samples:", len(train_pairs))
print("Val samples:", len(val_pairs))
print("Test samples:", len(test_pairs))


In [None]:
tokenizer_en, tokenizer_fr = get_tokenizers()


In [None]:
src_stoi, src_itos = build_vocab([p[0] for p in train_pairs], tokenizer_en)
trg_stoi, trg_itos = build_vocab([p[1] for p in train_pairs], tokenizer_fr)

print("SRC vocab:", len(src_itos))
print("TRG vocab:", len(trg_itos))


In [None]:
train_loader = build_dataloader(
    train_pairs, tokenizer_en, tokenizer_fr,
    src_stoi, trg_stoi,
    batch_size=16,
    shuffle=True,
    max_len=50
)

val_loader = build_dataloader(
    val_pairs, tokenizer_en, tokenizer_fr,
    src_stoi, trg_stoi,
    batch_size=16,
    shuffle=False,
    max_len=50
)


In [None]:
encoder = Encoder(
    vocab_size=len(src_itos),
    embed_dim=256,
    hidden_size=512,
    num_layers=3,
    dropout=0.3,
    pad_idx=src_stoi["<pad>"]
)

decoder = Decoder(
    vocab_size=len(trg_itos),
    embed_dim=256,
    hidden_size=512,
    num_layers=3,
    dropout=0.3,
    pad_idx=trg_stoi["<pad>"]
)

model = Seq2Seq(
    encoder,
    decoder,
    device,
    sos_idx=trg_stoi["<sos>"],
    eos_idx=trg_stoi["<eos>"],
    pad_idx=trg_stoi["<pad>"],
    max_len=50
).to(device)

criterion = nn.CrossEntropyLoss(ignore_index=trg_stoi["<pad>"])
optimizer = optim.Adam(model.parameters(), lr=1e-3)


In [None]:
train_model(
    model,
    train_loader,
    val_loader,
    optimizer,
    criterion,
    n_epochs=10,
    clip=1,
    device=device,
    save_path="checkpoints/best_model.pth",
    early_stop_patience=5
)


In [None]:
import pickle

with open("checkpoints/vocab.pkl", "wb") as f:
    pickle.dump({
        "src_stoi": src_stoi,
        "src_itos": src_itos,
        "trg_stoi": trg_stoi,
        "trg_itos": trg_itos
    }, f)

print("Saved vocab to checkpoints/vocab.pkl")
