In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
device = 'cuda'

config = {
    'model_name': 'LSTM_2',
    'feature': 'regularized',
    'max_len': 48,
    'min_freq_src': 4,
    'min_freq_trg': 4,

    'embedding_dim': 128,
    'hidden_size': 256,
    'num_epochs': 15,
    'weight_decay': 1e-5,
    'label_smoothing': 0.1,
    'dropout': 0.2,

    'learning_rate': 1e-3,
    'gamma': 0.2,
    'patience': 0,
    'threshold': 0.001
}

In [3]:
from dataset import TranslationDataset, Vocab, TrainDataLoader, TestDataLoader
from config import filenames, folders
from lstm2 import LSTM_2


vocab_src = Vocab(filenames['train_src'], min_freq=config['min_freq_src'])
vocab_trg = Vocab(filenames['train_trg'], min_freq=config['min_freq_trg'])

train_dataset = TranslationDataset(vocab_src, 
                                vocab_trg, 
                                filenames['train_src'], 
                                filenames['train_trg'], 
                                max_len=config['max_len'], 
                                device=device)

val_dataset = TranslationDataset(vocab_src, 
                                vocab_trg, 
                                filenames['test_src'], 
                                filenames['test_trg'], 
                                max_len=72, 
                                device=device, 
                                sort_lengths=True)

unk_idx, pad_idx, bos_idx, eos_idx = 0, 1, 2, 3

train_loader = TrainDataLoader(train_dataset, shuffle=True)
val_loader = TestDataLoader(val_dataset)

src_vocab_size = len(vocab_src)
trg_vocab_size = len(vocab_trg)

model = LSTM_2(
    src_vocab_size=src_vocab_size,
    trg_vocab_size=trg_vocab_size,
    embedding_dim=config['embedding_dim'],
    hidden_size=config['hidden_size'],
    dropout=config['dropout']
).to(device)
print(model)

100%|██████████| 195915/195915 [00:24<00:00, 7996.68it/s]
100%|██████████| 986/986 [00:00<00:00, 8507.37it/s]


LSTM_2(
  (src_embedding): Embedding(30249, 128)
  (trg_embedding): Embedding(21950, 128)
  (encoder): LSTM(128, 256, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  (encoder_output_proj): Linear(in_features=512, out_features=256, bias=True)
  (decoder): LSTM(128, 256, num_layers=2, batch_first=True, dropout=0.2)
  (encoder_hidden_proj): ModuleList(
    (0-1): 2 x Linear(in_features=512, out_features=256, bias=True)
  )
  (encoder_cell_proj): ModuleList(
    (0-1): 2 x Linear(in_features=512, out_features=256, bias=True)
  )
  (fc): Linear(in_features=512, out_features=21950, bias=True)
)


In [6]:
from matplotlib import pyplot as plt
def plot_losses(train_losses, val_losses):
    plt.figure(figsize=(10, 6))
    plt.plot(train_losses, label="Training Loss")
    plt.plot(val_losses, label="Validation Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Training and Validation Loss Over Epochs")
    plt.legend()
    plt.grid()
    plt.show()

In [9]:
model.load('lstm-save-14.pt', folders['weights'])

In [5]:
# optimizer = optim.AdamW(model.parameters(), lr=0.0001, weight_decay=weight_decay)
# scheduler = ReduceLROnPlateau(optimizer, patience=2, factor=gamma, verbose=True, threshold=1e-3)
# train_losses, val_losses = train(model, 
#                                  optimizer, 
#                                  num_epochs, 
#                                  train_loader, 
#                                  test_loader, 
#                                  criterion, 
#                                  vocab_trg, 
#                                  scheduler)
# plot_losses(train_losses, val_losses)

In [12]:
from submission import get_bleu

In [13]:
model.to(device)
get_bleu(model, val_loader, vocab_trg, device=device)

100%|██████████| 4/4 [00:09<00:00,  2.43s/it]


28.2

In [8]:
# model.save('lstm-deep-cut-vocab-12epoch.pt')


In [None]:
from submission import make_submission
from dataset import SubmissionDataset, SubmissionDataLoader
submission_dataset = SubmissionDataset(filenames['submission'], vocab_src, device=device)
ldr = SubmissionDataLoader(submission_dataset)
make_submission(model, ldr, vocab_trg, device=device)

KeyError: 'submission_filename'