In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
device = 'cpu'

config = {
    'model_name': 'LSTM_3',
    'feature': 'testing-teacher-forcing',
    'max_len': 42,
    'min_freq_src': 5,
    'min_freq_trg': 5,
    
    'src_vocab_size': 24991,
    'trg_vocab_size': 18710,

    'embedding_dim': 128,
    'hidden_size': 256,
    'num_layers': 3,

    'num_epochs': 15,
    'weight_decay': 1e-5,
    'label_smoothing': 0.1,

    'dropout_enc': 0.1,
    'dropout_dec': 0.1,
    'dropout_emb': 0.1,
    'dropout_attention': 0.1,

    'learning_rate': 1e-3,
    'gamma': 0.2,
    'patience': 2,
    'threshold': 5e-4,
    'batch_size': 128
}

In [3]:
from dataset import TranslationDataset, Vocab, TrainDataLoader, TestDataLoader
from config import filenames, folders
from lstm3 import LSTM_3

In [4]:
vocab_src = Vocab(filenames['train_src'], min_freq=config['min_freq_src'])
vocab_trg = Vocab(filenames['train_trg'], min_freq=config['min_freq_trg'])

train_dataset = TranslationDataset(vocab_src, 
                                vocab_trg, 
                                filenames['train_src'], 
                                filenames['train_trg'], 
                                max_len=config['max_len'], 
                                device=device)

val_dataset = TranslationDataset(vocab_src, 
                                vocab_trg, 
                                filenames['test_src'], 
                                filenames['test_trg'], 
                                max_len=72, 
                                device=device, 
                                sort_lengths=False)

100%|██████████| 195915/195915 [00:12<00:00, 15432.21it/s]
100%|██████████| 986/986 [00:00<00:00, 15688.98it/s]


In [5]:
print(len(vocab_src), len(vocab_trg))

24991 18710


In [443]:
unk_idx, pad_idx, bos_idx, eos_idx = 0, 1, 2, 3

train_loader = TrainDataLoader(train_dataset, shuffle=True)
val_loader = TestDataLoader(val_dataset, shuffle=False)

weights_filename = folders['weights'] + 'lstm-save-15.pt'

model = LSTM_3(config=config, weights_filename=weights_filename).to(device)
print(model)

LSTM_3(
  (src_embedding): Embedding(24991, 128, padding_idx=1)
  (trg_embedding): Embedding(18710, 128, padding_idx=1)
  (emb_dropout): Dropout(p=0.1, inplace=False)
  (enc_dropout): Dropout(p=0.1, inplace=False)
  (dec_dropout): Dropout(p=0.1, inplace=False)
  (attention_dropout): Dropout(p=0.1, inplace=False)
  (encoder): LSTM(128, 256, num_layers=3, batch_first=True, dropout=0.1, bidirectional=True)
  (encoder_output_proj): Linear(in_features=512, out_features=256, bias=True)
  (decoder): LSTM(128, 256, num_layers=3, batch_first=True, dropout=0.1)
  (encoder_hidden_proj): ModuleList(
    (0-2): 3 x Linear(in_features=512, out_features=256, bias=True)
  )
  (encoder_cell_proj): ModuleList(
    (0-2): 3 x Linear(in_features=512, out_features=256, bias=True)
  )
  (fc): Linear(in_features=512, out_features=18710, bias=True)
)


In [7]:
from matplotlib import pyplot as plt
def plot_losses(train_losses, val_losses):
    plt.figure(figsize=(10, 6))
    plt.plot(train_losses, label="Training Loss")
    plt.plot(val_losses, label="Validation Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Training and Validation Loss Over Epochs")
    plt.legend()
    plt.grid()
    plt.show()

In [8]:
model.load(weights_filename)

In [9]:
import numpy as np
np.load('../weights/train.npy')

array([7.98365283, 8.00939226, 7.99636304, 7.95958185, 8.03385913,
       8.08714366, 5.78621149, 5.70957637, 5.64270401, 5.58472025,
       5.5331763 , 5.47639728, 5.42644572, 5.33323669, 5.34423482])

In [10]:
from submission import get_bleu

In [11]:
from dataset import RawDataset
raw_dataset = RawDataset(filenames['test_src'])

In [444]:
model.to(device)
get_bleu(model, dataloader=val_loader, vocab_trg=vocab_trg, filenames=filenames, device=device, raw_dataset=raw_dataset)

100%|██████████| 4/4 [00:12<00:00,  3.24s/it]


0.73

In [None]:
# model.save('lstm-deep-cut-vocab-12epoch.pt')

In [375]:
import torch
val_loader = TestDataLoader(val_dataset, batch_size=1, shuffle=False)
for src_seq, _ in val_loader:
    batch_size = src_seq.size(1)
    trg_seq = torch.tensor([[bos_idx]] * batch_size, dtype=torch.long).to(device)  # (batch_size, 1)
    print(batch_size)
    break

20


In [429]:
vocab_trg.decode_idx(18611)

'years'

In [None]:
import torch
val_loader = TestDataLoader(val_dataset, batch_size=128, shuffle=False)
for src_seq, _ in val_loader:
    batch_size = src_seq.size(1)
    trg_seq = torch.tensor([[bos_idx]] * batch_size, dtype=torch.long).to(device)  # (batch_size, 1)
    # print(batch_size)
    break
from time import sleep
for batch_idx, (src, trg) in enumerate(val_loader):
    predictions = model.inference(src, device=device) # batch
    for i in range(len(src)):
        print(list(src[i]).index(eos_idx),list(trg[i]).index(eos_idx), list(predictions[i]).index(eos_idx))
        print("src:\t", " ".join(vocab_src.decode(src[i])))
        print("trg:\t", " ".join(vocab_trg.decode(trg[i])))
        print("pred:\t", " ".join(vocab_trg.decode(predictions[i])))
        sleep(3)

19 21 17
als ich <NUM> jahre alt war , wurde ich eines morgens von den klängen heller freude geweckt .
when i was <NUM> , i remember waking up one morning to the sound of joy in my house .
i was <NUM> years , i was one morning
17 15 15
mein vater hörte sich auf seinem kleinen , grauen radio die der bbc an .
my father was listening to bbc news on his small , gray radio .
father was on his ,
20 21 17
er sah sehr glücklich aus , was damals ziemlich ungewöhnlich war , da ihn die nachrichten meistens .
there was a big smile on his face which was unusual then , because the news mostly depressed him .
he very happy
11 12 9
er rief : " die taliban sind weg ! "
" the taliban are gone ! " my father .
said , " the taliban are gone !
20 22 17
ich wusste nicht , was das bedeutete , aber es machte meinen vater offensichtlich sehr , sehr glücklich .
i didn 't know what it meant , but i could see that my father was very , very happy .
didn 't know what it meant but
15 15 12
" jetzt kannst du auf eine 

KeyboardInterrupt: 

In [None]:
raw_submission = RawDataset(filenames['submission_src'])

In [176]:
from submission import make_submission
from dataset import SubmissionDataset, SubmissionDataLoader
submission_dataset = SubmissionDataset(filenames['submission_src'], vocab_src, device=device)
ldr = SubmissionDataLoader(submission_dataset)
make_submission(model, ldr, vocab_trg, filenames, device=device, raw_dataset=raw_submission)

NameError: name 'raw_submission' is not defined

In [15]:
src, trg = val_dataset[0]

In [210]:
from dataset import TestDataLoader
ldr = TestDataLoader(val_dataset, batch_size=2)
src, trg = ldr[0]
src.size(1)

20

In [215]:
predictions = model.inference(src, device='cpu')
predictions

tensor([[    2,  8125, 18184,     4, 18611,  8125, 18184, 18184, 11593, 10819,
         10819, 10819, 10819, 10819, 16813, 11529, 16813, 11529,    32,    32,
            32,    32,     3],
        [    2,  6209, 18184, 11587,  7839,    28,  9758,    28,    28,  1543,
            28,    28, 13407, 16813, 16813, 11529, 16813, 11529, 16813,    32,
             3,     3,     3]])

In [218]:
import torch
from torch.nn.functional import softmax
trg_words = ['<BOS>', 'i', 'was', '<NUM>', 'years', 'i', 'was']
i = len(trg_words) - 1
trg_pred = torch.tensor([vocab_trg.encode_word(word) for word in trg_words], dtype=torch.long).unsqueeze(0)
predictions = model.inference(src, device='cpu')
print(vocab_trg.decode(trg_pred.squeeze(0).numpy()))
for idx in predictions[0]:
    print(vocab_trg.decode_idx(idx.item()), end=' ')

['i', 'was', '<NUM>', 'years', 'i', 'was']
<BOS> i was <NUM> years i was was one morning morning morning morning morning the of the of . . . . <EOS> 

In [183]:
vocab_trg.decode(model.inference(src, device='cpu').squeeze(0))

['i',
 'was',
 '<NUM>',
 'years',
 'i',
 'was',
 'was',
 'one',
 'morning',
 'morning',
 'morning',
 'morning',
 'morning',
 'the',
 'of',
 'the',
 'of',
 '.',
 '.',
 '.',
 '.']

In [154]:
trg_pred.squeeze(0).numpy()

array([    2,  8125, 18184,     4, 18611,  8125, 18184])