In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
from lstm3 import train

from config import filenames, folders

from matplotlib import pyplot as plt

device = 'cuda'

config = {
    'model_name': 'LSTM_3',
    'feature': 'testing-teacher-forcing',
    'max_len': 42,
    # 'max_len': 24,
    'min_freq_src': 5,
    'min_freq_trg': 5,
    
    'src_vocab_size': 24991,
    'trg_vocab_size': 18710,

    'embedding_dim': 128,
    'hidden_size': 256,
    'num_layers': 3,

    # 'embedding_dim': 64,
    # 'hidden_size': 128,
    # 'num_layers': 2,

    'num_epochs': 15,
    'weight_decay': 1e-5,
    'label_smoothing': 0.1,

    'dropout_enc': 0.1,
    'dropout_dec': 0.1,
    'dropout_emb': 0.1,
    'dropout_attention': 0.1,

    'learning_rate': 1e-3,
    'gamma': 0.2,
    'patience': 2,
    'threshold': 5e-4,
    'batch_size': 128
}

def plot_losses(train_losses, val_losses):
    plt.figure(figsize=(10, 6))
    plt.plot(train_losses, label="Training Loss")
    plt.plot(val_losses, label="Validation Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Training and Validation Loss Over Epochs")
    plt.legend()
    plt.grid()
    plt.show()

In [3]:
from dataset import Vocab
vocab_src = Vocab(filenames['train_src'], min_freq=config['min_freq_src'])
vocab_trg = Vocab(filenames['train_trg'], min_freq=config['min_freq_trg'])

In [4]:
print(len(vocab_src))
print(len(vocab_trg))

24991
18710


In [5]:
from dataset import TranslationDataset
train_dataset = TranslationDataset(vocab_src, 
                                vocab_trg, 
                                filenames['train_src'], 
                                filenames['train_trg'], 
                                max_len=config['max_len'], 
                                device=device)
val_dataset = TranslationDataset(vocab_src, 
                                vocab_trg, 
                                filenames['test_src'], 
                                filenames['test_trg'], 
                                max_len=72, 
                                device=device, 
                                sort_lengths=False)


100%|██████████| 195915/195915 [00:41<00:00, 4755.82it/s]
100%|██████████| 986/986 [00:00<00:00, 4469.54it/s]


In [None]:
train_losses, val_losses = train(config=config, 
                                 filenames=filenames, 
                                 folders=folders, 
                                 use_wandb=False, 
                                 device=device, 
                                 vocab_src=vocab_src, 
                                 vocab_trg=vocab_trg,
                                 train_dataset=train_dataset,
                                 val_dataset=val_dataset)

plot_losses(train_losses, val_losses)

LSTM_3(
  (src_embedding): Embedding(24991, 128, padding_idx=1)
  (trg_embedding): Embedding(18710, 128, padding_idx=1)
  (emb_dropout): Dropout(p=0.1, inplace=False)
  (enc_dropout): Dropout(p=0.1, inplace=False)
  (dec_dropout): Dropout(p=0.1, inplace=False)
  (attention_dropout): Dropout(p=0.1, inplace=False)
  (encoder): LSTM(128, 256, num_layers=3, batch_first=True, dropout=0.1, bidirectional=True)
  (encoder_output_proj): Linear(in_features=512, out_features=256, bias=True)
  (decoder): LSTM(128, 256, num_layers=3, batch_first=True, dropout=0.1)
  (encoder_hidden_proj): ModuleList(
    (0-2): 3 x Linear(in_features=512, out_features=256, bias=True)
  )
  (encoder_cell_proj): ModuleList(
    (0-2): 3 x Linear(in_features=512, out_features=256, bias=True)
  )
  (fc): Linear(in_features=512, out_features=18710, bias=True)
)


100%|██████████| 1531/1531 [03:47<00:00,  6.73it/s]
100%|██████████| 4/4 [00:00<00:00,  4.04it/s]


Epoch [1/15]	Train Loss: 5.2859	Val Loss: 7.9641


 21%|██▏       | 326/1531 [00:58<03:51,  5.21it/s]

In [None]:
# lstm-2
train_losses, val_losses = train(config=config, 
                                 filenames=filenames, 
                                 folders=folders, 
                                 use_wandb=False, 
                                 device=device)

plot_losses(train_losses, val_losses)

100%|██████████| 195915/195915 [00:30<00:00, 6329.81it/s]
100%|██████████| 986/986 [00:00<00:00, 5973.34it/s]


LSTM_2(
  (src_embedding): Embedding(24991, 128, padding_idx=1)
  (trg_embedding): Embedding(18710, 128, padding_idx=1)
  (encoder): LSTM(128, 256, num_layers=3, batch_first=True, dropout=0.1, bidirectional=True)
  (encoder_output_proj): Linear(in_features=512, out_features=256, bias=True)
  (decoder): LSTM(128, 256, num_layers=3, batch_first=True, dropout=0.1)
  (encoder_hidden_proj): ModuleList(
    (0-2): 3 x Linear(in_features=512, out_features=256, bias=True)
  )
  (encoder_cell_proj): ModuleList(
    (0-2): 3 x Linear(in_features=512, out_features=256, bias=True)
  )
  (fc): Linear(in_features=512, out_features=18710, bias=True)
)


  0%|          | 6/1531 [01:05<4:09:37,  9.82s/it]