In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from lstm3 import train

from config import filenames, folders

from matplotlib import pyplot as plt

import psutil
import os

device = 'cuda'

config = {
    'model_name': 'LSTM_3',
    'feature': 'testing-teacher-forcing',
    # 'max_len': 48,
    'max_len': 24,
    'min_freq_src': 4,
    'min_freq_trg': 4,
    
    'src_vocab_size': 29798,
    'trg_vocab_size': 21555,

    # 'embedding_dim': 128,
    # 'hidden_size': 256,
    # 'num_layers': 3,

    'embedding_dim': 64,
    'hidden_size': 128,
    'num_layers': 2,

    'num_epochs': 15,
    'weight_decay': 1e-5,
    'label_smoothing': 0.1,

    'dropout_enc': 0.1,
    'dropout_dec': 0.1,
    'dropout_emb': 0.1,
    'dropout_attention': 0.1,

    'learning_rate': 1e-3,
    'gamma': 0.2,
    'patience': 2,
    'threshold': 5e-4,
    'batch_size': 128
}

def plot_losses(train_losses, val_losses):
    plt.figure(figsize=(10, 6))
    plt.plot(train_losses, label="Training Loss")
    plt.plot(val_losses, label="Validation Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Training and Validation Loss Over Epochs")
    plt.legend()
    plt.grid()
    plt.show()

In [None]:
from dataset import Vocab
vocab_src = Vocab(filenames['train_src'], min_freq=config['min_freq_src'])
vocab_trg = Vocab(filenames['train_trg'], min_freq=config['min_freq_trg'])

In [None]:
from dataset import TranslationDataset
train_dataset = TranslationDataset(vocab_src, 
                                vocab_trg, 
                                filenames['train_src'], 
                                filenames['train_trg'], 
                                max_len=config['max_len'], 
                                device=device)
val_dataset = TranslationDataset(vocab_src, 
                                vocab_trg, 
                                filenames['test_src'], 
                                filenames['test_trg'], 
                                max_len=72, 
                                device=device, 
                                sort_lengths=False)


In [None]:
train_losses, val_losses = train(config=config, 
                                 filenames=filenames, 
                                 folders=folders, 
                                 use_wandb=False, 
                                 device=device, 
                                 vocab_src=vocab_src, 
                                 vocab_trg=vocab_trg,
                                 )

plot_losses(train_losses, val_losses)

100%|██████████| 195915/195915 [00:36<00:00, 5333.92it/s]
100%|██████████| 986/986 [00:00<00:00, 5215.30it/s]


LSTM_3(
  (src_embedding): Embedding(29798, 64, padding_idx=1)
  (trg_embedding): Embedding(21555, 64, padding_idx=1)
  (emb_dropout): Dropout(p=0.1, inplace=False)
  (enc_dropout): Dropout(p=0.1, inplace=False)
  (dec_dropout): Dropout(p=0.1, inplace=False)
  (attention_dropout): Dropout(p=0.1, inplace=False)
  (encoder): LSTM(64, 128, num_layers=2, batch_first=True, dropout=0.1, bidirectional=True)
  (encoder_output_proj): Linear(in_features=256, out_features=128, bias=True)
  (decoder): LSTM(64, 128, num_layers=2, batch_first=True, dropout=0.1)
  (encoder_hidden_proj): ModuleList(
    (0-1): 2 x Linear(in_features=256, out_features=128, bias=True)
  )
  (encoder_cell_proj): ModuleList(
    (0-1): 2 x Linear(in_features=256, out_features=128, bias=True)
  )
  (fc): Linear(in_features=256, out_features=21555, bias=True)
)


 25%|██▍       | 378/1531 [01:16<03:49,  5.03it/s]