**Step:1**

In [None]:
train_data_path='/kaggle/input/dakshina-dataset/dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.train.tsv'
valid_data_path='/kaggle/input/dakshina-dataset/dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.dev.tsv'
test_data_path='/kaggle/input/dakshina-dataset/dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.test.tsv'

**Step:2**

In [None]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset
def load_and_prepare_data(path, batch_size=32):
    df = pd.read_csv(path, delimiter="\t", header=None)
    df.columns = ['target_word', 'input_word', 'dummy']
    df = df.drop(columns=['dummy'])
    df = df.dropna()
    df = df.reset_index(drop=True)
    df['input_word'] = df['input_word'].astype(str)
    df['target_word'] = df['target_word'].astype(str)

    max_input_len = max(len(word) for word in df['input_word'])
    max_target_len = max(len(word) for word in df['target_word'])

    input_letter_vocab = {'<pad>': 0, '<sos>': 1, '<eos>': 2}
    target_letter_vocab = {'<pad>': 0, '<sos>': 1, '<eos>': 2}
    letter_idx = 3

    for letter in sorted(set(''.join(df['input_word']))):
        input_letter_vocab[letter] = letter_idx
        letter_idx += 1

    letter_idx = 3
    for letter in sorted(set(''.join(df['target_word']))):
        if letter not in target_letter_vocab:
            target_letter_vocab[letter] = letter_idx
            letter_idx += 1

    def encode_input_letters(word):
        token_ids = [input_letter_vocab[char] for char in word if char in input_letter_vocab]
        padded = token_ids[:max_input_len] + [input_letter_vocab['<pad>']] * (max_input_len - len(token_ids))
        return padded

    def encode_target_letters(word):
        token_ids = [target_letter_vocab[char] for char in word if char in target_letter_vocab]
        padded = [target_letter_vocab['<pad>']] + token_ids[:max_target_len] + [target_letter_vocab['<pad>']] * (max_target_len - len(token_ids))
        return padded

    input_tensors = [torch.tensor(encode_input_letters(word)) for word in df['input_word']]
    target_tensors = [torch.tensor(encode_target_letters(word)) for word in df['target_word']]

    input_tensors = torch.stack(input_tensors)
    target_tensors = torch.stack(target_tensors)

    dataset = TensorDataset(input_tensors, target_tensors)
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

    return dataset, data_loader, input_letter_vocab, target_letter_vocab, max_input_len, max_target_len


**Step:3**

In [None]:
training_dataset,train_loader_ben,train_input_vocab,train_target_vocab,max_train_input_len,max_train_target_len =load_and_prepare_data(train_data_path,batch_size = 64)
print(train_input_vocab,train_target_vocab,max_train_input_len,max_train_target_len)

{'<pad>': 0, '<sos>': 1, '<eos>': 2, 'a': 3, 'b': 4, 'c': 5, 'd': 6, 'e': 7, 'f': 8, 'g': 9, 'h': 10, 'i': 11, 'j': 12, 'k': 13, 'l': 14, 'm': 15, 'n': 16, 'o': 17, 'p': 18, 'q': 19, 'r': 20, 's': 21, 't': 22, 'u': 23, 'v': 24, 'w': 25, 'x': 26, 'y': 27, 'z': 28} {'<pad>': 0, '<sos>': 1, '<eos>': 2, 'ঁ': 3, 'ং': 4, 'ঃ': 5, 'অ': 6, 'আ': 7, 'ই': 8, 'ঈ': 9, 'উ': 10, 'ঊ': 11, 'ঋ': 12, 'এ': 13, 'ঐ': 14, 'ও': 15, 'ঔ': 16, 'ক': 17, 'খ': 18, 'গ': 19, 'ঘ': 20, 'ঙ': 21, 'চ': 22, 'ছ': 23, 'জ': 24, 'ঝ': 25, 'ঞ': 26, 'ট': 27, 'ঠ': 28, 'ড': 29, 'ঢ': 30, 'ণ': 31, 'ত': 32, 'থ': 33, 'দ': 34, 'ধ': 35, 'ন': 36, 'প': 37, 'ফ': 38, 'ব': 39, 'ভ': 40, 'ম': 41, 'য': 42, 'র': 43, 'ল': 44, 'শ': 45, 'ষ': 46, 'স': 47, 'হ': 48, '়': 49, 'া': 50, 'ি': 51, 'ী': 52, 'ু': 53, 'ূ': 54, 'ৃ': 55, 'ে': 56, 'ৈ': 57, 'ো': 58, 'ৌ': 59, '্': 60, 'ৎ': 61, '২': 62} 22 22


**Step:4**

In [None]:
import pandas as pd
import torch
from torch.utils.data import TensorDataset, DataLoader

def load_and_prepare_test_valid_data(path, batch_size=32,
                          input_letter_vocab=None,
                          target_letter_vocab=None,
                          max_input_len=None,
                          max_target_len=None):
    df = pd.read_csv(path, delimiter="\t", header=None)
    df.columns = ['target_word', 'input_word', 'dummy']
    df = df.drop(columns=['dummy'])
    df = df.dropna()
    df = df.reset_index(drop=True)
    df['input_word'] = df['input_word'].astype(str)
    df['target_word'] = df['target_word'].astype(str)
    if input_letter_vocab is None:
        input_letter_vocab = {'<pad>': 0, '<sos>': 1, '<eos>': 2}
        letter_idx = 3
        for letter in sorted(set(''.join(df['input_word']))):
            input_letter_vocab[letter] = letter_idx
            letter_idx += 1
    if target_letter_vocab is None:
        target_letter_vocab = {'<pad>': 0, '<sos>': 1, '<eos>': 2}
        letter_idx = 3
        for letter in sorted(set(''.join(df['target_word']))):
            if letter not in target_letter_vocab:
                target_letter_vocab[letter] = letter_idx
                letter_idx += 1
    if max_input_len is None:
        max_input_len = max(len(word) for word in df['input_word'])
    if max_target_len is None:
        max_target_len = max(len(word) for word in df['target_word'])
    def encode_input_letters(word):
        token_ids = [input_letter_vocab[char] for char in word if char in input_letter_vocab]
        padded = token_ids[:max_input_len] + [input_letter_vocab['<pad>']] * (max_input_len - len(token_ids))
        return padded

    def encode_target_letters(word):
        token_ids = [target_letter_vocab[char] for char in word if char in target_letter_vocab]
        padded = [target_letter_vocab['<sos>']] + token_ids[:max_target_len] + [target_letter_vocab['<eos>']]
        padded += [target_letter_vocab['<pad>']] * (max_target_len + 2 - len(padded))
        return padded
    input_tensors = [torch.tensor(encode_input_letters(word)) for word in df['input_word']]
    target_tensors = [torch.tensor(encode_target_letters(word)) for word in df['target_word']]
    input_tensors = torch.stack(input_tensors)
    target_tensors = torch.stack(target_tensors)
    dataset = TensorDataset(input_tensors, target_tensors)
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    return dataset, data_loader, input_letter_vocab, target_letter_vocab, max_input_len, max_target_len

**Step:5**

In [None]:
val_dataset, val_data_loader, val_input_letter_vocab, val_target_letter_vocab,val_max_input_len, val_max_target_len=load_and_prepare_test_valid_data(valid_data_path,64,train_input_vocab,train_target_vocab,max_train_input_len,max_train_target_len)
test_dataset, test_data_loader, test_input_letter_vocab, test_target_letter_vocab, test_max_input_len, test_max_target_len=load_and_prepare_test_valid_data(test_data_path,64,train_input_vocab,train_target_vocab,max_train_input_len,max_train_target_len)

**Step:6**

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
import torch.optim as optim

class TextEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, embed_size, encoder_layers=1, drop_prob=0.5, cell_type='gru', bidirectional=False):
        super(TextEncoder, self).__init__()
        self.embedding = nn.Embedding(input_size, embed_size)
        self.dropout = nn.Dropout(drop_prob)
        self.cell_type = cell_type
        self.bidirectional = bidirectional

        rnn_cls = {'lstm': nn.LSTM, 'gru': nn.GRU, 'rnn': nn.RNN}[cell_type]
        self.rnn = rnn_cls(embed_size, hidden_size, encoder_layers,
                           dropout=drop_prob, bidirectional=bidirectional, batch_first=True)

    def forward(self, x):
        embedded = self.dropout(self.embedding(x))
        outputs, hidden = self.rnn(embedded)
        return outputs, hidden

class Decoder(nn.Module):
    def __init__(self, hidden_size, embed_size, output_size, decoder_layers=1, drop_prob=0.5, cell_type='gru'):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(output_size, embed_size)
        self.dropout = nn.Dropout(drop_prob)

        rnn_cls = {'lstm': nn.LSTM, 'gru': nn.GRU, 'rnn': nn.RNN}[cell_type]
        self.rnn = rnn_cls(embed_size, hidden_size, decoder_layers,
                           dropout=drop_prob, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        x = x.unsqueeze(1)
        embedded = self.dropout(self.embedding(x))
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output.squeeze(1))
        return output, hidden

class Seq2SeqModel(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, embed_size, beam_width,
                 encoder_layers=1, decoder_layers=1, drop_prob=0.3, cell_type='gru', bidirectional=True):
        super(Seq2SeqModel, self).__init__()
        self.encoder = TextEncoder(input_size, hidden_size, embed_size,
                                   encoder_layers, drop_prob, cell_type, bidirectional)

        self.bidirectional = bidirectional
        self.beam_width = beam_width
        enc_hidden_size = 2 * hidden_size if bidirectional else hidden_size

        self.decoder = Decoder(hidden_size, embed_size, output_size,
                               decoder_layers, drop_prob, cell_type)

        self.cell_type = cell_type
        self.encoder_layers = encoder_layers
        self.decoder_layers = decoder_layers

    def forward(self, source, target, teacher_forcing_ratio=0.5):
        batch_size = source.size(0)
        target_len = target.size(1)
        output_vocab_size = self.decoder.fc.out_features

        outputs = torch.zeros(batch_size, target_len, output_vocab_size).to(source.device)

        encoder_outputs, encoder_hidden = self.encoder(source)
        decoder_hidden = self._init_decoder_hidden(encoder_hidden)
        decoder_input = target[:, 0]

        for t in range(1, target_len):
            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
            outputs[:, t] = decoder_output
            teacher_force = torch.rand(1).item() < teacher_forcing_ratio
            decoder_input = target[:, t] if teacher_force else decoder_output.argmax(1)

        return outputs, None

    def _init_decoder_hidden(self, encoder_hidden):
        decoder_layers = self.decoder.rnn.num_layers
        if self.cell_type == 'lstm':
            h, c = encoder_hidden
            if self.bidirectional:
                h = self._merge_bidirectional(h)
                c = self._merge_bidirectional(c)
            h = self._pad_or_trim(h, decoder_layers)
            c = self._pad_or_trim(c, decoder_layers)
            return (h, c)
        else:
            h = encoder_hidden
            if self.bidirectional:
                h = self._merge_bidirectional(h)
            h = self._pad_or_trim(h, decoder_layers)
            return h

    def _merge_bidirectional(self, hidden):
        return hidden.view(self.encoder.rnn.num_layers, 2, hidden.size(1), hidden.size(2)).sum(1)

    def _pad_or_trim(self, hidden, target_layers):
        if hidden.shape[0] < target_layers:
            pad = torch.zeros(target_layers - hidden.shape[0], *hidden.shape[1:], device=hidden.device)
            return torch.cat([hidden, pad], dim=0)
        return hidden[:target_layers]

    def beam_search_decode(self, source, sos_idx, eos_idx, max_len=50):
        device = source.device
        batch_size = source.size(0)
        assert batch_size == 1, "Beam search decoding supports batch size 1 for simplicity."

        encoder_outputs, encoder_hidden = self.encoder(source)
        decoder_hidden = self._init_decoder_hidden(encoder_hidden)

        beams = [(0.0, [sos_idx], decoder_hidden)]
        completed_sequences = []

        for _ in range(max_len):
            new_beams = []
            for log_prob, seq, hidden in beams:
                decoder_input = torch.tensor([[seq[-1]]], device=device)
                with torch.no_grad():
                    decoder_output, new_hidden = self.decoder(decoder_input, hidden)
                    probs = F.log_softmax(decoder_output, dim=1)
                    topk_probs, topk_indices = probs.topk(self.beam_width)

                for k in range(self.beam_width):
                    next_token = topk_indices[0, k].item()
                    next_log_prob = log_prob + topk_probs[0, k].item()
                    new_seq = seq + [next_token]

                    if next_token == eos_idx:
                        completed_sequences.append((next_log_prob, new_seq))
                    else:
                        new_beams.append((next_log_prob, new_seq, new_hidden))

            beams = sorted(new_beams, key=lambda x: x[0], reverse=True)[:self.beam_width]

            if len(completed_sequences) >= self.beam_width:
                break

        if not completed_sequences:
            completed_sequences = [(log_prob, seq) for log_prob, seq, _ in beams]

        completed_sequences = sorted(completed_sequences, key=lambda x: x[0], reverse=True)
        best_log_prob, best_seq = completed_sequences[0]
        return best_seq, None

def train_model(model, data_loader, loss_function, optimizer, device):
    model.train()
    total_loss = 0

    for input_data, target_data in data_loader:
        input_data = input_data.to(device)
        target_data = target_data.to(device)

        optimizer.zero_grad()
        predictions, _ = model(input_data, target_data)
        output_size = predictions.shape[-1]
        predictions = predictions.view(-1, output_size)
        target_data = target_data.view(-1)

        loss = loss_function(predictions, target_data)
        total_loss += loss.item()

        loss.backward()
        optimizer.step()

    return model, total_loss / len(data_loader)

def evaluate_model(model, data_loader, loss_function, device, pad_token_id=0):
    model.eval()
    total_loss = 0
    total_tokens = 0
    correct_tokens = 0

    with torch.no_grad():
        for input_data, target_data in data_loader:
            input_data = input_data.to(device)
            target_data = target_data.to(device)

            predictions, _ = model(input_data, target_data, teacher_forcing_ratio=0.0)
            output_size = predictions.shape[-1]
            loss = loss_function(predictions.view(-1, output_size), target_data.view(-1))
            total_loss += loss.item()

            predicted_tokens = predictions.argmax(dim=-1)
            mask = target_data != pad_token_id
            correct = (predicted_tokens == target_data) & mask
            correct_tokens += correct.sum().item()
            total_tokens += mask.sum().item()

    average_loss = total_loss / len(data_loader)
    accuracy = correct_tokens / total_tokens if total_tokens > 0 else 0.0
    return average_loss, accuracy * 100

**Step:7**

In [None]:
import wandb
import numpy as np
from types import SimpleNamespace
import random

In [None]:
import getpass
api_key = getpass.getpass("Enter your W&B API Key: ")
wandb.login(key=api_key)

Enter your W&B API Key:  ········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mma24m022[0m ([33mma24m022-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

**Step:8**

In [None]:
sweep_config = {
    'method': 'bayes',
    'metric': {
        'name': 'val_accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'embedding_size': {
            'values': [256, 192, 128, 64, 32]
        },
        'dropout': {
            'values': [0.0,0.1,0.2,0.3,0.4,0.5]
        },
        'encoder_layers': {
            'values': [1,2,3]
        },
        'decoder_layers': {
            'values': [1,2,3]
        },
        'hidden_layer_size': {
            'values': [512, 256, 192, 128, 64]
        },
        'cell_type': {
            'values': ['lstm', 'rnn', 'gru']
        },
        'bidirectional': {
            'values': [True, False]
        },
        'batch_size': {
            'values': [128, 64, 32, 16]
        },
        'num_epochs': {
            'values': [5]
        },
        'learning_rate': {
            'values': [0.0001, 0.001, 0.005, 0.01]
        },
         'beam_width': {
            'values': [10, 5, 3, 2, 1]
        }
    }
}
sweep_id = wandb.sweep(sweep=sweep_config, project='DL_Translation')


Create sweep with ID: is1xsx5h
Sweep URL: https://wandb.ai/ma24m022-indian-institute-of-technology-madras/DL_Translation/sweeps/is1xsx5h


**Step:9**

In [None]:
def main():

    with wandb.init() as run:
        run_name="ct-"+str(wandb.config.cell_type)+"_el-"+str(wandb.config.encoder_layers)+"_dl-"+str(wandb.config.decoder_layers)+"_drop-"+str(wandb.config.dropout)+"_es-"+str(wandb.config.embedding_size)+"_hs-"+str(wandb.config.hidden_layer_size)+"_bs-"+str(wandb.config.batch_size)+"_ep-"+str(wandb.config.num_epochs)+"lr"+str(wandb.config.learning_rate)
        wandb.run.name=run_name

        model = Seq2SeqModel(input_size=29, output_size=63, hidden_size=wandb.config.hidden_layer_size,embed_size=wandb.config.embedding_size,beam_width=wandb.config.beam_width,encoder_layers=wandb.config.encoder_layers,
                        decoder_layers=wandb.config.decoder_layers,drop_prob=wandb.config.dropout, cell_type=wandb.config.cell_type, bidirectional=wandb.config.bidirectional)
        print(model)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=wandb.config.learning_rate)

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.to(device)
        custom_dataset1,train_loader_ben,a,b,_,_ = load_and_prepare_data(train_data_path,batch_size = wandb.config.batch_size)
        custom_dataset,val_loader_ben,_,_,_,_ =load_and_prepare_data(valid_data_path,batch_size = wandb.config.batch_size)
        for epoch in range(wandb.config.num_epochs):
            trained_model, train_loss = train_model(model, train_loader_ben, criterion, optimizer, device)
            val_loss, val_accuracy = evaluate_model(trained_model,val_data_loader, criterion, device)
            model = trained_model
            wandb.log({'Epoch': epoch, 'train_loss': train_loss , ' val_loss': val_loss, 'val_accuracy':val_accuracy})
            print(f'Epoch {epoch+1}/{wandb.config.num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')
wandb.agent(sweep_id, function= main,count=15)
# wandb.finish()

[34m[1mwandb[0m: Agent Starting Run: k2t8ani9 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Seq2SeqModel(
  (encoder): TextEncoder(
    (embedding): Embedding(29, 128)
    (dropout): Dropout(p=0.1, inplace=False)
    (rnn): LSTM(128, 512, num_layers=3, batch_first=True, dropout=0.1, bidirectional=True)
  )
  (decoder): Decoder(
    (embedding): Embedding(63, 128)
    (dropout): Dropout(p=0.1, inplace=False)
    (rnn): LSTM(128, 512, num_layers=2, batch_first=True, dropout=0.1)
    (fc): Linear(in_features=512, out_features=63, bias=True)
  )
)
Epoch 1/5, Train Loss: 1.1670, Val Loss: 1.6708, Val Accuracy: 14.2717
Epoch 2/5, Train Loss: 0.9251, Val Loss: 1.5821, Val Accuracy: 18.6613
Epoch 3/5, Train Loss: 0.7578, Val Loss: 1.5518, Val Accuracy: 21.8558
Epoch 4/5, Train Loss: 0.6486, Val Loss: 1.5401, Val Accuracy: 26.1459
Epoch 5/5, Train Loss: 0.5715, Val Loss: 1.5499, Val Accuracy: 30.0332


0,1
val_loss,█▃▂▁▂
Epoch,▁▃▅▆█
train_loss,█▅▃▂▁
val_accuracy,▁▃▄▆█

0,1
val_loss,1.54994
Epoch,4.0
train_loss,0.57152
val_accuracy,30.03324


[34m[1mwandb[0m: Agent Starting Run: 8jyn0gvv with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5




Seq2SeqModel(
  (encoder): TextEncoder(
    (embedding): Embedding(29, 32)
    (dropout): Dropout(p=0.4, inplace=False)
    (rnn): LSTM(32, 256, batch_first=True, dropout=0.4)
  )
  (decoder): Decoder(
    (embedding): Embedding(63, 32)
    (dropout): Dropout(p=0.4, inplace=False)
    (rnn): LSTM(32, 256, batch_first=True, dropout=0.4)
    (fc): Linear(in_features=256, out_features=63, bias=True)
  )
)
Epoch 1/5, Train Loss: 1.1245, Val Loss: 1.8845, Val Accuracy: 7.0406
Epoch 2/5, Train Loss: 1.0449, Val Loss: 1.9984, Val Accuracy: 9.7668
Epoch 3/5, Train Loss: 0.9800, Val Loss: 2.0912, Val Accuracy: 10.0544
Epoch 4/5, Train Loss: 0.9365, Val Loss: 2.2039, Val Accuracy: 10.3698
Epoch 5/5, Train Loss: 0.8930, Val Loss: 2.1727, Val Accuracy: 12.1788


0,1
val_loss,▁▃▆█▇
Epoch,▁▃▅▆█
train_loss,█▆▄▂▁
val_accuracy,▁▅▅▆█

0,1
val_loss,2.1727
Epoch,4.0
train_loss,0.893
val_accuracy,12.17879


[34m[1mwandb[0m: Agent Starting Run: 6ascm9tk with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beam_width: 2
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_epochs: 5




Seq2SeqModel(
  (encoder): TextEncoder(
    (embedding): Embedding(29, 32)
    (dropout): Dropout(p=0.5, inplace=False)
    (rnn): LSTM(32, 64, num_layers=3, batch_first=True, dropout=0.5, bidirectional=True)
  )
  (decoder): Decoder(
    (embedding): Embedding(63, 32)
    (dropout): Dropout(p=0.5, inplace=False)
    (rnn): LSTM(32, 64, batch_first=True, dropout=0.5)
    (fc): Linear(in_features=64, out_features=63, bias=True)
  )
)
Epoch 1/5, Train Loss: 1.0821, Val Loss: 2.0259, Val Accuracy: 7.2687
Epoch 2/5, Train Loss: 1.0782, Val Loss: 2.1355, Val Accuracy: 6.8040
Epoch 3/5, Train Loss: 1.0858, Val Loss: 2.1997, Val Accuracy: 8.2187
Epoch 4/5, Train Loss: 1.0850, Val Loss: 2.3844, Val Accuracy: 7.6691
Epoch 5/5, Train Loss: 1.0866, Val Loss: 2.4973, Val Accuracy: 8.8484


0,1
val_loss,▁▃▄▆█
Epoch,▁▃▅▆█
train_loss,▄▁▇▇█
val_accuracy,▃▁▆▄█

0,1
val_loss,2.49728
Epoch,4.0
train_loss,1.08661
val_accuracy,8.84837


[34m[1mwandb[0m: Agent Starting Run: w5fwau0g with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5




Seq2SeqModel(
  (encoder): TextEncoder(
    (embedding): Embedding(29, 64)
    (dropout): Dropout(p=0.5, inplace=False)
    (rnn): LSTM(64, 256, batch_first=True, dropout=0.5)
  )
  (decoder): Decoder(
    (embedding): Embedding(63, 64)
    (dropout): Dropout(p=0.5, inplace=False)
    (rnn): LSTM(64, 256, batch_first=True, dropout=0.5)
    (fc): Linear(in_features=256, out_features=63, bias=True)
  )
)
Epoch 1/5, Train Loss: 1.3391, Val Loss: 1.6487, Val Accuracy: 6.3466
Epoch 2/5, Train Loss: 1.1970, Val Loss: 1.6579, Val Accuracy: 7.8353
Epoch 3/5, Train Loss: 1.1544, Val Loss: 1.6888, Val Accuracy: 8.0051
Epoch 4/5, Train Loss: 1.1109, Val Loss: 1.7070, Val Accuracy: 9.9694
Epoch 5/5, Train Loss: 1.0704, Val Loss: 1.6684, Val Accuracy: 11.8937


0,1
val_loss,▁▂▆█▃
Epoch,▁▃▅▆█
train_loss,█▄▃▂▁
val_accuracy,▁▃▃▆█

0,1
val_loss,1.66836
Epoch,4.0
train_loss,1.07037
val_accuracy,11.89367


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: dm9wtx78 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beam_width: 10
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 5


Seq2SeqModel(
  (encoder): TextEncoder(
    (embedding): Embedding(29, 64)
    (dropout): Dropout(p=0.2, inplace=False)
    (rnn): RNN(64, 256, num_layers=3, batch_first=True, dropout=0.2)
  )
  (decoder): Decoder(
    (embedding): Embedding(63, 64)
    (dropout): Dropout(p=0.2, inplace=False)
    (rnn): RNN(64, 256, num_layers=3, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=256, out_features=63, bias=True)
  )
)
Epoch 1/5, Train Loss: 1.2124, Val Loss: 2.0543, Val Accuracy: 3.8800
Epoch 2/5, Train Loss: 1.1657, Val Loss: 2.0482, Val Accuracy: 4.8409
Epoch 3/5, Train Loss: 1.1447, Val Loss: 2.1122, Val Accuracy: 4.6019
Epoch 4/5, Train Loss: 1.1334, Val Loss: 2.1230, Val Accuracy: 4.8409
Epoch 5/5, Train Loss: 1.1209, Val Loss: 2.1715, Val Accuracy: 6.0615


0,1
val_loss,▁▁▅▅█
Epoch,▁▃▅▆█
train_loss,█▄▃▂▁
val_accuracy,▁▄▃▄█

0,1
val_loss,2.17153
Epoch,4.0
train_loss,1.1209
val_accuracy,6.06149


[34m[1mwandb[0m: Agent Starting Run: pvcao9da with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 10
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 5


Seq2SeqModel(
  (encoder): TextEncoder(
    (embedding): Embedding(29, 64)
    (dropout): Dropout(p=0.1, inplace=False)
    (rnn): LSTM(64, 256, num_layers=3, batch_first=True, dropout=0.1)
  )
  (decoder): Decoder(
    (embedding): Embedding(63, 64)
    (dropout): Dropout(p=0.1, inplace=False)
    (rnn): LSTM(64, 256, num_layers=3, batch_first=True, dropout=0.1)
    (fc): Linear(in_features=256, out_features=63, bias=True)
  )
)
Epoch 1/5, Train Loss: 1.2940, Val Loss: 1.7062, Val Accuracy: 7.5720
Epoch 2/5, Train Loss: 1.1600, Val Loss: 1.7622, Val Accuracy: 9.3592
Epoch 3/5, Train Loss: 1.1009, Val Loss: 1.8186, Val Accuracy: 9.2961
Epoch 4/5, Train Loss: 1.0658, Val Loss: 1.8763, Val Accuracy: 9.7680
Epoch 5/5, Train Loss: 1.0386, Val Loss: 1.9767, Val Accuracy: 10.3298


0,1
val_loss,▁▂▄▅█
Epoch,▁▃▅▆█
train_loss,█▄▃▂▁
val_accuracy,▁▆▅▇█

0,1
val_loss,1.97666
Epoch,4.0
train_loss,1.03861
val_accuracy,10.32977


[34m[1mwandb[0m: Agent Starting Run: t3my86lv with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 192
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5




Seq2SeqModel(
  (encoder): TextEncoder(
    (embedding): Embedding(29, 192)
    (dropout): Dropout(p=0.2, inplace=False)
    (rnn): LSTM(192, 128, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  )
  (decoder): Decoder(
    (embedding): Embedding(63, 192)
    (dropout): Dropout(p=0.2, inplace=False)
    (rnn): LSTM(192, 128, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=128, out_features=63, bias=True)
  )
)
Epoch 1/5, Train Loss: 1.1045, Val Loss: 1.4995, Val Accuracy: 16.8802
Epoch 2/5, Train Loss: 0.8800, Val Loss: 1.4398, Val Accuracy: 19.9158
Epoch 3/5, Train Loss: 0.7807, Val Loss: 1.4236, Val Accuracy: 23.4634
Epoch 4/5, Train Loss: 0.7210, Val Loss: 1.4044, Val Accuracy: 26.8508
Epoch 5/5, Train Loss: 0.6809, Val Loss: 1.4193, Val Accuracy: 29.0359


0,1
val_loss,█▄▂▁▂
Epoch,▁▃▅▆█
train_loss,█▄▃▂▁
val_accuracy,▁▃▅▇█

0,1
val_loss,1.4193
Epoch,4.0
train_loss,0.6809
val_accuracy,29.03594


[34m[1mwandb[0m: Agent Starting Run: r3syosu7 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 5


Seq2SeqModel(
  (encoder): TextEncoder(
    (embedding): Embedding(29, 64)
    (dropout): Dropout(p=0.2, inplace=False)
    (rnn): RNN(64, 128, num_layers=3, batch_first=True, dropout=0.2)
  )
  (decoder): Decoder(
    (embedding): Embedding(63, 64)
    (dropout): Dropout(p=0.2, inplace=False)
    (rnn): RNN(64, 128, num_layers=2, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=128, out_features=63, bias=True)
  )
)
Epoch 1/5, Train Loss: 1.2786, Val Loss: 1.9334, Val Accuracy: 5.5932
Epoch 2/5, Train Loss: 1.1891, Val Loss: 2.0562, Val Accuracy: 5.5932
Epoch 3/5, Train Loss: 1.1649, Val Loss: 2.1248, Val Accuracy: 5.1006
Epoch 4/5, Train Loss: 1.1544, Val Loss: 2.2015, Val Accuracy: 5.6211
Epoch 5/5, Train Loss: 1.1459, Val Loss: 2.2767, Val Accuracy: 5.2425


0,1
val_loss,▁▄▅▆█
Epoch,▁▃▅▆█
train_loss,█▃▂▁▁
val_accuracy,██▁█▃

0,1
val_loss,2.27674
Epoch,4.0
train_loss,1.14593
val_accuracy,5.24253


[34m[1mwandb[0m: Agent Starting Run: 0qves886 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 5


Seq2SeqModel(
  (encoder): TextEncoder(
    (embedding): Embedding(29, 128)
    (dropout): Dropout(p=0.2, inplace=False)
    (rnn): LSTM(128, 512, num_layers=3, batch_first=True, dropout=0.2, bidirectional=True)
  )
  (decoder): Decoder(
    (embedding): Embedding(63, 128)
    (dropout): Dropout(p=0.2, inplace=False)
    (rnn): LSTM(128, 512, num_layers=2, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=512, out_features=63, bias=True)
  )
)
Epoch 1/5, Train Loss: 1.0702, Val Loss: 1.7160, Val Accuracy: 14.3081
Epoch 2/5, Train Loss: 0.8010, Val Loss: 1.6474, Val Accuracy: 20.3574
Epoch 3/5, Train Loss: 0.6393, Val Loss: 1.6386, Val Accuracy: 26.3376
Epoch 4/5, Train Loss: 0.5435, Val Loss: 1.6956, Val Accuracy: 29.6717
Epoch 5/5, Train Loss: 0.4808, Val Loss: 1.7281, Val Accuracy: 31.6517


0,1
val_loss,▇▂▁▅█
Epoch,▁▃▅▆█
train_loss,█▅▃▂▁
val_accuracy,▁▃▆▇█

0,1
val_loss,1.72807
Epoch,4.0
train_loss,0.48083
val_accuracy,31.65174


[34m[1mwandb[0m: Agent Starting Run: 14s258wa with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 192
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5




Seq2SeqModel(
  (encoder): TextEncoder(
    (embedding): Embedding(29, 192)
    (dropout): Dropout(p=0.3, inplace=False)
    (rnn): RNN(192, 64, num_layers=2, batch_first=True, dropout=0.3)
  )
  (decoder): Decoder(
    (embedding): Embedding(63, 192)
    (dropout): Dropout(p=0.3, inplace=False)
    (rnn): RNN(192, 64, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=64, out_features=63, bias=True)
  )
)
Epoch 1/5, Train Loss: 1.2621, Val Loss: 7.3271, Val Accuracy: 0.0000
Epoch 2/5, Train Loss: 1.2097, Val Loss: 2.0700, Val Accuracy: 4.6335
Epoch 3/5, Train Loss: 1.2065, Val Loss: 2.3000, Val Accuracy: 4.7512
Epoch 4/5, Train Loss: 1.1978, Val Loss: 2.1079, Val Accuracy: 6.9205
Epoch 5/5, Train Loss: 1.1886, Val Loss: 2.5584, Val Accuracy: 1.5530


0,1
val_loss,█▁▁▁▂
Epoch,▁▃▅▆█
train_loss,█▃▃▂▁
val_accuracy,▁▆▆█▃

0,1
val_loss,2.55839
Epoch,4.0
train_loss,1.18862
val_accuracy,1.55298


[34m[1mwandb[0m: Agent Starting Run: 05ph0owe with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 192
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 5




Seq2SeqModel(
  (encoder): TextEncoder(
    (embedding): Embedding(29, 192)
    (dropout): Dropout(p=0.2, inplace=False)
    (rnn): RNN(192, 64, num_layers=3, batch_first=True, dropout=0.2)
  )
  (decoder): Decoder(
    (embedding): Embedding(63, 192)
    (dropout): Dropout(p=0.2, inplace=False)
    (rnn): RNN(192, 64, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=64, out_features=63, bias=True)
  )
)
Epoch 1/5, Train Loss: 1.7158, Val Loss: 1.7229, Val Accuracy: 6.8817
Epoch 2/5, Train Loss: 1.3697, Val Loss: 1.7854, Val Accuracy: 6.8768
Epoch 3/5, Train Loss: 1.3254, Val Loss: 1.8110, Val Accuracy: 6.9071
Epoch 4/5, Train Loss: 1.3021, Val Loss: 1.8433, Val Accuracy: 6.9071
Epoch 5/5, Train Loss: 1.2834, Val Loss: 1.9004, Val Accuracy: 6.8817


0,1
val_loss,▁▃▄▆█
Epoch,▁▃▅▆█
train_loss,█▂▂▁▁
val_accuracy,▂▁██▂

0,1
val_loss,1.90045
Epoch,4.0
train_loss,1.28341
val_accuracy,6.88166


[34m[1mwandb[0m: Agent Starting Run: st7lihyf with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5


Seq2SeqModel(
  (encoder): TextEncoder(
    (embedding): Embedding(29, 64)
    (dropout): Dropout(p=0.3, inplace=False)
    (rnn): RNN(64, 256, num_layers=3, batch_first=True, dropout=0.3, bidirectional=True)
  )
  (decoder): Decoder(
    (embedding): Embedding(63, 64)
    (dropout): Dropout(p=0.3, inplace=False)
    (rnn): RNN(64, 256, num_layers=3, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=256, out_features=63, bias=True)
  )
)
Epoch 1/5, Train Loss: 1.2132, Val Loss: 2.1782, Val Accuracy: 5.0278
Epoch 2/5, Train Loss: 1.2048, Val Loss: 2.2743, Val Accuracy: 5.6587
Epoch 3/5, Train Loss: 1.1996, Val Loss: 2.4859, Val Accuracy: 5.8795
Epoch 4/5, Train Loss: 1.1953, Val Loss: 3.2902, Val Accuracy: 5.4148
Epoch 5/5, Train Loss: 1.2063, Val Loss: 2.0636, Val Accuracy: 4.2440


0,1
val_loss,▂▂▃█▁
Epoch,▁▃▅▆█
train_loss,█▅▃▁▅
val_accuracy,▄▇█▆▁

0,1
val_loss,2.06357
Epoch,4.0
train_loss,1.20625
val_accuracy,4.24401


[34m[1mwandb[0m: Agent Starting Run: 067gpqm7 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 192
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5


Seq2SeqModel(
  (encoder): TextEncoder(
    (embedding): Embedding(29, 192)
    (dropout): Dropout(p=0.4, inplace=False)
    (rnn): RNN(192, 256, num_layers=3, batch_first=True, dropout=0.4, bidirectional=True)
  )
  (decoder): Decoder(
    (embedding): Embedding(63, 192)
    (dropout): Dropout(p=0.4, inplace=False)
    (rnn): RNN(192, 256, num_layers=2, batch_first=True, dropout=0.4)
    (fc): Linear(in_features=256, out_features=63, bias=True)
  )
)
Epoch 1/5, Train Loss: 1.2287, Val Loss: 2.0408, Val Accuracy: 3.9735
Epoch 2/5, Train Loss: 1.1897, Val Loss: 2.1313, Val Accuracy: 5.8213
Epoch 3/5, Train Loss: 1.1595, Val Loss: 2.1818, Val Accuracy: 3.9917
Epoch 4/5, Train Loss: 1.2045, Val Loss: 2.2627, Val Accuracy: 4.4721
Epoch 5/5, Train Loss: 1.1955, Val Loss: 2.6668, Val Accuracy: 3.1982


0,1
val_loss,▁▂▃▃█
Epoch,▁▃▅▆█
train_loss,█▄▁▆▅
val_accuracy,▃█▃▄▁

0,1
val_loss,2.66684
Epoch,4.0
train_loss,1.19547
val_accuracy,3.19818


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: bihbeo5c with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 192
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5


Seq2SeqModel(
  (encoder): TextEncoder(
    (embedding): Embedding(29, 192)
    (dropout): Dropout(p=0.3, inplace=False)
    (rnn): LSTM(192, 128, num_layers=3, batch_first=True, dropout=0.3, bidirectional=True)
  )
  (decoder): Decoder(
    (embedding): Embedding(63, 192)
    (dropout): Dropout(p=0.3, inplace=False)
    (rnn): LSTM(192, 128, num_layers=2, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=128, out_features=63, bias=True)
  )
)
Epoch 1/5, Train Loss: 1.0519, Val Loss: 1.6233, Val Accuracy: 14.4063
Epoch 2/5, Train Loss: 0.8435, Val Loss: 1.5272, Val Accuracy: 20.8342
Epoch 3/5, Train Loss: 0.7381, Val Loss: 1.4867, Val Accuracy: 25.8535
Epoch 4/5, Train Loss: 0.6729, Val Loss: 1.4817, Val Accuracy: 29.4218
Epoch 5/5, Train Loss: 0.6223, Val Loss: 1.5364, Val Accuracy: 31.5523


0,1
val_loss,█▃▁▁▄
Epoch,▁▃▅▆█
train_loss,█▅▃▂▁
val_accuracy,▁▄▆▇█

0,1
val_loss,1.53637
Epoch,4.0
train_loss,0.62232
val_accuracy,31.55226


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: glub3fmp with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5


Seq2SeqModel(
  (encoder): TextEncoder(
    (embedding): Embedding(29, 128)
    (dropout): Dropout(p=0.3, inplace=False)
    (rnn): LSTM(128, 256, num_layers=3, batch_first=True, dropout=0.3)
  )
  (decoder): Decoder(
    (embedding): Embedding(63, 128)
    (dropout): Dropout(p=0.3, inplace=False)
    (rnn): LSTM(128, 256, num_layers=3, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=256, out_features=63, bias=True)
  )
)
Epoch 1/5, Train Loss: 1.0789, Val Loss: 2.0595, Val Accuracy: 7.3949
Epoch 2/5, Train Loss: 0.9829, Val Loss: 2.0825, Val Accuracy: 9.5642
Epoch 3/5, Train Loss: 0.9169, Val Loss: 2.1210, Val Accuracy: 11.6692
Epoch 4/5, Train Loss: 0.8435, Val Loss: 2.2055, Val Accuracy: 13.0936
Epoch 5/5, Train Loss: 0.7789, Val Loss: 2.3384, Val Accuracy: 13.6832


0,1
val_loss,▁▂▃▅█
Epoch,▁▃▅▆█
train_loss,█▆▄▃▁
val_accuracy,▁▃▆▇█

0,1
val_loss,2.33838
Epoch,4.0
train_loss,0.77888
val_accuracy,13.68324


**Step:10**

In [None]:
input_size = 29
output_size = 63
embed_size = 128
beam_width=5
hidden_size = 512
encoder_layers = 3
decoder_layers = 2
cell_type = 'lstm'
drop_prob = 0.2
learning_rate = 0.0001
bidirectional=True
batch_size = 16
num_epochs = 35
Best_model = Seq2SeqModel(input_size, output_size, hidden_size,embed_size,beam_width, encoder_layers,decoder_layers,drop_prob, cell_type,bidirectional)
print(Best_model)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Best_model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Best_model.parameters(), lr=learning_rate)

Seq2SeqModel(
  (encoder): TextEncoder(
    (embedding): Embedding(29, 128)
    (dropout): Dropout(p=0.2, inplace=False)
    (rnn): LSTM(128, 512, num_layers=3, batch_first=True, dropout=0.2, bidirectional=True)
  )
  (decoder): Decoder(
    (embedding): Embedding(63, 128)
    (dropout): Dropout(p=0.2, inplace=False)
    (rnn): LSTM(128, 512, num_layers=2, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=512, out_features=63, bias=True)
  )
)


**Step:11**

In [None]:
final_model = Best_model.to(device)
for epoch in range(35):
    print(f"\nEpoch {epoch+1} started.")
    final_model, _ = train_model(final_model, train_loader_ben, criterion, optimizer, device)
    print(f"Finished training for epoch {epoch+1}")
    train_loss, train_accuracy = evaluate_model(final_model, train_loader_ben, criterion, device)
    val_loss, val_accuracy = evaluate_model(final_model, val_data_loader, criterion, device)
    print(f'Epoch {epoch+1}/35')
    print(f' - Train Loss      : {train_loss:.4f}, Train Accuracy      : {train_accuracy:.2f}%')
    print(f' - Validation Loss : {val_loss:.4f}, Validation Accuracy : {val_accuracy:.2f}%')



Epoch 1 started.
Finished training for epoch 1
Epoch 1/35
 - Train Loss      : 1.3527, Train Accuracy      : 10.99%
 - Validation Loss : 1.6545, Validation Accuracy : 8.74%

Epoch 2 started.
Finished training for epoch 2
Epoch 2/35
 - Train Loss      : 1.2575, Train Accuracy      : 17.55%
 - Validation Loss : 1.5851, Validation Accuracy : 14.04%

Epoch 3 started.
Finished training for epoch 3
Epoch 3/35
 - Train Loss      : 1.1505, Train Accuracy      : 23.09%
 - Validation Loss : 1.5164, Validation Accuracy : 18.13%

Epoch 4 started.
Finished training for epoch 4
Epoch 4/35
 - Train Loss      : 1.0531, Train Accuracy      : 27.86%
 - Validation Loss : 1.4615, Validation Accuracy : 22.30%

Epoch 5 started.
Finished training for epoch 5
Epoch 5/35
 - Train Loss      : 0.9910, Train Accuracy      : 31.70%
 - Validation Loss : 1.4300, Validation Accuracy : 24.83%

Epoch 6 started.
Finished training for epoch 6
Epoch 6/35
 - Train Loss      : 0.9378, Train Accuracy      : 36.32%
 - Valida

**Step:12**

In [None]:
test_loss, test_accuracy = evaluate_model(final_model,test_data_loader, criterion, device)
print(f' Test Accuracy: {test_accuracy:.2f}')

 Test Accuracy: 46.74


**Step:13**

In [None]:
def run_inference(model, dataloader, device):
    model.eval()
    predictions = []
    actual = []
    with torch.no_grad():
        for latin, devanagari in dataloader:
            latin = latin.to(device)
            devanagari = devanagari.to(device)
            output, _ = model(latin, devanagari, 0)
            deb = devanagari.cpu().numpy()
            actual.append(deb)
            if output.dim() == 3:
                output = output.argmax(2)
            elif output.dim() == 2:
                output = output.argmax(1)
            else:
                print("Unexpected output dimension:", output.dim())
                continue

            latin = latin.cpu().numpy()
            output = output.cpu().numpy()
            predictions.append((latin, output))
    return predictions, actual
latin_idx2token = {idx: char for char, idx in test_input_letter_vocab.items()}
bangla_idx2token = {idx: char for char, idx in test_target_letter_vocab.items()}

**Step:14**

In [None]:
def decode_sequence(indices, idx2token, target_vocab):
    valid_indices = []
    for idx in indices:
        if idx in idx2token and idx not in (target_vocab['<pad>'], target_vocab['<sos>'], target_vocab['<eos>']):
            valid_indices.append(idx)
    decoded_text = ''
    for idx in valid_indices:
        decoded_text += idx2token[idx]
    return decoded_text

**Step:15**

In [None]:
def process_output_indices(indices, idx2token, target_vocab):
    decoded_text = ''
    for idx in indices:
        if idx == target_vocab.get('<eos>'):
            break
        if idx in (target_vocab.get('<pad>'), target_vocab.get('<sos>')):
            continue
        decoded_text += idx2token.get(idx, '')
    return decoded_text

**Step:16**

In [None]:
import pandas as pd
from pathlib import Path
test_predictions, actual = run_inference(final_model, test_data_loader, device)
seq2seq_results = []
for (src_indices, output_indices), act_ind in zip(test_predictions, actual):
    for i in range(src_indices.shape[0]):
        input_text = decode_sequence(src_indices[i], latin_idx2token, test_input_letter_vocab)
        actual_target_text = decode_sequence(act_ind[i], bangla_idx2token, test_target_letter_vocab)
        predicted_text = process_output_indices(output_indices[i], bangla_idx2token, test_target_letter_vocab)
        seq2seq_results.append([input_text, actual_target_text, predicted_text])
results_df = pd.DataFrame(seq2seq_results, columns=["Input", "Actual", "Predicted"])
def char_level_accuracy(actual, predicted):
    matches = sum(a == b for a, b in zip(actual, predicted))
    return matches / max(len(actual), len(predicted)) if max(len(actual), len(predicted)) > 0 else 0

results_df["Accuracy"] = results_df.apply(lambda row: char_level_accuracy(row["Actual"], row["Predicted"]), axis=1)
total = len(results_df)
count_100 = (results_df["Accuracy"] == 1.0).sum()
count_75 = ((results_df["Accuracy"] > 0.75) & (results_df["Accuracy"] < 1.0)).sum()
count_50 = ((results_df["Accuracy"] > 0.5) & (results_df["Accuracy"] <= 0.75)).sum()
count_25 = ((results_df["Accuracy"] > 0.25) & (results_df["Accuracy"] <= 0.5)).sum()
count_0 = (results_df["Accuracy"] <= 0.25).sum()
def accuracy_highlighter(row):
    acc = row.Accuracy
    if acc == 1.0:
        color = 'background-color: #d4edda'  # Green
    elif acc > 0.75:
        color = 'background-color: #cce5ff'  # Light blue
    elif acc > 0.5:
        color = 'background-color: #fff3cd'  # Light yellow
    elif acc > 0.25:
        color = 'background-color: #f8d7da'  # Light pink
    else:
        color = ''
    return [color, color, color, '']
styled_table = results_df.style.set_properties(**{
    'border': '1px solid black',
    'text-align': 'left',
    'padding': '6px'
}).apply(accuracy_highlighter, axis=1).hide(axis="columns", subset=["Accuracy"])

html_content = styled_table.to_html()
summary_html = f"""
<div class="legend">
    <h3>Prediction Accuracy Summary</h3>
    <ul>
        <li><strong>Total Predictions:</strong> {total}</li>
        <li style="background-color: #d4edda; padding: 6px;">✅ 100% Match: {count_100}</li>
        <li style="background-color: #cce5ff; padding: 6px;">✅ Above 75%: {count_75}</li>
        <li style="background-color: #fff3cd; padding: 6px;">✅ Above 50%: {count_50}</li>
        <li style="background-color: #f8d7da; padding: 6px;">✅ Above 25%: {count_25}</li>
        <li>❌ ≤ 25% Match: {count_0}</li>
    </ul>
</div>
"""
html_full = f"""
<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>Seq2Seq Prediction Results</title>
    <style>
        body {{
            font-family: Arial, sans-serif;
            background-color: #f5f5f5;
            padding: 30px;
        }}
        h2 {{
            text-align: center;
            color: #333;
        }}
        .legend {{
            max-width: 600px;
            margin: 0 auto 30px auto;
            padding: 15px;
            border: 1px solid #ccc;
            background-color: #fff;
        }}
        .legend h3 {{
            margin-top: 0;
        }}
        table {{
            margin: auto;
            border-collapse: collapse;
            box-shadow: 0 0 10px rgba(0,0,0,0.1);
        }}
        th {{
            background-color: #4CAF50;
            color: white;
        }}
        td, th {{
            padding: 10px 15px;
            border: 1px solid #ddd;
        }}
        tr:nth-child(even) {{
            background-color: #f9f9f9;
        }}
        tr:hover {{
            background-color: #f1f1f1;
        }}
    </style>
</head>
<body>
    <h2>Character-Level Transliteration Predictions using Seq2Seq(without attention)</h2>
    {summary_html}
    {html_content}
</body>
</html>
"""
with open(" predictions_vanilla.html", "w", encoding="utf-8") as f:
    f.write(html_full)