In [1]:
!pip install wandb



In [2]:
import wandb
key = input('Enter your API:')
wandb.login(key=key)

Enter your API: 580e769ee2f34eafdded556ce52aaf31c265ad3b


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mma23m011[0m ([33mma23m011-iit-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import wandb
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import pandas as pd

# Dataset
class TransliterationDataset(Dataset):
    def __init__(self, data, src_vocab, tgt_vocab):
        self.data = data
        self.src_vocab = src_vocab
        self.tgt_vocab = tgt_vocab

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        src, tgt = self.data[idx]
        # for c in src:
        #     print(c)
        src_ids = [self.src_vocab['<sos>']] + [self.src_vocab.get(c, self.src_vocab['<unk>']) for c in src] + [self.src_vocab['<eos>']]
        tgt_ids = [self.tgt_vocab['<sos>']] + [self.tgt_vocab.get(c, self.tgt_vocab['<unk>']) for c in tgt] + [self.tgt_vocab['<eos>']]
        return torch.tensor(src_ids), torch.tensor(tgt_ids)

def collate_fn(batch):
    src_seqs, tgt_seqs = zip(*batch)
    src_padded = nn.utils.rnn.pad_sequence(src_seqs, batch_first=True, padding_value=0)
    tgt_padded = nn.utils.rnn.pad_sequence(tgt_seqs, batch_first=True, padding_value=0)
    return src_padded, tgt_padded

# Vocab 
def build_vocab(data):
    vocab = {'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3}
    idx = 4
    for word in data:
        if isinstance(word, str):
            for char in word:
                if char not in vocab:
                    vocab[char] = idx
                    idx += 1
    return vocab

# Model
class Seq2Seq(nn.Module):
    def __init__(self, config, src_vocab_size, tgt_vocab_size):
        super().__init__()
        self.config = config
        self.embedding_src = nn.Embedding(src_vocab_size, config['embedding_dim'], padding_idx=0)
        self.embedding_tgt = nn.Embedding(tgt_vocab_size, config['embedding_dim'], padding_idx=0)

        rnn_cell = {'RNN': nn.RNN, 'GRU': nn.GRU, 'LSTM': nn.LSTM}[config['cell_type']]
        self.encoder = rnn_cell(config['embedding_dim'], config['hidden_size'], config['encoder_layers'], dropout=config['dropout'], batch_first=True)
        self.decoder = rnn_cell(config['embedding_dim'], config['hidden_size'], config['decoder_layers'], dropout=config['dropout'], batch_first=True)
        
        self.fc_out = nn.Linear(config['hidden_size'], tgt_vocab_size)

    def forward(self, src, tgt):
        embedded_src = self.embedding_src(src)
        embedded_tgt = self.embedding_tgt(tgt)

        _, hidden = self.encoder(embedded_src)

        def expand_hidden(h_enc, required_layers):
            # h_enc: (num_layers_enc, batch, hidden_size)
            num_enc_layers = h_enc.size(0)
            if num_enc_layers < required_layers:
                # Create additional zero layers
                extra = torch.zeros(
                    required_layers - num_enc_layers,
                    h_enc.size(1),
                    h_enc.size(2),
                    device=h_enc.device,
                    dtype=h_enc.dtype
                )
                h_enc = torch.cat([h_enc, extra], dim=0)
            else:
                h_enc = h_enc[-required_layers:]  # if more, take only needed
            return h_enc
        
        if isinstance(hidden, tuple):  # LSTM
            h, c = hidden
            h = expand_hidden(h, self.config['decoder_layers'])
            c = expand_hidden(c, self.config['decoder_layers'])
            decoder_output, _ = self.decoder(embedded_tgt, (h, c))
        else:  # GRU / RNN
            hidden = expand_hidden(hidden, self.config['decoder_layers'])
            decoder_output, _ = self.decoder(embedded_tgt, hidden)

        output = self.fc_out(decoder_output)
        return output

# Accuracy 
def calculate_accuracy(output, target, pad_idx):
    preds = output.argmax(2)
    mask = (target != pad_idx)
    correct = (preds == target) & mask
    return correct.sum().item() / mask.sum().item()

def compute_word_accuracy(output, target, tgt_index_to_token, pad_idx):
    preds = output.argmax(dim=2)  # [batch, seq_len]
    correct = 0
    total = 0

    for pred_seq, tgt_seq in zip(preds, target):
        # Convert to tokens
        pred_tokens = [tgt_index_to_token[idx.item()] for idx in pred_seq if idx.item() != pad_idx]
        tgt_tokens = [tgt_index_to_token[idx.item()] for idx in tgt_seq if idx.item() != pad_idx]

        # Remove after <eos> if exists
        if '<eos>' in pred_tokens:
            pred_tokens = pred_tokens[:pred_tokens.index('<eos>')]
        if '<eos>' in tgt_tokens:
            tgt_tokens = tgt_tokens[:tgt_tokens.index('<eos>')]

        if pred_tokens == tgt_tokens:
            correct += 1
        total += 1

    return correct / total if total > 0 else 0.0

# Training 
def train(model, dataloader, optimizer, criterion, tgt_pad_idx, tgt_index_to_token):
    model.train()
    total_loss, total_acc, total_word_acc = 0, 0, 0
    for src, tgt in dataloader:
        src, tgt = src.to(device), tgt.to(device)
        optimizer.zero_grad()
        output = model(src, tgt[:, :-1])
        loss = criterion(output.reshape(-1, output.shape[-1]), tgt[:, 1:].reshape(-1))
        acc = calculate_accuracy(output, tgt[:, 1:], tgt_pad_idx)
        word_acc = compute_word_accuracy(output, tgt[:, 1:], tgt_index_to_token, tgt_pad_idx)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        total_acc += acc
        total_word_acc += word_acc
    return total_loss / len(dataloader), total_acc / len(dataloader), total_word_acc / len(dataloader)

def evaluate(model, dataloader, criterion, tgt_pad_idx, tgt_index_to_token):
    model.eval()
    total_loss, total_acc, total_word_acc = 0, 0, 0
    with torch.no_grad():
        for src, tgt in dataloader:
            src, tgt = src.to(device), tgt.to(device)
            output = model(src, tgt[:, :-1])
            loss = criterion(output.reshape(-1, output.shape[-1]), tgt[:, 1:].reshape(-1))
            acc = calculate_accuracy(output, tgt[:, 1:], tgt_pad_idx)
            word_acc = compute_word_accuracy(output, tgt[:, 1:], tgt_index_to_token, tgt_pad_idx)
            total_loss += loss.item()
            total_acc += acc
            total_word_acc += word_acc
    return total_loss / len(dataloader), total_acc / len(dataloader), total_word_acc / len(dataloader)


# Sweep 
def sweep_train():
    wandb.init()
    config = wandb.config
    train_df = pd.read_csv("/kaggle/input/dfefeff/dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.train.tsv", sep="\t", header=None, names=["tgt", "src", "freq"])
    train_df = train_df.loc[train_df.index.repeat(train_df['freq'])].reset_index(drop=True)
    dev_df = pd.read_csv("/kaggle/input/dfefeff/dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.dev.tsv", sep="\t", header=None, names=["tgt", "src", "freq"])
    train_df['src'] = train_df['src'].astype(str)
    train_df['tgt'] = train_df['tgt'].astype(str)

    src_vocab = build_vocab(train_df['src'])
    tgt_vocab = build_vocab(train_df['tgt'])
    print(src_vocab)
    print(tgt_vocab)
    tgt_index_to_token = {v: k for k, v in tgt_vocab.items()}
    idx_to_tgt = {v: k for k, v in tgt_vocab.items()}

    train_data = list(zip(train_df['src'], train_df['tgt']))
    dev_data = list(zip(dev_df['src'], dev_df['tgt']))

    train_dataset = TransliterationDataset(train_data, src_vocab, tgt_vocab)
    dev_dataset = TransliterationDataset(dev_data, src_vocab, tgt_vocab)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
    dev_loader = DataLoader(dev_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

    model = Seq2Seq(config, len(src_vocab), len(tgt_vocab)).to(device)
    optimizer = optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss(ignore_index=tgt_vocab['<pad>'])

    for epoch in range(config['epochs']):
        train_loss, train_acc, train_word_acc = train(model, train_loader, optimizer, criterion, tgt_vocab['<pad>'], tgt_index_to_token)
        val_loss, val_acc, val_word_acc = evaluate(model, dev_loader, criterion, tgt_vocab['<pad>'], tgt_index_to_token)
        print(f"Epoch {epoch + 1}")
        print(f"{'train_loss:':20} {train_loss:.4f}")
        print(f"{'val_loss:':20} {val_loss:.4f}")
        print(f"{'train_accuracy:':20} {train_acc * 100:.2f}%")
        print(f"{'val_accuracy:':20} {val_acc * 100:.2f}%")
        print(f"{'train_word_accuracy:':20} {train_word_acc * 100:.2f}%")
        print(f"{'val_word_accuracy:':20} {val_word_acc * 100:.2f}%")

        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "val_loss": val_loss,
            "train_accuracy": train_acc * 100,
            "val_accuracy": val_acc * 100,
            "train_word_accuracy": train_word_acc * 100,
            "val_word_accuracy": val_word_acc * 100
        })

    # predict_and_show(model, dev_dataset, src_vocab, tgt_vocab, idx_to_tgt, num_samples=100)


# Run 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

sweep_config = {
    'method': 'random',
    'name': 'DakshinaSweepForPred-3',
    'metric': {'name': 'val_accuracy', 'goal': 'maximize'},
    'parameters': {
        'embedding_dim': {'values': [16,32, 64, 128,256]},
        'hidden_size': {'values': [16,32,64, 128,256]},
        'encoder_layers': {'values': [1,2,3]},
        'decoder_layers': {'values': [1,2,3]},
        'cell_type': {'values': ['RNN','GRU', 'LSTM']},
        'dropout': {'values': [0.2, 0.3]},
        'epochs': {'values': [5,10,13,15,17,20]},
        'beam_width': {'values': [1, 3, 5]}
    }
}

sweep_id = wandb.sweep(sweep_config, project="DL_A3")
wandb.agent(sweep_id, function=sweep_train, count = 20)



Create sweep with ID: 5tiratk7
Sweep URL: https://wandb.ai/ma23m011-iit-madras/DL_A3/sweeps/5tiratk7


[34m[1mwandb[0m: Agent Starting Run: ihui3ixb with config:
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'a': 4, 'n': 5, 'g': 6, 'k': 7, 'i': 8, 't': 9, 'o': 10, 'e': 11, 'r': 12, 's': 13, 'h': 14, 'y': 15, 'w': 16, 'u': 17, 'l': 18, 'd': 19, 'j': 20, 'b': 21, 'm': 22, 'c': 23, 'q': 24, 'z': 25, 'p': 26, 'x': 27, 'v': 28, 'f': 29}
{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'অ': 4, 'ং': 5, 'ক': 6, 'ি': 7, 'ত': 8, 'ে': 9, 'র': 10, 'শ': 11, 'ই': 12, 'ও': 13, 'গ': 14, 'ু': 15, 'ল': 16, 'ো': 17, '্': 18, 'হ': 19, 'ণ': 20, 'া': 21, 'ী': 22, 'দ': 23, 'ন': 24, 'ট': 25, 'ব': 26, 'ষ': 27, 'ম': 28, 'স': 29, 'খ': 30, 'য': 31, 'ড': 32, 'ৎ': 33, 'ধ': 34, 'ঠ': 35, 'জ': 36, 'প': 37, 'ূ': 38, 'চ': 39, 'ছ': 40, 'ভ': 41, 'ঘ': 42, 'ঙ': 43, 'ৈ': 44, 'ঞ': 45, '়': 46, 'ঃ': 47, 'এ': 48, 'থ': 49, 'ৃ': 50, 'ৌ': 51, 'ফ': 52, 'ঝ': 53, 'আ': 54, 'উ': 55, 'ঁ': 56, 'ঈ': 57, 'ঊ': 58, 'ঋ': 59, 'ঐ': 60, 'ঔ': 61, 'ঢ': 62, '২': 63}




Epoch 1
train_loss:          2.4077
val_loss:            2.3966
train_accuracy:      32.10%
val_accuracy:        32.51%
train_word_accuracy: 0.00%
val_word_accuracy:   0.00%
Epoch 2
train_loss:          2.2130
val_loss:            2.3978
train_accuracy:      36.89%
val_accuracy:        32.43%
train_word_accuracy: 0.01%
val_word_accuracy:   0.00%
Epoch 3
train_loss:          2.1615
val_loss:            2.3043
train_accuracy:      38.04%
val_accuracy:        34.98%
train_word_accuracy: 0.02%
val_word_accuracy:   0.00%
Epoch 4
train_loss:          2.1246
val_loss:            2.3161
train_accuracy:      38.97%
val_accuracy:        35.34%
train_word_accuracy: 0.02%
val_word_accuracy:   0.00%
Epoch 5
train_loss:          2.1321
val_loss:            2.3603
train_accuracy:      38.82%
val_accuracy:        33.57%
train_word_accuracy: 0.01%
val_word_accuracy:   0.00%
Epoch 6
train_loss:          2.1271
val_loss:            2.3198
train_accuracy:      38.94%
val_accuracy:        35.01%
train_word

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_accuracy,▁▅▆▇▇▇▇▇▇▇█████
train_loss,█▄▃▂▂▂▂▂▂▂▁▁▁▁▁
train_word_accuracy,▁▄▅▅▃▄▆▆▃▆▅▄▆█▇
val_accuracy,▁▁▆▇▃▆▅▆▆▆██▇██
val_loss,██▂▃▅▃▃▃▃▃▂▁▂▂▁
val_word_accuracy,▁▁▁▁▁▁█▁▁▁▁▁▁▁▁

0,1
epoch,15.0
train_accuracy,40.54409
train_loss,2.06841
train_word_accuracy,0.02343
val_accuracy,35.90404
val_loss,2.29468
val_word_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: om3kaupo with config:
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden_size: 32


{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'a': 4, 'n': 5, 'g': 6, 'k': 7, 'i': 8, 't': 9, 'o': 10, 'e': 11, 'r': 12, 's': 13, 'h': 14, 'y': 15, 'w': 16, 'u': 17, 'l': 18, 'd': 19, 'j': 20, 'b': 21, 'm': 22, 'c': 23, 'q': 24, 'z': 25, 'p': 26, 'x': 27, 'v': 28, 'f': 29}
{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'অ': 4, 'ং': 5, 'ক': 6, 'ি': 7, 'ত': 8, 'ে': 9, 'র': 10, 'শ': 11, 'ই': 12, 'ও': 13, 'গ': 14, 'ু': 15, 'ল': 16, 'ো': 17, '্': 18, 'হ': 19, 'ণ': 20, 'া': 21, 'ী': 22, 'দ': 23, 'ন': 24, 'ট': 25, 'ব': 26, 'ষ': 27, 'ম': 28, 'স': 29, 'খ': 30, 'য': 31, 'ড': 32, 'ৎ': 33, 'ধ': 34, 'ঠ': 35, 'জ': 36, 'প': 37, 'ূ': 38, 'চ': 39, 'ছ': 40, 'ভ': 41, 'ঘ': 42, 'ঙ': 43, 'ৈ': 44, 'ঞ': 45, '়': 46, 'ঃ': 47, 'এ': 48, 'থ': 49, 'ৃ': 50, 'ৌ': 51, 'ফ': 52, 'ঝ': 53, 'আ': 54, 'উ': 55, 'ঁ': 56, 'ঈ': 57, 'ঊ': 58, 'ঋ': 59, 'ঐ': 60, 'ঔ': 61, 'ঢ': 62, '২': 63}
Epoch 1
train_loss:          1.9465
val_loss:            1.3778
train_accuracy:      45.88%
val_accuracy:        58.86%
train_word_accuracy:

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_accuracy,▁▅▆▆▇▇▇▇▇██████
train_loss,█▄▃▃▂▂▂▂▁▁▁▁▁▁▁
train_word_accuracy,▁▂▃▄▅▅▆▆▇▇▇▇███
val_accuracy,▁▄▅▆▆▇▇▇▇▇█████
val_loss,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁
val_word_accuracy,▁▂▃▄▅▅▆▆▇▇▇▇███

0,1
epoch,15.0
train_accuracy,77.25295
train_loss,0.71624
train_word_accuracy,17.98273
val_accuracy,76.56914
val_loss,0.75516
val_word_accuracy,16.10053


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pmkc85lt with config:
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 128
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 16


{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'a': 4, 'n': 5, 'g': 6, 'k': 7, 'i': 8, 't': 9, 'o': 10, 'e': 11, 'r': 12, 's': 13, 'h': 14, 'y': 15, 'w': 16, 'u': 17, 'l': 18, 'd': 19, 'j': 20, 'b': 21, 'm': 22, 'c': 23, 'q': 24, 'z': 25, 'p': 26, 'x': 27, 'v': 28, 'f': 29}
{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'অ': 4, 'ং': 5, 'ক': 6, 'ি': 7, 'ত': 8, 'ে': 9, 'র': 10, 'শ': 11, 'ই': 12, 'ও': 13, 'গ': 14, 'ু': 15, 'ল': 16, 'ো': 17, '্': 18, 'হ': 19, 'ণ': 20, 'া': 21, 'ী': 22, 'দ': 23, 'ন': 24, 'ট': 25, 'ব': 26, 'ষ': 27, 'ম': 28, 'স': 29, 'খ': 30, 'য': 31, 'ড': 32, 'ৎ': 33, 'ধ': 34, 'ঠ': 35, 'জ': 36, 'প': 37, 'ূ': 38, 'চ': 39, 'ছ': 40, 'ভ': 41, 'ঘ': 42, 'ঙ': 43, 'ৈ': 44, 'ঞ': 45, '়': 46, 'ঃ': 47, 'এ': 48, 'থ': 49, 'ৃ': 50, 'ৌ': 51, 'ফ': 52, 'ঝ': 53, 'আ': 54, 'উ': 55, 'ঁ': 56, 'ঈ': 57, 'ঊ': 58, 'ঋ': 59, 'ঐ': 60, 'ঔ': 61, 'ঢ': 62, '২': 63}
Epoch 1
train_loss:          2.4101
val_loss:            2.0556
train_accuracy:      34.13%
val_accuracy:        42.00%
train_word_accuracy:

0,1
epoch,▁▃▅▆█
train_accuracy,▁▅▆▇█
train_loss,█▄▃▂▁
train_word_accuracy,▁▂▄▆█
val_accuracy,▁▃▆▇█
val_loss,█▅▃▂▁
val_word_accuracy,▁▃▄▇█

0,1
epoch,5.0
train_accuracy,50.8338
train_loss,1.64472
train_word_accuracy,0.95851
val_accuracy,51.87148
val_loss,1.60264
val_word_accuracy,0.74353


[34m[1mwandb[0m: Agent Starting Run: 2il5q7us with config:
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 128
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 16


{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'a': 4, 'n': 5, 'g': 6, 'k': 7, 'i': 8, 't': 9, 'o': 10, 'e': 11, 'r': 12, 's': 13, 'h': 14, 'y': 15, 'w': 16, 'u': 17, 'l': 18, 'd': 19, 'j': 20, 'b': 21, 'm': 22, 'c': 23, 'q': 24, 'z': 25, 'p': 26, 'x': 27, 'v': 28, 'f': 29}
{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'অ': 4, 'ং': 5, 'ক': 6, 'ি': 7, 'ত': 8, 'ে': 9, 'র': 10, 'শ': 11, 'ই': 12, 'ও': 13, 'গ': 14, 'ু': 15, 'ল': 16, 'ো': 17, '্': 18, 'হ': 19, 'ণ': 20, 'া': 21, 'ী': 22, 'দ': 23, 'ন': 24, 'ট': 25, 'ব': 26, 'ষ': 27, 'ম': 28, 'স': 29, 'খ': 30, 'য': 31, 'ড': 32, 'ৎ': 33, 'ধ': 34, 'ঠ': 35, 'জ': 36, 'প': 37, 'ূ': 38, 'চ': 39, 'ছ': 40, 'ভ': 41, 'ঘ': 42, 'ঙ': 43, 'ৈ': 44, 'ঞ': 45, '়': 46, 'ঃ': 47, 'এ': 48, 'থ': 49, 'ৃ': 50, 'ৌ': 51, 'ফ': 52, 'ঝ': 53, 'আ': 54, 'উ': 55, 'ঁ': 56, 'ঈ': 57, 'ঊ': 58, 'ঋ': 59, 'ঐ': 60, 'ঔ': 61, 'ঢ': 62, '২': 63}
Epoch 1
train_loss:          2.5852
val_loss:            2.2405
train_accuracy:      29.35%
val_accuracy:        36.76%
train_word_accuracy:

0,1
epoch,▁▃▅▆█
train_accuracy,▁▅▆▇█
train_loss,█▄▃▂▁
train_word_accuracy,▁▂▃▆█
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁
val_word_accuracy,▁▂▄▅█

0,1
epoch,5.0
train_accuracy,52.14395
train_loss,1.62257
train_word_accuracy,1.25333
val_accuracy,55.17958
val_loss,1.51266
val_word_accuracy,1.64871


[34m[1mwandb[0m: Agent Starting Run: h0cajvna with config:
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_size: 32


{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'a': 4, 'n': 5, 'g': 6, 'k': 7, 'i': 8, 't': 9, 'o': 10, 'e': 11, 'r': 12, 's': 13, 'h': 14, 'y': 15, 'w': 16, 'u': 17, 'l': 18, 'd': 19, 'j': 20, 'b': 21, 'm': 22, 'c': 23, 'q': 24, 'z': 25, 'p': 26, 'x': 27, 'v': 28, 'f': 29}
{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'অ': 4, 'ং': 5, 'ক': 6, 'ি': 7, 'ত': 8, 'ে': 9, 'র': 10, 'শ': 11, 'ই': 12, 'ও': 13, 'গ': 14, 'ু': 15, 'ল': 16, 'ো': 17, '্': 18, 'হ': 19, 'ণ': 20, 'া': 21, 'ী': 22, 'দ': 23, 'ন': 24, 'ট': 25, 'ব': 26, 'ষ': 27, 'ম': 28, 'স': 29, 'খ': 30, 'য': 31, 'ড': 32, 'ৎ': 33, 'ধ': 34, 'ঠ': 35, 'জ': 36, 'প': 37, 'ূ': 38, 'চ': 39, 'ছ': 40, 'ভ': 41, 'ঘ': 42, 'ঙ': 43, 'ৈ': 44, 'ঞ': 45, '়': 46, 'ঃ': 47, 'এ': 48, 'থ': 49, 'ৃ': 50, 'ৌ': 51, 'ফ': 52, 'ঝ': 53, 'আ': 54, 'উ': 55, 'ঁ': 56, 'ঈ': 57, 'ঊ': 58, 'ঋ': 59, 'ঐ': 60, 'ঔ': 61, 'ঢ': 62, '২': 63}




Epoch 1
train_loss:          2.5438
val_loss:            2.4614
train_accuracy:      29.50%
val_accuracy:        31.80%
train_word_accuracy: 0.00%
val_word_accuracy:   0.00%
Epoch 2
train_loss:          2.3415
val_loss:            2.4097
train_accuracy:      34.29%
val_accuracy:        32.72%
train_word_accuracy: 0.00%
val_word_accuracy:   0.00%
Epoch 3
train_loss:          2.2888
val_loss:            2.3805
train_accuracy:      35.36%
val_accuracy:        33.55%
train_word_accuracy: 0.00%
val_word_accuracy:   0.00%
Epoch 4
train_loss:          2.2538
val_loss:            2.3565
train_accuracy:      36.15%
val_accuracy:        34.35%
train_word_accuracy: 0.01%
val_word_accuracy:   0.01%
Epoch 5
train_loss:          2.2314
val_loss:            2.3437
train_accuracy:      36.69%
val_accuracy:        34.29%
train_word_accuracy: 0.01%
val_word_accuracy:   0.00%
Epoch 6
train_loss:          2.2171
val_loss:            2.3456
train_accuracy:      37.00%
val_accuracy:        34.35%
train_word

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▄▅▆▆▆▇▇▇▇▇▇▇▇█▇████
train_loss,█▅▄▃▃▃▃▂▂▂▂▁▂▁▁▁▁▁▁▁
train_word_accuracy,▁▁▁▂▂▃▅▄▃▄▅▆▃▄▆▆▅▅██
val_accuracy,▁▂▃▄▄▄▄▅▅▆▅▆▆▅▆▇▇▇█▆
val_loss,█▇▆▅▅▅▄▄▄▃▂▂▂▄▂▂▁▁▁▂
val_word_accuracy,▁▁▁▅▁▁▁▁▁▁▅▁█▁▁▅▁▁▁▅

0,1
epoch,20.0
train_accuracy,39.13738
train_loss,2.10115
train_word_accuracy,0.02495
val_accuracy,35.66347
val_loss,2.23581
val_word_accuracy,0.01078


[34m[1mwandb[0m: Agent Starting Run: ammodhwh with config:
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	epochs: 17
[34m[1mwandb[0m: 	hidden_size: 32


{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'a': 4, 'n': 5, 'g': 6, 'k': 7, 'i': 8, 't': 9, 'o': 10, 'e': 11, 'r': 12, 's': 13, 'h': 14, 'y': 15, 'w': 16, 'u': 17, 'l': 18, 'd': 19, 'j': 20, 'b': 21, 'm': 22, 'c': 23, 'q': 24, 'z': 25, 'p': 26, 'x': 27, 'v': 28, 'f': 29}
{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'অ': 4, 'ং': 5, 'ক': 6, 'ি': 7, 'ত': 8, 'ে': 9, 'র': 10, 'শ': 11, 'ই': 12, 'ও': 13, 'গ': 14, 'ু': 15, 'ল': 16, 'ো': 17, '্': 18, 'হ': 19, 'ণ': 20, 'া': 21, 'ী': 22, 'দ': 23, 'ন': 24, 'ট': 25, 'ব': 26, 'ষ': 27, 'ম': 28, 'স': 29, 'খ': 30, 'য': 31, 'ড': 32, 'ৎ': 33, 'ধ': 34, 'ঠ': 35, 'জ': 36, 'প': 37, 'ূ': 38, 'চ': 39, 'ছ': 40, 'ভ': 41, 'ঘ': 42, 'ঙ': 43, 'ৈ': 44, 'ঞ': 45, '়': 46, 'ঃ': 47, 'এ': 48, 'থ': 49, 'ৃ': 50, 'ৌ': 51, 'ফ': 52, 'ঝ': 53, 'আ': 54, 'উ': 55, 'ঁ': 56, 'ঈ': 57, 'ঊ': 58, 'ঋ': 59, 'ঐ': 60, 'ঔ': 61, 'ঢ': 62, '২': 63}




Epoch 1
train_loss:          2.6367
val_loss:            2.5256
train_accuracy:      26.98%
val_accuracy:        29.55%
train_word_accuracy: 0.00%
val_word_accuracy:   0.00%
Epoch 2
train_loss:          2.4456
val_loss:            2.4637
train_accuracy:      31.62%
val_accuracy:        31.10%
train_word_accuracy: 0.01%
val_word_accuracy:   0.00%
Epoch 3
train_loss:          2.4034
val_loss:            2.4365
train_accuracy:      32.56%
val_accuracy:        31.82%
train_word_accuracy: 0.01%
val_word_accuracy:   0.00%
Epoch 4
train_loss:          2.3803
val_loss:            2.4101
train_accuracy:      32.94%
val_accuracy:        32.07%
train_word_accuracy: 0.01%
val_word_accuracy:   0.00%
Epoch 5
train_loss:          2.3632
val_loss:            2.3871
train_accuracy:      33.28%
val_accuracy:        32.84%
train_word_accuracy: 0.01%
val_word_accuracy:   0.00%
Epoch 6
train_loss:          2.3510
val_loss:            2.3748
train_accuracy:      33.55%
val_accuracy:        33.49%
train_word

0,1
epoch,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇██
train_accuracy,▁▅▆▆▇▇▇▇▇▇███████
train_loss,█▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁
train_word_accuracy,▁█▅▅▅▄▆▅▄▅▃▄▄▃▂█▇
val_accuracy,▁▃▄▅▆▇▇▆▇▇▆▇████▆
val_loss,█▆▅▄▃▃▂▂▂▂▂▁▁▁▁▁▂
val_word_accuracy,▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁

0,1
epoch,17.0
train_accuracy,34.91209
train_loss,2.27615
train_word_accuracy,0.00983
val_accuracy,33.27119
val_loss,2.34894
val_word_accuracy,0.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: c5k9qyg0 with config:
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32


{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'a': 4, 'n': 5, 'g': 6, 'k': 7, 'i': 8, 't': 9, 'o': 10, 'e': 11, 'r': 12, 's': 13, 'h': 14, 'y': 15, 'w': 16, 'u': 17, 'l': 18, 'd': 19, 'j': 20, 'b': 21, 'm': 22, 'c': 23, 'q': 24, 'z': 25, 'p': 26, 'x': 27, 'v': 28, 'f': 29}
{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'অ': 4, 'ং': 5, 'ক': 6, 'ি': 7, 'ত': 8, 'ে': 9, 'র': 10, 'শ': 11, 'ই': 12, 'ও': 13, 'গ': 14, 'ু': 15, 'ল': 16, 'ো': 17, '্': 18, 'হ': 19, 'ণ': 20, 'া': 21, 'ী': 22, 'দ': 23, 'ন': 24, 'ট': 25, 'ব': 26, 'ষ': 27, 'ম': 28, 'স': 29, 'খ': 30, 'য': 31, 'ড': 32, 'ৎ': 33, 'ধ': 34, 'ঠ': 35, 'জ': 36, 'প': 37, 'ূ': 38, 'চ': 39, 'ছ': 40, 'ভ': 41, 'ঘ': 42, 'ঙ': 43, 'ৈ': 44, 'ঞ': 45, '়': 46, 'ঃ': 47, 'এ': 48, 'থ': 49, 'ৃ': 50, 'ৌ': 51, 'ফ': 52, 'ঝ': 53, 'আ': 54, 'উ': 55, 'ঁ': 56, 'ঈ': 57, 'ঊ': 58, 'ঋ': 59, 'ঐ': 60, 'ঔ': 61, 'ঢ': 62, '২': 63}
Epoch 1
train_loss:          2.5667
val_loss:            2.4726
train_accuracy:      28.85%
val_accuracy:        31.02%
train_word_accuracy:

0,1
epoch,▁▃▅▆█
train_accuracy,▁▅▇██
train_loss,█▄▃▁▁
train_word_accuracy,▁▃▇█▆
val_accuracy,▁▂▇▂█
val_loss,█▇▃▅▁
val_word_accuracy,▁▁█▁█

0,1
epoch,5.0
train_accuracy,35.54992
train_loss,2.24082
train_word_accuracy,0.01436
val_accuracy,35.12356
val_loss,2.24975
val_word_accuracy,0.03233


[34m[1mwandb[0m: Agent Starting Run: 0pq5ttsa with config:
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden_size: 64


{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'a': 4, 'n': 5, 'g': 6, 'k': 7, 'i': 8, 't': 9, 'o': 10, 'e': 11, 'r': 12, 's': 13, 'h': 14, 'y': 15, 'w': 16, 'u': 17, 'l': 18, 'd': 19, 'j': 20, 'b': 21, 'm': 22, 'c': 23, 'q': 24, 'z': 25, 'p': 26, 'x': 27, 'v': 28, 'f': 29}
{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'অ': 4, 'ং': 5, 'ক': 6, 'ি': 7, 'ত': 8, 'ে': 9, 'র': 10, 'শ': 11, 'ই': 12, 'ও': 13, 'গ': 14, 'ু': 15, 'ল': 16, 'ো': 17, '্': 18, 'হ': 19, 'ণ': 20, 'া': 21, 'ী': 22, 'দ': 23, 'ন': 24, 'ট': 25, 'ব': 26, 'ষ': 27, 'ম': 28, 'স': 29, 'খ': 30, 'য': 31, 'ড': 32, 'ৎ': 33, 'ধ': 34, 'ঠ': 35, 'জ': 36, 'প': 37, 'ূ': 38, 'চ': 39, 'ছ': 40, 'ভ': 41, 'ঘ': 42, 'ঙ': 43, 'ৈ': 44, 'ঞ': 45, '়': 46, 'ঃ': 47, 'এ': 48, 'থ': 49, 'ৃ': 50, 'ৌ': 51, 'ফ': 52, 'ঝ': 53, 'আ': 54, 'উ': 55, 'ঁ': 56, 'ঈ': 57, 'ঊ': 58, 'ঋ': 59, 'ঐ': 60, 'ঔ': 61, 'ঢ': 62, '২': 63}
Epoch 1
train_loss:          2.4614
val_loss:            2.4242
train_accuracy:      31.29%
val_accuracy:        32.24%
train_word_accuracy:

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_accuracy,▁▄▅▅▆▆▇▇▇▇▇████
train_loss,█▆▅▄▃▃▃▂▂▂▂▁▁▁▁
train_word_accuracy,▁▁▂▃▃▄▄▆▅▅█▆▆▇█
val_accuracy,▁▃▄▄▅▅▅▇▆▇▇▇███
val_loss,█▇▄▄▄▄▄▂▂▃▂▂▁▁▁
val_word_accuracy,▁▁▆▁▅▃▁▁▃▁█▅▅▆▆

0,1
epoch,15.0
train_accuracy,41.86144
train_loss,1.96034
train_word_accuracy,0.0514
val_accuracy,39.23011
val_loss,2.06315
val_word_accuracy,0.03233


[34m[1mwandb[0m: Agent Starting Run: va5h8k9l with config:
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128


{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'a': 4, 'n': 5, 'g': 6, 'k': 7, 'i': 8, 't': 9, 'o': 10, 'e': 11, 'r': 12, 's': 13, 'h': 14, 'y': 15, 'w': 16, 'u': 17, 'l': 18, 'd': 19, 'j': 20, 'b': 21, 'm': 22, 'c': 23, 'q': 24, 'z': 25, 'p': 26, 'x': 27, 'v': 28, 'f': 29}
{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'অ': 4, 'ং': 5, 'ক': 6, 'ি': 7, 'ত': 8, 'ে': 9, 'র': 10, 'শ': 11, 'ই': 12, 'ও': 13, 'গ': 14, 'ু': 15, 'ল': 16, 'ো': 17, '্': 18, 'হ': 19, 'ণ': 20, 'া': 21, 'ী': 22, 'দ': 23, 'ন': 24, 'ট': 25, 'ব': 26, 'ষ': 27, 'ম': 28, 'স': 29, 'খ': 30, 'য': 31, 'ড': 32, 'ৎ': 33, 'ধ': 34, 'ঠ': 35, 'জ': 36, 'প': 37, 'ূ': 38, 'চ': 39, 'ছ': 40, 'ভ': 41, 'ঘ': 42, 'ঙ': 43, 'ৈ': 44, 'ঞ': 45, '়': 46, 'ঃ': 47, 'এ': 48, 'থ': 49, 'ৃ': 50, 'ৌ': 51, 'ফ': 52, 'ঝ': 53, 'আ': 54, 'উ': 55, 'ঁ': 56, 'ঈ': 57, 'ঊ': 58, 'ঋ': 59, 'ঐ': 60, 'ঔ': 61, 'ঢ': 62, '২': 63}
Epoch 1
train_loss:          1.1819
val_loss:            0.6292
train_accuracy:      66.23%
val_accuracy:        80.13%
train_word_accuracy:

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▇▇▇█████
train_loss,█▃▂▂▂▁▁▁▁▁
train_word_accuracy,▁▄▅▆▇▇▇███
val_accuracy,▁▆▇███████
val_loss,█▂▁▁▂▃▃▄▄▆
val_word_accuracy,▁▅▇▇▇▇▇██▇

0,1
epoch,10.0
train_accuracy,95.24148
train_loss,0.14445
train_word_accuracy,68.9163
val_accuracy,84.81067
val_loss,0.58713
val_word_accuracy,30.89718


[34m[1mwandb[0m: Agent Starting Run: 6uql0w2p with config:
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64


{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'a': 4, 'n': 5, 'g': 6, 'k': 7, 'i': 8, 't': 9, 'o': 10, 'e': 11, 'r': 12, 's': 13, 'h': 14, 'y': 15, 'w': 16, 'u': 17, 'l': 18, 'd': 19, 'j': 20, 'b': 21, 'm': 22, 'c': 23, 'q': 24, 'z': 25, 'p': 26, 'x': 27, 'v': 28, 'f': 29}
{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'অ': 4, 'ং': 5, 'ক': 6, 'ি': 7, 'ত': 8, 'ে': 9, 'র': 10, 'শ': 11, 'ই': 12, 'ও': 13, 'গ': 14, 'ু': 15, 'ল': 16, 'ো': 17, '্': 18, 'হ': 19, 'ণ': 20, 'া': 21, 'ী': 22, 'দ': 23, 'ন': 24, 'ট': 25, 'ব': 26, 'ষ': 27, 'ম': 28, 'স': 29, 'খ': 30, 'য': 31, 'ড': 32, 'ৎ': 33, 'ধ': 34, 'ঠ': 35, 'জ': 36, 'প': 37, 'ূ': 38, 'চ': 39, 'ছ': 40, 'ভ': 41, 'ঘ': 42, 'ঙ': 43, 'ৈ': 44, 'ঞ': 45, '়': 46, 'ঃ': 47, 'এ': 48, 'থ': 49, 'ৃ': 50, 'ৌ': 51, 'ফ': 52, 'ঝ': 53, 'আ': 54, 'উ': 55, 'ঁ': 56, 'ঈ': 57, 'ঊ': 58, 'ঋ': 59, 'ঐ': 60, 'ঔ': 61, 'ঢ': 62, '২': 63}




Epoch 1
train_loss:          2.3860
val_loss:            2.4009
train_accuracy:      32.90%
val_accuracy:        32.55%
train_word_accuracy: 0.01%
val_word_accuracy:   0.00%
Epoch 2
train_loss:          2.1425
val_loss:            2.3438
train_accuracy:      38.09%
val_accuracy:        33.40%
train_word_accuracy: 0.02%
val_word_accuracy:   0.00%
Epoch 3
train_loss:          2.0567
val_loss:            2.2978
train_accuracy:      40.08%
val_accuracy:        35.09%
train_word_accuracy: 0.03%
val_word_accuracy:   0.00%
Epoch 4
train_loss:          2.0106
val_loss:            2.2769
train_accuracy:      41.23%
val_accuracy:        35.25%
train_word_accuracy: 0.03%
val_word_accuracy:   0.04%
Epoch 5
train_loss:          1.9800
val_loss:            2.2841
train_accuracy:      42.01%
val_accuracy:        35.30%
train_word_accuracy: 0.03%
val_word_accuracy:   0.01%
Epoch 6
train_loss:          1.9543
val_loss:            2.3322
train_accuracy:      42.45%
val_accuracy:        33.22%
train_word

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▆▆▇▇▇███
train_loss,█▄▃▃▂▂▁▁▁▁
train_word_accuracy,▁▃▄▅▅▅▆███
val_accuracy,▁▃▆▆▆▂▆█▇▇
val_loss,█▆▄▃▄▆▃▁▂▂
val_word_accuracy,▁▁▁▇▂▁▁██▁

0,1
epoch,10.0
train_accuracy,43.78159
train_loss,1.89906
train_word_accuracy,0.05216
val_accuracy,35.93264
val_loss,2.23173
val_word_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: m998bmne with config:
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 128
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden_size: 32


{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'a': 4, 'n': 5, 'g': 6, 'k': 7, 'i': 8, 't': 9, 'o': 10, 'e': 11, 'r': 12, 's': 13, 'h': 14, 'y': 15, 'w': 16, 'u': 17, 'l': 18, 'd': 19, 'j': 20, 'b': 21, 'm': 22, 'c': 23, 'q': 24, 'z': 25, 'p': 26, 'x': 27, 'v': 28, 'f': 29}
{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'অ': 4, 'ং': 5, 'ক': 6, 'ি': 7, 'ত': 8, 'ে': 9, 'র': 10, 'শ': 11, 'ই': 12, 'ও': 13, 'গ': 14, 'ু': 15, 'ল': 16, 'ো': 17, '্': 18, 'হ': 19, 'ণ': 20, 'া': 21, 'ী': 22, 'দ': 23, 'ন': 24, 'ট': 25, 'ব': 26, 'ষ': 27, 'ম': 28, 'স': 29, 'খ': 30, 'য': 31, 'ড': 32, 'ৎ': 33, 'ধ': 34, 'ঠ': 35, 'জ': 36, 'প': 37, 'ূ': 38, 'চ': 39, 'ছ': 40, 'ভ': 41, 'ঘ': 42, 'ঙ': 43, 'ৈ': 44, 'ঞ': 45, '়': 46, 'ঃ': 47, 'এ': 48, 'থ': 49, 'ৃ': 50, 'ৌ': 51, 'ফ': 52, 'ঝ': 53, 'আ': 54, 'উ': 55, 'ঁ': 56, 'ঈ': 57, 'ঊ': 58, 'ঋ': 59, 'ঐ': 60, 'ঔ': 61, 'ঢ': 62, '২': 63}
Epoch 1
train_loss:          1.9271
val_loss:            1.2945
train_accuracy:      46.39%
val_accuracy:        61.38%
train_word_accuracy:

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_accuracy,▁▅▆▆▇▇▇▇███████
train_loss,█▄▃▂▂▂▂▂▁▁▁▁▁▁▁
train_word_accuracy,▁▃▄▅▅▆▆▇▇▇▇████
val_accuracy,▁▄▅▆▆▇▇▇▇▇█████
val_loss,█▅▄▃▂▂▂▂▂▁▁▁▁▁▁
val_word_accuracy,▁▂▄▅▅▆▆▇▇▇▇▇███

0,1
epoch,15.0
train_accuracy,79.60549
train_loss,0.63986
train_word_accuracy,21.50686
val_accuracy,78.86837
val_loss,0.67621
val_word_accuracy,20.38828


[34m[1mwandb[0m: Agent Starting Run: ai2w3syi with config:
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	epochs: 17
[34m[1mwandb[0m: 	hidden_size: 64


{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'a': 4, 'n': 5, 'g': 6, 'k': 7, 'i': 8, 't': 9, 'o': 10, 'e': 11, 'r': 12, 's': 13, 'h': 14, 'y': 15, 'w': 16, 'u': 17, 'l': 18, 'd': 19, 'j': 20, 'b': 21, 'm': 22, 'c': 23, 'q': 24, 'z': 25, 'p': 26, 'x': 27, 'v': 28, 'f': 29}
{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'অ': 4, 'ং': 5, 'ক': 6, 'ি': 7, 'ত': 8, 'ে': 9, 'র': 10, 'শ': 11, 'ই': 12, 'ও': 13, 'গ': 14, 'ু': 15, 'ল': 16, 'ো': 17, '্': 18, 'হ': 19, 'ণ': 20, 'া': 21, 'ী': 22, 'দ': 23, 'ন': 24, 'ট': 25, 'ব': 26, 'ষ': 27, 'ম': 28, 'স': 29, 'খ': 30, 'য': 31, 'ড': 32, 'ৎ': 33, 'ধ': 34, 'ঠ': 35, 'জ': 36, 'প': 37, 'ূ': 38, 'চ': 39, 'ছ': 40, 'ভ': 41, 'ঘ': 42, 'ঙ': 43, 'ৈ': 44, 'ঞ': 45, '়': 46, 'ঃ': 47, 'এ': 48, 'থ': 49, 'ৃ': 50, 'ৌ': 51, 'ফ': 52, 'ঝ': 53, 'আ': 54, 'উ': 55, 'ঁ': 56, 'ঈ': 57, 'ঊ': 58, 'ঋ': 59, 'ঐ': 60, 'ঔ': 61, 'ঢ': 62, '২': 63}




Epoch 1
train_loss:          2.5154
val_loss:            2.1204
train_accuracy:      30.58%
val_accuracy:        39.98%
train_word_accuracy: 0.02%
val_word_accuracy:   0.01%
Epoch 2
train_loss:          1.7027
val_loss:            1.3164
train_accuracy:      50.65%
val_accuracy:        59.95%
train_word_accuracy: 0.95%
val_word_accuracy:   2.60%
Epoch 3
train_loss:          1.1659
val_loss:            0.9980
train_accuracy:      64.19%
val_accuracy:        68.75%
train_word_accuracy: 4.71%
val_word_accuracy:   7.14%
Epoch 4
train_loss:          0.9329
val_loss:            0.8670
train_accuracy:      70.67%
val_accuracy:        72.59%
train_word_accuracy: 9.37%
val_word_accuracy:   10.03%
Epoch 5
train_loss:          0.8042
val_loss:            0.7682
train_accuracy:      74.45%
val_accuracy:        75.72%
train_word_accuracy: 13.21%
val_word_accuracy:   13.43%
Epoch 6
train_loss:          0.7240
val_loss:            0.7102
train_accuracy:      76.83%
val_accuracy:        77.25%
train_w

0,1
epoch,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇██
train_accuracy,▁▄▅▆▇▇▇▇▇████████
train_loss,█▅▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁
train_word_accuracy,▁▁▂▃▄▅▅▆▆▇▇▇▇▇███
val_accuracy,▁▄▆▆▇▇▇▇█████████
val_loss,█▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁
val_word_accuracy,▁▂▃▄▅▅▆▆▇▇▇▇▇████

0,1
epoch,17.0
train_accuracy,84.82975
train_loss,0.46443
train_word_accuracy,30.66642
val_accuracy,82.18044
val_loss,0.57598
val_word_accuracy,25.7119


[34m[1mwandb[0m: Agent Starting Run: d8a817t8 with config:
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden_size: 32


{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'a': 4, 'n': 5, 'g': 6, 'k': 7, 'i': 8, 't': 9, 'o': 10, 'e': 11, 'r': 12, 's': 13, 'h': 14, 'y': 15, 'w': 16, 'u': 17, 'l': 18, 'd': 19, 'j': 20, 'b': 21, 'm': 22, 'c': 23, 'q': 24, 'z': 25, 'p': 26, 'x': 27, 'v': 28, 'f': 29}
{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'অ': 4, 'ং': 5, 'ক': 6, 'ি': 7, 'ত': 8, 'ে': 9, 'র': 10, 'শ': 11, 'ই': 12, 'ও': 13, 'গ': 14, 'ু': 15, 'ল': 16, 'ো': 17, '্': 18, 'হ': 19, 'ণ': 20, 'া': 21, 'ী': 22, 'দ': 23, 'ন': 24, 'ট': 25, 'ব': 26, 'ষ': 27, 'ম': 28, 'স': 29, 'খ': 30, 'য': 31, 'ড': 32, 'ৎ': 33, 'ধ': 34, 'ঠ': 35, 'জ': 36, 'প': 37, 'ূ': 38, 'চ': 39, 'ছ': 40, 'ভ': 41, 'ঘ': 42, 'ঙ': 43, 'ৈ': 44, 'ঞ': 45, '়': 46, 'ঃ': 47, 'এ': 48, 'থ': 49, 'ৃ': 50, 'ৌ': 51, 'ফ': 52, 'ঝ': 53, 'আ': 54, 'উ': 55, 'ঁ': 56, 'ঈ': 57, 'ঊ': 58, 'ঋ': 59, 'ঐ': 60, 'ঔ': 61, 'ঢ': 62, '২': 63}
Epoch 1
train_loss:          2.5718
val_loss:            2.4967
train_accuracy:      28.90%
val_accuracy:        30.69%
train_word_accuracy:

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_accuracy,▁▄▅▆▆▇▇▇▇▇██▇▇█
train_loss,█▅▄▃▃▂▂▂▂▂▁▁▂▂▁
train_word_accuracy,▁▃▃▃▄▄▄▅▄▆▇▆▇▆█
val_accuracy,▁▃▅▄▄▇▆▅▇▇▇█▇██
val_loss,█▆▄▄▄▂▃▄▂▂▂▁▂▁▁
val_word_accuracy,▁▆█▁▁▃▁▁▁▁▆▁▁▁▁

0,1
epoch,15.0
train_accuracy,37.7882
train_loss,2.14157
train_word_accuracy,0.02797
val_accuracy,36.51089
val_loss,2.20061
val_word_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: zmi0fsdu with config:
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 128
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 13
[34m[1mwandb[0m: 	hidden_size: 128


{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'a': 4, 'n': 5, 'g': 6, 'k': 7, 'i': 8, 't': 9, 'o': 10, 'e': 11, 'r': 12, 's': 13, 'h': 14, 'y': 15, 'w': 16, 'u': 17, 'l': 18, 'd': 19, 'j': 20, 'b': 21, 'm': 22, 'c': 23, 'q': 24, 'z': 25, 'p': 26, 'x': 27, 'v': 28, 'f': 29}
{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'অ': 4, 'ং': 5, 'ক': 6, 'ি': 7, 'ত': 8, 'ে': 9, 'র': 10, 'শ': 11, 'ই': 12, 'ও': 13, 'গ': 14, 'ু': 15, 'ল': 16, 'ো': 17, '্': 18, 'হ': 19, 'ণ': 20, 'া': 21, 'ী': 22, 'দ': 23, 'ন': 24, 'ট': 25, 'ব': 26, 'ষ': 27, 'ম': 28, 'স': 29, 'খ': 30, 'য': 31, 'ড': 32, 'ৎ': 33, 'ধ': 34, 'ঠ': 35, 'জ': 36, 'প': 37, 'ূ': 38, 'চ': 39, 'ছ': 40, 'ভ': 41, 'ঘ': 42, 'ঙ': 43, 'ৈ': 44, 'ঞ': 45, '়': 46, 'ঃ': 47, 'এ': 48, 'থ': 49, 'ৃ': 50, 'ৌ': 51, 'ফ': 52, 'ঝ': 53, 'আ': 54, 'উ': 55, 'ঁ': 56, 'ঈ': 57, 'ঊ': 58, 'ঋ': 59, 'ঐ': 60, 'ঔ': 61, 'ঢ': 62, '২': 63}




Epoch 1
train_loss:          1.3462
val_loss:            0.7298
train_accuracy:      61.33%
val_accuracy:        77.24%
train_word_accuracy: 8.37%
val_word_accuracy:   17.12%
Epoch 2
train_loss:          0.5120
val_loss:            0.5969
train_accuracy:      83.93%
val_accuracy:        81.43%
train_word_accuracy: 29.16%
val_word_accuracy:   23.50%
Epoch 3
train_loss:          0.3797
val_loss:            0.5702
train_accuracy:      87.90%
val_accuracy:        82.65%
train_word_accuracy: 39.39%
val_word_accuracy:   25.67%
Epoch 4
train_loss:          0.3144
val_loss:            0.5558
train_accuracy:      89.94%
val_accuracy:        83.00%
train_word_accuracy: 45.94%
val_word_accuracy:   26.91%
Epoch 5
train_loss:          0.2738
val_loss:            0.5572
train_accuracy:      91.20%
val_accuracy:        83.39%
train_word_accuracy: 50.64%
val_word_accuracy:   28.55%
Epoch 6
train_loss:          0.2451
val_loss:            0.5712
train_accuracy:      92.12%
val_accuracy:        83.34%
t

0,1
epoch,▁▂▂▃▃▄▅▅▆▆▇▇█
train_accuracy,▁▆▇▇▇▇███████
train_loss,█▃▂▂▂▂▁▁▁▁▁▁▁
train_word_accuracy,▁▃▅▆▆▇▇▇▇████
val_accuracy,▁▆▇▇███████▇▇
val_loss,█▃▂▁▁▂▂▃▃▄▄▅▅
val_word_accuracy,▁▅▆▇██▇▇█▇█▇▇

0,1
epoch,13.0
train_accuracy,94.79912
train_loss,0.15946
train_word_accuracy,66.65533
val_accuracy,82.95842
val_loss,0.66676
val_word_accuracy,27.56639


[34m[1mwandb[0m: Agent Starting Run: vxt4bskd with config:
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32


{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'a': 4, 'n': 5, 'g': 6, 'k': 7, 'i': 8, 't': 9, 'o': 10, 'e': 11, 'r': 12, 's': 13, 'h': 14, 'y': 15, 'w': 16, 'u': 17, 'l': 18, 'd': 19, 'j': 20, 'b': 21, 'm': 22, 'c': 23, 'q': 24, 'z': 25, 'p': 26, 'x': 27, 'v': 28, 'f': 29}
{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'অ': 4, 'ং': 5, 'ক': 6, 'ি': 7, 'ত': 8, 'ে': 9, 'র': 10, 'শ': 11, 'ই': 12, 'ও': 13, 'গ': 14, 'ু': 15, 'ল': 16, 'ো': 17, '্': 18, 'হ': 19, 'ণ': 20, 'া': 21, 'ী': 22, 'দ': 23, 'ন': 24, 'ট': 25, 'ব': 26, 'ষ': 27, 'ম': 28, 'স': 29, 'খ': 30, 'য': 31, 'ড': 32, 'ৎ': 33, 'ধ': 34, 'ঠ': 35, 'জ': 36, 'প': 37, 'ূ': 38, 'চ': 39, 'ছ': 40, 'ভ': 41, 'ঘ': 42, 'ঙ': 43, 'ৈ': 44, 'ঞ': 45, '়': 46, 'ঃ': 47, 'এ': 48, 'থ': 49, 'ৃ': 50, 'ৌ': 51, 'ফ': 52, 'ঝ': 53, 'আ': 54, 'উ': 55, 'ঁ': 56, 'ঈ': 57, 'ঊ': 58, 'ঋ': 59, 'ঐ': 60, 'ঔ': 61, 'ঢ': 62, '২': 63}




Epoch 1
train_loss:          2.6168
val_loss:            2.3027
train_accuracy:      28.69%
val_accuracy:        35.73%
train_word_accuracy: 0.01%
val_word_accuracy:   0.02%
Epoch 2
train_loss:          2.0639
val_loss:            1.8465
train_accuracy:      41.08%
val_accuracy:        45.63%
train_word_accuracy: 0.12%
val_word_accuracy:   0.19%
Epoch 3
train_loss:          1.7078
val_loss:            1.5157
train_accuracy:      49.32%
val_accuracy:        54.27%
train_word_accuracy: 0.59%
val_word_accuracy:   1.22%
Epoch 4
train_loss:          1.4733
val_loss:            1.3047
train_accuracy:      55.29%
val_accuracy:        59.76%
train_word_accuracy: 1.59%
val_word_accuracy:   2.07%
Epoch 5
train_loss:          1.3250
val_loss:            1.2259
train_accuracy:      59.04%
val_accuracy:        61.72%
train_word_accuracy: 2.51%
val_word_accuracy:   2.78%
Epoch 6
train_loss:          1.2392
val_loss:            1.1513
train_accuracy:      61.30%
val_accuracy:        64.06%
train_word

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▅▆▇▇▇███
train_loss,█▅▄▃▂▂▁▁▁▁
train_word_accuracy,▁▁▂▃▄▅▆▇▇█
val_accuracy,▁▃▅▆▇▇▇███
val_loss,█▆▄▃▂▂▁▁▁▁
val_word_accuracy,▁▁▂▄▄▆▆▇██

0,1
epoch,10.0
train_accuracy,65.87491
train_loss,1.0732
train_word_accuracy,5.4404
val_accuracy,67.64969
val_loss,1.01147
val_word_accuracy,5.63578


[34m[1mwandb[0m: Agent Starting Run: u4c7zn8q with config:
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden_size: 16


{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'a': 4, 'n': 5, 'g': 6, 'k': 7, 'i': 8, 't': 9, 'o': 10, 'e': 11, 'r': 12, 's': 13, 'h': 14, 'y': 15, 'w': 16, 'u': 17, 'l': 18, 'd': 19, 'j': 20, 'b': 21, 'm': 22, 'c': 23, 'q': 24, 'z': 25, 'p': 26, 'x': 27, 'v': 28, 'f': 29}
{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'অ': 4, 'ং': 5, 'ক': 6, 'ি': 7, 'ত': 8, 'ে': 9, 'র': 10, 'শ': 11, 'ই': 12, 'ও': 13, 'গ': 14, 'ু': 15, 'ল': 16, 'ো': 17, '্': 18, 'হ': 19, 'ণ': 20, 'া': 21, 'ী': 22, 'দ': 23, 'ন': 24, 'ট': 25, 'ব': 26, 'ষ': 27, 'ম': 28, 'স': 29, 'খ': 30, 'য': 31, 'ড': 32, 'ৎ': 33, 'ধ': 34, 'ঠ': 35, 'জ': 36, 'প': 37, 'ূ': 38, 'চ': 39, 'ছ': 40, 'ভ': 41, 'ঘ': 42, 'ঙ': 43, 'ৈ': 44, 'ঞ': 45, '়': 46, 'ঃ': 47, 'এ': 48, 'থ': 49, 'ৃ': 50, 'ৌ': 51, 'ফ': 52, 'ঝ': 53, 'আ': 54, 'উ': 55, 'ঁ': 56, 'ঈ': 57, 'ঊ': 58, 'ঋ': 59, 'ঐ': 60, 'ঔ': 61, 'ঢ': 62, '২': 63}
Epoch 1
train_loss:          2.6765
val_loss:            2.3876
train_accuracy:      26.92%
val_accuracy:        32.89%
train_word_accuracy:

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_accuracy,▁▃▄▅▆▆▆▇▇▇▇████
train_loss,█▆▅▄▃▃▃▂▂▂▁▁▁▁▁
train_word_accuracy,▁▁▂▂▂▃▃▄▄▅▆▇▇██
val_accuracy,▁▃▄▅▅▆▆▇▇▇█████
val_loss,█▆▅▄▄▃▃▂▂▂▂▁▁▁▁
val_word_accuracy,▁▁▁▂▂▃▃▄▅▅▆▇▇██

0,1
epoch,15.0
train_accuracy,55.24957
train_loss,1.48421
train_word_accuracy,2.32447
val_accuracy,58.58585
val_loss,1.33888
val_word_accuracy,3.15733


[34m[1mwandb[0m: Agent Starting Run: at2th45n with config:
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 128
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden_size: 64


{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'a': 4, 'n': 5, 'g': 6, 'k': 7, 'i': 8, 't': 9, 'o': 10, 'e': 11, 'r': 12, 's': 13, 'h': 14, 'y': 15, 'w': 16, 'u': 17, 'l': 18, 'd': 19, 'j': 20, 'b': 21, 'm': 22, 'c': 23, 'q': 24, 'z': 25, 'p': 26, 'x': 27, 'v': 28, 'f': 29}
{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'অ': 4, 'ং': 5, 'ক': 6, 'ি': 7, 'ত': 8, 'ে': 9, 'র': 10, 'শ': 11, 'ই': 12, 'ও': 13, 'গ': 14, 'ু': 15, 'ল': 16, 'ো': 17, '্': 18, 'হ': 19, 'ণ': 20, 'া': 21, 'ী': 22, 'দ': 23, 'ন': 24, 'ট': 25, 'ব': 26, 'ষ': 27, 'ম': 28, 'স': 29, 'খ': 30, 'য': 31, 'ড': 32, 'ৎ': 33, 'ধ': 34, 'ঠ': 35, 'জ': 36, 'প': 37, 'ূ': 38, 'চ': 39, 'ছ': 40, 'ভ': 41, 'ঘ': 42, 'ঙ': 43, 'ৈ': 44, 'ঞ': 45, '়': 46, 'ঃ': 47, 'এ': 48, 'থ': 49, 'ৃ': 50, 'ৌ': 51, 'ফ': 52, 'ঝ': 53, 'আ': 54, 'উ': 55, 'ঁ': 56, 'ঈ': 57, 'ঊ': 58, 'ঋ': 59, 'ঐ': 60, 'ঔ': 61, 'ঢ': 62, '২': 63}
Epoch 1
train_loss:          1.6046
val_loss:            0.9055
train_accuracy:      54.09%
val_accuracy:        71.84%
train_word_accuracy:

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_accuracy,▁▅▆▇▇▇▇████████
train_loss,█▃▃▂▂▂▂▁▁▁▁▁▁▁▁
train_word_accuracy,▁▃▄▅▆▆▆▇▇▇▇████
val_accuracy,▁▄▆▆▇▇▇▇███████
val_loss,█▄▃▂▂▂▂▁▁▁▁▁▁▁▁
val_word_accuracy,▁▄▅▆▆▇▇▇▇██████

0,1
epoch,15.0
train_accuracy,86.34136
train_loss,0.41589
train_word_accuracy,34.54055
val_accuracy,83.27929
val_loss,0.53206
val_word_accuracy,28.22442


[34m[1mwandb[0m: Agent Starting Run: 12giq82e with config:
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 13
[34m[1mwandb[0m: 	hidden_size: 256


{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'a': 4, 'n': 5, 'g': 6, 'k': 7, 'i': 8, 't': 9, 'o': 10, 'e': 11, 'r': 12, 's': 13, 'h': 14, 'y': 15, 'w': 16, 'u': 17, 'l': 18, 'd': 19, 'j': 20, 'b': 21, 'm': 22, 'c': 23, 'q': 24, 'z': 25, 'p': 26, 'x': 27, 'v': 28, 'f': 29}
{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'অ': 4, 'ং': 5, 'ক': 6, 'ি': 7, 'ত': 8, 'ে': 9, 'র': 10, 'শ': 11, 'ই': 12, 'ও': 13, 'গ': 14, 'ু': 15, 'ল': 16, 'ো': 17, '্': 18, 'হ': 19, 'ণ': 20, 'া': 21, 'ী': 22, 'দ': 23, 'ন': 24, 'ট': 25, 'ব': 26, 'ষ': 27, 'ম': 28, 'স': 29, 'খ': 30, 'য': 31, 'ড': 32, 'ৎ': 33, 'ধ': 34, 'ঠ': 35, 'জ': 36, 'প': 37, 'ূ': 38, 'চ': 39, 'ছ': 40, 'ভ': 41, 'ঘ': 42, 'ঙ': 43, 'ৈ': 44, 'ঞ': 45, '়': 46, 'ঃ': 47, 'এ': 48, 'থ': 49, 'ৃ': 50, 'ৌ': 51, 'ফ': 52, 'ঝ': 53, 'আ': 54, 'উ': 55, 'ঁ': 56, 'ঈ': 57, 'ঊ': 58, 'ঋ': 59, 'ঐ': 60, 'ঔ': 61, 'ঢ': 62, '২': 63}
Epoch 1
train_loss:          2.2679
val_loss:            2.2622
train_accuracy:      35.48%
val_accuracy:        35.43%
train_word_accuracy:

0,1
epoch,▁▂▂▃▃▄▅▅▆▆▇▇█
train_accuracy,▁▅▆▇▇███████▇
train_loss,█▄▃▂▂▁▁▁▁▁▁▁▂
train_word_accuracy,▁▂▄█▇▄▆▆█▇█▅▆
val_accuracy,▁▆▆█▆▇▇▅▆▅▆▅▆
val_loss,█▂▂▁▄▄▂▅▆▇▅██
val_word_accuracy,▁█▁▁▁▁▁▁█▆█▁▁

0,1
epoch,13.0
train_accuracy,45.78378
train_loss,1.8455
train_word_accuracy,0.02192
val_accuracy,37.31072
val_loss,2.26606
val_word_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: j5ywj5uw with config:
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64


{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'a': 4, 'n': 5, 'g': 6, 'k': 7, 'i': 8, 't': 9, 'o': 10, 'e': 11, 'r': 12, 's': 13, 'h': 14, 'y': 15, 'w': 16, 'u': 17, 'l': 18, 'd': 19, 'j': 20, 'b': 21, 'm': 22, 'c': 23, 'q': 24, 'z': 25, 'p': 26, 'x': 27, 'v': 28, 'f': 29}
{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'অ': 4, 'ং': 5, 'ক': 6, 'ি': 7, 'ত': 8, 'ে': 9, 'র': 10, 'শ': 11, 'ই': 12, 'ও': 13, 'গ': 14, 'ু': 15, 'ল': 16, 'ো': 17, '্': 18, 'হ': 19, 'ণ': 20, 'া': 21, 'ী': 22, 'দ': 23, 'ন': 24, 'ট': 25, 'ব': 26, 'ষ': 27, 'ম': 28, 'স': 29, 'খ': 30, 'য': 31, 'ড': 32, 'ৎ': 33, 'ধ': 34, 'ঠ': 35, 'জ': 36, 'প': 37, 'ূ': 38, 'চ': 39, 'ছ': 40, 'ভ': 41, 'ঘ': 42, 'ঙ': 43, 'ৈ': 44, 'ঞ': 45, '়': 46, 'ঃ': 47, 'এ': 48, 'থ': 49, 'ৃ': 50, 'ৌ': 51, 'ফ': 52, 'ঝ': 53, 'আ': 54, 'উ': 55, 'ঁ': 56, 'ঈ': 57, 'ঊ': 58, 'ঋ': 59, 'ঐ': 60, 'ঔ': 61, 'ঢ': 62, '২': 63}




Epoch 1
train_loss:          2.5144
val_loss:            2.4326
train_accuracy:      29.89%
val_accuracy:        32.14%
train_word_accuracy: 0.00%
val_word_accuracy:   0.00%
Epoch 2
train_loss:          2.3218
val_loss:            2.3588
train_accuracy:      34.32%
val_accuracy:        33.83%
train_word_accuracy: 0.01%
val_word_accuracy:   0.00%
Epoch 3
train_loss:          2.2663
val_loss:            2.3234
train_accuracy:      35.65%
val_accuracy:        34.47%
train_word_accuracy: 0.01%
val_word_accuracy:   0.00%
Epoch 4
train_loss:          2.2355
val_loss:            2.2964
train_accuracy:      36.36%
val_accuracy:        35.07%
train_word_accuracy: 0.01%
val_word_accuracy:   0.00%
Epoch 5
train_loss:          2.2125
val_loss:            2.2972
train_accuracy:      36.91%
val_accuracy:        35.23%
train_word_accuracy: 0.01%
val_word_accuracy:   0.00%
Epoch 6
train_loss:          2.1927
val_loss:            2.2960
train_accuracy:      37.36%
val_accuracy:        35.24%
train_word

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▄▃▃▃▂▁▁
train_word_accuracy,▁▂▂▂▃▄▃▄▄█
val_accuracy,▁▃▄▅▅▅▅▇▇█
val_loss,█▆▆▅▅▅▅▂▁▁
val_word_accuracy,▁▁▁▁▁▁▁▂▃█

0,1
epoch,10.0
train_accuracy,39.28086
train_loss,2.06181
train_word_accuracy,0.02646
val_accuracy,37.85267
val_loss,2.12548
val_word_accuracy,0.08621


[34m[1mwandb[0m: Agent Starting Run: 9jz3xaxq with config:
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128


{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'a': 4, 'n': 5, 'g': 6, 'k': 7, 'i': 8, 't': 9, 'o': 10, 'e': 11, 'r': 12, 's': 13, 'h': 14, 'y': 15, 'w': 16, 'u': 17, 'l': 18, 'd': 19, 'j': 20, 'b': 21, 'm': 22, 'c': 23, 'q': 24, 'z': 25, 'p': 26, 'x': 27, 'v': 28, 'f': 29}
{'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3, 'অ': 4, 'ং': 5, 'ক': 6, 'ি': 7, 'ত': 8, 'ে': 9, 'র': 10, 'শ': 11, 'ই': 12, 'ও': 13, 'গ': 14, 'ু': 15, 'ল': 16, 'ো': 17, '্': 18, 'হ': 19, 'ণ': 20, 'া': 21, 'ী': 22, 'দ': 23, 'ন': 24, 'ট': 25, 'ব': 26, 'ষ': 27, 'ম': 28, 'স': 29, 'খ': 30, 'য': 31, 'ড': 32, 'ৎ': 33, 'ধ': 34, 'ঠ': 35, 'জ': 36, 'প': 37, 'ূ': 38, 'চ': 39, 'ছ': 40, 'ভ': 41, 'ঘ': 42, 'ঙ': 43, 'ৈ': 44, 'ঞ': 45, '়': 46, 'ঃ': 47, 'এ': 48, 'থ': 49, 'ৃ': 50, 'ৌ': 51, 'ফ': 52, 'ঝ': 53, 'আ': 54, 'উ': 55, 'ঁ': 56, 'ঈ': 57, 'ঊ': 58, 'ঋ': 59, 'ঐ': 60, 'ঔ': 61, 'ঢ': 62, '২': 63}




Epoch 1
train_loss:          2.2980
val_loss:            2.3117
train_accuracy:      34.43%
val_accuracy:        33.94%
train_word_accuracy: 0.01%
val_word_accuracy:   0.00%
Epoch 2
train_loss:          2.0696
val_loss:            2.2798
train_accuracy:      40.20%
val_accuracy:        35.75%
train_word_accuracy: 0.01%
val_word_accuracy:   0.00%
Epoch 3
train_loss:          1.9898
val_loss:            2.2579
train_accuracy:      42.40%
val_accuracy:        35.79%
train_word_accuracy: 0.02%
val_word_accuracy:   0.00%
Epoch 4
train_loss:          1.9441
val_loss:            2.2816
train_accuracy:      43.52%
val_accuracy:        36.12%
train_word_accuracy: 0.02%
val_word_accuracy:   0.00%
Epoch 5
train_loss:          1.9134
val_loss:            2.2682
train_accuracy:      44.32%
val_accuracy:        36.05%
train_word_accuracy: 0.02%
val_word_accuracy:   0.00%
Epoch 6
train_loss:          1.8876
val_loss:            2.2640
train_accuracy:      44.93%
val_accuracy:        36.49%
train_word

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▃▂▂▁▁▁
train_word_accuracy,▁▂▃▃▃▅▅▅██
val_accuracy,▁▅▅▆▅▆▆██▇
val_loss,█▅▄▆▅▄▃▁▂▃
val_word_accuracy,▁▁▁▁▁▁█▁▁▁

0,1
epoch,10.0
train_accuracy,47.07848
train_loss,1.79137
train_word_accuracy,0.05518
val_accuracy,36.9288
val_loss,2.25032
val_word_accuracy,0.0
