In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/3dataset/dakshina_dataset_v1.0/README.md
/kaggle/input/3dataset/dakshina_dataset_v1.0/mr/lexicons/mr.translit.sampled.test.tsv
/kaggle/input/3dataset/dakshina_dataset_v1.0/mr/lexicons/mr.translit.sampled.train.tsv
/kaggle/input/3dataset/dakshina_dataset_v1.0/mr/lexicons/mr.translit.sampled.dev.tsv
/kaggle/input/3dataset/dakshina_dataset_v1.0/mr/native_script_wikipedia/mr.wiki-filt.train.text.sorted.tsv/wiki-filt.train.text.sorted.tsv
/kaggle/input/3dataset/dakshina_dataset_v1.0/mr/native_script_wikipedia/mr.wiki-full.nonblock.sections.list.txt/wiki-full.nonblock.sections.list.txt
/kaggle/input/3dataset/dakshina_dataset_v1.0/mr/native_script_wikipedia/mr.wiki-filt.valid.text.shuf.txt/wiki-filt.valid.text.shuf.txt
/kaggle/input/3dataset/dakshina_dataset_v1.0/mr/native_script_wikipedia/mr.wiki-full.urls.tsv/wiki-full.urls.tsv
/kaggle/input/3dataset/dakshina_dataset_v1.0/mr/native_script_wikipedia/mr.wiki-full.omit_pages.txt/wiki-full.omit_pages.txt
/kaggle/input/3dataset/dak

# Step 1 : Setting up the wandb

In [2]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("wandb")

In [3]:
import wandb
wandb.login(key=secret_value_0)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mma23m020[0m ([33msnehalma23m020-iit-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Step 2 : importing the important libraries and loading the path of datafiles

In [1]:
import torch
import torch.nn as nn
import os
import random
import pandas as pd
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import wandb
from IPython.display import HTML, display

train_path = "/kaggle/input/snehal/hi.translit.sampled.train.tsv"
dev_path   = "/kaggle/input/snehal/hi.translit.sampled.dev.tsv"
test_path  = "/kaggle/input/snehal/hi.translit.sampled.test.tsv"

# Step 3 : Implementing a flexible RNN-based character-level Seq2Seq model with configurable RNN cell type (RNN/LSTM/GRU), number of layers, and embedding/hidden dimensions. It uses an encoder-decoder architecture to translate sequences from a source vocabulary (e.g., Latin) to a target vocabulary (e.g., Devanagari).

In [4]:
class CharSeq2Seq(nn.Module):
    def __init__(self, input_vocab_size,target_vocab_size,embedding_dim,hidden_size,num_layers,cell_type='LSTM'):
        super(CharSeq2Seq, self).__init__()

           # RNN types
        assert cell_type in ['RNN', 'LSTM', 'GRU'], "Unsupported RNN cell type"

        self.cell_type = cell_type
        self.embedding_dim = embedding_dim
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # Input Embedding layers
        self.encoder_embedding = nn.Embedding(input_vocab_size, embedding_dim) # for encoder
        self.decoder_embedding = nn.Embedding(target_vocab_size, embedding_dim) # for decoder

        # RNN cell type constructor (RNN / LSTM / GRU)
        rnn_cell = getattr(nn, cell_type)

        # Encoder rnn
        self.encoder_rnn = rnn_cell(embedding_dim, hidden_size, num_layers, batch_first=True)
        
        # Decoder rnn
        self.decoder_rnn = rnn_cell(embedding_dim, hidden_size, num_layers, batch_first=True)

        #  Output projection from decoder hidden state to target vocab size
        self.fc_out = nn.Linear(hidden_size, target_vocab_size)

    def forward(self, src, tgt):
        """
        src: [batch_size, src_seq_len] - input/source sequence
        tgt: [batch_size, tgt_seq_len] - output/target sequence
        """
        batch_size = src.size(0)

        #fFor Encoder
        embedded_src = self.encoder_embedding(src)  # [batch_size, src_len, embed_dim]
        encoder_outputs, hidden = self.encoder_rnn(embedded_src)  # hidden: [num_layers, batch, hidden_size]

        # For Decoder
        embedded_tgt = self.decoder_embedding(tgt)  # [batch_size, tgt_len, embed_dim]
        decoder_outputs, _ = self.decoder_rnn(embedded_tgt, hidden)  # [batch, tgt_len, hidden_size]

        # Project decoder outputs to target vocab space
        logits = self.fc_out(decoder_outputs)  # [batch, tgt_len, target_vocab_size]

        return logits


# Step 4 : set the seed for reproducibility and setting up the device

In [None]:
def set_seed(seed=42):
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Step 5 : Loading the dataset ( Dakshina dataset)

In [None]:
def load_dakshina_pairs(path):
    """
    Returns list of (latin, devanagari) pairs with all NaNs removed.
    """
    df = (
        pd.read_csv(path,sep="\t",names=["native", "latin", "count"],
            dtype={"native": str, "latin": str, "count": str}  # force strings
        )
        .dropna(subset=["native", "latin"])       # <-- NEW: keep only complete rows
    )

    # strip any stray whitespace
    df["native"] = df["native"].str.strip()
    df["latin"]  = df["latin"].str.strip()

    # remove empty strings (after strip)
    df = df[(df["native"] != "") & (df["latin"] != "")]
    return list(zip(df["latin"], df["native"]))

# Step 6 : The build_vocab function constructs character-level vocabularies for source and target scripts, including special tokens. The TransliterationDataset prepares source-target sequence pairs with proper tokenization and padding for training a character-level Seq2Seq transliteration model.

In [5]:

# Function to build character vocabularies for input (Latin) and target (Devanagari)
def build_vocab(pairs, special_tokens=["<pad>", "<sos>", "<eos>", "<unk>"]):
    # Extract unique characters from source (Latin) and target (Devanagari) strings
    input_chars = sorted(set("".join(src for src, _ in pairs)))
    target_chars = sorted(set("".join(trg for _, trg in pairs)))
    ## Prepend special tokens to character vocabularies
    input_vocab = special_tokens + input_chars
    target_vocab = special_tokens + target_chars
    return input_vocab, target_vocab

# Dataset for training
# Dataset class for preparing tokenized and padded (src, tgt) character sequences
class TransliterationDataset(Dataset):
    def __init__(self, pairs, input_char2idx, target_char2idx, max_input_len, max_output_len):
        self.pairs = pairs
        self.input_char2idx = input_char2idx # mapping for input (Latin) characters
        self.target_char2idx = target_char2idx # mapping for outut (devnagri) characters
        self.max_input_len = max_input_len   # max allowed input length
        self.max_output_len = max_output_len # max allowed output length

    def __len__(self):
        return len(self.pairs)
    def __getitem__(self, idx):
        latin, devanagari = self.pairs[idx]

        # Convert Latin characters to indices with <sos> and <eos>
        src_seq = [self.input_char2idx["<sos>"]] + [self.input_char2idx.get(ch, self.input_char2idx["<unk>"]) for ch in latin] + [self.input_char2idx["<eos>"]]

        # Convert Devanagari characters to indices with <sos> and <eos>
        tgt_seq = [self.target_char2idx["<sos>"]] + [self.target_char2idx.get(ch, self.target_char2idx["<unk>"]) for ch in devanagari] + [self.target_char2idx["<eos>"]]
       
        # Pad sequences to max lengths
        src_seq += [self.input_char2idx["<pad>"]] * (self.max_input_len - len(src_seq))
        tgt_seq += [self.target_char2idx["<pad>"]] * (self.max_output_len - len(tgt_seq))
        return torch.tensor(src_seq), torch.tensor(tgt_seq)
        

# step 7 : Basic Seq2Seq Model
A flexible Seq2Seq model for character-level transliteration, supporting RNN/LSTM/GRU cells, optional bidirectional encoding, and configurable layers. It embeds input/output tokens, encodes the source, and then decodes using the last hidden state to predict target sequences.

In [6]:
# Basic Seq2Seq Model for Transliteration
class Seq2Seq(nn.Module):
    def __init__(self, input_vocab_size, target_vocab_size, embedding_dim, hidden_size,encoder_layers, decoder_layers, rnn_type="LSTM", dropout=0.2, bidirectional=False):
        super(Seq2Seq, self).__init__()
        assert rnn_type in ["RNN", "LSTM", "GRU"]
        self.rnn_type = rnn_type
        self.bidirectional = bidirectional
        self.encoder_directions = 2 if bidirectional else 1

        # Embedding layers for source and target
        self.encoder_embedding = nn.Embedding(input_vocab_size, embedding_dim)
        self.decoder_embedding = nn.Embedding(target_vocab_size, embedding_dim)

        rnn_cls = getattr(nn, rnn_type)

          # Encoder RNN (can be multi-layered and bidirectional)
        self.encoder_rnn = rnn_cls(embedding_dim, hidden_size, encoder_layers,batch_first=True,dropout=dropout if encoder_layers > 1 else 0, bidirectional=bidirectional)

        # Decoder RNN 
        self.decoder_rnn = rnn_cls(embedding_dim, hidden_size, decoder_layers,batch_first=True,dropout=dropout if decoder_layers > 1 else 0)

        self.dropout = nn.Dropout(dropout)  # Dropout regularization

        # Final linear layer to project decoder output to vocabulary space
        self.fc_out = nn.Linear(hidden_size, target_vocab_size)

        if bidirectional:
            self.bridge = nn.Linear(hidden_size * 2, hidden_size)

        self.encoder_layers = encoder_layers
        self.decoder_layers = decoder_layers
        self.hidden_size = hidden_size

    def forward(self, src, tgt):

         # Embed sequences
        src_embed = self.dropout(self.encoder_embedding(src))
        tgt_embed = self.dropout(self.decoder_embedding(tgt))

        # Encode input using encoder RNN
        encoder_outputs, encoder_hidden = self.encoder_rnn(src_embed)

        # Prepare initial hidden state for decoder
        def _bridge(hidden):
            if self.rnn_type == "LSTM":
                h, c = hidden
                h_fwd = h[-2, :, :]
                h_bwd = h[-1, :, :]
                h_cat = torch.cat([h_fwd, h_bwd], dim=1)
                h_proj = self.bridge(h_cat).unsqueeze(0).repeat(self.decoder_layers, 1, 1)

                c_fwd = c[-2, :, :]
                c_bwd = c[-1, :, :]
                c_cat = torch.cat([c_fwd, c_bwd], dim=1)
                c_proj = self.bridge(c_cat).unsqueeze(0).repeat(self.decoder_layers, 1, 1)
                return (h_proj, c_proj)
            else:
                h_fwd = hidden[-2, :, :]
                h_bwd = hidden[-1, :, :]
                h_cat = torch.cat([h_fwd, h_bwd], dim=1)
                h_proj = self.bridge(h_cat).unsqueeze(0).repeat(self.decoder_layers, 1, 1)
                return h_proj

        if self.bidirectional:
            decoder_hidden = _bridge(encoder_hidden)
        else:
            if self.rnn_type == "LSTM":
                h, c = encoder_hidden
                if h.shape[0] != self.decoder_layers:
                    h = h[-1:, :, :].repeat(self.decoder_layers, 1, 1)
                    c = c[-1:, :, :].repeat(self.decoder_layers, 1, 1)
                decoder_hidden = (h, c)
            else:
                h = encoder_hidden
                if h.shape[0] != self.decoder_layers:
                    h = h[-1:, :, :].repeat(self.decoder_layers, 1, 1)
                decoder_hidden = h

         # Decode target sequence
        decoder_outputs, _ = self.decoder_rnn(tgt_embed, decoder_hidden)

          # Project decoder outputs to target vocabulary logits
        return self.fc_out(decoder_outputs)


# Step 8 : Training function for WandB

In [7]:

def train(config=None):
    with wandb.init(config=config):
        config = wandb.config  # load hyperparameters from sweep config


        # Load training and validation data
        train_pairs = load_dakshina_pairs(train_path)
        dev_pairs = load_dakshina_pairs(dev_path)

         # Build vocabularies and mappings
        input_vocab, target_vocab = build_vocab(train_pairs)
        input_char2idx = {c: i for i, c in enumerate(input_vocab)}
        target_char2idx = {c: i for i, c in enumerate(target_vocab)}

        
        # Compute max sequence lengths (+2 for <sos>, <eos>)
        max_input_len = max(len(x[0]) for x in train_pairs) + 2
        max_output_len = max(len(x[1]) for x in train_pairs) + 2

         # DataLoaders
        train_ds = TransliterationDataset(train_pairs, input_char2idx, target_char2idx, max_input_len, max_output_len)
        dev_ds = TransliterationDataset(dev_pairs, input_char2idx, target_char2idx, max_input_len, max_output_len)
        train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
        dev_loader = DataLoader(dev_ds, batch_size=64)

        PAD_IDX  = target_char2idx["<pad>"]
        EOS_IDX  = target_char2idx["<eos>"]
        target_idx2char = {i: c for c, i in target_char2idx.items()}

        #  Evaluation function to compute accuracy
        def evaluate():
            model.eval()
            corr_chars, tot_chars = 0, 0
            corr_words, tot_words = 0, 0

            with torch.no_grad():
                for src, tgt in dev_loader:
                    src, tgt = src.to(device), tgt.to(device)
                    logits = model(src, tgt[:, :-1])
                    pred   = torch.argmax(logits, dim=-1)

                    # Character-level accuracy
                    mask = tgt[:, 1:] != PAD_IDX
                    corr_chars += (pred == tgt[:,1:]).masked_select(mask).sum().item()
                    tot_chars   += mask.sum().item()

                    # word‐level exact match
                    B, T = pred.size()
                    for i in range(B):
                        gold_idxs = tgt[i,1:].tolist()
                        pred_idxs = pred[i].tolist()

                        if EOS_IDX in gold_idxs:
                            gold_idxs = gold_idxs[:gold_idxs.index(EOS_IDX)]
                        if EOS_IDX in pred_idxs:
                            pred_idxs = pred_idxs[:pred_idxs.index(EOS_IDX)]

                        gold_str = "".join(target_idx2char[idx] for idx in gold_idxs)
                        pred_str = "".join(target_idx2char[idx] for idx in pred_idxs)

                        if pred_str == gold_str:
                            corr_words += 1
                        tot_words += 1

            char_acc = corr_chars / tot_chars
            word_acc = corr_words / tot_words
            return char_acc, word_acc

        
        # Instantiate model with sweep-configured hyperparameters
        model = Seq2Seq(
            input_vocab_size=len(input_vocab),
            target_vocab_size=len(target_vocab),
            embedding_dim=config.embedding_dim,
            hidden_size=config.hidden_size,
            encoder_layers=config.encoder_layers,
            decoder_layers=config.decoder_layers,
            rnn_type=config.cell_type,
            dropout=config.dropout,
            bidirectional=config.bidirectional 
        ).to(device)

        criterion = nn.CrossEntropyLoss(ignore_index=target_char2idx["<pad>"])
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        # def evaluate():
        #     model.eval()
        #     correct, total = 0, 0
        #     with torch.no_grad():
        #         for src, tgt in dev_loader:
        #             src, tgt = src.to(device), tgt.to(device)
        #             output = model(src, tgt[:, :-1])
        #             pred = torch.argmax(output, dim=-1)
        #             mask = tgt[:, 1:] != target_char2idx["<pad>"]
        #             correct += (pred == tgt[:, 1:]).masked_select(mask).sum().item()
        #             total += mask.sum().item()
        #     return correct / total

        
        # Training loop for 10 epochs
        for epoch in range(10):
            model.train()
            epoch_loss = 0
            for src, tgt in train_loader:
                src, tgt = src.to(device), tgt.to(device)
                output = model(src, tgt[:, :-1])
                loss = criterion(output.reshape(-1, output.shape[-1]), tgt[:, 1:].reshape(-1))
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                epoch_loss += loss.item()

            val_char_acc, val_word_acc = evaluate()#evaluate(model, dev_loader)
            wandb.log({
                "epoch": epoch+1,
                "val_char_accuracy": val_char_acc,
                "val_word_accuracy": val_word_acc,
            })

# Step 9 : WandB sweep configuration

In [18]:

sweep_config = {
    'method': 'bayes',
    'name': 'hindi-transliteration-sweep',
    'metric': {
        'name': 'val_accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'embedding_dim': {'values': [16, 32, 64, 256]},
        'hidden_size': {'values': [16, 32, 64, 256]},
        'encoder_layers': {'values': [1, 2, 3]},
        'decoder_layers': {'values': [1, 2, 3]},
        'cell_type': {'values': ['RNN', 'GRU', 'LSTM']},
        'dropout': {'values': [0.2, 0.3]},
        'beam_size': {'values': [1, 3, 5]}, # Not used in decoding here, but swept
        'bidirectional': {'values': [True, False]}

    }
}

# Run sweep
sweep_id = wandb.sweep(sweep_config, project="dakshina-transliteration-hi")
#sweep_id= 'evp2r03p'
wandb.agent(sweep_id, function=train, count=100, project="dakshina-transliteration-hi", entity='snehalma23m020-iit-madras')


Create sweep with ID: gnn2a4fm
Sweep URL: https://wandb.ai/snehalma23m020-iit-madras/dakshina-transliteration-hi/sweeps/gnn2a4fm


[34m[1mwandb[0m: Agent Starting Run: wazqszn7 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 64


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▅▅▆▇▇▆▇██
val_word_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.28081
val_word_accuracy,0.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6p7q1ie2 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 64


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▅▅▆▇▇██
val_word_accuracy,▁▁▂▂▂▃▅▆▆█

0,1
epoch,10.0
val_char_accuracy,0.55017
val_word_accuracy,0.02363


[34m[1mwandb[0m: Agent Starting Run: 5cycaj67 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▂▅▆▇▇██▇█
val_word_accuracy,▁▁▁▁▁▁█▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.33477
val_word_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: kcarls8t with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▅▆▇██████
val_word_accuracy,▁▅▇▇██████

0,1
epoch,10.0
val_char_accuracy,0.83807
val_word_accuracy,0.32813


[34m[1mwandb[0m: Agent Starting Run: ha8xhaiv with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▂▄▅▅▆▇▇██
val_word_accuracy,▁▁▁▂▃▄▄▆▇█

0,1
epoch,10.0
val_char_accuracy,0.67644
val_word_accuracy,0.10028


[34m[1mwandb[0m: Agent Starting Run: 143pb3ob with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▆▇███████
val_word_accuracy,▁▄▆▇▇▇████

0,1
epoch,10.0
val_char_accuracy,0.85869
val_word_accuracy,0.3777


[34m[1mwandb[0m: Agent Starting Run: k46xwozv with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▅▆▆▇▇██
val_word_accuracy,▁▂▂▃▃▅▅▆▇█

0,1
epoch,10.0
val_char_accuracy,0.45461
val_word_accuracy,0.00826


[34m[1mwandb[0m: Agent Starting Run: d0xsd8l8 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▄▅▅▆▇▇█
val_word_accuracy,▁▁▁▁▁▁▁▁▁█

0,1
epoch,10.0
val_char_accuracy,0.41998
val_word_accuracy,0.00092


[34m[1mwandb[0m: Agent Starting Run: ye0x4d0r with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▅▇▇██████
val_word_accuracy,▁▄▆▇▇█████

0,1
epoch,10.0
val_char_accuracy,0.84163
val_word_accuracy,0.33639


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7pd2gdqu with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▆▆▇█████
val_word_accuracy,▁▃▅▆▆█▇███

0,1
epoch,10.0
val_char_accuracy,0.71823
val_word_accuracy,0.15076


[34m[1mwandb[0m: Agent Starting Run: 0cgpziud with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▅▆▆▇▇██
val_word_accuracy,▁▂▁▂▄▄▄▆▇█

0,1
epoch,10.0
val_char_accuracy,0.54067
val_word_accuracy,0.02134


[34m[1mwandb[0m: Agent Starting Run: p31i7rys with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▄▆▆▇▇█▇█
val_word_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.30954
val_word_accuracy,0.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: q814exvi with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▄▅▅▆▇▇██
val_word_accuracy,▁▁▂▄▄▄▆▇▇█

0,1
epoch,10.0
val_char_accuracy,0.4903
val_word_accuracy,0.00665


[34m[1mwandb[0m: Agent Starting Run: rraw3pu2 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 64


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▅▆▆▇▇███
val_word_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.2877
val_word_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: m3fhulb6 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▆▇▇▇▇▇██
val_word_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.26798
val_word_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: 7bzx8hiu with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▅▅▆▆▆▇▇█
val_word_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.27985
val_word_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: tsa6iv99 with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▅▆▆▇▇███
val_word_accuracy,▁▂▃▅▅▆▇▇██

0,1
epoch,10.0
val_char_accuracy,0.6832
val_word_accuracy,0.1028


[34m[1mwandb[0m: Agent Starting Run: p9a4w4am with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 64


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▅▆▇▇▇███
val_word_accuracy,▁▁▂▂▄▅▆▇▇█

0,1
epoch,10.0
val_char_accuracy,0.70316
val_word_accuracy,0.12368


[34m[1mwandb[0m: Agent Starting Run: qxs3p58r with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▆▆▇▇▇█▇█
val_word_accuracy,▁▃▅▅▆▆▇▇▇█

0,1
epoch,10.0
val_char_accuracy,0.68268
val_word_accuracy,0.1067


[34m[1mwandb[0m: Agent Starting Run: ad512mku with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▆▇▇██████
val_word_accuracy,▁▄▆▇█▇████

0,1
epoch,10.0
val_char_accuracy,0.85516
val_word_accuracy,0.36737


[34m[1mwandb[0m: Agent Starting Run: qvbt3p61 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▅▇▇██████
val_word_accuracy,▁▂▅▆▇█████

0,1
epoch,10.0
val_char_accuracy,0.85354
val_word_accuracy,0.36783


[34m[1mwandb[0m: Agent Starting Run: wlwyhfup with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▆▇▇▇█▇██
val_word_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.27411
val_word_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: i33y6tuy with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▆▆▇█████
val_word_accuracy,▁▃▅▆▇▇▇▇▇█

0,1
epoch,10.0
val_char_accuracy,0.70735
val_word_accuracy,0.13837


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: g9k9nref with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▅▆▆▇▇██
val_word_accuracy,▁▂▃▄▄▅▆▇██

0,1
epoch,10.0
val_char_accuracy,0.51877
val_word_accuracy,0.01262


[34m[1mwandb[0m: Agent Starting Run: 30nm0ofr with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▂▂▄▅▅▆▇██
val_word_accuracy,▁▁▁▁▂▂▃▄▆█

0,1
epoch,10.0
val_char_accuracy,0.55337
val_word_accuracy,0.02593


[34m[1mwandb[0m: Agent Starting Run: 6j1hyhsm with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▅▆▇▆▇▇█
val_word_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.31633
val_word_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: emvkq1fu with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▆▆▇▇████
val_word_accuracy,▁▃▅▆▇▇█▇██

0,1
epoch,10.0
val_char_accuracy,0.74515
val_word_accuracy,0.18908


[34m[1mwandb[0m: Agent Starting Run: smjah4it with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▄▆▆▇▆▇██
val_word_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.27612
val_word_accuracy,0.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: cia9epvy with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 64


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▅▆▆▇▇███
val_word_accuracy,▁▁▁▃▄▅▅▇▇█

0,1
epoch,10.0
val_char_accuracy,0.68802
val_word_accuracy,0.10578


[34m[1mwandb[0m: Agent Starting Run: wfa0c0om with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▂▃▅▅▆▇▇██
val_word_accuracy,▁▁▂▂▂▃▃▅▅█

0,1
epoch,10.0
val_char_accuracy,0.58801
val_word_accuracy,0.02983


[34m[1mwandb[0m: Agent Starting Run: 929td1b7 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▅▅▆▆▆▇██
val_word_accuracy,▁▁▁▁▁▁▁▁▁█

0,1
epoch,10.0
val_char_accuracy,0.40299
val_word_accuracy,0.00023


[34m[1mwandb[0m: Agent Starting Run: apwyl17u with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▅▇▇██████
val_word_accuracy,▁▃▅▆▇▇████

0,1
epoch,10.0
val_char_accuracy,0.8544
val_word_accuracy,0.36921


[34m[1mwandb[0m: Agent Starting Run: cev73hy1 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▅▆▇▇▇███
val_word_accuracy,▁▂▃▄▅▆▇███

0,1
epoch,10.0
val_char_accuracy,0.65985
val_word_accuracy,0.10211


[34m[1mwandb[0m: Agent Starting Run: sqcg3zlb with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▅▆▆▇▇██
val_word_accuracy,▁▁▂▂▃▅▄▆▆█

0,1
epoch,10.0
val_char_accuracy,0.47981
val_word_accuracy,0.00849


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0v2k075o with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▅▆▇██████
val_word_accuracy,▁▅▆▇█▇████

0,1
epoch,10.0
val_char_accuracy,0.85358
val_word_accuracy,0.36737


[34m[1mwandb[0m: Agent Starting Run: qhm7nsde with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▅▅▆▇▇██
val_word_accuracy,▁▁▁▂▃▄▅▇██

0,1
epoch,10.0
val_char_accuracy,0.47368
val_word_accuracy,0.0039


[34m[1mwandb[0m: Agent Starting Run: 73w414ry with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▆▆▇▇████
val_word_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.25139
val_word_accuracy,0.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: bjiqgxxn with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▄▄▆▇▇███
val_word_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.27566
val_word_accuracy,0.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: np8j5963 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▃▄▆▆▇▇██
val_word_accuracy,▁▂▂▂▂▅▇▇██

0,1
epoch,10.0
val_char_accuracy,0.50073
val_word_accuracy,0.00551


[34m[1mwandb[0m: Agent Starting Run: 5wl6xpyo with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▅▆▇▇▇██▇█
val_word_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.25656
val_word_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: ipii47pw with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▅▆▇▇▇██
val_word_accuracy,▁▂▂▂▄▅▆▇██

0,1
epoch,10.0
val_char_accuracy,0.47064
val_word_accuracy,0.00505


[34m[1mwandb[0m: Agent Starting Run: q32dzyix with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▂▄▆▆▆▆▇▇█
val_word_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.36265
val_word_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: r2ujf3q8 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▅▆▆▇▇▇█
val_word_accuracy,▁▁▃▃▁▃███▃

0,1
epoch,10.0
val_char_accuracy,0.37258
val_word_accuracy,0.00023


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4kntvdux with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▅▆▆▇▇▇██
val_word_accuracy,▁▅▅▅▅▅▅▆██

0,1
epoch,10.0
val_char_accuracy,0.39068
val_word_accuracy,0.00092


[34m[1mwandb[0m: Agent Starting Run: 08oxwbjx with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▇▇██████
val_word_accuracy,▁▁▄▆▇▇████

0,1
epoch,10.0
val_char_accuracy,0.85668
val_word_accuracy,0.37173


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7uwz57hd with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▆▇██████
val_word_accuracy,▁▁▄▆▇▇████

0,1
epoch,10.0
val_char_accuracy,0.86268
val_word_accuracy,0.37678


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: b75igd3z with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▅▇▇██▇██
val_word_accuracy,▁▁▁▁▁█▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.31904
val_word_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: rf7rvlsz with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 64


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▅▆▇▇▇███
val_word_accuracy,▁▁▂▃▄▅▆▇▇█

0,1
epoch,10.0
val_char_accuracy,0.7783
val_word_accuracy,0.22832


[34m[1mwandb[0m: Agent Starting Run: flyeg6q9 with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▆▆▇▇▇██
val_word_accuracy,▁▂▃▄▅▆▆▇██

0,1
epoch,10.0
val_char_accuracy,0.70699
val_word_accuracy,0.13814


[34m[1mwandb[0m: Agent Starting Run: o9d979oa with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▁▁▂▂▃▄▅▇█
val_word_accuracy,▁▁▁▁▁▁▁▂▅█

0,1
epoch,10.0
val_char_accuracy,0.76949
val_word_accuracy,0.2056


[34m[1mwandb[0m: Agent Starting Run: 8a243mfb with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▆▇███████
val_word_accuracy,▁▄▆▇▇█████

0,1
epoch,10.0
val_char_accuracy,0.85608
val_word_accuracy,0.36852


[34m[1mwandb[0m: Agent Starting Run: 6zkbdpwt with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▂▃▄▅▆▇▇██
val_word_accuracy,▁▁▁▂▃▃▅▆█▇

0,1
epoch,10.0
val_char_accuracy,0.49924
val_word_accuracy,0.0062


[34m[1mwandb[0m: Agent Starting Run: k6qoymwy with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 64


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▅▆▆▇▇███
val_word_accuracy,▁▂▃▄▅▆▇▇██

0,1
epoch,10.0
val_char_accuracy,0.74165
val_word_accuracy,0.18862


[34m[1mwandb[0m: Agent Starting Run: njucff3h with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 64


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▃▄▅▆▆▇██
val_word_accuracy,▁▁▂▂▂▄▅▅▇█

0,1
epoch,10.0
val_char_accuracy,0.59144
val_word_accuracy,0.04222


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: g4kuci69 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▂▂▃▄▄▅▆▇█
val_word_accuracy,▁▁▂▁▁▁▁▁▃█

0,1
epoch,10.0
val_char_accuracy,0.35836
val_word_accuracy,0.00138


[34m[1mwandb[0m: Agent Starting Run: jjl3noov with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▂▂▄▅▅▆▇██
val_word_accuracy,▁▁▁▁█▁▁▅██

0,1
epoch,10.0
val_char_accuracy,0.33738
val_word_accuracy,0.00046


[34m[1mwandb[0m: Agent Starting Run: q1y0qhyp with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 64


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▂▄▆▆▇▇███
val_word_accuracy,▁▁▁▂▄▅▆▆▇█

0,1
epoch,10.0
val_char_accuracy,0.73911
val_word_accuracy,0.16774


[34m[1mwandb[0m: Agent Starting Run: e9niu3w4 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▆▆▇▇███
val_word_accuracy,▁▁▂▃▄▅▆▆▇█

0,1
epoch,10.0
val_char_accuracy,0.51026
val_word_accuracy,0.01308


[34m[1mwandb[0m: Agent Starting Run: 9xseb06n with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▂▄▆▆▇▇▇██
val_word_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.24776
val_word_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: 3dyld9uz with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▄▅▆▇▇██
val_word_accuracy,▁▁▁▁▁▁▁▆██

0,1
epoch,10.0
val_char_accuracy,0.40573
val_word_accuracy,0.00069


[34m[1mwandb[0m: Agent Starting Run: ltbnepms with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▆▆▇▇▇▇██
val_word_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.30136
val_word_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: pkkf1lwo with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▅▆▇▇█████
val_word_accuracy,▁▅▆▇▇▇███▇

0,1
epoch,10.0
val_char_accuracy,0.8417
val_word_accuracy,0.33983


[34m[1mwandb[0m: Agent Starting Run: lftp022f with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 64


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▅▅▆▆▇▇█
val_word_accuracy,▁▁▂▂▃▃▄▅▇█

0,1
epoch,10.0
val_char_accuracy,0.58448
val_word_accuracy,0.03855


[34m[1mwandb[0m: Agent Starting Run: 7o6mtvah with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 64


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▅▆▇▇▇███
val_word_accuracy,▁▃▄▅▆▆▇▇██

0,1
epoch,10.0
val_char_accuracy,0.78497
val_word_accuracy,0.2492


[34m[1mwandb[0m: Agent Starting Run: 6jk2hu47 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▃▄▅▆▇▇██
val_word_accuracy,▁▁▁▂▃▃▄▅▆█

0,1
epoch,10.0
val_char_accuracy,0.52669
val_word_accuracy,0.02088


[34m[1mwandb[0m: Agent Starting Run: zl1t4bxa with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▃▅▆▆▇▇██
val_word_accuracy,▁▁▁▂▃▄▃▅▆█

0,1
epoch,10.0
val_char_accuracy,0.54282
val_word_accuracy,0.01606


[34m[1mwandb[0m: Agent Starting Run: m9k257uf with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▆▇▇▇████
val_word_accuracy,▁▃▆▇▇▇████

0,1
epoch,10.0
val_char_accuracy,0.82234
val_word_accuracy,0.28912


[34m[1mwandb[0m: Agent Starting Run: 3cl4cw5d with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▅▆▇▇█████
val_word_accuracy,▁▅▆▇▇██▇██

0,1
epoch,10.0
val_char_accuracy,0.85196
val_word_accuracy,0.36462


[34m[1mwandb[0m: Agent Starting Run: ziaiaqk1 with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▅▆▆▇▇▇██
val_word_accuracy,▁▁▂▂▄▆▆▃▇█

0,1
epoch,10.0
val_char_accuracy,0.46992
val_word_accuracy,0.00298


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rwdjaujo with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▂▃▄▅▅▆▇▇█
val_word_accuracy,▁▁▁▁▂▂▂▄▆█

0,1
epoch,10.0
val_char_accuracy,0.52883
val_word_accuracy,0.0156


[34m[1mwandb[0m: Agent Starting Run: 0e9qh0w5 with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▅▆▇▇███
val_word_accuracy,▁▁▂▂▃▄▆█▇█

0,1
epoch,10.0
val_char_accuracy,0.50211
val_word_accuracy,0.014


[34m[1mwandb[0m: Agent Starting Run: iafowyk7 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▄▅▅▆▇▇▇█
val_word_accuracy,▁▁▃▃▃▁▃█▆▃

0,1
epoch,10.0
val_char_accuracy,0.42991
val_word_accuracy,0.00023


[34m[1mwandb[0m: Agent Starting Run: lf5blx6a with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▂▄▄▅▅▇▇█▇
val_word_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.24291
val_word_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: 2cign262 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▅▅▆▇▇▇██
val_word_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.27523
val_word_accuracy,0.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 74y2pqbj with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▅▆▇▇█████
val_word_accuracy,▁▄▆▆▇▇████

0,1
epoch,10.0
val_char_accuracy,0.8511
val_word_accuracy,0.35796


[34m[1mwandb[0m: Agent Starting Run: bst81z16 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▂▃▄▅▆▇▇██
val_word_accuracy,▁▁▁▁▂▃▅▆▇█

0,1
epoch,10.0
val_char_accuracy,0.58609
val_word_accuracy,0.03121


[34m[1mwandb[0m: Agent Starting Run: 8qsi37km with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▅▅▆▇▇▇█
val_word_accuracy,▁▁▁▁▁▁▅█▅█

0,1
epoch,10.0
val_char_accuracy,0.36133
val_word_accuracy,0.00046


[34m[1mwandb[0m: Agent Starting Run: b45bbado with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 32
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▂▃▅▆▆▇▇▇█
val_word_accuracy,▁▁▁▁▁██▁██

0,1
epoch,10.0
val_char_accuracy,0.31241
val_word_accuracy,0.00023


[34m[1mwandb[0m: Agent Starting Run: xg5hw27t with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 64


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▅▆▇▇▇███
val_word_accuracy,▁▂▃▄▅▆▆▇██

0,1
epoch,10.0
val_char_accuracy,0.76527
val_word_accuracy,0.18678


[34m[1mwandb[0m: Agent Starting Run: tdodvgaw with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 64


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▆▆▇▇▇███
val_word_accuracy,▁▂▄▅▆▆▇▇██

0,1
epoch,10.0
val_char_accuracy,0.79677
val_word_accuracy,0.25746


[34m[1mwandb[0m: Agent Starting Run: h09z244l with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▅▅▆▇▇██
val_word_accuracy,▁▁█▁▁▁▃▁▃▁

0,1
epoch,10.0
val_char_accuracy,0.3701
val_word_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: 9sne2q2c with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▅▇▇▇█████
val_word_accuracy,▁▆▇▇▇███▇█

0,1
epoch,10.0
val_char_accuracy,0.85113
val_word_accuracy,0.35659


[34m[1mwandb[0m: Agent Starting Run: h7y2oun3 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▅▅▆▇▇██
val_word_accuracy,▁▁▁▂▂▃▅▅▇█

0,1
epoch,10.0
val_char_accuracy,0.4807
val_word_accuracy,0.00551


[34m[1mwandb[0m: Agent Starting Run: qbasw2sh with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▅▅▆▆▆▆▇█
val_word_accuracy,▁▁▁▁▁▃▃▅██

0,1
epoch,10.0
val_char_accuracy,0.34797
val_word_accuracy,0.00092


[34m[1mwandb[0m: Agent Starting Run: 1u58kar0 with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▅▆▆▇▇██
val_word_accuracy,▁▁▂▂▃▄▅▆▇█

0,1
epoch,10.0
val_char_accuracy,0.61278
val_word_accuracy,0.05599


[34m[1mwandb[0m: Agent Starting Run: 838erhfv with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 64


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▅▅▇▇▇███
val_word_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.28473
val_word_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: bsebrg39 with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▅▇▇▇▇███
val_word_accuracy,▁▃▄▆▆▇▇███

0,1
epoch,10.0
val_char_accuracy,0.67802
val_word_accuracy,0.10005


[34m[1mwandb[0m: Agent Starting Run: 1cxbxmfj with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 64


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▅▅▆▇▇▇▇█
val_word_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.29397
val_word_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: mcp8v3da with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▅▆▆▇▇██
val_word_accuracy,▁▁▂▂▅▇▅▇▆█

0,1
epoch,10.0
val_char_accuracy,0.43561
val_word_accuracy,0.00229


[34m[1mwandb[0m: Agent Starting Run: lkabc8b6 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▆▇▇██████
val_word_accuracy,▁▅▆▇▇▇████

0,1
epoch,10.0
val_char_accuracy,0.86769
val_word_accuracy,0.39399


[34m[1mwandb[0m: Agent Starting Run: xu1g32v2 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▅▆▇▇▇███
val_word_accuracy,▁▃▄▅▆▇▇██▇

0,1
epoch,10.0
val_char_accuracy,0.72731
val_word_accuracy,0.16659


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jtu37usb with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▄▅▅▆▇▇██
val_word_accuracy,▁▁▅▁██▅▁▁█

0,1
epoch,10.0
val_char_accuracy,0.43964
val_word_accuracy,0.00046


[34m[1mwandb[0m: Agent Starting Run: qlxf3fu4 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▅▆▆▇▇▇▆█
val_word_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.32323
val_word_accuracy,0.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9wir44fj with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▇▇██████
val_word_accuracy,▁▂▅▇▇▇████

0,1
epoch,10.0
val_char_accuracy,0.85902
val_word_accuracy,0.37265


[34m[1mwandb[0m: Agent Starting Run: crlrrrdo with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▅▇▇▇█████
val_word_accuracy,▁▅▆▇▇█████

0,1
epoch,10.0
val_char_accuracy,0.84945
val_word_accuracy,0.35085


[34m[1mwandb[0m: Agent Starting Run: pc4c8112 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_size: 16


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▅▅▆▆▇▇██
val_word_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
val_char_accuracy,0.36248
val_word_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: b4nzmhae with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 32


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▅▆▆▆▇▇██
val_word_accuracy,▅▁▁▁▁▁▁█▅▅

0,1
epoch,10.0
val_char_accuracy,0.41021
val_word_accuracy,0.00023


[34m[1mwandb[0m: Agent Starting Run: pim62ra4 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▃▅▇▇█████
val_word_accuracy,▁▁▂▅▆▇▇███

0,1
epoch,10.0
val_char_accuracy,0.85915
val_word_accuracy,0.37816


[34m[1mwandb[0m: Agent Starting Run: 6artny3a with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_dim: 16
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▄▅▅▅▇████
val_word_accuracy,▁▁▁▁▁▁▁▁▁█

0,1
epoch,10.0
val_char_accuracy,0.29704
val_word_accuracy,0.00023


[34m[1mwandb[0m: Agent Starting Run: o7g2wssh with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_dim: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_size: 256


0,1
epoch,▁▂▃▃▄▅▆▆▇█
val_char_accuracy,▁▅▆▇███▇██
val_word_accuracy,▁▅▆▇███▇██

0,1
epoch,10.0
val_char_accuracy,0.85249
val_word_accuracy,0.35911


# Step 10:  The best hyperparametrs obtained with the help of Wandb are :
# beam_size: 1
# bidirectional: False
# cell_type: LSTM
# decoder_layers: 3
# dropout: 0.3
# embedding_dim:256
# encoder_layers: 2
# hidden_size: 256

# Step 11 : Now training the model with best hyperparameters and evaluate on test set

# note : beam size = 1

In [8]:
# Load Latin-Devanagari pairs from each file
train_pairs = load_dakshina_pairs(train_path)
dev_pairs   = load_dakshina_pairs(dev_path)
test_pairs  = load_dakshina_pairs(test_path)

# Build vocabularies
input_vocab, target_vocab = build_vocab(train_pairs)

# Create character-to-index mappings
input_char2idx  = {c:i for i,c in enumerate(input_vocab)}
target_char2idx = {c:i for i,c in enumerate(target_vocab)}
target_idx2char = {i:c for c,i in target_char2idx.items()}

# Special token indices
PAD_IDX = target_char2idx["<pad>"]
EOS_IDX = target_char2idx["<eos>"]

# Define max sequence lengths (+2 for <sos> and <eos>)
max_in  = max(len(s) for s,_ in train_pairs) + 2
max_out = max(len(t) for _,t in train_pairs) + 2

# Create PyTorch datasets for training, validation and test
train_ds = TransliterationDataset(train_pairs, input_char2idx, target_char2idx, max_in, max_out)
dev_ds   = TransliterationDataset(dev_pairs,   input_char2idx, target_char2idx, max_in, max_out)
test_ds  = TransliterationDataset(test_pairs,  input_char2idx, target_char2idx, max_in, max_out)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
dev_loader   = DataLoader(dev_ds,   batch_size=64)
test_loader  = DataLoader(test_ds,  batch_size=64)

# Instantiate the best model —
# best hyperparameters
best_hparams = dict(
    embedding_dim  = 256,hidden_size = 256, encoder_layers = 2, decoder_layers = 3, rnn_type = "LSTM", dropout = 0.3, bidirectional  = False)

 #  Seq2Seq model using best hyperparameters found from sweep
model = Seq2Seq(
    input_vocab_size = len(input_vocab),
    target_vocab_size= len(target_vocab),
    embedding_dim    = best_hparams["embedding_dim"],
    hidden_size      = best_hparams["hidden_size"],
    encoder_layers   = best_hparams["encoder_layers"],
    decoder_layers   = best_hparams["decoder_layers"],
    rnn_type         = best_hparams["rnn_type"],
    dropout          = best_hparams["dropout"],
    bidirectional    = best_hparams["bidirectional"]
).to(device)

# Define optimiser and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss(ignore_index=PAD_IDX)

# Computes exact-match word-level accuracy over a given data split
def evaluate_split(loader):
    model.eval()
    corr, total = 0, 0
    with torch.no_grad():
        for src, tgt in loader:
            src, tgt = src.to(device), tgt.to(device)

            # Forward pass through model
            logits = model(src, tgt[:, :-1])
            preds  = torch.argmax(logits, dim=-1)
            B, T    = preds.size()
            
            for i in range(B):
                # Convert target sequence to string
                gold_seq = tgt[i,1:].tolist()
                if EOS_IDX in gold_seq:
                    gold_seq = gold_seq[:gold_seq.index(EOS_IDX)]
                gold = "".join(target_idx2char[x] for x in gold_seq)
                # extract pred string
                p_seq = preds[i].tolist()
                if EOS_IDX in p_seq:
                    p_seq = p_seq[:p_seq.index(EOS_IDX)]
                pred = "".join(target_idx2char[x] for x in p_seq)
                # tally
                corr  += (pred == gold)
                total += 1
    return corr/total



# Training + validation loop —
num_epochs = 10
best_dev_acc = 0.0

for epoch in range(1, num_epochs+1):
    model.train()
    running_loss = 0.0

    for src, tgt in train_loader:
        src, tgt = src.to(device), tgt.to(device)

        # Forward pass
        logits   = model(src, tgt[:, :-1])
        loss     = criterion(logits.reshape(-1, logits.size(-1)),tgt[:,1:].reshape(-1))

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    avg_train_loss = running_loss / len(train_loader)
    dev_acc = evaluate_split(dev_loader)

    print(f"Epoch {epoch:2d}  Train Loss: {avg_train_loss:.4f}  |  Dev Acc: {dev_acc:.4f}")

    # save best model
    if dev_acc > best_dev_acc:
        best_dev_acc = dev_acc
        torch.save(model.state_dict(), "best_seq2seq.pt")

#  Final test evaluation —
model.load_state_dict(torch.load("best_seq2seq.pt"))
test_acc = evaluate_split(test_loader)
print(f"\nFinal Test Exact‐Match Acc: {test_acc:.4f}")


Epoch  1  Train Loss: 2.5220  |  Dev Acc: 0.0190
Epoch  2  Train Loss: 1.0691  |  Dev Acc: 0.2150
Epoch  3  Train Loss: 0.6484  |  Dev Acc: 0.3040
Epoch  4  Train Loss: 0.5142  |  Dev Acc: 0.3309
Epoch  5  Train Loss: 0.4386  |  Dev Acc: 0.3504
Epoch  6  Train Loss: 0.3879  |  Dev Acc: 0.3639
Epoch  7  Train Loss: 0.3483  |  Dev Acc: 0.3871
Epoch  8  Train Loss: 0.3165  |  Dev Acc: 0.3816
Epoch  9  Train Loss: 0.2897  |  Dev Acc: 0.3834
Epoch 10  Train Loss: 0.2665  |  Dev Acc: 0.3899

Final Test Exact‐Match Acc: 0.3994


# Step 12 : Storing predictions on the entire test set

In [12]:
# Ensure the output folder exists
os.makedirs("predictions_vanilla", exist_ok=True)

# Build a DataFrame 
df = pd.DataFrame(predictions, columns=["latin", "gold", "predicted"])

# Write out as CSV
csv_path = os.path.join("predictions_vanilla", "predictions.csv")
df.to_csv(csv_path, index=False, encoding="utf-8")
print(f"→ Wrote all predictions to {csv_path}")

# write out as TSV 
tsv_path = os.path.join("predictions_vanilla", "predictions.tsv")
df.to_csv(tsv_path, sep="\t", index=False, encoding="utf-8")
print(f"→ Wrote all predictions to {tsv_path}")


→ Wrote all predictions to predictions_vanilla/predictions.csv
→ Wrote all predictions to predictions_vanilla/predictions.tsv


# Step 13 : displaying the Transliteration grid

In [12]:
try:
    predictions
except NameError:
    df = pd.read_csv("predictions_vanilla/predictions.tsv", sep="\t", encoding="utf-8")
    predictions = list(zip(df['latin'], df['gold'], df['predicted']))


# format of the grid
def display_colored_prediction_grid(predictions, n=20):
    """
    Display a styled table with model predictions vs ground truth.
    Args:
        predictions (list of tuples): (latin_input, reference_output, predicted_output)
        n (int): number of rows to show
    """
    df = pd.DataFrame(predictions[:n], columns=["Latin", "Reference", "Predicted"])

    html = '<h3> Hindi Transliteration: Prediction Grid</h3>'
    html += """
    <style>
        table.pred-table {
            border-collapse: collapse;
            font-family: monospace;
            margin: 10px 0;
        }
        table.pred-table th, table.pred-table td {
            border: 1px solid #ccc;
            padding: 6px 14px;
            text-align: center;
        }
    </style>
    """
    html += '<table class="pred-table">'
    html += '<thead><tr><th>#</th><th>Latin Input</th><th>Predicted</th><th>Reference</th><th>Status</th></tr></thead><tbody>'

    for i, row in df.iterrows():
        match        = row["Predicted"] == row["Reference"]
        bg_color     = "#d4edda" if match else "#f8d7da"
        border_color = "#28a745" if match else "#dc3545"
        status_icon  = "CORRECT" if match else "INCORRECT"

        html += f'''
        <tr style="background-color: {bg_color}; border: 2px solid {border_color};">
            <td>{i+1}</td>
            <td><b>{row["Latin"]}</b></td>
            <td style="border: 2px solid {border_color};">{row["Predicted"]}</td>
            <td style="border: 2px solid {border_color};">{row["Reference"]}</td>
            <td>{status_icon}</td>
        </tr>
        '''

    html += '</tbody></table>'
    display(HTML(html))




# show the 20 examples
display_colored_prediction_grid(predictions, n=20)


#,Latin Input,Predicted,Reference,Status
1,ank,अंक,अंक,CORRECT
2,anka,अंका,अंक,INCORRECT
3,ankit,अनकित,अंकित,INCORRECT
4,anakon,अनकों,अंकों,INCORRECT
5,ankhon,अंखों,अंकों,INCORRECT
6,ankon,अंकों,अंकों,CORRECT
7,angkor,अंगोकक,अंकोर,INCORRECT
8,ankor,अनकोर,अंकोर,INCORRECT
9,angaarak,अंगारक,अंगारक,CORRECT
10,angarak,अंगररक,अंगारक,INCORRECT
