In [1]:
import csv
import random

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.font_manager import FontProperties

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

import wandb

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:

# DATA LOADING & ANALYSIS

def data_load_tsv(path):
    """
    Load data from TSV files with source-target pairs
    """
    df = pd.read_csv(
        path,
        sep='\t',
        header=None,
        dtype=str,
        quoting=csv.QUOTE_NONE
    )
    df = df.dropna(subset=[0,1])
    return df[0].tolist(), df[1].tolist()

# create char set from multiple lists
def create_char_set(*datasets):
    char_set = set()
    for data in datasets:
        for word in data:
            char_set.update(word)
    return char_set


train_input, train_output = data_load_tsv(
    "/kaggle/input/dakshina-dataset/dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.train.tsv"
)
val_input, val_output = data_load_tsv(
    "/kaggle/input/dakshina-dataset/dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.dev.tsv"
)
test_input, test_output = data_load_tsv(
    "/kaggle/input/dakshina-dataset/dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.test.tsv"
)


# Print sizes
print(f"Number of training samples:   {len(train_input)}")
print(f"Number of validation samples: {len(val_input)}")
print(f"Number of test samples:       {len(test_input)}")

# Build character sets
src_chars = create_char_set(train_input, val_input, test_input)
tgt_chars = create_char_set(train_output, val_output, test_output)

print("\nSource Character Set:")
print(f"Total characters: {len(src_chars)}")
print(sorted(src_chars))

print("\nTarget Character Set:")
print(f"Total characters: {len(tgt_chars)}")
print(sorted(tgt_chars))

# Max seq lengths 
max_seq_src = max(len(w) for w in train_input + val_input + test_input) + 2
max_seq_tgt = max(len(w) for w in train_output + val_output + test_output) + 2
print(f"\nMax source seq length (with tokens): {max_seq_src}")
print(f"Max target seq length (with tokens): {max_seq_tgt}")


# INDEX MAPPINGS

special_tokens = {'<pad>': 0, '<sow>': 1, '<eow>': 2}

src2idx = {ch: i+3 for i, ch in enumerate(sorted(src_chars))}
src2idx.update(special_tokens)
print("\nSource Indices:")
print(src2idx)

idx2src = {i: ch for ch, i in src2idx.items()}

tgt2idx = {ch: i+3 for i, ch in enumerate(sorted(tgt_chars))}
tgt2idx.update(special_tokens)
print("\nTarget Indices:")
print(tgt2idx)

idx2tgt = {i: ch for ch, i in tgt2idx.items()}

SRC_VOCAB = len(src2idx)
TGT_VOCAB = len(tgt2idx)

# embedding dims 
SRC_EMB_DIM = 64
TGT_EMB_DIM = 64


# PREPROCESSING

def tsv_preprocessor(data, max_len, vocab):
    processed = []
    for w in data:
        seq = ['<sow>'] + list(w) + ['<eow>']
        seq += ['<pad>'] * (max_len - len(seq))
        indices = [vocab.get(c, vocab['<pad>']) for c in seq]
        processed.append(torch.LongTensor(indices))
    return torch.stack(processed)

train_src = tsv_preprocessor(train_input, max_seq_src, src2idx)
train_tgt = tsv_preprocessor(train_output, max_seq_tgt, tgt2idx)
val_src   = tsv_preprocessor(val_input,   max_seq_src, src2idx)
val_tgt   = tsv_preprocessor(val_output,  max_seq_tgt, tgt2idx)
test_src  = tsv_preprocessor(test_input,  max_seq_src, src2idx)
test_tgt  = tsv_preprocessor(test_output, max_seq_tgt, tgt2idx)


# DATASET & DATALOADER
class TSVDataset(Dataset):
    def __init__(self, src, tgt):
        self.src = src
        self.tgt = tgt
    def __len__(self):
        return len(self.src)
    def __getitem__(self, idx):
        return self.src[idx], self.tgt[idx]

# custom collate to pad along seq dim

def collate_fn(batch):
    src_batch, tgt_batch = zip(*batch)
    src_padded = nn.utils.rnn.pad_sequence(src_batch, batch_first=False, padding_value=special_tokens['<pad>'])
    tgt_padded = nn.utils.rnn.pad_sequence(tgt_batch, batch_first=False, padding_value=special_tokens['<pad>'])
    return src_padded, tgt_padded

BATCH_SIZE = 256
train_loader = DataLoader(
    TSVDataset(train_src, train_tgt),
    batch_size=BATCH_SIZE, shuffle=True,
    collate_fn=collate_fn
)
val_loader = DataLoader(
    TSVDataset(val_src, val_tgt),
    batch_size=BATCH_SIZE,
    collate_fn=collate_fn
)
test_loader = DataLoader(
    TSVDataset(test_src, test_tgt),
    batch_size=BATCH_SIZE,
    collate_fn=collate_fn
)

Number of training samples:   94543
Number of validation samples: 9279
Number of test samples:       9228

Source Character Set:
Total characters: 60
['ঁ', 'ং', 'ঃ', 'অ', 'আ', 'ই', 'ঈ', 'উ', 'ঊ', 'ঋ', 'এ', 'ঐ', 'ও', 'ঔ', 'ক', 'খ', 'গ', 'ঘ', 'ঙ', 'চ', 'ছ', 'জ', 'ঝ', 'ঞ', 'ট', 'ঠ', 'ড', 'ঢ', 'ণ', 'ত', 'থ', 'দ', 'ধ', 'ন', 'প', 'ফ', 'ব', 'ভ', 'ম', 'য', 'র', 'ল', 'শ', 'ষ', 'স', 'হ', '়', 'া', 'ি', 'ী', 'ু', 'ূ', 'ৃ', 'ে', 'ৈ', 'ো', 'ৌ', '্', 'ৎ', '২']

Target Character Set:
Total characters: 26
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

Max source seq length (with tokens): 24
Max target seq length (with tokens): 24

Source Indices:
{'ঁ': 3, 'ং': 4, 'ঃ': 5, 'অ': 6, 'আ': 7, 'ই': 8, 'ঈ': 9, 'উ': 10, 'ঊ': 11, 'ঋ': 12, 'এ': 13, 'ঐ': 14, 'ও': 15, 'ঔ': 16, 'ক': 17, 'খ': 18, 'গ': 19, 'ঘ': 20, 'ঙ': 21, 'চ': 22, 'ছ': 23, 'জ': 24, 'ঝ': 25, 'ঞ': 26, 'ট': 27, 'ঠ': 28, 'ড': 29, 'ঢ': 30, 'ণ': 31, 'ত': 32, 'থ': 33, 'দ'

In [3]:

# ENCODER CLASS

class Encoder(nn.Module):
    def __init__(
        self,
        vocab_size: int,
        emb_dim: int,
        hid_dim: int,
        rnn_type: str = 'gru',
        num_layers: int = 1,
        dropout: float = 0.0,
        bidir: bool = False
    ):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_dim)
        rnn_cls = nn.LSTM if rnn_type.lower() == 'lstm' else nn.GRU
        self.rnn = rnn_cls(
            input_size=emb_dim,
            hidden_size=hid_dim,
            num_layers=num_layers,
            dropout=dropout if num_layers > 1 else 0.0,
            bidirectional=bidir,
            batch_first=True
        )

    def forward(
        self,
        x: torch.Tensor,
        hidden: torch.Tensor = None,
        cell: torch.Tensor = None
    ):
        emb = self.embedding(x)  # (batch, seq_len, emb_dim)
        if isinstance(self.rnn, nn.LSTM):
            if hidden is None or cell is None:
                out, (h, c) = self.rnn(emb)
            else:
                out, (h, c) = self.rnn(emb, (hidden, cell))
            return out, h, c
        else:
            if hidden is None:
                out, h = self.rnn(emb)
            else:
                out, h = self.rnn(emb, hidden)
            return out, h, None

In [4]:

# DECODER CLASS

class Decoder(nn.Module):
    def __init__(
        self,
        vocab_size: int,
        emb_dim: int,
        hid_dim: int,
        rnn_type: str = 'gru',
        num_layers: int = 1,
        dropout: float = 0.0,
        bidir: bool = False,
        use_attention: bool = False
    ):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_dim)
        rnn_cls = nn.LSTM if rnn_type.lower() == 'lstm' else nn.GRU
        self.rnn = rnn_cls(
            input_size=emb_dim,
            hidden_size=hid_dim,
            num_layers=num_layers,
            dropout=dropout if num_layers > 1 else 0.0,
            bidirectional=bidir,
            batch_first=True
        )
        # adjust output linear to match bidirectional RNN hidden dim
        out_dim = hid_dim * (2 if bidir else 1)
        self.out = nn.Linear(out_dim, vocab_size)
        self.use_attention = use_attention

    def forward(
        self,
        x: torch.Tensor,
        prev_hidden: torch.Tensor,
        prev_cell: torch.Tensor = None,
        encoder_outputs: torch.Tensor = None
    ):
        emb = self.embedding(x)  # (batch, 1, emb_dim)
        if isinstance(self.rnn, nn.LSTM):
            if prev_hidden is None or prev_cell is None:
                dec_out, (h, c) = self.rnn(emb)
            else:
                dec_out, (h, c) = self.rnn(emb, (prev_hidden, prev_cell))
        else:
            if prev_hidden is None:
                dec_out, h = self.rnn(emb)
                c = None
            else:
                dec_out, h = self.rnn(emb, prev_hidden)
                c = None
        logits = self.out(dec_out)  # dec_out shape: (batch, 1, out_dim)
        return logits, h, c


In [5]:

# SEQ2SEQ CLASS
class Seq2Seq(nn.Module):
    def __init__(
        self,
        encoder: Encoder,
        decoder: Decoder,
        max_tgt_len: int,
        teacher_force_rate: float = 0.5
    ):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.max_tgt_len = max_tgt_len
        self.teacher_force_rate = teacher_force_rate

    def forward(
        self,
        src: torch.Tensor,
        tgt: torch.Tensor = None,
        teacher_forcing: bool = True,
        training: bool = True
    ):
        # Encode source sequence
        enc_outputs, h_enc, c_enc = self.encoder(src)

        # Align hidden cell to decoder's layers
        rnn = self.decoder.rnn
        dirs = 2 if rnn.bidirectional else 1
        exp_layers = rnn.num_layers * dirs
        L_enc, B, H = h_enc.size()
        if L_enc != exp_layers:
            if L_enc > exp_layers:
                h = h_enc[:exp_layers]
                c = c_enc[:exp_layers] if c_enc is not None else None
            else:
                pad = exp_layers - L_enc
                h = torch.cat([h_enc, h_enc.new_zeros(pad, B, H)], 0)
                if c_enc is not None:
                    c = torch.cat([c_enc, c_enc.new_zeros(pad, B, H)], 0)
                else:
                    c = None
        else:
            h, c = h_enc, c_enc

        # Prepare initial decoder input 
        dec_in = torch.full((B,1), 1, dtype=torch.long, device=src.device)
        outputs = torch.zeros(
            self.max_tgt_len, B, self.decoder.out.out_features,
            device=src.device
        )

        for t in range(self.max_tgt_len):
            logits, h, c = self.decoder(dec_in, h, c, enc_outputs)
            outputs[t] = logits.squeeze(1)
            if training and teacher_forcing and random.random() < self.teacher_force_rate:
                dec_in = tgt[:,t].unsqueeze(1)
            else:
                dec_in = logits.argmax(dim=2)

        return outputs, enc_outputs


In [6]:
def train1(model, train_loader, val_loader, epochs):
    model.to(device)
    optimizer = optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss()

    for epoch in tqdm(range(1, epochs + 1), desc="Epochs"):
        # ------------------
        # Training Phase
        # ------------------
        model.train()
        train_loss = 0.0
        train_hits = 0

        for src_batch, tgt_batch in train_loader:
            # transpose from (seq_len, B) -> (B, seq_len)
            src = src_batch.transpose(0, 1).to(device)
            tgt = tgt_batch.transpose(0, 1).to(device)

            optimizer.zero_grad()
            outputs, _ = model(src, tgt, teacher_forcing=True, training=True)
            # outputs: (T, B, V) -> (B*T, V) for loss
            logits = outputs.permute(1, 0, 2).reshape(-1, TGT_VOCAB)
            truth  = tgt.reshape(-1)

            loss = criterion(logits, truth)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * src.size(0)
            train_hits += (logits.argmax(dim=1) == truth).sum().item()

        avg_train_loss = train_loss / len(train_loader.dataset)
        train_acc      = train_hits / (len(train_loader.dataset) * max_seq_tgt)

        
        # Validation Phase
    
        model.eval()
        val_loss       = 0.0
        val_word_hits  = 0

        with torch.no_grad():
            for src_batch, tgt_batch in val_loader:
                src = src_batch.transpose(0, 1).to(device)
                tgt = tgt_batch.transpose(0, 1).to(device)

                outputs, _ = model(src, tgt, teacher_forcing=False, training=False)
                # token-level loss
                logits = outputs.permute(1, 0, 2).reshape(-1, TGT_VOCAB)
                truth  = tgt.reshape(-1)
                loss   = criterion(logits, truth)

                val_loss += loss.item() * src.size(0)

                # word-level accuracy: count sequences where all tokens match
                pred_tokens = outputs.argmax(dim=2).transpose(0, 1)  # (B, T)
                val_word_hits += (pred_tokens == tgt).all(dim=1).sum().item()

        avg_val_loss    = val_loss / len(val_loader.dataset)
        validation_acc  = val_word_hits / len(val_loader.dataset)


        # Logging
        print(
            f"Epoch {epoch}: "
            f"train_loss={avg_train_loss:.4f}, train_acc={train_acc:.4f} | "
            f"val_loss={avg_val_loss:.4f}, validation_acc={validation_acc:.4f}"
        )
        wandb.log({
            "epoch": epoch,
            "train_loss":     avg_train_loss   * 100,
            "train_acc":      train_acc        * 100,
            "val_loss":       avg_val_loss     * 100,
            "validation_acc": validation_acc   * 100
        })


In [7]:
import wandb
import numpy as np
from types import SimpleNamespace
import random

In [8]:
wandb.login(key='1df7a902fa4a610500b8e79e21818419d5facdbb')#


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mma23m018[0m ([33mma23m018-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

**Find best validation accuracy without attention**

In [9]:

# SWEEP CONFIGURATION (Bayesian, no attention)
sweep_config = {
    'method': 'bayes',
    'name'  : 'sweep - no attention',
    'metric': {
      'goal': 'maximize',
      'name': 'validation_accuracy'
    },
    'parameters': {
        'input_embedding_size': {'values': [128, 256, 512]},
        'enc_layers':            {'values': [1, 2, 3]},
        'dec_layers':            {'values': [1, 2, 3]},
        'hidden_size':           {'values': [ 128, 256, 512]},
        'cell_type':             {'values': ['lstm', 'rnn', 'gru']},
        'bidirectional':         {'values': [True]},
        'dropout':               {'values': [0.1, 0.2, 0.3]},
        'beam_size':             {'values': [1, 3, 5]}
    }
}

# define sweep_id 
sweep_id = wandb.sweep(
    sweep   = sweep_config,
    entity  = "ma23m018-indian-institute-of-technology-madras",
    project = "DA6401_A3"
)

# MAIN FUNCTION
def main():
    with wandb.init():
        cfg = wandb.config
        wandb.run.name = (
            f"cell-{cfg.cell_type}_hid-{cfg.hidden_size}"
            f"_emb-{cfg.input_embedding_size}"
            f"_enc-{cfg.enc_layers}_dec-{cfg.dec_layers}"
            f"_drop{cfg.dropout}_beam{cfg.beam_size}"
        )

        encoder = Encoder(
            vocab_size = SRC_VOCAB,
            emb_dim    = cfg.input_embedding_size,
            hid_dim    = cfg.hidden_size,
            rnn_type   = cfg.cell_type,
            num_layers = cfg.enc_layers,
            dropout    = cfg.dropout,
            bidir      = cfg.bidirectional
        )

        decoder = Decoder(
            vocab_size    = TGT_VOCAB,
            emb_dim       = cfg.input_embedding_size,
            hid_dim       = cfg.hidden_size,
            rnn_type      = cfg.cell_type,
            num_layers    = cfg.dec_layers,
            dropout       = cfg.dropout,
            bidir         = cfg.bidirectional,
            use_attention = False
        )

        model = Seq2Seq(
            encoder            = encoder,
            decoder            = decoder,
            max_tgt_len        = max_seq_tgt,
            teacher_force_rate = 0.5
        ).to(device)

    
        train1(model, train_loader, val_loader, epochs=10)

# launch 15 sweep runs of main()
wandb.agent(sweep_id, function=main, count=15)
wandb.finish()


Create sweep with ID: ggd6ozc5
Sweep URL: https://wandb.ai/ma23m018-indian-institute-of-technology-madras/DA6401_A3/sweeps/ggd6ozc5


[34m[1mwandb[0m: Agent Starting Run: gi8hv6a2 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 512


Epochs:  10%|█         | 1/10 [00:40<06:06, 40.76s/it]

Epoch 1: train_loss=0.5927, train_acc=0.8155 | val_loss=0.5811, validation_acc=0.1140


Epochs:  20%|██        | 2/10 [01:23<05:36, 42.06s/it]

Epoch 2: train_loss=0.3411, train_acc=0.8868 | val_loss=0.5451, validation_acc=0.1579


Epochs:  30%|███       | 3/10 [02:09<05:07, 43.90s/it]

Epoch 3: train_loss=0.2968, train_acc=0.9002 | val_loss=0.5695, validation_acc=0.1651


Epochs:  40%|████      | 4/10 [02:56<04:28, 44.82s/it]

Epoch 4: train_loss=0.2743, train_acc=0.9067 | val_loss=0.5336, validation_acc=0.1766


Epochs:  50%|█████     | 5/10 [03:42<03:46, 45.33s/it]

Epoch 5: train_loss=0.2642, train_acc=0.9090 | val_loss=0.5393, validation_acc=0.1746


Epochs:  60%|██████    | 6/10 [04:28<03:02, 45.62s/it]

Epoch 6: train_loss=0.2581, train_acc=0.9108 | val_loss=0.5515, validation_acc=0.1822


Epochs:  70%|███████   | 7/10 [05:14<02:17, 45.81s/it]

Epoch 7: train_loss=0.2496, train_acc=0.9129 | val_loss=0.5521, validation_acc=0.1747


Epochs:  80%|████████  | 8/10 [06:01<01:32, 46.01s/it]

Epoch 8: train_loss=0.2527, train_acc=0.9114 | val_loss=0.5420, validation_acc=0.1767


Epochs:  90%|█████████ | 9/10 [06:47<00:46, 46.07s/it]

Epoch 9: train_loss=0.2449, train_acc=0.9134 | val_loss=0.5681, validation_acc=0.1858


Epochs: 100%|██████████| 10/10 [07:33<00:00, 45.35s/it]

Epoch 10: train_loss=0.2384, train_acc=0.9153 | val_loss=0.5585, validation_acc=0.1831





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇██████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,█▃▆▁▂▄▄▂▆▅
validation_acc,▁▅▆▇▇█▇▇██

0,1
epoch,10.0
train_acc,91.53436
train_loss,23.84129
val_loss,55.84949
validation_acc,18.31016


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: v8ipoj8q with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 128


Epochs:  10%|█         | 1/10 [00:31<04:46, 31.87s/it]

Epoch 1: train_loss=0.6499, train_acc=0.8001 | val_loss=0.5672, validation_acc=0.1023


Epochs:  20%|██        | 2/10 [01:03<04:15, 31.88s/it]

Epoch 2: train_loss=0.3682, train_acc=0.8786 | val_loss=0.5325, validation_acc=0.1421


Epochs:  30%|███       | 3/10 [01:35<03:43, 31.91s/it]

Epoch 3: train_loss=0.3195, train_acc=0.8936 | val_loss=0.5263, validation_acc=0.1521


Epochs:  40%|████      | 4/10 [02:08<03:13, 32.20s/it]

Epoch 4: train_loss=0.2902, train_acc=0.9023 | val_loss=0.5230, validation_acc=0.1693


Epochs:  50%|█████     | 5/10 [02:41<02:42, 32.52s/it]

Epoch 5: train_loss=0.2757, train_acc=0.9062 | val_loss=0.5378, validation_acc=0.1626


Epochs:  60%|██████    | 6/10 [03:14<02:10, 32.59s/it]

Epoch 6: train_loss=0.2689, train_acc=0.9074 | val_loss=0.5376, validation_acc=0.1672


Epochs:  70%|███████   | 7/10 [03:47<01:38, 32.75s/it]

Epoch 7: train_loss=0.2595, train_acc=0.9099 | val_loss=0.5557, validation_acc=0.1714


Epochs:  80%|████████  | 8/10 [04:20<01:05, 32.85s/it]

Epoch 8: train_loss=0.2546, train_acc=0.9111 | val_loss=0.5185, validation_acc=0.1633


Epochs:  90%|█████████ | 9/10 [04:53<00:32, 32.99s/it]

Epoch 9: train_loss=0.2489, train_acc=0.9122 | val_loss=0.5557, validation_acc=0.1763


Epochs: 100%|██████████| 10/10 [05:26<00:00, 32.64s/it]

Epoch 10: train_loss=0.2404, train_acc=0.9147 | val_loss=0.5506, validation_acc=0.1757





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇▇█████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,█▃▂▂▄▄▆▁▆▆
validation_acc,▁▅▆▇▇▇█▇██

0,1
epoch,10.0
train_acc,91.46517
train_loss,24.04347
val_loss,55.06104
validation_acc,17.56655


[34m[1mwandb[0m: Agent Starting Run: 5nyouscy with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 256


Epochs:  10%|█         | 1/10 [01:54<17:07, 114.16s/it]

Epoch 1: train_loss=0.5528, train_acc=0.8278 | val_loss=0.5581, validation_acc=0.1480


Epochs:  20%|██        | 2/10 [03:47<15:11, 113.89s/it]

Epoch 2: train_loss=0.3026, train_acc=0.8992 | val_loss=0.5301, validation_acc=0.1769


Epochs:  30%|███       | 3/10 [05:41<13:15, 113.58s/it]

Epoch 3: train_loss=0.2776, train_acc=0.9050 | val_loss=0.5025, validation_acc=0.1740


Epochs:  40%|████      | 4/10 [07:34<11:21, 113.63s/it]

Epoch 4: train_loss=0.2535, train_acc=0.9115 | val_loss=0.5283, validation_acc=0.1809


Epochs:  50%|█████     | 5/10 [09:28<09:27, 113.60s/it]

Epoch 5: train_loss=0.2442, train_acc=0.9138 | val_loss=0.5216, validation_acc=0.1822


Epochs:  60%|██████    | 6/10 [11:22<07:34, 113.71s/it]

Epoch 6: train_loss=0.2396, train_acc=0.9144 | val_loss=0.5366, validation_acc=0.1831


Epochs:  70%|███████   | 7/10 [13:16<05:41, 113.88s/it]

Epoch 7: train_loss=0.2338, train_acc=0.9158 | val_loss=0.5560, validation_acc=0.1848


Epochs:  80%|████████  | 8/10 [15:10<03:47, 113.79s/it]

Epoch 8: train_loss=0.2293, train_acc=0.9168 | val_loss=0.5225, validation_acc=0.1821


Epochs:  90%|█████████ | 9/10 [17:03<01:53, 113.73s/it]

Epoch 9: train_loss=0.2303, train_acc=0.9158 | val_loss=0.5592, validation_acc=0.1871


Epochs: 100%|██████████| 10/10 [18:57<00:00, 113.71s/it]

Epoch 10: train_loss=0.2265, train_acc=0.9169 | val_loss=0.5740, validation_acc=0.1855





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▇▇███████
train_loss,█▃▂▂▁▁▁▁▁▁
val_loss,▆▄▁▄▃▄▆▃▇█
validation_acc,▁▆▆▇▇▇█▇██

0,1
epoch,10.0
train_acc,91.68787
train_loss,22.64735
val_loss,57.40048
validation_acc,18.54726


[34m[1mwandb[0m: Agent Starting Run: 1pigqq74 with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 512


Epochs:  10%|█         | 1/10 [00:46<06:58, 46.50s/it]

Epoch 1: train_loss=0.6049, train_acc=0.8112 | val_loss=0.5617, validation_acc=0.1266


Epochs:  20%|██        | 2/10 [01:32<06:09, 46.22s/it]

Epoch 2: train_loss=0.3424, train_acc=0.8871 | val_loss=0.5583, validation_acc=0.1626


Epochs:  30%|███       | 3/10 [02:18<05:23, 46.24s/it]

Epoch 3: train_loss=0.3031, train_acc=0.8983 | val_loss=0.5589, validation_acc=0.1624


Epochs:  40%|████      | 4/10 [03:05<04:37, 46.24s/it]

Epoch 4: train_loss=0.2820, train_acc=0.9042 | val_loss=0.5325, validation_acc=0.1624


Epochs:  50%|█████     | 5/10 [03:51<03:51, 46.32s/it]

Epoch 5: train_loss=0.2701, train_acc=0.9073 | val_loss=0.5106, validation_acc=0.1656


Epochs:  60%|██████    | 6/10 [04:37<03:05, 46.30s/it]

Epoch 6: train_loss=0.2566, train_acc=0.9110 | val_loss=0.5474, validation_acc=0.1854


Epochs:  70%|███████   | 7/10 [05:24<02:18, 46.29s/it]

Epoch 7: train_loss=0.2535, train_acc=0.9114 | val_loss=0.5591, validation_acc=0.1805


Epochs:  80%|████████  | 8/10 [06:10<01:32, 46.28s/it]

Epoch 8: train_loss=0.2422, train_acc=0.9149 | val_loss=0.5757, validation_acc=0.1815


Epochs:  90%|█████████ | 9/10 [06:56<00:46, 46.34s/it]

Epoch 9: train_loss=0.2417, train_acc=0.9144 | val_loss=0.5668, validation_acc=0.1864


Epochs: 100%|██████████| 10/10 [07:43<00:00, 46.30s/it]

Epoch 10: train_loss=0.2399, train_acc=0.9147 | val_loss=0.5719, validation_acc=0.1842





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇▇█████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,▆▆▆▃▁▅▆█▇█
validation_acc,▁▅▅▅▆█▇▇██

0,1
epoch,10.0
train_acc,91.46526
train_loss,23.99126
val_loss,57.18613
validation_acc,18.41793


[34m[1mwandb[0m: Agent Starting Run: 3rm0uue7 with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 512


Epochs:  10%|█         | 1/10 [00:23<03:27, 23.09s/it]

Epoch 1: train_loss=0.6258, train_acc=0.8118 | val_loss=0.5820, validation_acc=0.1111


Epochs:  20%|██        | 2/10 [00:46<03:04, 23.08s/it]

Epoch 2: train_loss=0.3648, train_acc=0.8820 | val_loss=0.5578, validation_acc=0.1318


Epochs:  30%|███       | 3/10 [01:09<02:41, 23.12s/it]

Epoch 3: train_loss=0.3272, train_acc=0.8930 | val_loss=0.5388, validation_acc=0.1498


Epochs:  40%|████      | 4/10 [01:32<02:17, 22.98s/it]

Epoch 4: train_loss=0.2996, train_acc=0.9014 | val_loss=0.5441, validation_acc=0.1460


Epochs:  50%|█████     | 5/10 [01:54<01:54, 22.92s/it]

Epoch 5: train_loss=0.2903, train_acc=0.9033 | val_loss=0.5510, validation_acc=0.1551


Epochs:  60%|██████    | 6/10 [02:17<01:31, 22.91s/it]

Epoch 6: train_loss=0.2812, train_acc=0.9058 | val_loss=0.5541, validation_acc=0.1488


Epochs:  70%|███████   | 7/10 [02:40<01:08, 22.93s/it]

Epoch 7: train_loss=0.2744, train_acc=0.9076 | val_loss=0.5577, validation_acc=0.1650


Epochs:  80%|████████  | 8/10 [03:03<00:45, 22.99s/it]

Epoch 8: train_loss=0.2679, train_acc=0.9093 | val_loss=0.5357, validation_acc=0.1532


Epochs:  90%|█████████ | 9/10 [03:26<00:22, 22.93s/it]

Epoch 9: train_loss=0.2593, train_acc=0.9117 | val_loss=0.5235, validation_acc=0.1554


Epochs: 100%|██████████| 10/10 [03:49<00:00, 22.95s/it]

Epoch 10: train_loss=0.2553, train_acc=0.9124 | val_loss=0.5455, validation_acc=0.1601





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇▇█████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,█▅▃▃▄▅▅▂▁▄
validation_acc,▁▄▆▆▇▆█▆▇▇

0,1
epoch,10.0
train_acc,91.24208
train_loss,25.52591
val_loss,54.55354
validation_acc,16.01466


[34m[1mwandb[0m: Agent Starting Run: h4qytx7w with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 128


Epochs:  10%|█         | 1/10 [02:12<19:53, 132.59s/it]

Epoch 1: train_loss=0.6129, train_acc=0.8117 | val_loss=0.5297, validation_acc=0.1236


Epochs:  20%|██        | 2/10 [04:23<17:34, 131.81s/it]

Epoch 2: train_loss=0.3148, train_acc=0.8955 | val_loss=0.5140, validation_acc=0.1629


Epochs:  30%|███       | 3/10 [06:34<15:20, 131.51s/it]

Epoch 3: train_loss=0.2710, train_acc=0.9077 | val_loss=0.5392, validation_acc=0.1676


Epochs:  40%|████      | 4/10 [08:46<13:08, 131.41s/it]

Epoch 4: train_loss=0.2600, train_acc=0.9099 | val_loss=0.4979, validation_acc=0.1834


Epochs:  50%|█████     | 5/10 [10:58<10:58, 131.68s/it]

Epoch 5: train_loss=0.2465, train_acc=0.9131 | val_loss=0.5142, validation_acc=0.1798


Epochs:  60%|██████    | 6/10 [13:09<08:46, 131.60s/it]

Epoch 6: train_loss=0.2328, train_acc=0.9166 | val_loss=0.5067, validation_acc=0.1805


Epochs:  70%|███████   | 7/10 [15:21<06:34, 131.54s/it]

Epoch 7: train_loss=0.2285, train_acc=0.9173 | val_loss=0.5207, validation_acc=0.1759


Epochs:  80%|████████  | 8/10 [17:33<04:23, 131.66s/it]

Epoch 8: train_loss=0.2248, train_acc=0.9172 | val_loss=0.5184, validation_acc=0.1836


Epochs:  90%|█████████ | 9/10 [19:45<02:11, 131.85s/it]

Epoch 9: train_loss=0.2191, train_acc=0.9191 | val_loss=0.5012, validation_acc=0.1932


Epochs: 100%|██████████| 10/10 [21:57<00:00, 131.71s/it]

Epoch 10: train_loss=0.2200, train_acc=0.9180 | val_loss=0.5205, validation_acc=0.1873





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇██████
train_loss,█▃▂▂▁▁▁▁▁▁
val_loss,▆▄█▁▄▃▅▄▂▅
validation_acc,▁▅▅▇▇▇▆▇█▇

0,1
epoch,10.0
train_acc,91.79734
train_loss,22.00167
val_loss,52.04633
validation_acc,18.73047


[34m[1mwandb[0m: Agent Starting Run: wb2disxj with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 256


Epochs:  10%|█         | 1/10 [02:17<20:38, 137.56s/it]

Epoch 1: train_loss=0.5574, train_acc=0.8267 | val_loss=0.5145, validation_acc=0.1323


Epochs:  20%|██        | 2/10 [04:34<18:17, 137.17s/it]

Epoch 2: train_loss=0.3106, train_acc=0.8961 | val_loss=0.5294, validation_acc=0.1732


Epochs:  30%|███       | 3/10 [06:50<15:57, 136.78s/it]

Epoch 3: train_loss=0.2761, train_acc=0.9057 | val_loss=0.5203, validation_acc=0.1806


Epochs:  40%|████      | 4/10 [09:07<13:40, 136.81s/it]

Epoch 4: train_loss=0.2579, train_acc=0.9101 | val_loss=0.5080, validation_acc=0.1765


Epochs:  50%|█████     | 5/10 [11:24<11:23, 136.76s/it]

Epoch 5: train_loss=0.2428, train_acc=0.9142 | val_loss=0.5426, validation_acc=0.1885


Epochs:  60%|██████    | 6/10 [13:41<09:07, 136.83s/it]

Epoch 6: train_loss=0.2376, train_acc=0.9148 | val_loss=0.5389, validation_acc=0.1884


Epochs:  70%|███████   | 7/10 [15:58<06:50, 136.86s/it]

Epoch 7: train_loss=0.2337, train_acc=0.9154 | val_loss=0.5284, validation_acc=0.1873


Epochs:  80%|████████  | 8/10 [18:14<04:33, 136.82s/it]

Epoch 8: train_loss=0.2300, train_acc=0.9158 | val_loss=0.5024, validation_acc=0.1866


Epochs:  90%|█████████ | 9/10 [20:31<02:16, 136.84s/it]

Epoch 9: train_loss=0.2184, train_acc=0.9192 | val_loss=0.5248, validation_acc=0.1915


Epochs: 100%|██████████| 10/10 [22:48<00:00, 136.89s/it]

Epoch 10: train_loss=0.2143, train_acc=0.9201 | val_loss=0.5287, validation_acc=0.1905





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇██████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,▃▆▄▂█▇▆▁▅▆
validation_acc,▁▆▇▆███▇██

0,1
epoch,10.0
train_acc,92.01241
train_loss,21.43323
val_loss,52.86672
validation_acc,19.05378


[34m[1mwandb[0m: Agent Starting Run: a5pr7gjj with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	input_embedding_size: 128


Epochs:  10%|█         | 1/10 [00:33<05:05, 33.90s/it]

Epoch 1: train_loss=0.7966, train_acc=0.7642 | val_loss=0.6285, validation_acc=0.0551


Epochs:  20%|██        | 2/10 [01:07<04:28, 33.62s/it]

Epoch 2: train_loss=0.4356, train_acc=0.8578 | val_loss=0.5669, validation_acc=0.1128


Epochs:  30%|███       | 3/10 [01:40<03:54, 33.44s/it]

Epoch 3: train_loss=0.3757, train_acc=0.8763 | val_loss=0.5587, validation_acc=0.1307


Epochs:  40%|████      | 4/10 [02:14<03:21, 33.54s/it]

Epoch 4: train_loss=0.3416, train_acc=0.8871 | val_loss=0.5388, validation_acc=0.1231


Epochs:  50%|█████     | 5/10 [02:47<02:47, 33.47s/it]

Epoch 5: train_loss=0.3227, train_acc=0.8926 | val_loss=0.5272, validation_acc=0.1414


Epochs:  60%|██████    | 6/10 [03:20<02:13, 33.45s/it]

Epoch 6: train_loss=0.3028, train_acc=0.8991 | val_loss=0.5314, validation_acc=0.1569


Epochs:  70%|███████   | 7/10 [03:54<01:40, 33.44s/it]

Epoch 7: train_loss=0.2965, train_acc=0.9004 | val_loss=0.5571, validation_acc=0.1659


Epochs:  80%|████████  | 8/10 [04:27<01:06, 33.48s/it]

Epoch 8: train_loss=0.2863, train_acc=0.9031 | val_loss=0.5452, validation_acc=0.1696


Epochs:  90%|█████████ | 9/10 [05:01<00:33, 33.43s/it]

Epoch 9: train_loss=0.2802, train_acc=0.9048 | val_loss=0.5407, validation_acc=0.1676


Epochs: 100%|██████████| 10/10 [05:34<00:00, 33.47s/it]

Epoch 10: train_loss=0.2746, train_acc=0.9060 | val_loss=0.5269, validation_acc=0.1610





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇▇█████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,█▄▃▂▁▁▃▂▂▁
validation_acc,▁▅▆▅▆▇███▇

0,1
epoch,10.0
train_acc,90.6
train_loss,27.45579
val_loss,52.6907
validation_acc,16.10087


[34m[1mwandb[0m: Agent Starting Run: 6qxfsdvm with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 256


Epochs:  10%|█         | 1/10 [02:19<20:59, 139.91s/it]

Epoch 1: train_loss=0.5361, train_acc=0.8327 | val_loss=0.5469, validation_acc=0.1317


Epochs:  20%|██        | 2/10 [04:39<18:36, 139.51s/it]

Epoch 2: train_loss=0.3054, train_acc=0.8978 | val_loss=0.5041, validation_acc=0.1617


Epochs:  30%|███       | 3/10 [06:58<16:17, 139.61s/it]

Epoch 3: train_loss=0.2675, train_acc=0.9081 | val_loss=0.5512, validation_acc=0.1716


Epochs:  40%|████      | 4/10 [09:18<13:58, 139.72s/it]

Epoch 4: train_loss=0.2525, train_acc=0.9115 | val_loss=0.5216, validation_acc=0.1747


Epochs:  50%|█████     | 5/10 [11:38<11:38, 139.61s/it]

Epoch 5: train_loss=0.2421, train_acc=0.9138 | val_loss=0.5438, validation_acc=0.1731


Epochs:  60%|██████    | 6/10 [13:58<09:19, 139.86s/it]

Epoch 6: train_loss=0.2390, train_acc=0.9141 | val_loss=0.5424, validation_acc=0.1851


Epochs:  70%|███████   | 7/10 [16:18<06:59, 139.89s/it]

Epoch 7: train_loss=0.2351, train_acc=0.9147 | val_loss=0.5403, validation_acc=0.1831


Epochs:  80%|████████  | 8/10 [18:38<04:39, 139.87s/it]

Epoch 8: train_loss=0.2263, train_acc=0.9169 | val_loss=0.5522, validation_acc=0.1791


Epochs:  90%|█████████ | 9/10 [20:57<02:19, 139.81s/it]

Epoch 9: train_loss=0.2217, train_acc=0.9185 | val_loss=0.5710, validation_acc=0.1891


Epochs: 100%|██████████| 10/10 [23:17<00:00, 139.78s/it]

Epoch 10: train_loss=0.2208, train_acc=0.9180 | val_loss=0.5485, validation_acc=0.1844





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇██████
train_loss,█▃▂▂▁▁▁▁▁▁
val_loss,▅▁▆▃▅▅▅▆█▆
validation_acc,▁▅▆▆▆█▇▇█▇

0,1
epoch,10.0
train_acc,91.801
train_loss,22.08233
val_loss,54.85142
validation_acc,18.43949


[34m[1mwandb[0m: Agent Starting Run: 47ed92t7 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 512


Epochs:  10%|█         | 1/10 [02:35<23:15, 155.04s/it]

Epoch 1: train_loss=0.6454, train_acc=0.8024 | val_loss=0.5268, validation_acc=0.1407


Epochs:  20%|██        | 2/10 [05:09<20:37, 154.68s/it]

Epoch 2: train_loss=0.3132, train_acc=0.8954 | val_loss=0.5198, validation_acc=0.1722


Epochs:  30%|███       | 3/10 [07:43<18:01, 154.49s/it]

Epoch 3: train_loss=0.2712, train_acc=0.9076 | val_loss=0.5048, validation_acc=0.1881


Epochs:  40%|████      | 4/10 [10:18<15:27, 154.52s/it]

Epoch 4: train_loss=0.2539, train_acc=0.9120 | val_loss=0.4744, validation_acc=0.1859


Epochs:  50%|█████     | 5/10 [12:52<12:52, 154.47s/it]

Epoch 5: train_loss=0.2439, train_acc=0.9143 | val_loss=0.5023, validation_acc=0.1937


Epochs:  60%|██████    | 6/10 [15:27<10:18, 154.52s/it]

Epoch 6: train_loss=0.2358, train_acc=0.9158 | val_loss=0.5035, validation_acc=0.1910


Epochs:  70%|███████   | 7/10 [18:01<07:43, 154.53s/it]

Epoch 7: train_loss=0.2276, train_acc=0.9182 | val_loss=0.5136, validation_acc=0.1925


Epochs:  80%|████████  | 8/10 [20:36<05:09, 154.60s/it]

Epoch 8: train_loss=0.2223, train_acc=0.9190 | val_loss=0.5519, validation_acc=0.1981


Epochs:  90%|█████████ | 9/10 [23:11<02:34, 154.61s/it]

Epoch 9: train_loss=0.2190, train_acc=0.9193 | val_loss=0.5493, validation_acc=0.1950


Epochs: 100%|██████████| 10/10 [25:46<00:00, 154.60s/it]

Epoch 10: train_loss=0.2156, train_acc=0.9202 | val_loss=0.5585, validation_acc=0.1929





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▇▇███████
train_loss,█▃▂▂▁▁▁▁▁▁
val_loss,▅▅▄▁▃▃▄▇▇█
validation_acc,▁▅▇▇▇▇▇██▇

0,1
epoch,10.0
train_acc,92.01845
train_loss,21.55878
val_loss,55.84626
validation_acc,19.29087


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hhxzyznl with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	input_embedding_size: 256


Epochs:  10%|█         | 1/10 [00:47<07:07, 47.53s/it]

Epoch 1: train_loss=0.7645, train_acc=0.7705 | val_loss=0.5835, validation_acc=0.0706


Epochs:  20%|██        | 2/10 [01:33<06:13, 46.69s/it]

Epoch 2: train_loss=0.4027, train_acc=0.8681 | val_loss=0.5528, validation_acc=0.1222


Epochs:  30%|███       | 3/10 [02:20<05:26, 46.60s/it]

Epoch 3: train_loss=0.3413, train_acc=0.8874 | val_loss=0.5216, validation_acc=0.1404


Epochs:  40%|████      | 4/10 [03:06<04:38, 46.44s/it]

Epoch 4: train_loss=0.3106, train_acc=0.8970 | val_loss=0.5298, validation_acc=0.1606


Epochs:  50%|█████     | 5/10 [03:52<03:52, 46.42s/it]

Epoch 5: train_loss=0.2989, train_acc=0.8997 | val_loss=0.5594, validation_acc=0.1732


Epochs:  60%|██████    | 6/10 [04:39<03:05, 46.46s/it]

Epoch 6: train_loss=0.2866, train_acc=0.9033 | val_loss=0.5323, validation_acc=0.1765


Epochs:  70%|███████   | 7/10 [05:25<02:19, 46.38s/it]

Epoch 7: train_loss=0.2745, train_acc=0.9063 | val_loss=0.5355, validation_acc=0.1686


Epochs:  80%|████████  | 8/10 [06:11<01:32, 46.39s/it]

Epoch 8: train_loss=0.2691, train_acc=0.9078 | val_loss=0.5495, validation_acc=0.1771


Epochs:  90%|█████████ | 9/10 [06:58<00:46, 46.36s/it]

Epoch 9: train_loss=0.2607, train_acc=0.9101 | val_loss=0.5599, validation_acc=0.1767


Epochs: 100%|██████████| 10/10 [07:44<00:00, 46.46s/it]

Epoch 10: train_loss=0.2576, train_acc=0.9105 | val_loss=0.5321, validation_acc=0.1767





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇▇█████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,█▅▁▂▅▂▃▄▅▂
validation_acc,▁▄▆▇██▇███

0,1
epoch,10.0
train_acc,91.05319
train_loss,25.76178
val_loss,53.21241
validation_acc,17.67432


[34m[1mwandb[0m: Agent Starting Run: qa35iwoe with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	input_embedding_size: 128


Epochs:  10%|█         | 1/10 [00:31<04:46, 31.82s/it]

Epoch 1: train_loss=0.7999, train_acc=0.7617 | val_loss=0.6419, validation_acc=0.0655


Epochs:  20%|██        | 2/10 [01:03<04:13, 31.72s/it]

Epoch 2: train_loss=0.4428, train_acc=0.8552 | val_loss=0.5550, validation_acc=0.1020


Epochs:  30%|███       | 3/10 [01:35<03:42, 31.73s/it]

Epoch 3: train_loss=0.3769, train_acc=0.8760 | val_loss=0.5409, validation_acc=0.1375


Epochs:  40%|████      | 4/10 [02:07<03:10, 31.78s/it]

Epoch 4: train_loss=0.3376, train_acc=0.8886 | val_loss=0.5425, validation_acc=0.1409


Epochs:  50%|█████     | 5/10 [02:38<02:38, 31.79s/it]

Epoch 5: train_loss=0.3217, train_acc=0.8931 | val_loss=0.5309, validation_acc=0.1581


Epochs:  60%|██████    | 6/10 [03:10<02:07, 31.77s/it]

Epoch 6: train_loss=0.3102, train_acc=0.8961 | val_loss=0.5190, validation_acc=0.1599


Epochs:  70%|███████   | 7/10 [03:42<01:35, 31.77s/it]

Epoch 7: train_loss=0.2988, train_acc=0.8996 | val_loss=0.5696, validation_acc=0.1665


Epochs:  80%|████████  | 8/10 [04:14<01:03, 31.85s/it]

Epoch 8: train_loss=0.2861, train_acc=0.9032 | val_loss=0.5421, validation_acc=0.1653


Epochs:  90%|█████████ | 9/10 [04:46<00:31, 31.83s/it]

Epoch 9: train_loss=0.2830, train_acc=0.9037 | val_loss=0.5493, validation_acc=0.1639


Epochs: 100%|██████████| 10/10 [05:17<00:00, 31.80s/it]

Epoch 10: train_loss=0.2738, train_acc=0.9067 | val_loss=0.5426, validation_acc=0.1697





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇▇▇████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,█▃▂▂▂▁▄▂▃▂
validation_acc,▁▃▆▆▇▇████

0,1
epoch,10.0
train_acc,90.66611
train_loss,27.37947
val_loss,54.25726
validation_acc,16.97381


[34m[1mwandb[0m: Agent Starting Run: yk3winu7 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 256


Epochs:  10%|█         | 1/10 [01:24<12:39, 84.43s/it]

Epoch 1: train_loss=0.5325, train_acc=0.8317 | val_loss=0.5809, validation_acc=0.1355


Epochs:  20%|██        | 2/10 [02:48<11:13, 84.23s/it]

Epoch 2: train_loss=0.3148, train_acc=0.8949 | val_loss=0.5279, validation_acc=0.1587


Epochs:  30%|███       | 3/10 [04:13<09:50, 84.34s/it]

Epoch 3: train_loss=0.2853, train_acc=0.9026 | val_loss=0.5252, validation_acc=0.1669


Epochs:  40%|████      | 4/10 [05:37<08:26, 84.41s/it]

Epoch 4: train_loss=0.2659, train_acc=0.9081 | val_loss=0.5353, validation_acc=0.1734


Epochs:  50%|█████     | 5/10 [07:02<07:02, 84.56s/it]

Epoch 5: train_loss=0.2540, train_acc=0.9111 | val_loss=0.5294, validation_acc=0.1679


Epochs:  60%|██████    | 6/10 [08:26<05:37, 84.42s/it]

Epoch 6: train_loss=0.2460, train_acc=0.9127 | val_loss=0.5653, validation_acc=0.1734


Epochs:  70%|███████   | 7/10 [09:50<04:13, 84.34s/it]

Epoch 7: train_loss=0.2450, train_acc=0.9124 | val_loss=0.5468, validation_acc=0.1744


Epochs:  80%|████████  | 8/10 [11:14<02:48, 84.32s/it]

Epoch 8: train_loss=0.2431, train_acc=0.9127 | val_loss=0.5814, validation_acc=0.1819


Epochs:  90%|█████████ | 9/10 [12:39<01:24, 84.32s/it]

Epoch 9: train_loss=0.2373, train_acc=0.9142 | val_loss=0.5397, validation_acc=0.1804


Epochs: 100%|██████████| 10/10 [14:03<00:00, 84.37s/it]

Epoch 10: train_loss=0.2332, train_acc=0.9152 | val_loss=0.5543, validation_acc=0.1712





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇██████
train_loss,█▃▂▂▁▁▁▁▁▁
val_loss,█▁▁▂▂▆▄█▃▅
validation_acc,▁▅▆▇▆▇▇██▆

0,1
epoch,10.0
train_acc,91.51832
train_loss,23.32248
val_loss,55.42625
validation_acc,17.12469


[34m[1mwandb[0m: Agent Starting Run: a80od6jl with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 128


Epochs:  10%|█         | 1/10 [00:29<04:26, 29.62s/it]

Epoch 1: train_loss=0.7281, train_acc=0.7822 | val_loss=0.5911, validation_acc=0.0804


Epochs:  20%|██        | 2/10 [00:59<03:56, 29.60s/it]

Epoch 2: train_loss=0.3913, train_acc=0.8726 | val_loss=0.5417, validation_acc=0.1278


Epochs:  30%|███       | 3/10 [01:28<03:26, 29.50s/it]

Epoch 3: train_loss=0.3351, train_acc=0.8897 | val_loss=0.5354, validation_acc=0.1471


Epochs:  40%|████      | 4/10 [01:57<02:56, 29.41s/it]

Epoch 4: train_loss=0.3023, train_acc=0.8998 | val_loss=0.5367, validation_acc=0.1525


Epochs:  50%|█████     | 5/10 [02:27<02:26, 29.36s/it]

Epoch 5: train_loss=0.2895, train_acc=0.9032 | val_loss=0.5158, validation_acc=0.1526


Epochs:  60%|██████    | 6/10 [02:56<01:57, 29.35s/it]

Epoch 6: train_loss=0.2753, train_acc=0.9070 | val_loss=0.5266, validation_acc=0.1572


Epochs:  70%|███████   | 7/10 [03:25<01:28, 29.36s/it]

Epoch 7: train_loss=0.2660, train_acc=0.9091 | val_loss=0.5331, validation_acc=0.1663


Epochs:  80%|████████  | 8/10 [03:55<00:58, 29.46s/it]

Epoch 8: train_loss=0.2602, train_acc=0.9107 | val_loss=0.5382, validation_acc=0.1631


Epochs:  90%|█████████ | 9/10 [04:24<00:29, 29.46s/it]

Epoch 9: train_loss=0.2545, train_acc=0.9120 | val_loss=0.5541, validation_acc=0.1651


Epochs: 100%|██████████| 10/10 [04:54<00:00, 29.44s/it]

Epoch 10: train_loss=0.2459, train_acc=0.9143 | val_loss=0.5277, validation_acc=0.1509





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇▇█████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,█▃▃▃▁▂▃▃▅▂
validation_acc,▁▅▆▇▇▇███▇

0,1
epoch,10.0
train_acc,91.43344
train_loss,24.59138
val_loss,52.7682
validation_acc,15.08783


[34m[1mwandb[0m: Agent Starting Run: q6wy6ivo with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	input_embedding_size: 256


Epochs:  10%|█         | 1/10 [00:44<06:36, 44.04s/it]

Epoch 1: train_loss=0.8700, train_acc=0.7424 | val_loss=0.6687, validation_acc=0.0386


Epochs:  20%|██        | 2/10 [01:27<05:47, 43.43s/it]

Epoch 2: train_loss=0.4698, train_acc=0.8462 | val_loss=0.6326, validation_acc=0.1066


Epochs:  30%|███       | 3/10 [02:10<05:03, 43.37s/it]

Epoch 3: train_loss=0.3912, train_acc=0.8708 | val_loss=0.5669, validation_acc=0.1207


Epochs:  40%|████      | 4/10 [02:53<04:19, 43.27s/it]

Epoch 4: train_loss=0.3521, train_acc=0.8833 | val_loss=0.5831, validation_acc=0.1440


Epochs:  50%|█████     | 5/10 [03:36<03:36, 43.35s/it]

Epoch 5: train_loss=0.3214, train_acc=0.8932 | val_loss=0.5756, validation_acc=0.1464


Epochs:  60%|██████    | 6/10 [04:20<02:53, 43.28s/it]

Epoch 6: train_loss=0.3067, train_acc=0.8974 | val_loss=0.5480, validation_acc=0.1545


Epochs:  70%|███████   | 7/10 [05:03<02:09, 43.24s/it]

Epoch 7: train_loss=0.2974, train_acc=0.8999 | val_loss=0.5586, validation_acc=0.1552


Epochs:  80%|████████  | 8/10 [05:46<01:26, 43.24s/it]

Epoch 8: train_loss=0.2824, train_acc=0.9045 | val_loss=0.5452, validation_acc=0.1585


Epochs:  90%|█████████ | 9/10 [06:29<00:43, 43.24s/it]

Epoch 9: train_loss=0.2798, train_acc=0.9046 | val_loss=0.5476, validation_acc=0.1580


Epochs: 100%|██████████| 10/10 [07:13<00:00, 43.32s/it]

Epoch 10: train_loss=0.2679, train_acc=0.9081 | val_loss=0.5724, validation_acc=0.1638





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▅▆▇▇█████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,█▆▂▃▃▁▂▁▁▃
validation_acc,▁▅▆▇▇▇████

0,1
epoch,10.0
train_acc,90.81291
train_loss,26.78848
val_loss,57.2426
validation_acc,16.38108


**Finding the test accuracy with the best validation accuracy giving hyperparameter**

In [12]:
import torch.nn.functional as F

def compute_score(preds: torch.Tensor, targets: torch.Tensor) -> int:
    
    # each row is True if all tokens match
    correct_seq = (preds == targets).all(dim=1)
    return correct_seq.sum().item()


def calc_test_acc(model, loader):
    model.eval()
    total_loss  = 0
    total_score = 0
    criterion   = nn.CrossEntropyLoss()

    for inputs, targets in loader:
        # inputs, targets
        inputs, targets = inputs.transpose(0,1).to(device), targets.transpose(0,1).to(device)
        outputs, _ = model(inputs, None, False, False)

        # Calculate accuracy
        preds = torch.argmax(F.softmax(outputs, dim=2), dim=2).T  # (batch, seq)
        total_score += compute_score(preds, targets)

        # Reshape outputs and targets for loss calculation
        outputs = outputs.permute(1, 0, 2).reshape(-1, TGT_VOCAB)
        targets = F.one_hot(targets, num_classes=TGT_VOCAB).float().reshape(-1, TGT_VOCAB)

        # Calculate loss
        loss = criterion(outputs, targets)
        total_loss += loss.item()

    avg_loss  = total_loss  / len(loader)
    avg_score = total_score / len(loader.dataset)

    print(f'Test Loss: {avg_loss:.4f} \t Test Accuracy: {avg_score:.4f}')
    wandb.log({'test_accuracy': avg_score * 100, 'test_loss': avg_loss})





In [13]:

#  Instantiate best_model with best valiation hyperparameter
best_encoder = Encoder(
    vocab_size = SRC_VOCAB,
    emb_dim    = 512,
    hid_dim    = 512,
    rnn_type   = 'lstm',
    num_layers = 2,
    dropout    = 0.3,
    bidir      = True
)

best_decoder = Decoder(
    vocab_size    = TGT_VOCAB,
    emb_dim       = 512,
    hid_dim       = 512,
    rnn_type      = 'lstm',
    num_layers    = 3,
    dropout       = 0.3,
    bidir         = True,
    use_attention = False
)

best_model = Seq2Seq(
    encoder            = best_encoder,
    decoder            = best_decoder,
    max_tgt_len        = max_seq_tgt,
    teacher_force_rate = 0.5
).to(device)


#train on train+val 
wandb.init(
    project = "MA23M018_assignment3",
    entity  = "ma23m018-indian-institute-of-technology-madras",
    name    = "final_train_on_trainval"
)
train1(best_model, train_loader, val_loader, epochs=10)
wandb.finish()



#Grid‐sweep config to run test‐set eval
sweep_config = {
    'method':  'grid',
    'name'  :  'testset run',
    'metric': {'goal': 'maximize', 'name': 'test_accuracy'},
    'parameters': {
        'beam_size': {'values': [1]}
    }
}

#sweep_id for test-set 
sweep_id = wandb.sweep(
    sweep   = sweep_config,
    entity  = "ma23m018-indian-institute-of-technology-madras",
    project = "DA6401_TEST_A3"
)



#  main() that just calls calc_test_acc on the test set
def main():
    with wandb.init():
        wandb.run.name = "test_set_run"
        calc_test_acc(best_model, test_loader)

# launch run for test-set
wandb.agent(sweep_id, function=main, count=1)
wandb.finish()


0,1
epoch,▁█
train_acc,▁█
train_loss,█▁
val_loss,█▁
validation_acc,▁█

0,1
epoch,2.0
train_acc,89.43805
train_loss,31.65786
val_loss,53.92083
validation_acc,17.34023


Epochs:  10%|█         | 1/10 [02:35<23:16, 155.17s/it]

Epoch 1: train_loss=0.6259, train_acc=0.8083 | val_loss=0.5568, validation_acc=0.1417


Epochs:  20%|██        | 2/10 [05:09<20:37, 154.66s/it]

Epoch 2: train_loss=0.3115, train_acc=0.8960 | val_loss=0.5141, validation_acc=0.1747


Epochs:  30%|███       | 3/10 [07:44<18:03, 154.73s/it]

Epoch 3: train_loss=0.2728, train_acc=0.9069 | val_loss=0.5323, validation_acc=0.1845


Epochs:  40%|████      | 4/10 [10:19<15:29, 154.92s/it]

Epoch 4: train_loss=0.2564, train_acc=0.9112 | val_loss=0.5311, validation_acc=0.1932


Epochs:  50%|█████     | 5/10 [12:54<12:54, 154.90s/it]

Epoch 5: train_loss=0.2416, train_acc=0.9149 | val_loss=0.5507, validation_acc=0.1899


Epochs:  60%|██████    | 6/10 [15:29<10:19, 154.95s/it]

Epoch 6: train_loss=0.2344, train_acc=0.9162 | val_loss=0.4881, validation_acc=0.1827


Epochs:  70%|███████   | 7/10 [18:03<07:44, 154.69s/it]

Epoch 7: train_loss=0.2267, train_acc=0.9180 | val_loss=0.5321, validation_acc=0.1909


Epochs:  80%|████████  | 8/10 [20:38<05:09, 154.66s/it]

Epoch 8: train_loss=0.2223, train_acc=0.9189 | val_loss=0.5233, validation_acc=0.1864


Epochs:  90%|█████████ | 9/10 [23:12<02:34, 154.67s/it]

Epoch 9: train_loss=0.2189, train_acc=0.9197 | val_loss=0.5153, validation_acc=0.1849


Epochs: 100%|██████████| 10/10 [25:47<00:00, 154.78s/it]

Epoch 10: train_loss=0.2145, train_acc=0.9203 | val_loss=0.5514, validation_acc=0.1970





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇██████
train_loss,█▃▂▂▁▁▁▁▁▁
val_loss,█▄▆▅▇▁▅▅▄▇
validation_acc,▁▅▆█▇▆▇▇▆█

0,1
epoch,10.0
train_acc,92.03233
train_loss,21.44666
val_loss,55.14309
validation_acc,19.7004


Create sweep with ID: 6tedvxsl
Sweep URL: https://wandb.ai/ma23m018-indian-institute-of-technology-madras/DA6401_TEST_A3/sweeps/6tedvxsl


[34m[1mwandb[0m: Agent Starting Run: 1kh413a7 with config:
[34m[1mwandb[0m: 	beam_size: 1


Test Loss: 0.5460 	 Test Accuracy: 0.1967


0,1
test_accuracy,▁
test_loss,▁

0,1
test_accuracy,19.6684
test_loss,0.54597


**Save the predictions on test set in a csv file**

In [14]:

# decode a sequence of indices into a string
def decode_sequence(idx_seq, idx2char):
   
    chars = []
    for idx in idx_seq:
        # stop at end‐of‐word
        if idx == special_tokens['<eow>']:
            break
        # skip pad and start‐of‐word
        if idx in (special_tokens['<pad>'], special_tokens['<sow>']):
            continue
        chars.append(idx2char[idx])
    return "".join(chars)


# run the model on the test set and collect rows
best_model.eval()
rows = []

with torch.no_grad():
    for src_batch, tgt_batch in test_loader:
        # transpose to (batch, seq)
        src = src_batch.transpose(0,1).to(device)
        tgt = tgt_batch.transpose(0,1).to(device)

        # get model outputs (T, B, V)
        outputs, _ = best_model(src, None, False, False)

        # predicted indices
        pred_idxs = outputs.argmax(dim=2).transpose(0,1).cpu().tolist()

        for inp_idxs, true_idxs, pr_idxs in zip(
            src.cpu().tolist(),
            tgt.cpu().tolist(),
            pred_idxs
        ):
            inp_str  = decode_sequence(inp_idxs, idx2src)
            true_str = decode_sequence(true_idxs, idx2tgt)
            pred_str = decode_sequence(pr_idxs, idx2tgt)
            correctness = "Correct" if pred_str == true_str else "Incorrect"

            rows.append({
                "Input-English":        inp_str,
                "Output-Bengali":     true_str,
                "Predicted-Bengali":  pred_str,
                "Correct/Incorrect":    correctness
            })


#  CSV
import pandas as pd

df = pd.DataFrame(rows, columns=[
    "Input-English",
    "Output-Bengali",
    "Predicted-Bengali",
    "Correct/Incorrect"
])
df.to_csv("predictions.csv", index=False)
print(f"Saved predictions.csv with {len(df)} rows.")


Saved predictions.csv with 9228 rows.


**With attention**

In [15]:

# SWEEP CONFIGURATION (Bayesian, with attention)

sweep_config = {
    'method': 'bayes',
    'name'  : 'sweep - attention',
    'metric': {
      'goal': 'maximize',
      'name': 'validation_accuracy'
    },
    'parameters': {
        'input_embedding_size': {'values': [128, 256, 512]},
        'enc_layers':            {'values': [1, 2, 3]},
        'dec_layers':            {'values': [1, 2, 3]},
        'hidden_size':           {'values': [128, 256, 512]},
        'cell_type':             {'values': ['lstm', 'gru', 'rnn']},
        'bidirectional':         {'values': [True]},
        'dropout':               {'values': [0.1, 0.2, 0.3]},
        'beam_size':             {'values': [1, 3, 5]}
    }
}

sweep_id = wandb.sweep(
    sweep   = sweep_config,
    entity  = "ma23m018-indian-institute-of-technology-madras",
    project = "DA6401_A3_ATTN"
)

# MAIN FUNCTION FOR ATTENTION SWEEP
def main():
    with wandb.init():
        cfg = wandb.config
        wandb.run.name = (
            f"attn_cell-{cfg.cell_type}_hid-{cfg.hidden_size}"
            f"_emb-{cfg.input_embedding_size}"
            f"_enc-{cfg.enc_layers}_dec-{cfg.dec_layers}"
            f"_drop{cfg.dropout}_beam{cfg.beam_size}"
        )

        encoder = Encoder(
            vocab_size = SRC_VOCAB,
            emb_dim    = cfg.input_embedding_size,
            hid_dim    = cfg.hidden_size,
            rnn_type   = cfg.cell_type,
            num_layers = cfg.enc_layers,
            dropout    = cfg.dropout,
            bidir      = cfg.bidirectional
        )

        # decoder with attention enabled
        decoder = Decoder(
            vocab_size    = TGT_VOCAB,
            emb_dim       = cfg.input_embedding_size,
            hid_dim       = cfg.hidden_size,
            rnn_type      = cfg.cell_type,
            num_layers    = cfg.dec_layers,
            dropout       = cfg.dropout,
            bidir         = cfg.bidirectional,
            use_attention = True
        )

        model = Seq2Seq(
            encoder            = encoder,
            decoder            = decoder,
            max_tgt_len        = max_seq_tgt,
            teacher_force_rate = 0.5
        ).to(device)

        # train & compute word‐level validation accuracy
        train1(model, train_loader, val_loader, epochs=10)

wandb.agent(sweep_id, function=main, count=15)
wandb.finish()

Create sweep with ID: icx3riaa
Sweep URL: https://wandb.ai/ma23m018-indian-institute-of-technology-madras/DA6401_A3_ATTN/sweeps/icx3riaa


[34m[1mwandb[0m: Agent Starting Run: v04cruhl with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	input_embedding_size: 128


Epochs:  10%|█         | 1/10 [00:20<03:05, 20.60s/it]

Epoch 1: train_loss=0.9075, train_acc=0.7382 | val_loss=0.7531, validation_acc=0.0213


Epochs:  20%|██        | 2/10 [00:41<02:45, 20.69s/it]

Epoch 2: train_loss=0.5405, train_acc=0.8251 | val_loss=0.6662, validation_acc=0.0671


Epochs:  30%|███       | 3/10 [01:01<02:22, 20.39s/it]

Epoch 3: train_loss=0.4393, train_acc=0.8567 | val_loss=0.6034, validation_acc=0.0924


Epochs:  40%|████      | 4/10 [01:21<02:01, 20.25s/it]

Epoch 4: train_loss=0.3953, train_acc=0.8708 | val_loss=0.5930, validation_acc=0.1212


Epochs:  50%|█████     | 5/10 [01:41<01:41, 20.23s/it]

Epoch 5: train_loss=0.3623, train_acc=0.8814 | val_loss=0.5946, validation_acc=0.1277


Epochs:  60%|██████    | 6/10 [02:02<01:21, 20.40s/it]

Epoch 6: train_loss=0.3434, train_acc=0.8873 | val_loss=0.5793, validation_acc=0.1242


Epochs:  70%|███████   | 7/10 [02:22<01:00, 20.32s/it]

Epoch 7: train_loss=0.3300, train_acc=0.8912 | val_loss=0.5741, validation_acc=0.1376


Epochs:  80%|████████  | 8/10 [02:42<00:40, 20.23s/it]

Epoch 8: train_loss=0.3180, train_acc=0.8947 | val_loss=0.5644, validation_acc=0.1326


Epochs:  90%|█████████ | 9/10 [03:02<00:20, 20.21s/it]

Epoch 9: train_loss=0.3098, train_acc=0.8971 | val_loss=0.5837, validation_acc=0.1411


Epochs: 100%|██████████| 10/10 [03:23<00:00, 20.31s/it]

Epoch 10: train_loss=0.3021, train_acc=0.8995 | val_loss=0.5812, validation_acc=0.1424





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▅▆▇▇▇████
train_loss,█▄▃▂▂▁▁▁▁▁
val_loss,█▅▂▂▂▂▁▁▂▂
validation_acc,▁▄▅▇▇▇█▇██

0,1
epoch,10.0
train_acc,89.95285
train_loss,30.21479
val_loss,58.11818
validation_acc,14.23645


[34m[1mwandb[0m: Agent Starting Run: dn7cjrdy with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	input_embedding_size: 256


Epochs:  10%|█         | 1/10 [00:46<07:00, 46.71s/it]

Epoch 1: train_loss=0.7653, train_acc=0.7730 | val_loss=0.5956, validation_acc=0.0795


Epochs:  20%|██        | 2/10 [01:32<06:08, 46.06s/it]

Epoch 2: train_loss=0.4100, train_acc=0.8663 | val_loss=0.5402, validation_acc=0.1263


Epochs:  30%|███       | 3/10 [02:18<05:21, 46.00s/it]

Epoch 3: train_loss=0.3470, train_acc=0.8862 | val_loss=0.5281, validation_acc=0.1390


Epochs:  40%|████      | 4/10 [03:04<04:35, 46.00s/it]

Epoch 4: train_loss=0.3210, train_acc=0.8938 | val_loss=0.5284, validation_acc=0.1500


Epochs:  50%|█████     | 5/10 [03:50<03:49, 45.99s/it]

Epoch 5: train_loss=0.3025, train_acc=0.8990 | val_loss=0.5249, validation_acc=0.1545


Epochs:  60%|██████    | 6/10 [04:36<03:03, 45.97s/it]

Epoch 6: train_loss=0.2882, train_acc=0.9032 | val_loss=0.5147, validation_acc=0.1571


Epochs:  70%|███████   | 7/10 [05:22<02:17, 45.98s/it]

Epoch 7: train_loss=0.2803, train_acc=0.9050 | val_loss=0.5161, validation_acc=0.1485


Epochs:  80%|████████  | 8/10 [06:08<01:32, 46.03s/it]

Epoch 8: train_loss=0.2717, train_acc=0.9072 | val_loss=0.5206, validation_acc=0.1626


Epochs:  90%|█████████ | 9/10 [06:54<00:46, 46.02s/it]

Epoch 9: train_loss=0.2628, train_acc=0.9097 | val_loss=0.5257, validation_acc=0.1715


Epochs: 100%|██████████| 10/10 [07:40<00:00, 46.03s/it]

Epoch 10: train_loss=0.2582, train_acc=0.9110 | val_loss=0.5243, validation_acc=0.1704





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇▇█████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,█▃▂▂▂▁▁▂▂▂
validation_acc,▁▅▆▆▇▇▆▇██

0,1
epoch,10.0
train_acc,91.09805
train_loss,25.81926
val_loss,52.4269
validation_acc,17.03847


[34m[1mwandb[0m: Agent Starting Run: r41e0lwb with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 512


Epochs:  10%|█         | 1/10 [00:46<06:57, 46.40s/it]

Epoch 1: train_loss=0.5298, train_acc=0.8361 | val_loss=0.5380, validation_acc=0.1340


Epochs:  20%|██        | 2/10 [01:31<06:07, 45.88s/it]

Epoch 2: train_loss=0.3213, train_acc=0.8945 | val_loss=0.5428, validation_acc=0.1596


Epochs:  30%|███       | 3/10 [02:18<05:21, 45.98s/it]

Epoch 3: train_loss=0.2890, train_acc=0.9032 | val_loss=0.5573, validation_acc=0.1643


Epochs:  40%|████      | 4/10 [03:03<04:35, 45.95s/it]

Epoch 4: train_loss=0.2707, train_acc=0.9084 | val_loss=0.5290, validation_acc=0.1606


Epochs:  50%|█████     | 5/10 [03:50<03:50, 46.04s/it]

Epoch 5: train_loss=0.2568, train_acc=0.9119 | val_loss=0.5514, validation_acc=0.1674


Epochs:  60%|██████    | 6/10 [04:36<03:04, 46.22s/it]

Epoch 6: train_loss=0.2518, train_acc=0.9127 | val_loss=0.5328, validation_acc=0.1656


Epochs:  70%|███████   | 7/10 [05:22<02:18, 46.20s/it]

Epoch 7: train_loss=0.2441, train_acc=0.9145 | val_loss=0.5500, validation_acc=0.1738


Epochs:  80%|████████  | 8/10 [06:08<01:32, 46.14s/it]

Epoch 8: train_loss=0.2382, train_acc=0.9159 | val_loss=0.5452, validation_acc=0.1645


Epochs:  90%|█████████ | 9/10 [06:54<00:46, 46.10s/it]

Epoch 9: train_loss=0.2364, train_acc=0.9161 | val_loss=0.5443, validation_acc=0.1716


Epochs: 100%|██████████| 10/10 [07:41<00:00, 46.12s/it]

Epoch 10: train_loss=0.2315, train_acc=0.9172 | val_loss=0.5555, validation_acc=0.1702





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇██████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,▃▄█▁▇▂▆▅▅█
validation_acc,▁▆▆▆▇▇█▆█▇

0,1
epoch,10.0
train_acc,91.71797
train_loss,23.14788
val_loss,55.55254
validation_acc,17.01692


[34m[1mwandb[0m: Agent Starting Run: y37mmiw9 with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	input_embedding_size: 512


Epochs:  10%|█         | 1/10 [00:51<07:43, 51.45s/it]

Epoch 1: train_loss=0.7601, train_acc=0.7701 | val_loss=0.6784, validation_acc=0.0896


Epochs:  20%|██        | 2/10 [01:41<06:46, 50.83s/it]

Epoch 2: train_loss=0.4369, train_acc=0.8562 | val_loss=0.5622, validation_acc=0.1293


Epochs:  30%|███       | 3/10 [02:32<05:55, 50.81s/it]

Epoch 3: train_loss=0.3755, train_acc=0.8761 | val_loss=0.5455, validation_acc=0.1426


Epochs:  40%|████      | 4/10 [03:23<05:05, 50.87s/it]

Epoch 4: train_loss=0.3403, train_acc=0.8874 | val_loss=0.5838, validation_acc=0.1607


Epochs:  50%|█████     | 5/10 [04:14<04:14, 50.81s/it]

Epoch 5: train_loss=0.3193, train_acc=0.8938 | val_loss=0.5430, validation_acc=0.1678


Epochs:  60%|██████    | 6/10 [05:05<03:23, 50.78s/it]

Epoch 6: train_loss=0.3086, train_acc=0.8964 | val_loss=0.5623, validation_acc=0.1714


Epochs:  70%|███████   | 7/10 [05:55<02:32, 50.76s/it]

Epoch 7: train_loss=0.3041, train_acc=0.8977 | val_loss=0.5240, validation_acc=0.1720


Epochs:  80%|████████  | 8/10 [06:46<01:41, 50.83s/it]

Epoch 8: train_loss=0.2915, train_acc=0.9016 | val_loss=0.5485, validation_acc=0.1751


Epochs:  90%|█████████ | 9/10 [07:37<00:50, 50.82s/it]

Epoch 9: train_loss=0.2855, train_acc=0.9032 | val_loss=0.5416, validation_acc=0.1785


Epochs: 100%|██████████| 10/10 [08:28<00:00, 50.82s/it]

Epoch 10: train_loss=0.2789, train_acc=0.9049 | val_loss=0.5746, validation_acc=0.1822





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▅▇▇▇█████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,█▃▂▄▂▃▁▂▂▃
validation_acc,▁▄▅▆▇▇▇▇██

0,1
epoch,10.0
train_acc,90.48837
train_loss,27.89342
val_loss,57.45546
validation_acc,18.22395


[34m[1mwandb[0m: Agent Starting Run: 18vgt8ct with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	input_embedding_size: 128


Epochs:  10%|█         | 1/10 [00:40<06:07, 40.84s/it]

Epoch 1: train_loss=0.9181, train_acc=0.7339 | val_loss=0.6853, validation_acc=0.0438


Epochs:  20%|██        | 2/10 [01:21<05:24, 40.59s/it]

Epoch 2: train_loss=0.4604, train_acc=0.8502 | val_loss=0.5712, validation_acc=0.1046


Epochs:  30%|███       | 3/10 [02:01<04:42, 40.37s/it]

Epoch 3: train_loss=0.3812, train_acc=0.8751 | val_loss=0.5465, validation_acc=0.1318


Epochs:  40%|████      | 4/10 [02:41<04:01, 40.23s/it]

Epoch 4: train_loss=0.3421, train_acc=0.8874 | val_loss=0.5259, validation_acc=0.1500


Epochs:  50%|█████     | 5/10 [03:21<03:20, 40.16s/it]

Epoch 5: train_loss=0.3179, train_acc=0.8947 | val_loss=0.5394, validation_acc=0.1607


Epochs:  60%|██████    | 6/10 [04:01<02:40, 40.21s/it]

Epoch 6: train_loss=0.2994, train_acc=0.9002 | val_loss=0.5264, validation_acc=0.1583


Epochs:  70%|███████   | 7/10 [04:41<02:00, 40.16s/it]

Epoch 7: train_loss=0.2852, train_acc=0.9044 | val_loss=0.5508, validation_acc=0.1611


Epochs:  80%|████████  | 8/10 [05:21<01:20, 40.11s/it]

Epoch 8: train_loss=0.2772, train_acc=0.9064 | val_loss=0.5487, validation_acc=0.1704


Epochs:  90%|█████████ | 9/10 [06:01<00:40, 40.08s/it]

Epoch 9: train_loss=0.2689, train_acc=0.9087 | val_loss=0.5396, validation_acc=0.1724


Epochs: 100%|██████████| 10/10 [06:42<00:00, 40.20s/it]

Epoch 10: train_loss=0.2664, train_acc=0.9088 | val_loss=0.5245, validation_acc=0.1665





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇▇█████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,█▃▂▁▂▁▂▂▂▁
validation_acc,▁▄▆▇▇▇▇███

0,1
epoch,10.0
train_acc,90.88259
train_loss,26.64468
val_loss,52.45207
validation_acc,16.6505


[34m[1mwandb[0m: Agent Starting Run: y1c5gy6e with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	input_embedding_size: 512


Epochs:  10%|█         | 1/10 [00:46<06:56, 46.25s/it]

Epoch 1: train_loss=0.8385, train_acc=0.7518 | val_loss=0.7031, validation_acc=0.0556


Epochs:  20%|██        | 2/10 [01:31<06:06, 45.86s/it]

Epoch 2: train_loss=0.4909, train_acc=0.8384 | val_loss=0.6014, validation_acc=0.0968


Epochs:  30%|███       | 3/10 [02:17<05:20, 45.80s/it]

Epoch 3: train_loss=0.4131, train_acc=0.8635 | val_loss=0.5794, validation_acc=0.1118


Epochs:  40%|████      | 4/10 [03:03<04:34, 45.83s/it]

Epoch 4: train_loss=0.3801, train_acc=0.8740 | val_loss=0.5682, validation_acc=0.1387


Epochs:  50%|█████     | 5/10 [03:49<03:48, 45.78s/it]

Epoch 5: train_loss=0.3512, train_acc=0.8832 | val_loss=0.5538, validation_acc=0.1398


Epochs:  60%|██████    | 6/10 [04:34<03:02, 45.74s/it]

Epoch 6: train_loss=0.3322, train_acc=0.8893 | val_loss=0.5932, validation_acc=0.1498


Epochs:  70%|███████   | 7/10 [05:20<02:17, 45.73s/it]

Epoch 7: train_loss=0.3214, train_acc=0.8924 | val_loss=0.5621, validation_acc=0.1509


Epochs:  80%|████████  | 8/10 [06:06<01:31, 45.80s/it]

Epoch 8: train_loss=0.3092, train_acc=0.8962 | val_loss=0.6243, validation_acc=0.1561


Epochs:  90%|█████████ | 9/10 [06:52<00:45, 45.78s/it]

Epoch 9: train_loss=0.3065, train_acc=0.8967 | val_loss=0.5697, validation_acc=0.1586


Epochs: 100%|██████████| 10/10 [07:37<00:00, 45.80s/it]

Epoch 10: train_loss=0.2945, train_acc=0.9005 | val_loss=0.5605, validation_acc=0.1601





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▅▆▇▇▇████
train_loss,█▄▃▂▂▁▁▁▁▁
val_loss,█▃▂▂▁▃▁▄▂▁
validation_acc,▁▄▅▇▇▇▇███

0,1
epoch,10.0
train_acc,90.04862
train_loss,29.44823
val_loss,56.05351
validation_acc,16.01466


[34m[1mwandb[0m: Agent Starting Run: jpaw9vh2 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	input_embedding_size: 512


Epochs:  10%|█         | 1/10 [00:46<06:57, 46.39s/it]

Epoch 1: train_loss=0.8403, train_acc=0.7519 | val_loss=0.6598, validation_acc=0.0501


Epochs:  20%|██        | 2/10 [01:32<06:08, 46.10s/it]

Epoch 2: train_loss=0.4867, train_acc=0.8403 | val_loss=0.6123, validation_acc=0.0940


Epochs:  30%|███       | 3/10 [02:18<05:21, 45.96s/it]

Epoch 3: train_loss=0.4120, train_acc=0.8637 | val_loss=0.5865, validation_acc=0.1167


Epochs:  40%|████      | 4/10 [03:03<04:35, 45.87s/it]

Epoch 4: train_loss=0.3739, train_acc=0.8759 | val_loss=0.5892, validation_acc=0.1378


Epochs:  50%|█████     | 5/10 [03:49<03:49, 45.80s/it]

Epoch 5: train_loss=0.3460, train_acc=0.8852 | val_loss=0.5500, validation_acc=0.1455


Epochs:  60%|██████    | 6/10 [04:35<03:03, 45.87s/it]

Epoch 6: train_loss=0.3331, train_acc=0.8889 | val_loss=0.5885, validation_acc=0.1455


Epochs:  70%|███████   | 7/10 [05:21<02:17, 45.82s/it]

Epoch 7: train_loss=0.3202, train_acc=0.8927 | val_loss=0.5464, validation_acc=0.1564


Epochs:  80%|████████  | 8/10 [06:06<01:31, 45.80s/it]

Epoch 8: train_loss=0.3119, train_acc=0.8951 | val_loss=0.5495, validation_acc=0.1512


Epochs:  90%|█████████ | 9/10 [06:52<00:45, 45.79s/it]

Epoch 9: train_loss=0.3063, train_acc=0.8965 | val_loss=0.5681, validation_acc=0.1514


Epochs: 100%|██████████| 10/10 [07:38<00:00, 45.87s/it]

Epoch 10: train_loss=0.2954, train_acc=0.8999 | val_loss=0.5699, validation_acc=0.1552





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▅▆▇▇▇████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,█▅▃▄▁▄▁▁▂▂
validation_acc,▁▄▅▇▇▇████

0,1
epoch,10.0
train_acc,89.98621
train_loss,29.54318
val_loss,56.9882
validation_acc,15.51891


[34m[1mwandb[0m: Agent Starting Run: 7uorljzy with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256


Epochs:  10%|█         | 1/10 [00:26<04:01, 26.78s/it]

Epoch 1: train_loss=0.6653, train_acc=0.7958 | val_loss=0.5965, validation_acc=0.0923


Epochs:  20%|██        | 2/10 [00:53<03:33, 26.70s/it]

Epoch 2: train_loss=0.3709, train_acc=0.8777 | val_loss=0.5416, validation_acc=0.1362


Epochs:  30%|███       | 3/10 [01:19<03:06, 26.60s/it]

Epoch 3: train_loss=0.3142, train_acc=0.8953 | val_loss=0.5319, validation_acc=0.1599


Epochs:  40%|████      | 4/10 [01:46<02:40, 26.70s/it]

Epoch 4: train_loss=0.2852, train_acc=0.9041 | val_loss=0.5557, validation_acc=0.1652


Epochs:  50%|█████     | 5/10 [02:13<02:13, 26.71s/it]

Epoch 5: train_loss=0.2783, train_acc=0.9053 | val_loss=0.5711, validation_acc=0.1743


Epochs:  60%|██████    | 6/10 [02:40<01:46, 26.68s/it]

Epoch 6: train_loss=0.2645, train_acc=0.9088 | val_loss=0.5707, validation_acc=0.1704


Epochs:  70%|███████   | 7/10 [03:06<01:19, 26.57s/it]

Epoch 7: train_loss=0.2576, train_acc=0.9106 | val_loss=0.5402, validation_acc=0.1697


Epochs:  80%|████████  | 8/10 [03:33<00:53, 26.66s/it]

Epoch 8: train_loss=0.2526, train_acc=0.9114 | val_loss=0.5397, validation_acc=0.1706


Epochs:  90%|█████████ | 9/10 [03:59<00:26, 26.63s/it]

Epoch 9: train_loss=0.2468, train_acc=0.9128 | val_loss=0.5323, validation_acc=0.1597


Epochs: 100%|██████████| 10/10 [04:26<00:00, 26.62s/it]

Epoch 10: train_loss=0.2359, train_acc=0.9163 | val_loss=0.5398, validation_acc=0.1776





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇▇█████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,█▂▁▄▅▅▂▂▁▂
validation_acc,▁▅▇▇█▇▇▇▇█

0,1
epoch,10.0
train_acc,91.62581
train_loss,23.59022
val_loss,53.98332
validation_acc,17.76053


[34m[1mwandb[0m: Agent Starting Run: fjq7olqp with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256


Epochs:  10%|█         | 1/10 [00:42<06:22, 42.46s/it]

Epoch 1: train_loss=0.6219, train_acc=0.8065 | val_loss=0.5712, validation_acc=0.1202


Epochs:  20%|██        | 2/10 [01:24<05:38, 42.27s/it]

Epoch 2: train_loss=0.3515, train_acc=0.8837 | val_loss=0.5621, validation_acc=0.1683


Epochs:  30%|███       | 3/10 [02:06<04:55, 42.21s/it]

Epoch 3: train_loss=0.3044, train_acc=0.8984 | val_loss=0.5291, validation_acc=0.1677


Epochs:  40%|████      | 4/10 [02:48<04:12, 42.10s/it]

Epoch 4: train_loss=0.2855, train_acc=0.9035 | val_loss=0.5478, validation_acc=0.1819


Epochs:  50%|█████     | 5/10 [03:30<03:30, 42.03s/it]

Epoch 5: train_loss=0.2747, train_acc=0.9061 | val_loss=0.5103, validation_acc=0.1809


Epochs:  60%|██████    | 6/10 [04:12<02:48, 42.07s/it]

Epoch 6: train_loss=0.2657, train_acc=0.9083 | val_loss=0.5358, validation_acc=0.1813


Epochs:  70%|███████   | 7/10 [04:54<02:06, 42.06s/it]

Epoch 7: train_loss=0.2572, train_acc=0.9104 | val_loss=0.5539, validation_acc=0.1844


Epochs:  80%|████████  | 8/10 [05:36<01:24, 42.08s/it]

Epoch 8: train_loss=0.2531, train_acc=0.9110 | val_loss=0.5553, validation_acc=0.1835


Epochs:  90%|█████████ | 9/10 [06:18<00:42, 42.09s/it]

Epoch 9: train_loss=0.2504, train_acc=0.9119 | val_loss=0.5374, validation_acc=0.1835


Epochs: 100%|██████████| 10/10 [07:01<00:00, 42.11s/it]

Epoch 10: train_loss=0.2428, train_acc=0.9140 | val_loss=0.5692, validation_acc=0.1886





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇▇█████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,█▇▃▅▁▄▆▆▄█
validation_acc,▁▆▆▇▇▇█▇▇█

0,1
epoch,10.0
train_acc,91.40012
train_loss,24.27816
val_loss,56.92425
validation_acc,18.85979


[34m[1mwandb[0m: Agent Starting Run: izi1ljju with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 128


Epochs:  10%|█         | 1/10 [00:34<05:06, 34.01s/it]

Epoch 1: train_loss=0.7245, train_acc=0.7810 | val_loss=0.5695, validation_acc=0.0949


Epochs:  20%|██        | 2/10 [01:07<04:31, 33.94s/it]

Epoch 2: train_loss=0.3738, train_acc=0.8773 | val_loss=0.5130, validation_acc=0.1220


Epochs:  30%|███       | 3/10 [01:41<03:57, 33.97s/it]

Epoch 3: train_loss=0.3195, train_acc=0.8939 | val_loss=0.5227, validation_acc=0.1629


Epochs:  40%|████      | 4/10 [02:16<03:24, 34.03s/it]

Epoch 4: train_loss=0.2945, train_acc=0.9008 | val_loss=0.5134, validation_acc=0.1638


Epochs:  50%|█████     | 5/10 [02:50<02:50, 34.06s/it]

Epoch 5: train_loss=0.2734, train_acc=0.9068 | val_loss=0.5251, validation_acc=0.1769


Epochs:  60%|██████    | 6/10 [03:24<02:16, 34.01s/it]

Epoch 6: train_loss=0.2616, train_acc=0.9101 | val_loss=0.5158, validation_acc=0.1751


Epochs:  70%|███████   | 7/10 [03:57<01:41, 33.95s/it]

Epoch 7: train_loss=0.2553, train_acc=0.9113 | val_loss=0.5061, validation_acc=0.1764


Epochs:  80%|████████  | 8/10 [04:31<01:07, 33.93s/it]

Epoch 8: train_loss=0.2480, train_acc=0.9129 | val_loss=0.5295, validation_acc=0.1760


Epochs:  90%|█████████ | 9/10 [05:06<00:34, 34.03s/it]

Epoch 9: train_loss=0.2432, train_acc=0.9138 | val_loss=0.5358, validation_acc=0.1826


Epochs: 100%|██████████| 10/10 [05:40<00:00, 34.02s/it]

Epoch 10: train_loss=0.2343, train_acc=0.9164 | val_loss=0.5133, validation_acc=0.1851





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇██████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,█▂▃▂▃▂▁▄▄▂
validation_acc,▁▃▆▆▇▇▇▇██

0,1
epoch,10.0
train_acc,91.64437
train_loss,23.43369
val_loss,51.32548
validation_acc,18.51493


[34m[1mwandb[0m: Agent Starting Run: fo81lknq with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 512


Epochs:  10%|█         | 1/10 [00:39<05:56, 39.58s/it]

Epoch 1: train_loss=0.6080, train_acc=0.8120 | val_loss=0.5527, validation_acc=0.1187


Epochs:  20%|██        | 2/10 [01:19<05:15, 39.50s/it]

Epoch 2: train_loss=0.3347, train_acc=0.8890 | val_loss=0.5377, validation_acc=0.1522


Epochs:  30%|███       | 3/10 [01:58<04:36, 39.54s/it]

Epoch 3: train_loss=0.2920, train_acc=0.9019 | val_loss=0.5498, validation_acc=0.1536


Epochs:  40%|████      | 4/10 [02:37<03:56, 39.42s/it]

Epoch 4: train_loss=0.2739, train_acc=0.9066 | val_loss=0.5352, validation_acc=0.1649


Epochs:  50%|█████     | 5/10 [03:17<03:16, 39.39s/it]

Epoch 5: train_loss=0.2602, train_acc=0.9099 | val_loss=0.5382, validation_acc=0.1787


Epochs:  60%|██████    | 6/10 [03:56<02:37, 39.37s/it]

Epoch 6: train_loss=0.2510, train_acc=0.9124 | val_loss=0.5679, validation_acc=0.1912


Epochs:  70%|███████   | 7/10 [04:36<01:58, 39.46s/it]

Epoch 7: train_loss=0.2459, train_acc=0.9133 | val_loss=0.5145, validation_acc=0.1801


Epochs:  80%|████████  | 8/10 [05:15<01:18, 39.41s/it]

Epoch 8: train_loss=0.2403, train_acc=0.9147 | val_loss=0.5868, validation_acc=0.1815


Epochs:  90%|█████████ | 9/10 [05:54<00:39, 39.32s/it]

Epoch 9: train_loss=0.2354, train_acc=0.9157 | val_loss=0.5162, validation_acc=0.1717


Epochs: 100%|██████████| 10/10 [06:33<00:00, 39.36s/it]

Epoch 10: train_loss=0.2359, train_acc=0.9152 | val_loss=0.5462, validation_acc=0.1795





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇██████
train_loss,█▃▂▂▁▁▁▁▁▁
val_loss,▅▃▄▃▃▆▁█▁▄
validation_acc,▁▄▄▅▇█▇▇▆▇

0,1
epoch,10.0
train_acc,91.51687
train_loss,23.59411
val_loss,54.61758
validation_acc,17.95452


[34m[1mwandb[0m: Agent Starting Run: 08epy7x7 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 512


Epochs:  10%|█         | 1/10 [00:35<05:16, 35.19s/it]

Epoch 1: train_loss=0.6774, train_acc=0.7926 | val_loss=0.6085, validation_acc=0.0870


Epochs:  20%|██        | 2/10 [01:10<04:40, 35.11s/it]

Epoch 2: train_loss=0.3834, train_acc=0.8728 | val_loss=0.5469, validation_acc=0.1310


Epochs:  30%|███       | 3/10 [01:44<04:04, 34.93s/it]

Epoch 3: train_loss=0.3282, train_acc=0.8907 | val_loss=0.5650, validation_acc=0.1484


Epochs:  40%|████      | 4/10 [02:19<03:28, 34.80s/it]

Epoch 4: train_loss=0.3010, train_acc=0.8986 | val_loss=0.5538, validation_acc=0.1605


Epochs:  50%|█████     | 5/10 [02:54<02:53, 34.69s/it]

Epoch 5: train_loss=0.2842, train_acc=0.9032 | val_loss=0.5530, validation_acc=0.1597


Epochs:  60%|██████    | 6/10 [03:28<02:18, 34.64s/it]

Epoch 6: train_loss=0.2716, train_acc=0.9068 | val_loss=0.5849, validation_acc=0.1605


Epochs:  70%|███████   | 7/10 [04:03<01:43, 34.63s/it]

Epoch 7: train_loss=0.2614, train_acc=0.9098 | val_loss=0.5790, validation_acc=0.1641


Epochs:  80%|████████  | 8/10 [04:37<01:09, 34.52s/it]

Epoch 8: train_loss=0.2587, train_acc=0.9103 | val_loss=0.5590, validation_acc=0.1695


Epochs:  90%|█████████ | 9/10 [05:12<00:34, 34.66s/it]

Epoch 9: train_loss=0.2536, train_acc=0.9114 | val_loss=0.5764, validation_acc=0.1669


Epochs: 100%|██████████| 10/10 [05:47<00:00, 34.72s/it]

Epoch 10: train_loss=0.2465, train_acc=0.9133 | val_loss=0.5818, validation_acc=0.1712





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇▇█████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,█▁▃▂▂▅▅▂▄▅
validation_acc,▁▅▆▇▇▇▇███

0,1
epoch,10.0
train_acc,91.32635
train_loss,24.64542
val_loss,58.18144
validation_acc,17.12469


[34m[1mwandb[0m: Agent Starting Run: 5xcvqv0a with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 128


Epochs:  10%|█         | 1/10 [00:26<03:57, 26.43s/it]

Epoch 1: train_loss=0.7517, train_acc=0.7749 | val_loss=0.6106, validation_acc=0.0747


Epochs:  20%|██        | 2/10 [00:52<03:31, 26.42s/it]

Epoch 2: train_loss=0.4087, train_acc=0.8661 | val_loss=0.5994, validation_acc=0.1222


Epochs:  30%|███       | 3/10 [01:19<03:04, 26.39s/it]

Epoch 3: train_loss=0.3404, train_acc=0.8886 | val_loss=0.5430, validation_acc=0.1265


Epochs:  40%|████      | 4/10 [01:45<02:38, 26.34s/it]

Epoch 4: train_loss=0.3123, train_acc=0.8963 | val_loss=0.5460, validation_acc=0.1329


Epochs:  50%|█████     | 5/10 [02:11<02:11, 26.35s/it]

Epoch 5: train_loss=0.2938, train_acc=0.9015 | val_loss=0.5429, validation_acc=0.1506


Epochs:  60%|██████    | 6/10 [02:37<01:45, 26.27s/it]

Epoch 6: train_loss=0.2772, train_acc=0.9062 | val_loss=0.5488, validation_acc=0.1596


Epochs:  70%|███████   | 7/10 [03:04<01:18, 26.33s/it]

Epoch 7: train_loss=0.2687, train_acc=0.9081 | val_loss=0.5636, validation_acc=0.1587


Epochs:  80%|████████  | 8/10 [03:30<00:52, 26.28s/it]

Epoch 8: train_loss=0.2624, train_acc=0.9097 | val_loss=0.5537, validation_acc=0.1613


Epochs:  90%|█████████ | 9/10 [03:56<00:26, 26.28s/it]

Epoch 9: train_loss=0.2553, train_acc=0.9115 | val_loss=0.5702, validation_acc=0.1575


Epochs: 100%|██████████| 10/10 [04:23<00:00, 26.31s/it]

Epoch 10: train_loss=0.2511, train_acc=0.9123 | val_loss=0.5764, validation_acc=0.1604





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇▇█████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,█▇▁▁▁▂▃▂▄▄
validation_acc,▁▅▅▆▇█████

0,1
epoch,10.0
train_acc,91.23089
train_loss,25.11197
val_loss,57.64355
validation_acc,16.03621


[34m[1mwandb[0m: Agent Starting Run: gg64uxb7 with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 256


Epochs:  10%|█         | 1/10 [01:54<17:09, 114.39s/it]

Epoch 1: train_loss=0.5543, train_acc=0.8274 | val_loss=0.5214, validation_acc=0.1368


Epochs:  20%|██        | 2/10 [03:47<15:11, 113.89s/it]

Epoch 2: train_loss=0.3040, train_acc=0.8984 | val_loss=0.5512, validation_acc=0.1701


Epochs:  30%|███       | 3/10 [05:41<13:17, 113.92s/it]

Epoch 3: train_loss=0.2705, train_acc=0.9074 | val_loss=0.5037, validation_acc=0.1759


Epochs:  40%|████      | 4/10 [07:35<11:23, 113.86s/it]

Epoch 4: train_loss=0.2531, train_acc=0.9118 | val_loss=0.5279, validation_acc=0.1779


Epochs:  50%|█████     | 5/10 [09:29<09:29, 113.84s/it]

Epoch 5: train_loss=0.2474, train_acc=0.9124 | val_loss=0.5434, validation_acc=0.1833


Epochs:  60%|██████    | 6/10 [11:23<07:35, 113.81s/it]

Epoch 6: train_loss=0.2388, train_acc=0.9147 | val_loss=0.5461, validation_acc=0.1754


Epochs:  70%|███████   | 7/10 [13:16<05:41, 113.70s/it]

Epoch 7: train_loss=0.2360, train_acc=0.9147 | val_loss=0.5785, validation_acc=0.1886


Epochs:  80%|████████  | 8/10 [15:10<03:47, 113.70s/it]

Epoch 8: train_loss=0.2291, train_acc=0.9166 | val_loss=0.5482, validation_acc=0.1847


Epochs:  90%|█████████ | 9/10 [17:04<01:53, 113.93s/it]

Epoch 9: train_loss=0.2251, train_acc=0.9175 | val_loss=0.5780, validation_acc=0.1898


Epochs: 100%|██████████| 10/10 [18:58<00:00, 113.88s/it]

Epoch 10: train_loss=0.2212, train_acc=0.9186 | val_loss=0.5693, validation_acc=0.1854





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇██████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,▃▅▁▃▅▅█▅█▇
validation_acc,▁▅▆▆▇▆█▇█▇

0,1
epoch,10.0
train_acc,91.86212
train_loss,22.11523
val_loss,56.93443
validation_acc,18.53648


[34m[1mwandb[0m: Agent Starting Run: 6406lqs7 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 256


Epochs:  10%|█         | 1/10 [00:30<04:33, 30.44s/it]

Epoch 1: train_loss=0.5793, train_acc=0.8211 | val_loss=0.5712, validation_acc=0.1114


Epochs:  20%|██        | 2/10 [01:01<04:04, 30.60s/it]

Epoch 2: train_loss=0.3346, train_acc=0.8894 | val_loss=0.5406, validation_acc=0.1406


Epochs:  30%|███       | 3/10 [01:30<03:31, 30.24s/it]

Epoch 3: train_loss=0.2915, train_acc=0.9018 | val_loss=0.5367, validation_acc=0.1457


Epochs:  40%|████      | 4/10 [02:01<03:02, 30.45s/it]

Epoch 4: train_loss=0.2712, train_acc=0.9072 | val_loss=0.5564, validation_acc=0.1571


Epochs:  50%|█████     | 5/10 [02:32<02:32, 30.50s/it]

Epoch 5: train_loss=0.2547, train_acc=0.9115 | val_loss=0.5411, validation_acc=0.1555


Epochs:  60%|██████    | 6/10 [03:03<02:02, 30.56s/it]

Epoch 6: train_loss=0.2472, train_acc=0.9130 | val_loss=0.5573, validation_acc=0.1652


Epochs:  70%|███████   | 7/10 [03:34<01:32, 30.73s/it]

Epoch 7: train_loss=0.2437, train_acc=0.9133 | val_loss=0.5711, validation_acc=0.1701


Epochs:  80%|████████  | 8/10 [04:04<01:01, 30.75s/it]

Epoch 8: train_loss=0.2433, train_acc=0.9130 | val_loss=0.5456, validation_acc=0.1503


Epochs:  90%|█████████ | 9/10 [04:35<00:30, 30.72s/it]

Epoch 9: train_loss=0.2351, train_acc=0.9151 | val_loss=0.5513, validation_acc=0.1590


Epochs: 100%|██████████| 10/10 [05:06<00:00, 30.62s/it]

Epoch 10: train_loss=0.2271, train_acc=0.9174 | val_loss=0.5707, validation_acc=0.1572





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇██████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,█▂▁▅▂▅█▃▄█
validation_acc,▁▄▅▆▆▇█▆▇▆

0,1
epoch,10.0
train_acc,91.74357
train_loss,22.71203
val_loss,57.07346
validation_acc,15.72368


**Apply the best validation hyperparameter on test data**

In [17]:
best_encoder_attn = Encoder(
    vocab_size = SRC_VOCAB,
    emb_dim    = 256,
    hid_dim    = 256,
    rnn_type   = 'rnn',
    num_layers = 3,
    dropout    = 0.2,
    bidir      = True
)

best_decoder_attn = Decoder(
    vocab_size    = TGT_VOCAB,
    emb_dim       = 256,
    hid_dim       = 256,
    rnn_type      = 'rnn',
    num_layers    = 3,
    dropout       = 0.2,
    bidir         = True,
    use_attention = True
)

best_model_attn = Seq2Seq(
    encoder            = best_encoder_attn,
    decoder            = best_decoder_attn,
    max_tgt_len        = max_seq_tgt,
    teacher_force_rate = 0.5
).to(device)

# retrain on train+val to finalize
wandb.init(
    project = "MA23M018_assignment3",
    entity  = "ma23m018-indian-institute-of-technology-madras",
    name    = "final_train_with_attention"
)
train1(best_model_attn, train_loader, val_loader, epochs=10)
wandb.finish()

# TEST-SET EVAL SWEEP (Attention)
sweep_config = {
    'method': 'grid',
    'name'  : 'testset_run_attention',
    'metric': {'goal': 'maximize', 'name': 'test_accuracy'},
    'parameters': {
        
        'beam_size': {'values': [1]}
    }
}

sweep_id = wandb.sweep(
    sweep   = sweep_config,
    entity  = "ma23m018-indian-institute-of-technology-madras",
    project = "DA6401_A3_TEST_ATTN"
)

def test_main():
    with wandb.init():
        cfg = wandb.config
        wandb.run.name = f"test_set_run_attn_beam{cfg.beam_size}"
        # If you had a beam‐search decode, you'd pass cfg.beam_size here.
        calc_test_acc(best_model_attn, test_loader)

# Run 1 test‐set evaluation
wandb.agent(sweep_id, function=test_main, count=1)
wandb.finish()


Epochs:  10%|█         | 1/10 [00:42<06:22, 42.48s/it]

Epoch 1: train_loss=0.6070, train_acc=0.8106 | val_loss=0.5629, validation_acc=0.1278


Epochs:  20%|██        | 2/10 [01:23<05:35, 41.91s/it]

Epoch 2: train_loss=0.3442, train_acc=0.8859 | val_loss=0.5656, validation_acc=0.1660


Epochs:  30%|███       | 3/10 [02:06<04:53, 41.99s/it]

Epoch 3: train_loss=0.2996, train_acc=0.8993 | val_loss=0.5307, validation_acc=0.1678


Epochs:  40%|████      | 4/10 [02:47<04:11, 41.91s/it]

Epoch 4: train_loss=0.2757, train_acc=0.9063 | val_loss=0.5287, validation_acc=0.1716


Epochs:  50%|█████     | 5/10 [03:29<03:29, 41.82s/it]

Epoch 5: train_loss=0.2672, train_acc=0.9077 | val_loss=0.5213, validation_acc=0.1720


Epochs:  60%|██████    | 6/10 [04:11<02:47, 41.85s/it]

Epoch 6: train_loss=0.2572, train_acc=0.9104 | val_loss=0.5359, validation_acc=0.1856


Epochs:  70%|███████   | 7/10 [04:53<02:05, 41.88s/it]

Epoch 7: train_loss=0.2527, train_acc=0.9113 | val_loss=0.5263, validation_acc=0.1731


Epochs:  80%|████████  | 8/10 [05:34<01:23, 41.79s/it]

Epoch 8: train_loss=0.2449, train_acc=0.9132 | val_loss=0.5459, validation_acc=0.1826


Epochs:  90%|█████████ | 9/10 [06:16<00:41, 41.71s/it]

Epoch 9: train_loss=0.2392, train_acc=0.9147 | val_loss=0.5460, validation_acc=0.1848


Epochs: 100%|██████████| 10/10 [06:58<00:00, 41.80s/it]

Epoch 10: train_loss=0.2365, train_acc=0.9152 | val_loss=0.5448, validation_acc=0.1860





0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▇▇▇█████
train_loss,█▃▂▂▂▁▁▁▁▁
val_loss,██▂▂▁▃▂▅▅▅
validation_acc,▁▆▆▆▆█▆███

0,1
epoch,10.0
train_acc,91.51973
train_loss,23.65463
val_loss,54.4817
validation_acc,18.60114


Create sweep with ID: kacz1wga
Sweep URL: https://wandb.ai/ma23m018-indian-institute-of-technology-madras/DA6401_A3_TEST_ATTN/sweeps/kacz1wga


[34m[1mwandb[0m: Agent Starting Run: s6cn3nnr with config:
[34m[1mwandb[0m: 	beam_size: 1


Test Loss: 0.5267 	 Test Accuracy: 0.1856


0,1
test_accuracy,▁
test_loss,▁

0,1
test_accuracy,18.56307
test_loss,0.52675
