In [2]:
import numpy as np 
import pandas as pd 
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/aksharantar-sampled/aksharantar_sampled/brx/brx_test.csv
/kaggle/input/aksharantar-sampled/aksharantar_sampled/brx/brx_valid.csv
/kaggle/input/aksharantar-sampled/aksharantar_sampled/brx/brx_train.csv
/kaggle/input/aksharantar-sampled/aksharantar_sampled/tam/tam_valid.csv
/kaggle/input/aksharantar-sampled/aksharantar_sampled/tam/tam_train.csv
/kaggle/input/aksharantar-sampled/aksharantar_sampled/tam/tam_test.csv
/kaggle/input/aksharantar-sampled/aksharantar_sampled/mni/mni_valid.csv
/kaggle/input/aksharantar-sampled/aksharantar_sampled/mni/mni_test.csv
/kaggle/input/aksharantar-sampled/aksharantar_sampled/mni/mni_train.csv
/kaggle/input/aksharantar-sampled/aksharantar_sampled/urd/urd_train.csv
/kaggle/input/aksharantar-sampled/aksharantar_sampled/urd/urd_valid.csv
/kaggle/input/aksharantar-sampled/aksharantar_sampled/urd/urd_test.csv
/kaggle/input/aksharantar-sampled/aksharantar_sampled/kok/kok_valid.csv
/kaggle/input/aksharantar-sampled/aksharantar_sampled/kok/kok_train.

In [1]:
!pip install wandb
import wandb
wandb.login()



[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [3]:
#import necessary libraries
import os
import wandb
import torch
import torch.nn as nn
import random
from torch.autograd import Variable
from torch.utils.data import DataLoader
import pandas as pd
import torch.optim as optim
import torch.nn.functional as Function
import argparse

# Check if CUDA is available
use_cuda = torch.cuda.is_available()

# Set the device type to CUDA if available, otherwise use CPU
if use_cuda:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
  
F=Function
Start_Symbol, End_Symbol, Unknown, Padding = 0, 1, 2, 3

class Vocabulary:
    def __init__(self):
        self.char2count = {}
        self.char2index = {}
        self.n_chars = 4
        self.index2char = {0: "<", 1: ">", 2: "?", 3: "."}


    def addWord(self, word):
        for char in word:
            if char not in self.char2index:
                self.char2index[char] = self.n_chars
                self.index2char[self.n_chars] = char
                self.char2count[char] = 1
                self.n_chars += 1
            else:
                self.char2count[char] += 1

            
def prepareData(dir):

    input_lang = Vocabulary()
    output_lang = Vocabulary()

    data = pd.read_csv(dir, sep=",", names=["input", "target"])

    input_list = data["input"].to_list()
    target_list = data["target"].to_list()

    max_target_length = max([len(txt) for txt in data["target"].to_list()])

    pairs = []
    for i in range(len(target_list)):
        pairs.append([input_list[i], target_list[i]])

    max_input_length = max([len(txt) for txt in data["input"].to_list()])
    for pair in pairs:
        input_lang.addWord(pair[0])
        output_lang.addWord(pair[1])

    prepared_data = {
        "input_lang": input_lang,
        "output_lang": output_lang,
        "pairs": pairs,
        "max_input_length": max_input_length,
        "max_target_length": max_target_length,
    }

    return prepared_data

def helpindex(lang, word):
    l=[]
    for i in range(len(word)):
        if word[i] not in lang.char2index.keys():
            l.append(Unknown)
        else:
            l.append(lang.char2index[word[i]])
    return l

def helpTensor(lang, word, max_length):
    indexes = helpindex(lang, word)
    indexes.append(End_Symbol)
    indexes.extend([Padding] * (max_length - len(indexes)))
    result = torch.LongTensor(indexes)
    if use_cuda==False:
        return result
    else:
        return result.cuda()

def MakeTensor(input_lang, output_lang, pairs, max_length):
    res = []
    for pair in pairs:
        input_variable = helpTensor(input_lang, pair[0], max_length)
        target_variable = helpTensor(output_lang, pair[1], max_length)
        res.append((input_variable, target_variable))
    return res


class EncoderRNN(nn.Module):
    def __init__(self, input_size, embedding_size,hidden_size,num_layers_encoder,cell_type,drop_out,bi_directional):
        super(EncoderRNN, self).__init__()

        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.num_layers_encoder = num_layers_encoder
        self.cell_type = cell_type
        self.drop_out = drop_out
        self.bi_directional = bi_directional

        self.embedding = nn.Embedding(input_size, self.embedding_size)
        self.dropout = nn.Dropout(self.drop_out)

        cell_map = {"RNN": nn.RNN, "GRU": nn.GRU, "LSTM": nn.LSTM}
        self.cell_layer = cell_map[self.cell_type](
            self.embedding_size,
            self.hidden_size,
            num_layers=self.num_layers_encoder,
            dropout=self.drop_out,
            bidirectional=self.bi_directional,
        )

    def forward(self, input, batch_size, hidden):
        embedded = self.dropout(self.embedding(input).view(1, batch_size, -1))
        output, hidden = self.cell_layer(embedded, hidden)
        return output, hidden

    def initHidden(self, batch_size, num_layers_enc):
        res = torch.zeros(
            num_layers_enc * 2 if self.bi_directional else num_layers_enc,
            batch_size,
            self.hidden_size,
        )
        if use_cuda== False:
            return res
        else:
            return res.cuda()



class DecoderAttention(nn.Module):
    def __init__(
        self,
        hidden_size,
        embedding_size,
        cell_type,
        num_layers_decoder,
        drop_out,
        max_length_word,
        output_size,
    ):

        super(DecoderAttention, self).__init__()

        self.hidden_size = hidden_size
        self.embedding_size = embedding_size
        self.cell_type = cell_type
        self.num_layers_decoder = num_layers_decoder
        self.drop_out = drop_out
        self.max_length_word = max_length_word

        self.embedding = nn.Embedding(output_size, embedding_dim=self.embedding_size)
        self.attention_layer = nn.Linear(
            self.embedding_size + self.hidden_size, self.max_length_word
        )
        self.attention_combine = nn.Linear(
            self.embedding_size + self.hidden_size, self.embedding_size
        )
        self.dropout = nn.Dropout(self.drop_out)

        self.cell_layer = None
        cell_map = {"RNN": nn.RNN, "GRU": nn.GRU, "LSTM": nn.LSTM}

        if self.cell_type in cell_map:
            self.cell_layer = cell_map[self.cell_type](
                self.embedding_size,
                self.hidden_size,
                num_layers=self.num_layers_decoder,
                dropout=self.drop_out,
            )

        self.out = nn.Linear(self.hidden_size, output_size)

    def forward(self, input, batch_size, hidden, encoder_outputs):

        embedded = self.embedding(input).view(1, batch_size, -1)

        attention_weights = None
        if self.cell_type == "LSTM":
            attention_weights = Function.softmax(
                self.attention_layer(torch.cat((embedded[0], hidden[0][0]), 1)), dim=1
            )

        else:
            attention_weights = Function.softmax(
                self.attention_layer(torch.cat((embedded[0], hidden[0]), 1)), dim=1
            )

        attention_applied = torch.bmm(
            attention_weights.view(batch_size, 1, self.max_length_word),
            encoder_outputs,
        ).view(1, batch_size, -1)
        output = torch.cat((embedded[0], attention_applied[0]), 1)
        output = self.attention_combine(output).unsqueeze(0)
        output = Function.relu(output)
        # if self.cell_type=RNN" :
        output, hidden = self.cell_layer(output, hidden)
        output = Function.log_softmax(self.out(output[0]), dim=1)

        return output, hidden, attention_weights



def train_and_val_with_attn(
    encoder,
    decoder,
    encoder_optimizer,
    decoder_optimizer,
    input_tensor,
    target_tensor,
    criterion,
    batch_size,
    cell_type,
    num_layers_enc,
    max_length,is_training,
    teacher_forcing_ratio=0.5,
):

    encoder_hidden = encoder.initHidden(batch_size, num_layers_enc)

    if cell_type == "LSTM":
        encoder_cell_state = encoder.initHidden(batch_size, num_layers_enc)
        encoder_hidden = (encoder_hidden, encoder_cell_state)

    if is_training:
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = Variable(torch.zeros(max_length, batch_size, encoder.hidden_size))
    encoder_outputs = encoder_outputs.cuda() if use_cuda else encoder_outputs

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], batch_size, encoder_hidden
        )
        encoder_outputs[ei] = encoder_output[0]

    decoder_input = Variable(torch.LongTensor([Start_Symbol] * batch_size))
    decoder_input = decoder_input.cuda() if use_cuda else decoder_input

    decoder_hidden = encoder_hidden
    if is_training:
        use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

        if use_teacher_forcing == False:
            for di in range(target_length):
                decoder_output, decoder_hidden, decoder_attention = decoder(
                    decoder_input,
                    batch_size,
                    decoder_hidden,
                    encoder_outputs.reshape(batch_size, max_length, encoder.hidden_size),
                )
                #2 for loop ko bhar dal de
                topv, topi = decoder_output.data.topk(1)
                decoder_input = torch.cat(tuple(topi))

                decoder_input = decoder_input.cuda() if use_cuda else decoder_input

                loss += criterion(decoder_output, target_tensor[di])
        else:
            for di in range(target_length):
                decoder_output, decoder_hidden, decoder_attention = decoder(
                    decoder_input,
                    batch_size,
                    decoder_hidden,
                    encoder_outputs.reshape(batch_size, max_length, encoder.hidden_size),
                )
                loss += criterion(decoder_output, target_tensor[di])
                decoder_input = target_tensor[di]
            

        loss.backward()

        encoder_optimizer.step()
        decoder_optimizer.step()
    else :
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input,
                batch_size,
                decoder_hidden,
                encoder_outputs.reshape(batch_size, max_length, encoder.hidden_size),
            )
            topv, topi = decoder_output.data.topk(1)
            decoder_input = torch.cat(tuple(topi))

            decoder_input = decoder_input.cuda() if use_cuda else decoder_input
            loss += criterion(decoder_output, target_tensor[di])


    return loss.item() / target_length


# batch_size,num_layers_enc,cell_type,output_lang,criterion,
def accuracy_with_attention(
    encoder,
    decoder,
    loader,
    batch_size,
    num_layers_enc,
    cell_type,
    output_lang,
    criterion,
    max_length,
):

    with torch.no_grad():

        # batch_size = configuration["batch_size"]
        total = 0
        correct = 0

        for batch_x, batch_y in loader:

            encoder_hidden = encoder.initHidden(batch_size, num_layers_enc)

            input_variable = Variable(batch_x.transpose(0, 1))
            target_variable = Variable(batch_y.transpose(0, 1))

            if cell_type == "LSTM":
                encoder_cell_state = encoder.initHidden(batch_size, num_layers_enc)
                encoder_hidden = (encoder_hidden, encoder_cell_state)

            input_length = input_variable.size()[0]
            target_length = target_variable.size()[0]

            output = torch.LongTensor(target_length, batch_size)

            encoder_outputs = Variable(
                torch.zeros(max_length, batch_size, encoder.hidden_size)
            )
            encoder_outputs = encoder_outputs.cuda() if use_cuda else encoder_outputs

            for ei in range(input_length):
                encoder_output, encoder_hidden = encoder(
                    input_variable[ei], batch_size, encoder_hidden
                )
                encoder_outputs[ei] = encoder_output[0]

            decoder_input = Variable(torch.LongTensor([Start_Symbol] * batch_size))
            decoder_input = decoder_input.cuda() if use_cuda else decoder_input

            decoder_hidden = encoder_hidden

            for di in range(target_length):
                decoder_output, decoder_hidden, decoder_attention = decoder(
                    decoder_input,
                    batch_size,
                    decoder_hidden,
                    encoder_outputs.reshape(
                        batch_size, max_length, encoder.hidden_size
                    ),
                )
                topv, topi = decoder_output.data.topk(1)
                decoder_input = torch.cat(tuple(topi))
                output[di] = torch.cat(tuple(topi))

            output = output.transpose(0, 1)
            for di in range(output.size()[0]):
                ignore = [Start_Symbol, End_Symbol, Padding]
                sent = [
                    output_lang.index2char[letter.item()]
                    for letter in output[di]
                    if letter not in ignore
                ]
                y = [
                    output_lang.index2char[letter.item()]
                    for letter in batch_y[di]
                    if letter not in ignore
                ]
                if sent == y:
                    correct += 1
                total += 1

    return (correct / total) * 100


def cal_val_loss_with_attn(
    encoder,
    decoder,
    input_tensor,
    target_tensor,
    batch_size,
    criterion,
    cell_type,
    num_layers_enc,
    max_length,
):

    with torch.no_grad():

        encoder_hidden = encoder.initHidden(batch_size, num_layers_enc)

        if cell_type == "LSTM":
            encoder_cell_state = encoder.initHidden(batch_size, num_layers_enc)
            encoder_hidden = (encoder_hidden, encoder_cell_state)

        input_length = input_tensor.size()[0]
        target_length = target_tensor.size()[0]

        encoder_outputs = Variable(
            torch.zeros(max_length, batch_size, encoder.hidden_size)
        )
        encoder_outputs = encoder_outputs.cuda() if use_cuda else encoder_outputs

        loss = 0

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(
                input_tensor[ei], batch_size, encoder_hidden
            )
            encoder_outputs[ei] = encoder_output[0]

        decoder_input = Variable(torch.LongTensor([Start_Symbol] * batch_size))
        if use_cuda== True:
            decoder_input = decoder_input.cuda()  
        else :
            decoder_input = decoder_input

        decoder_hidden = encoder_hidden

        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input,
                batch_size,
                decoder_hidden,
                encoder_outputs.reshape(batch_size, max_length, encoder.hidden_size),
            )
            topv, topi = decoder_output.data.topk(1)
            decoder_input = torch.cat(tuple(topi))

            if use_cuda== True:
                decoder_input = decoder_input.cuda()  
            else :
                decoder_input = decoder_input
            loss += criterion(decoder_output, target_tensor[di])

    return loss.item() / target_length


def Attention_seq2seq(
    encoder,
    decoder,
    train_loader,
    val_loader,
    test_loader,
    learning_rate,
    optimizer,
    epochs,
    max_length_word,
    attention,
    num_layers_enc,
    output_lang,
    batch_size,
    cell_type
):
    max_length = max_length_word - 1
    batch_size = 1024
    encoder_optimizer = (
        optim.NAdam(encoder.parameters(), lr=learning_rate)
        if optimizer == "nadam"
        else optim.Adam(encoder.parameters(), lr=learning_rate)
    )
    decoder_optimizer = (
        optim.NAdam(decoder.parameters(), lr=learning_rate)
        if optimizer == "nadam"
        else optim.Adam(decoder.parameters(), lr=learning_rate)
    )
    criterion = nn.NLLLoss()

    for epoch in range(epochs):
        train_loss_total, val_loss_total  =0, 0
        
        for batchx, batchy in train_loader:
            batchx = Variable(batchx.transpose(0, 1))
            batchy = Variable(batchy.transpose(0, 1))
            loss = train_and_val_with_attn(
                encoder,
                decoder,
                encoder_optimizer,
                decoder_optimizer,
                batchx,
                batchy,
                criterion,
                batch_size,
                cell_type,
                num_layers_enc,
                max_length + 1,
                True, #is_training
            )
            train_loss_total += loss

        train_loss_avg = train_loss_total / len(train_loader)
        print(f"Epoch: {epoch} | Train Loss: {train_loss_avg:.4f} | ", end="")

        for batchx, batchy in val_loader:
            batchx = Variable(batchx.transpose(0, 1))
            batchy = Variable(batchy.transpose(0, 1))
            loss = train_and_val_with_attn(
                encoder,
                decoder,
                encoder_optimizer,
                decoder_optimizer,
                batchx,
                batchy,
                criterion,
                batch_size,
                cell_type,
                num_layers_enc,
                max_length + 1,
                False,#is_training=
            )
            val_loss_total += loss

        val_loss_avg = val_loss_total / len(val_loader)
        print(f"Val Loss: {val_loss_avg:.4f} | ", end="")
        val_acc = accuracy_with_attention(
            encoder,
            decoder,
            val_loader,
            batch_size,
            num_layers_enc,
            cell_type,
            output_lang,
            criterion,
            max_length + 1,
        )
        val_acc = val_acc / 100
        print(f"Val Accuracy: {val_acc:.4%}")
        wandb.log({
            "epoch": epoch + 1,
            "training_loss": train_loss_avg,
            "validation_accuracy": val_acc,
            "validation_loss": val_loss_avg
        })
        if epochs-1==epoch:
            test_acc = accuracy_with_attention(
            encoder,
            decoder,
            test_loader,
            batch_size,
            num_layers_enc,
            cell_type,
            output_lang,
            criterion,
            max_length + 1,
        )
            test_acc = test_acc / 100
            print(f"Test Accuracy: {test_acc:.4%}")
            wandb.log({"test_accuracy": test_acc})

def to_dict(input_lang,output_lang,pairs,max_len):
    d = {
        "input_lang": input_lang,
        "output_lang": output_lang,
        "pairs": pairs,
        "max_len": max_len
    }
    return d


   

In [5]:
sweep_configuration = {
    "method": "bayes",
    "metric": {
        "name": "validation_accuracy",
        "goal": "maximize"
    },
    "parameters": {
        "embed_size": {
            "values": [32,64,128]
        },
        "hidden_size": {
            "values": [128, 256, 512]
        },
        "cell_type": {
            "values": ["GRU", "LSTM", "RNN"]
        },
        "num_layers": {
            "values": [1, 2, 3]
        },
        "dropout": {
            "values": [0, 0.1, 0.2]
        },
        "learning_rate": {
            "values": [0.0005, 0.001, 0.005]
        },
        "optimizer": {
            "values": ["Adam","Nadam"]
        },
        "teacher_forcing_ratio": {
            "values": [0.5, 0.75, 0.25]
        }
    }
}

In [7]:
count = 0

def train_sweep():
    global count
    count += 1
    run = wandb.init()
    config = wandb.config
    run.name = "Attn_Model_embed_size {}_hidden_size {}_cell_type {}_num_layers {} _dropout {} _learning_rate {} _optimizer {} _teacher_forcing_ratio {}".format(config.embed_size, config.hidden_size, config.cell_type, config.num_layers, config.dropout, config.learning_rate, config.optimizer, config.teacher_forcing_ratio)
    
    optimizer = config.optimizer
    learning_rate = config.learning_rate
    teacher_forcing_ratio = config.teacher_forcing_ratio
    hidden_size = config.hidden_size
    input_lang = "eng"
    target_lang = "hin"
    cell_type = "LSTM"
    num_layers_encoder = config.num_layers
    num_layers_decoder = config.num_layers
    drop_out = config.dropout
    epochs = 60
    embedding_size = config.embed_size
    bi_directional = False
    batch_size = 1024
    
    
    
    
    
    train_path = "/kaggle/input/aksharantar-sampled/aksharantar_sampled/hin/hin_train.csv"
    validation_path = "/kaggle/input/aksharantar-sampled/aksharantar_sampled/hin/hin_valid.csv"
    test_path = "/kaggle/input/aksharantar-sampled/aksharantar_sampled/hin/hin_test.csv"
    train_prepared_data = prepareData(train_path)
    input_langs, output_langs, pairs = (
            train_prepared_data["input_lang"],
            train_prepared_data["output_lang"],
            train_prepared_data["pairs"],
        )

    max_input_length, max_target_length = (
            train_prepared_data["max_input_length"],
            train_prepared_data["max_target_length"],
        )

        # validation
    val_prepared_data = prepareData(validation_path)
    val_pairs = val_prepared_data["pairs"]
        # Test
    max_input_length_val, max_target_length_val = (
    val_prepared_data["max_input_length"],
    val_prepared_data["max_target_length"],
        )
    test_prepared_data = prepareData(validation_path)
    test_pairs = test_prepared_data["pairs"]

    max_input_length_test, max_target_length_test = (
    test_prepared_data["max_input_length"],
    test_prepared_data["max_target_length"],
        )
    max_len_all = (
            max(
                max_input_length,
                max_target_length,
                max_input_length_val,
                max_target_length_val,
                max_input_length_test,
                max_target_length_test,
            )
            + 1
        )

    max_len = max(max_input_length, max_target_length) + 3


    pairs = MakeTensor(input_langs, output_langs, pairs, max_len)
    val_pairs = MakeTensor(input_langs, output_langs, val_pairs, max_len)
    test_pairs = MakeTensor(input_langs, output_langs, test_pairs, max_len)

    train_loader = DataLoader(pairs, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_pairs, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_pairs, batch_size=batch_size, shuffle=True)
    encoder1 = EncoderRNN(
            input_langs.n_chars,
            embedding_size,
            hidden_size,
            num_layers_encoder,
            cell_type,
            drop_out,
            bi_directional,
        )
    attndecoder1 = DecoderAttention(
            hidden_size,
            embedding_size,
            cell_type,
            num_layers_decoder,
            drop_out,
            max_len,
            output_langs.n_chars,
        )
    if use_cuda== True:
            encoder1 = encoder1.cuda()
            attndecoder1 = attndecoder1.cuda()
    print("with attention")
    attention = True
    Attention_seq2seq(
            encoder1,
            attndecoder1,
            train_loader,
            val_loader,
            test_loader,
            learning_rate,
            optimizer,
            epochs,
            max_len,
            attention,
            num_layers_encoder,
            output_langs,
            batch_size,
            cell_type
        )

In [8]:
wandb_id = wandb.sweep(sweep_configuration, project="DL_A-03_AttentionRNN")
wandb.agent(wandb_id, train_sweep, count=10)

Create sweep with ID: pvpipb29
Sweep URL: https://wandb.ai/believer12/DL_A-03_AttentionRNN/sweeps/pvpipb29


[34m[1mwandb[0m: Agent Starting Run: 911n2p4h with config:
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.75
[34m[1mwandb[0m: Currently logged in as: [33mma22c019[0m ([33mbeliever12[0m). Use [1m`wandb login --relogin`[0m to force relogin


with attention
Epoch: 0 | Train Loss: 1.8920 | Val Loss: 1.1099 | Val Accuracy: 0.0000%
Epoch: 1 | Train Loss: 1.2380 | Val Loss: 1.0867 | Val Accuracy: 0.0000%
Epoch: 2 | Train Loss: 1.2111 | Val Loss: 1.0270 | Val Accuracy: 0.0000%
Epoch: 3 | Train Loss: 1.1219 | Val Loss: 0.9395 | Val Accuracy: 0.0000%
Epoch: 4 | Train Loss: 1.0877 | Val Loss: 0.9335 | Val Accuracy: 0.0000%
Epoch: 5 | Train Loss: 1.0803 | Val Loss: 0.9249 | Val Accuracy: 0.0000%
Epoch: 6 | Train Loss: 1.0733 | Val Loss: 0.9184 | Val Accuracy: 0.0000%
Epoch: 7 | Train Loss: 1.0633 | Val Loss: 0.9191 | Val Accuracy: 0.0000%
Epoch: 8 | Train Loss: 1.0593 | Val Loss: 0.9161 | Val Accuracy: 0.0000%
Epoch: 9 | Train Loss: 1.0469 | Val Loss: 0.9106 | Val Accuracy: 0.0000%
Epoch: 10 | Train Loss: 1.0568 | Val Loss: 0.9853 | Val Accuracy: 0.0000%
Epoch: 11 | Train Loss: 1.0356 | Val Loss: 0.9196 | Val Accuracy: 0.0000%
Epoch: 12 | Train Loss: 1.0086 | Val Loss: 0.8887 | Val Accuracy: 0.0000%
Epoch: 13 | Train Loss: 0.9741 | 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_accuracy,▁
training_loss,█▅▅▄▄▄▄▄▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▃▃▄▄▅▅▅▆▆▆▇▆▇▇▇▇▇▇██
validation_loss,██▆▆▆▆▆▇▆▆▆▆▅▅▄▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,60.0
test_accuracy,0.26782
training_loss,0.28677
validation_accuracy,0.26611
validation_loss,0.3651


[34m[1mwandb[0m: Agent Starting Run: 6s9qq9db with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: Nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.25


with attention
Epoch: 0 | Train Loss: 1.8448 | Val Loss: 1.1046 | Val Accuracy: 0.0000%
Epoch: 1 | Train Loss: 1.2293 | Val Loss: 1.0415 | Val Accuracy: 0.0000%
Epoch: 2 | Train Loss: 1.1425 | Val Loss: 0.9570 | Val Accuracy: 0.0000%
Epoch: 3 | Train Loss: 1.1041 | Val Loss: 0.9409 | Val Accuracy: 0.0000%
Epoch: 4 | Train Loss: 1.0906 | Val Loss: 0.9365 | Val Accuracy: 0.0000%
Epoch: 5 | Train Loss: 1.0787 | Val Loss: 0.9254 | Val Accuracy: 0.0000%
Epoch: 6 | Train Loss: 1.0597 | Val Loss: 0.9235 | Val Accuracy: 0.0000%
Epoch: 7 | Train Loss: 1.0469 | Val Loss: 0.9319 | Val Accuracy: 0.0000%
Epoch: 8 | Train Loss: 1.0442 | Val Loss: 0.9570 | Val Accuracy: 0.0244%
Epoch: 9 | Train Loss: 1.0357 | Val Loss: 0.9228 | Val Accuracy: 0.0000%
Epoch: 10 | Train Loss: 1.0206 | Val Loss: 0.9264 | Val Accuracy: 0.0000%
Epoch: 11 | Train Loss: 1.0061 | Val Loss: 0.9126 | Val Accuracy: 0.0000%
Epoch: 12 | Train Loss: 1.0117 | Val Loss: 0.8931 | Val Accuracy: 0.0000%
Epoch: 13 | Train Loss: 0.9857 | 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_accuracy,▁
training_loss,█▅▅▅▄▄▄▄▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▃▃▃▄▄▅▅▆▅▆▆▆▆▆▆▆▇▇██▇▇█
validation_loss,█▇▇▆▆▆▆▆▆▆▅▅▅▄▄▃▃▃▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,60.0
test_accuracy,0.2771
training_loss,0.28431
validation_accuracy,0.27441
validation_loss,0.33964


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ghi09al0 with config:
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.75




with attention
Epoch: 0 | Train Loss: 2.2979 | Val Loss: 1.1615 | Val Accuracy: 0.0000%
Epoch: 1 | Train Loss: 1.2564 | Val Loss: 1.1085 | Val Accuracy: 0.0000%
Epoch: 2 | Train Loss: 1.2130 | Val Loss: 1.0808 | Val Accuracy: 0.0000%
Epoch: 3 | Train Loss: 1.1328 | Val Loss: 0.9813 | Val Accuracy: 0.0000%
Epoch: 4 | Train Loss: 1.0976 | Val Loss: 0.9638 | Val Accuracy: 0.0000%
Epoch: 5 | Train Loss: 1.0602 | Val Loss: 0.9512 | Val Accuracy: 0.0000%
Epoch: 6 | Train Loss: 1.0536 | Val Loss: 0.9415 | Val Accuracy: 0.0000%
Epoch: 7 | Train Loss: 1.0364 | Val Loss: 0.9461 | Val Accuracy: 0.0000%
Epoch: 8 | Train Loss: 1.0329 | Val Loss: 0.9445 | Val Accuracy: 0.0000%
Epoch: 9 | Train Loss: 1.0203 | Val Loss: 0.9302 | Val Accuracy: 0.0000%
Epoch: 10 | Train Loss: 1.0188 | Val Loss: 0.9374 | Val Accuracy: 0.0000%
Epoch: 11 | Train Loss: 1.0189 | Val Loss: 0.9316 | Val Accuracy: 0.0000%
Epoch: 12 | Train Loss: 0.9967 | Val Loss: 0.9224 | Val Accuracy: 0.0000%
Epoch: 13 | Train Loss: 1.0096 | 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_accuracy,▁
training_loss,█▄▃▃▃▃▃▃▃▃▃▃▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▂▂▃▄▄▄▆▆▇▇▇▇█
validation_loss,█▇▆▆▅▆▅▅▅▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁

0,1
epoch,60.0
test_accuracy,0.05249
training_loss,0.61677
validation_accuracy,0.05225
validation_loss,0.55681


[34m[1mwandb[0m: Agent Starting Run: 0wutm11c with config:
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	optimizer: Nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.25




with attention
Epoch: 0 | Train Loss: 1.7433 | Val Loss: 1.0974 | Val Accuracy: 0.0000%
Epoch: 1 | Train Loss: 1.2177 | Val Loss: 1.0819 | Val Accuracy: 0.0000%
Epoch: 2 | Train Loss: 1.1852 | Val Loss: 1.0690 | Val Accuracy: 0.0000%
Epoch: 3 | Train Loss: 1.1679 | Val Loss: 1.1283 | Val Accuracy: 0.0000%
Epoch: 4 | Train Loss: 1.1693 | Val Loss: 1.0964 | Val Accuracy: 0.0000%
Epoch: 5 | Train Loss: 1.1284 | Val Loss: 1.0172 | Val Accuracy: 0.0000%
Epoch: 6 | Train Loss: 1.0721 | Val Loss: 1.0388 | Val Accuracy: 0.0000%
Epoch: 7 | Train Loss: 1.0309 | Val Loss: 0.9356 | Val Accuracy: 0.0000%
Epoch: 8 | Train Loss: 1.0334 | Val Loss: 0.9424 | Val Accuracy: 0.0000%
Epoch: 9 | Train Loss: 1.0111 | Val Loss: 0.9111 | Val Accuracy: 0.0000%
Epoch: 10 | Train Loss: 0.9826 | Val Loss: 0.9277 | Val Accuracy: 0.0244%
Epoch: 11 | Train Loss: 0.9854 | Val Loss: 0.8788 | Val Accuracy: 0.0244%
Epoch: 12 | Train Loss: 0.9754 | Val Loss: 0.8688 | Val Accuracy: 0.0000%
Epoch: 13 | Train Loss: 0.9392 | 

VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.18696998250100955, max=1.…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_accuracy,▁
training_loss,█▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▃▃▄▄▄▅▄▅▆▆▆▆▇▇▇▇▇███▇███
validation_loss,████▇▆▆▆▆▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,60.0
test_accuracy,0.28564
training_loss,0.27295
validation_accuracy,0.28467
validation_loss,0.33385


[34m[1mwandb[0m: Agent Starting Run: 3fte60tb with config:
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	optimizer: Nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.25




with attention
Epoch: 0 | Train Loss: 1.7437 | Val Loss: 1.0853 | Val Accuracy: 0.0000%
Epoch: 1 | Train Loss: 1.2108 | Val Loss: 1.0824 | Val Accuracy: 0.0000%
Epoch: 2 | Train Loss: 1.1828 | Val Loss: 1.0675 | Val Accuracy: 0.0000%
Epoch: 3 | Train Loss: 1.1801 | Val Loss: 1.0984 | Val Accuracy: 0.0000%
Epoch: 4 | Train Loss: 1.1669 | Val Loss: 1.0920 | Val Accuracy: 0.0000%
Epoch: 5 | Train Loss: 1.1343 | Val Loss: 1.0536 | Val Accuracy: 0.0000%
Epoch: 6 | Train Loss: 1.0976 | Val Loss: 0.9952 | Val Accuracy: 0.0000%
Epoch: 7 | Train Loss: 1.0442 | Val Loss: 0.9410 | Val Accuracy: 0.0000%
Epoch: 8 | Train Loss: 1.0206 | Val Loss: 0.9370 | Val Accuracy: 0.0000%
Epoch: 9 | Train Loss: 1.0123 | Val Loss: 0.9230 | Val Accuracy: 0.0000%
Epoch: 10 | Train Loss: 0.9930 | Val Loss: 0.9177 | Val Accuracy: 0.0000%
Epoch: 11 | Train Loss: 0.9845 | Val Loss: 0.9120 | Val Accuracy: 0.0000%
Epoch: 12 | Train Loss: 0.9734 | Val Loss: 0.8724 | Val Accuracy: 0.0000%
Epoch: 13 | Train Loss: 0.9339 | 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_accuracy,▁
training_loss,█▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▄▄▅▅▅▆▅▆▆▆▆▇▇▇▇▇▇▇▇███
validation_loss,████▇▇▆▆▆▆▅▅▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,60.0
test_accuracy,0.29785
training_loss,0.27008
validation_accuracy,0.30322
validation_loss,0.34522


[34m[1mwandb[0m: Agent Starting Run: wvcq42kz with config:
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	optimizer: Nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.25




with attention
Epoch: 0 | Train Loss: 1.6825 | Val Loss: 1.0862 | Val Accuracy: 0.0000%
Epoch: 1 | Train Loss: 1.1954 | Val Loss: 1.0964 | Val Accuracy: 0.0000%
Epoch: 2 | Train Loss: 1.1674 | Val Loss: 1.0267 | Val Accuracy: 0.0000%
Epoch: 3 | Train Loss: 1.1210 | Val Loss: 0.9970 | Val Accuracy: 0.0000%
Epoch: 4 | Train Loss: 1.0639 | Val Loss: 0.9338 | Val Accuracy: 0.0000%
Epoch: 5 | Train Loss: 1.0274 | Val Loss: 0.9398 | Val Accuracy: 0.0000%
Epoch: 6 | Train Loss: 0.9975 | Val Loss: 0.9298 | Val Accuracy: 0.0000%
Epoch: 7 | Train Loss: 0.9806 | Val Loss: 0.9040 | Val Accuracy: 0.0244%
Epoch: 8 | Train Loss: 0.9779 | Val Loss: 0.8526 | Val Accuracy: 0.0732%
Epoch: 9 | Train Loss: 0.8797 | Val Loss: 0.8408 | Val Accuracy: 0.2197%
Epoch: 10 | Train Loss: 0.8749 | Val Loss: 0.7929 | Val Accuracy: 0.2930%
Epoch: 11 | Train Loss: 0.8378 | Val Loss: 0.7624 | Val Accuracy: 0.2441%
Epoch: 12 | Train Loss: 0.7896 | Val Loss: 0.7243 | Val Accuracy: 0.8789%
Epoch: 13 | Train Loss: 0.7277 | 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_accuracy,▁
training_loss,█▆▅▅▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▁▁▁▁▁▁▁▁▁▂▂▃▃▄▄▄▄▅▅▅▆▆▆▇▆▇▇▇▇▇▇▇▇████▇█
validation_loss,██▇▇▆▆▆▅▅▄▃▃▃▂▂▂▂▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,60.0
test_accuracy,0.32617
training_loss,0.21634
validation_accuracy,0.32617
validation_loss,0.3392


[34m[1mwandb[0m: Agent Starting Run: vvb29xf5 with config:
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: Nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.75


with attention
Epoch: 0 | Train Loss: 1.3823 | Val Loss: 1.0658 | Val Accuracy: 0.0000%
Epoch: 1 | Train Loss: 1.2049 | Val Loss: 1.0422 | Val Accuracy: 0.0000%
Epoch: 2 | Train Loss: 1.1317 | Val Loss: 0.9565 | Val Accuracy: 0.0000%
Epoch: 3 | Train Loss: 1.0917 | Val Loss: 0.9466 | Val Accuracy: 0.0000%
Epoch: 4 | Train Loss: 1.0935 | Val Loss: 0.9345 | Val Accuracy: 0.0000%
Epoch: 5 | Train Loss: 1.0820 | Val Loss: 1.0058 | Val Accuracy: 0.0000%
Epoch: 6 | Train Loss: 1.0837 | Val Loss: 1.0022 | Val Accuracy: 0.0000%
Epoch: 7 | Train Loss: 1.0663 | Val Loss: 0.9221 | Val Accuracy: 0.0000%
Epoch: 8 | Train Loss: 1.0573 | Val Loss: 0.9091 | Val Accuracy: 0.0000%
Epoch: 9 | Train Loss: 1.0249 | Val Loss: 0.8968 | Val Accuracy: 0.0000%
Epoch: 10 | Train Loss: 0.9973 | Val Loss: 0.8542 | Val Accuracy: 0.0000%
Epoch: 11 | Train Loss: 0.9707 | Val Loss: 0.8620 | Val Accuracy: 0.0000%
Epoch: 12 | Train Loss: 0.9605 | Val Loss: 0.8330 | Val Accuracy: 0.0244%
Epoch: 13 | Train Loss: 0.9469 | 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_accuracy,▁
training_loss,█▇▆▆▆▆▆▆▆▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
validation_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▃▃▄▄▅▅▅▆▆▆▆▇▇▇▇▇████
validation_loss,██▇▇▇▇▆▆▆▅▅▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,60.0
test_accuracy,0.27515
training_loss,0.18824
validation_accuracy,0.26782
validation_loss,0.38928


[34m[1mwandb[0m: Agent Starting Run: xclnet9j with config:
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: Nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.25


with attention
Epoch: 0 | Train Loss: 1.5887 | Val Loss: 1.0814 | Val Accuracy: 0.0000%
Epoch: 1 | Train Loss: 1.1964 | Val Loss: 0.9735 | Val Accuracy: 0.0000%
Epoch: 2 | Train Loss: 1.1060 | Val Loss: 0.9331 | Val Accuracy: 0.0000%
Epoch: 3 | Train Loss: 1.0846 | Val Loss: 0.9269 | Val Accuracy: 0.0244%
Epoch: 4 | Train Loss: 1.0712 | Val Loss: 0.9225 | Val Accuracy: 0.0244%
Epoch: 5 | Train Loss: 1.0664 | Val Loss: 0.9312 | Val Accuracy: 0.0000%
Epoch: 6 | Train Loss: 1.0481 | Val Loss: 0.9295 | Val Accuracy: 0.0000%
Epoch: 7 | Train Loss: 1.0285 | Val Loss: 0.9402 | Val Accuracy: 0.0000%
Epoch: 8 | Train Loss: 1.0191 | Val Loss: 0.9538 | Val Accuracy: 0.0000%
Epoch: 9 | Train Loss: 0.9942 | Val Loss: 0.8970 | Val Accuracy: 0.0000%
Epoch: 10 | Train Loss: 0.9810 | Val Loss: 0.8907 | Val Accuracy: 0.0000%
Epoch: 11 | Train Loss: 0.9642 | Val Loss: 0.8352 | Val Accuracy: 0.0488%
Epoch: 12 | Train Loss: 0.8995 | Val Loss: 0.7952 | Val Accuracy: 0.0732%
Epoch: 13 | Train Loss: 0.8736 | 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_accuracy,▁
training_loss,█▆▆▆▅▅▅▅▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▃▃▄▅▅▅▅▆▆▆▇▆▇▇▇█████████████
validation_loss,█▇▇▇▇▇▆▆▅▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂

0,1
epoch,60.0
test_accuracy,0.34229
training_loss,0.10372
validation_accuracy,0.3418
validation_loss,0.36565


[34m[1mwandb[0m: Agent Starting Run: f72road4 with config:
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: Nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.25


with attention
Epoch: 0 | Train Loss: 1.4723 | Val Loss: 1.0920 | Val Accuracy: 0.0000%
Epoch: 1 | Train Loss: 1.2071 | Val Loss: 1.0824 | Val Accuracy: 0.0000%
Epoch: 2 | Train Loss: 1.1693 | Val Loss: 1.0342 | Val Accuracy: 0.0000%
Epoch: 3 | Train Loss: 1.1139 | Val Loss: 0.9294 | Val Accuracy: 0.0000%
Epoch: 4 | Train Loss: 1.0413 | Val Loss: 0.9053 | Val Accuracy: 0.0244%
Epoch: 5 | Train Loss: 0.9864 | Val Loss: 0.8520 | Val Accuracy: 0.0244%
Epoch: 6 | Train Loss: 0.9329 | Val Loss: 0.8083 | Val Accuracy: 0.1465%
Epoch: 7 | Train Loss: 0.8609 | Val Loss: 0.7355 | Val Accuracy: 0.1221%
Epoch: 8 | Train Loss: 0.8103 | Val Loss: 0.6690 | Val Accuracy: 0.5371%
Epoch: 9 | Train Loss: 0.7249 | Val Loss: 0.5947 | Val Accuracy: 1.9043%
Epoch: 10 | Train Loss: 0.5875 | Val Loss: 0.5441 | Val Accuracy: 5.9326%
Epoch: 11 | Train Loss: 0.5534 | Val Loss: 0.4541 | Val Accuracy: 10.7178%
Epoch: 12 | Train Loss: 0.4507 | Val Loss: 0.4564 | Val Accuracy: 16.0645%
Epoch: 13 | Train Loss: 0.4320 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_accuracy,▁
training_loss,█▇▆▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▁▁▁▁▁▁▂▄▄▅▅▅▆▇▆▇▇▇▇▇▇███▇▇█████████████
validation_loss,██▇▆▅▅▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂

0,1
epoch,60.0
test_accuracy,0.36157
training_loss,0.03113
validation_accuracy,0.36987
validation_loss,0.47844


[34m[1mwandb[0m: Agent Starting Run: 7gg2pvgv with config:
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.25


with attention
Epoch: 0 | Train Loss: 1.4341 | Val Loss: 1.0865 | Val Accuracy: 0.0000%
Epoch: 1 | Train Loss: 1.2039 | Val Loss: 1.0282 | Val Accuracy: 0.0000%
Epoch: 2 | Train Loss: 1.1934 | Val Loss: 1.0467 | Val Accuracy: 0.0000%
Epoch: 3 | Train Loss: 1.1436 | Val Loss: 1.0648 | Val Accuracy: 0.0000%
Epoch: 4 | Train Loss: 1.1052 | Val Loss: 0.9437 | Val Accuracy: 0.0000%
Epoch: 5 | Train Loss: 1.0466 | Val Loss: 0.8992 | Val Accuracy: 0.0488%
Epoch: 6 | Train Loss: 1.0069 | Val Loss: 0.8633 | Val Accuracy: 0.0244%
Epoch: 7 | Train Loss: 0.9641 | Val Loss: 0.8312 | Val Accuracy: 0.0732%
Epoch: 8 | Train Loss: 0.9277 | Val Loss: 0.7627 | Val Accuracy: 0.1465%
Epoch: 9 | Train Loss: 0.8851 | Val Loss: 0.7721 | Val Accuracy: 0.0244%
Epoch: 10 | Train Loss: 0.8572 | Val Loss: 0.7389 | Val Accuracy: 0.1465%
Epoch: 11 | Train Loss: 0.8175 | Val Loss: 0.7084 | Val Accuracy: 0.2197%
Epoch: 12 | Train Loss: 0.8054 | Val Loss: 0.6824 | Val Accuracy: 0.3662%
Epoch: 13 | Train Loss: 0.7565 | 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_accuracy,▁
training_loss,█▇▆▆▆▅▅▅▅▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▁▁▁▁▁▁▁▁▁▂▂▃▃▄▄▅▄▅▆▆▆▆▆▅▇▇▇▇▇█▇▇▇▇▇████
validation_loss,█▇█▇▆▅▅▅▄▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂

0,1
epoch,60.0
test_accuracy,0.25562
training_loss,0.16425
validation_accuracy,0.25195
validation_loss,0.43613
