In [1]:
# Install required packages
!pip install wandb
!pip install GPUtil

Collecting GPUtil
  Downloading GPUtil-1.4.0.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: GPUtil
  Building wheel for GPUtil (setup.py) ... [?25ldone
[?25h  Created wheel for GPUtil: filename=GPUtil-1.4.0-py3-none-any.whl size=7393 sha256=cdf160cd3253e13bc011d0e45be994e0ede7b8dfa6699511a5ea9a09d177ff6c
  Stored in directory: /root/.cache/pip/wheels/a9/8a/bd/81082387151853ab8b6b3ef33426e98f5cbfebc3c397a9d4d0
Successfully built GPUtil
Installing collected packages: GPUtil
Successfully installed GPUtil-1.4.0


In [8]:
import zipfile
import os
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
import gc
import random
import math
import wandb
from GPUtil import showUtilization as gpu_usage
from numba import cuda

wandb.login(key='4734e60951ce310dbe17484eeeb5b3366b54850f')

# zip_file_path = '/kaggle/input/aksharantar-sampled/aksharantar_sampled.zip'
# extracted_folder_path = '/kaggle/input/aksharantar-sampled'

# with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
#     zip_ref.extractall(extracted_folder_path)

# extracted_folder_contents = os.listdir(extracted_folder_path)
# print("Contents of extracted folder:", extracted_folder_contents)

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print("Trained on: " + str(device))

train_dataset = pd.read_csv('/kaggle/input/aksharantar-sampled/aksharantar_sampled/hin/hin_train.csv', names=['English', 'Hindi'], header=None)
test_dataset = pd.read_csv('/kaggle/input/aksharantar-sampled/aksharantar_sampled/hin/hin_test.csv', names=['English', 'Hindi'], header=None)
val_dataset = pd.read_csv('/kaggle/input/aksharantar-sampled/aksharantar_sampled/hin/hin_valid.csv', names=['English', 'Hindi'], header=None)


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Trained on: cuda


# Support Functions:

In [9]:
def clear_gpu_cache():
    print("Initial GPU Usage")
    gpu_usage()
    torch.cuda.empty_cache()
    cuda.select_device(0)
    cuda.close()
    cuda.select_device(0)
    print("GPU Usage after emptying the cache")
    gpu_usage()

def split_into_tokens(word):
    tokens = []
    for x in word:
        tokens.append(x)
    return tokens

def encode_english(word):
    tokens = []
    for x in word:
        tokens.append(eng_dict[x])
    for x in range(len(tokens), max_english_length):
        tokens.append(eng_dict['<pad>'])
    return tokens

def encode_hindi(word):
    tokens = []
    for x in word:
        tokens.append(hin_dict[x])
    tokens.append(hin_dict['<eow>'])
    for x in range(len(tokens), max_hindi_length + 1):
        tokens.append(hin_dict['<pad>'])
    return tokens

def encode_test_english(word):
    tokens = []
    for x in word:
        tokens.append(eng_dict[x])
    for x in range(len(tokens), test_max_english_length):
        tokens.append(eng_dict['<pad>'])
    return tokens

def encode_test_hindi(word):
    tokens = []
    for x in word:
        tokens.append(hin_dict[x])
    tokens.append(hin_dict['<eow>'])
    for x in range(len(tokens), test_max_hindi_length):
        tokens.append(hin_dict['<pad>'])
    return tokens

def encode_val_english(word):
    tokens = []
    for x in word:
        tokens.append(eng_dict[x])
    for x in range(len(tokens), val_max_english_length):
        tokens.append(eng_dict['<pad>'])
    return tokens

def encode_val_hindi(word):
    tokens = []
    for x in word:
        tokens.append(hin_dict[x])
    tokens.append(hin_dict['<eow>'])
    for x in range(len(tokens), val_max_hindi_length):
        tokens.append(hin_dict['<pad>'])
    return tokens

def get_word(characters):
    return "".join(characters)

def calculate_accuracy(target, predictions, flag):
    total = 0
    for x in range(len(target)):
        if torch.equal(target[x], predictions[x]):
            total += 1
    return total

def translate_predictions(target, predictions, df):
    i = len(df)
    for x in range(len(predictions)):
        original = []
        for y in target[x]:
            if y != 1:
                original.append(y)
            else:
                break
        predicted = []
        for y in predictions[x]:
            if y != 1:
                predicted.append(y)
            else:
                break
        df.loc[i, ['Original']] = get_word([reverse_hin[x.item()] for x in original])
        df.loc[i, ['Predicted']] = get_word([reverse_hin[x.item()] for x in predicted])
        i += 1
    return df

# Creating initial variables:

In [11]:
split_into_tokens(train_dataset.iloc[0]['Hindi'])

max_english_length = 0
max_hindi_length = 0
test_max_english_length = 0
test_max_hindi_length = 0

for x in range(len(test_dataset)):
    temp = 0
    for y in test_dataset.iloc[x]['English']:
        temp += 1
    test_max_english_length = max(test_max_english_length, temp)

for x in range(len(test_dataset)):
    temp = 0
    for y in test_dataset.iloc[x]['Hindi']:
        temp += 1
    test_max_hindi_length = max(test_max_hindi_length, temp)

val_max_english_length = 0
val_max_hindi_length = 0

for x in range(len(val_dataset)):
    temp = 0
    for y in val_dataset.iloc[x]['English']:
        temp += 1
    val_max_english_length = max(val_max_english_length, temp)

for x in range(len(val_dataset)):
    temp = 0
    for y in val_dataset.iloc[x]['Hindi']:
        temp += 1
    val_max_hindi_length = max(val_max_hindi_length, temp)

english_vocab = []
for x in range(len(train_dataset)):
    temp = 0
    for y in train_dataset.iloc[x]['English']:
        temp += 1
        if y not in english_vocab:
            english_vocab.append(y)
    if temp > max_english_length:
        max_english_length = max(max_english_length, temp)

hindi_vocab = []
for x in range(len(train_dataset)):
    temp = 0
    for y in train_dataset.iloc[x]['Hindi']:
        temp += 1
        if y not in hindi_vocab:
            hindi_vocab.append(y)
    max_hindi_length = max(temp, max_hindi_length)
for x in range(len(test_dataset)):
    for y in test_dataset.iloc[x]['Hindi']:
        if y not in hindi_vocab:
            hindi_vocab.append(y)

english_vocab = sorted(english_vocab)
hindi_vocab = sorted(hindi_vocab)

eng_dict = {}
reverse_eng = {}

for x in range(len(english_vocab)):
    eng_dict[english_vocab[x]] = x + 3
    reverse_eng[x + 3] = english_vocab[x]
eng_dict['<sow>'] = 0
eng_dict['<eow>'] = 1
eng_dict['<pad>'] = 2
reverse_eng[0] = '<sow>'
reverse_eng[1] = '<eow>'
reverse_eng[2] = '<pad>'

hin_dict = {}
reverse_hin = {}
for x in range(len(hindi_vocab)):
    hin_dict[hindi_vocab[x]] = x + 3
    reverse_hin[x + 3] = hindi_vocab[x]
hin_dict['<sow>'] = 0
hin_dict['<eow>'] = 1
hin_dict['<pad>'] = 2
reverse_hin[0] = '<sow>'
reverse_hin[1] = '<eow>'
reverse_hin[2] = '<pad>'

encode_english(train_dataset.iloc[0]['English'])

eng_words = []
hin_words = []
for x in range(len(train_dataset)):
    eng_words.append(encode_english(train_dataset.iloc[x]['English']))
    hin_words.append(encode_hindi(train_dataset.iloc[x]['Hindi']))
eng_words = torch.tensor(eng_words)
hin_words = torch.tensor(hin_words)
max_hindi_length

max_hindi_length += 1
test_max_hindi_length += 1
val_max_hindi_length += 1
max_hindi_length

val_eng_words = []
val_hin_words = []
for x in range(len(val_dataset)):
    val_eng_words.append(encode_val_english(val_dataset.iloc[x]['English']))
    val_hin_words.append(encode_val_hindi(val_dataset.iloc[x]['Hindi']))
val_eng_words = torch.tensor(val_eng_words)
val_hin_words = torch.tensor(val_hin_words)

test_eng_words = []
test_hin_words = []
for x in range(len(test_dataset)):
    test_eng_words.append(encode_test_english(test_dataset.iloc[x]['English']))
    test_hin_words.append(encode_test_hindi(test_dataset.iloc[x]['Hindi']))
test_eng_words = torch.tensor(test_eng_words)
test_hin_words = torch.tensor(test_hin_words)

# Encoder-decoder without attention

In [12]:
class Encoder(nn.Module):
    def __init__(self, char_embed_size, hidden_size, no_of_layers, dropout, rnn):
        super(Encoder, self).__init__()
        self.layer = no_of_layers
        self.rnn = rnn
        self.embedding = nn.Embedding(len(eng_dict), char_embed_size).to(device)
        self.embedding.weight.requires_grad = True
        self.drop = nn.Dropout(dropout)
        self.LSTM = nn.LSTM(char_embed_size, hidden_size, self.layer, batch_first=True, bidirectional=True).to(device)
        self.RNN = nn.RNN(char_embed_size, hidden_size, self.layer, batch_first=True, bidirectional=True).to(device)
        self.GRU = nn.GRU(char_embed_size, hidden_size, self.layer, batch_first=True, bidirectional=True).to(device)

    def forward(self, input, hidden, cell):
        embedded = self.embedding(input)
        embedded1 = self.drop(embedded)
        cell1 = cell
        if self.rnn == 'RNN':
            output, hidden1 = self.RNN(embedded1, hidden)
        elif self.rnn == 'LSTM':
            output, (hidden1, cell1) = self.LSTM(embedded1, (hidden, cell))
        elif self.rnn == 'GRU':
            output, hidden1 = self.GRU(embedded1, hidden)
        return output, (hidden1, cell1)


class DecoderNoAttention(nn.Module):
    def __init__(self, char_embed_size, hidden_size, no_of_layers, dropout, batchsize, rnn):
        super(DecoderNoAttention, self).__init__()
        self.layer = no_of_layers
        self.batchsize = batchsize
        self.hidden_size = hidden_size
        self.rnn = rnn
        self.embedding = nn.Embedding(len(hin_dict), char_embed_size).to(device)
        self.drop = nn.Dropout(dropout)
        self.embedding.weight.requires_grad = True
        self.LSTM = nn.LSTM(char_embed_size + hidden_size * 2, hidden_size, self.layer, batch_first=True).to(device)
        self.RNN = nn.RNN(char_embed_size + hidden_size * 2, hidden_size, self.layer, batch_first=True).to(device)
        self.GRU = nn.GRU(char_embed_size + hidden_size * 2, hidden_size, self.layer, batch_first=True).to(device)
        self.linear = nn.Linear(hidden_size, len(hin_dict), bias=True).to(device)
        self.softmax = nn.Softmax(dim=2).to(device)

    def forward(self, input, hidden, cell, og_hidden, matrix):
        embedded = self.embedding(input)
        s1 = og_hidden.size()[1]
        s2 = og_hidden.size()[2]
        embedded1 = torch.cat((embedded, og_hidden[0].resize(s1, 1, s2), og_hidden[1].resize(s1, 1, s2)), dim=2)
        embedded2 = self.drop(embedded1)
        cell1 = cell
        if self.rnn == 'LSTM':
            output, (hidden1, cell1) = self.LSTM(embedded2, (hidden, cell))
        elif self.rnn == 'RNN':
            output, hidden1 = self.RNN(embedded2, hidden)
        elif self.rnn == 'GRU':
            output, hidden1 = self.GRU(embedded2, hidden)
        output1 = self.linear(output)
        return output1, (hidden1, cell1)

def val_evaluate(attention, val_eng_words, val_hin_words, encoder, decoder, batch_size, hidden_size, char_embed_size, no_of_layers):
    with torch.no_grad():
        total_loss = 0
        total_acc = 0
        for x in range(0, len(val_dataset), batch_size):
            loss = 0
            input_tensor = val_eng_words[x:x + batch_size].to(device)
            if input_tensor.size()[0] < batch_size:
                break
            en_hidden = torch.zeros(2 * no_of_layers, batch_size, hidden_size).to(device)
            en_cell = torch.zeros(2 * no_of_layers, batch_size, hidden_size).to(device)
            output, (hidden, cell) = encoder.forward(input_tensor, en_hidden, en_cell)
            del input_tensor
            del en_hidden
            del en_cell
            output = torch.split(output, [hidden_size, hidden_size], dim=2)
            output = torch.add(output[0], output[1]) / 2
            input2 = []
            for y in range(batch_size):
                input2.append([0])
            input2 = torch.tensor(input2).to(device)
            hidden = hidden.resize(2, no_of_layers, batch_size, hidden_size)
            hidden1 = torch.add(hidden[0], hidden[1]) / 2
            cell = cell.resize(2, no_of_layers, batch_size, hidden_size)
            cell1 = torch.add(cell[0], cell[1]) / 2
            OGhidden = hidden1
            predicted = []
            predictions = []
            if attention:
                temp = output
            else:
                temp = OGhidden
            for i in range(val_max_hindi_length):
                output1, (hidden1, cell1) = decoder.forward(input2, hidden1, cell1, temp, False)
                predicted.append(output1)
                output2 = decoder.softmax(output1)
                output3 = torch.argmax(output2, dim=2)
                predictions.append(output3)
                input2 = output3
            predicted = torch.cat(tuple(x for x in predicted), dim=1).to(device).resize(val_max_hindi_length * batch_size, len(hin_dict))
            predictions = torch.cat(tuple(x for x in predictions), dim=1).to(device)
            total_acc += calculate_accuracy(val_hin_words[x:x + batch_size].to(device), predictions, x)
            loss = nn.CrossEntropyLoss(reduction='sum')(predicted, val_hin_words[x:x + batch_size].reshape(-1).to(device))
            with torch.no_grad():
                total_loss += loss.item()
        validation_loss = total_loss / (len(val_dataset) * val_max_hindi_length)
        validation_accuracy = (total_acc / len(val_dataset)) * 100
        del predictions
        del predicted
        del input2
        del output1
        del output2
        del output3
        del hidden1
        del cell1
        del OGhidden
        del output
        del cell
        return validation_loss, validation_accuracy

In [13]:
def train(batch_size, hidden_size, char_embed_size, no_of_layers, dropout, epochs, rnn):
    gc.collect()
    torch.autograd.set_detect_anomaly(True)
    encoder = Encoder(char_embed_size, hidden_size, no_of_layers, dropout, rnn).to(device)
    decoder = DecoderNoAttention(char_embed_size, hidden_size, no_of_layers, dropout, batch_size, rnn).to(device)
    # print(encoder.parameters)
    # print(decoder.parameters)
    opt_encoder = optim.Adam(encoder.parameters(), lr=0.001)
    opt_decoder = optim.Adam(decoder.parameters(), lr=0.001)
    teacher_ratio = 0.5
    epoch_count = 0
    for _ in range(epochs):
        total_loss = 0
        total_acc = 0
        for x in range(0, len(train_dataset), batch_size):
            loss = 0
            opt_encoder.zero_grad()
            opt_decoder.zero_grad()
            input_tensor = eng_words[x:x + batch_size].to(device)
            # taking initial hidden and cell states as (2* no_of_layers, hidden_size, hidden_size) because I have considered encoder to be bidirectional
            en_hidden = torch.zeros(2 * no_of_layers, batch_size, hidden_size).to(device)
            en_cell = torch.zeros(2 * no_of_layers, batch_size, hidden_size).to(device)
            if input_tensor.size()[0] < batch_size:
                break
            output, (hidden, cell) = encoder.forward(input_tensor, en_hidden, en_cell)
            del en_hidden
            del en_cell
            del input_tensor
            input2 = []
            for y in range(batch_size):
                input2.append([0])
            input2 = torch.tensor(input2).to(device)
            hidden = hidden.resize(2, no_of_layers, batch_size, hidden_size)
            cell = cell.resize(2, no_of_layers, batch_size, hidden_size)
            # averaging due to bidirectional encoder
            hidden1 = torch.add(hidden[0], hidden[1]) / 2
            cell1 = torch.add(cell[0], cell[1]) / 2
            OGhidden = hidden1
            predicted = []
            predictions = []
            use_teacher_forcing = True if random.random() < teacher_ratio else False
            if use_teacher_forcing:
                for i in range(max_hindi_length):
                    output1, (hidden1, cell1) = decoder.forward(input2, hidden1, cell1, OGhidden, False)
                    predicted.append(output1)
                    output2 = decoder.softmax(output1)
                    output3 = torch.argmax(output2, dim=2)
                    predictions.append(output3)
                    input2 = hin_words[x:x + batch_size, i].to(device).resize(batch_size, 1)
            else:
                for i in range(max_hindi_length):
                    output1, (hidden1, cell1) = decoder.forward(input2, hidden1, cell1, OGhidden, False)
                    predicted.append(output1)
                    output2 = decoder.softmax(output1)
                    output3 = torch.argmax(output2, dim=2)
                    predictions.append(output3)
                    input2 = output3
            predicted = torch.cat(tuple(x for x in predicted), dim=1).to(device).resize(max_hindi_length * batch_size, len(hin_dict))
            predictions = torch.cat(tuple(x for x in predictions), dim=1).to(device)
            total_acc += calculate_accuracy(hin_words[x:x + batch_size].to(device), predictions, x)
            loss = nn.CrossEntropyLoss(reduction='sum')(predicted, hin_words[x:x + batch_size].reshape(-1).to(device))
            with torch.no_grad():
                total_loss += loss.item()
            loss.backward(retain_graph=True)
            torch.nn.utils.clip_grad_norm_(encoder.parameters(), max_norm=1)
            torch.nn.utils.clip_grad_norm_(decoder.parameters(), max_norm=1)
            opt_encoder.step()
            opt_decoder.step()
        del predictions
        del predicted
        del input2
        del output1
        del output2
        del output3
        del hidden1
        del cell1
        del OGhidden
        del output
        del cell
        training_loss = total_loss / (51200 * max_hindi_length)
        training_accuracy = total_acc / 512
        validation_loss, validation_accuracy = val_evaluate(False, val_eng_words, val_hin_words, encoder, decoder, batch_size, hidden_size, char_embed_size, no_of_layers)
        wandb.log({'training_accuracy': training_accuracy, 'validation_accuracy': validation_accuracy, 'training_loss': training_loss, 'validation_loss': validation_loss, 'epoch': epoch_count + 1})
        print("Epoch: " + str(epoch_count + 1) + "/" + str(epochs) + "; Train loss: " + str(training_loss) + "; Val loss: " + str(validation_loss))
        epoch_count += 1
    return encoder, decoder


# Sweep:

In [None]:
# helper function for sweep
def withoutattention():
    wandb.init(project='CS6910_assignment_3', entity='sumanta_roy')
    config = wandb.config
    wandb.run.name = "withoutatt_ctype_{}_nlayers_{}_hsize_{}_drop_{}_emb_{}_bs{}".format(config.cell_type,config.no_of_layers,config.hidden_size,config.dropout,config.input_embedding_size,config.batch_size)
    hidden_size = config.hidden_size
    char_embed_size = config.input_embedding_size
    no_of_layers = config.no_of_layers
    dropout = config.dropout
    rnn = config.cell_type
    epochs = 10
    batchsize = config.batch_size
    Encoder1,Decoder1 = train(batchsize,hidden_size,char_embed_size,no_of_layers,dropout,epochs,rnn)
    clear_gpu_cache()

sweep_configuration = {
    'method': 'bayes',
    'name': 'sweep no attention',
    'metric': {
      'name': 'validation_accuracy',
      'goal': 'maximize'
    },
    'parameters': {
        'batch_size': {
            'values': [32,64,128,256]
        },
        'input_embedding_size': {
            'values': [16,32,64,256]
        },
        'no_of_layers': {
            'values': [1,2,3]
        },
        'hidden_size': {
            'values': [16,32,64,256]
        },
        'cell_type': {
            'values': ['LSTM','GRU','RNN']
        },
        'dropout': {
            'values': [0.2,0.3]
        },
    }
}
sweep_id = wandb.sweep(sweep = sweep_configuration,project = 'CS6910_assignment_3')
wandb.agent(sweep_id,function=withoutattention,count = 50)
wandb.finish()

Create sweep with ID: hasvsplb
Sweep URL: https://wandb.ai/sumanta_roy/CS6910_assignment_3/sweeps/hasvsplb


[34m[1mwandb[0m: Agent Starting Run: j3f6c1hh with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	input_embedding_size: 16
[34m[1mwandb[0m: 	no_of_layers: 3




Epoch: 1/10; Train loss: 1.706637475831168; Val loss: 1.244556820108777
Epoch: 2/10; Train loss: 1.2987215783482506; Val loss: 1.1454314532734098
Epoch: 3/10; Train loss: 1.2394102626755124; Val loss: 1.113411429382506
Epoch: 4/10; Train loss: 1.2024151508013408; Val loss: 1.0878860240890866
Epoch: 5/10; Train loss: 1.185526586714245; Val loss: 1.077739912839163
Epoch: 6/10; Train loss: 1.170848401274; Val loss: 1.06685038123812
Epoch: 7/10; Train loss: 1.16487208706992; Val loss: 1.0544828588054294
Epoch: 8/10; Train loss: 1.1543370974631537; Val loss: 1.038617927403677
Epoch: 9/10; Train loss: 1.1424242183140345; Val loss: 1.0266104468277522
Epoch: 10/10; Train loss: 1.123856818335397; Val loss: 1.0213286521888914
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 13% |  2% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 13% |  1% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▅▅▁▅▅▅▅█▅
training_loss,█▃▂▂▂▂▁▁▁▁
validation_accuracy,▁▁▃▅▁▃▃█▁▆
validation_loss,█▅▄▃▃▂▂▂▁▁

0,1
epoch,10.0
training_accuracy,0.00195
training_loss,1.12386
validation_accuracy,0.07324
validation_loss,1.02133


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4orwrznx with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 64
[34m[1mwandb[0m: 	no_of_layers: 2




Epoch: 1/10; Train loss: 1.1835017293975467; Val loss: 0.7171221744446528
Epoch: 2/10; Train loss: 0.6813692403974987; Val loss: 0.5762890052227747
Epoch: 3/10; Train loss: 0.5240337464355287; Val loss: 0.4912290573120117
Epoch: 4/10; Train loss: 0.4655042135147821; Val loss: 0.4834337007431757
Epoch: 5/10; Train loss: 0.42887497538612; Val loss: 0.442183921734492
Epoch: 6/10; Train loss: 0.3844568288893927; Val loss: 0.4275137455690475
Epoch: 7/10; Train loss: 0.36737219767911095; Val loss: 0.425193462343443
Epoch: 8/10; Train loss: 0.3479856344064077; Val loss: 0.40096879715011235
Epoch: 9/10; Train loss: 0.3221687213295982; Val loss: 0.4171911847023737
Epoch: 10/10; Train loss: 0.2994639443783533; Val loss: 0.41628095720495495
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 35% |  4% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 35% |  2% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▂▃▄▅▆▆▇▇█
training_loss,█▄▃▂▂▂▂▁▁▁
validation_accuracy,▁▃▅▆▆▇▇▇██
validation_loss,█▅▃▃▂▂▂▁▁▁

0,1
epoch,10.0
training_accuracy,36.80273
training_loss,0.29946
validation_accuracy,34.44824
validation_loss,0.41628


[34m[1mwandb[0m: Agent Starting Run: hyt5rw2b with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 32
[34m[1mwandb[0m: 	no_of_layers: 2




Epoch: 1/10; Train loss: 1.0892987094322841; Val loss: 0.8269400688863936
Epoch: 2/10; Train loss: 0.8441628762937727; Val loss: 0.7352935221223604
Epoch: 3/10; Train loss: 0.7679419568039122; Val loss: 0.683733140428861
Epoch: 4/10; Train loss: 0.7193877137558801; Val loss: 0.6707785086972373
Epoch: 5/10; Train loss: 0.6974799241622289; Val loss: 0.6625399064450037
Epoch: 6/10; Train loss: 0.6673806009831883; Val loss: 0.6379721384672892
Epoch: 7/10; Train loss: 0.6484806284308433; Val loss: 0.630034632626034
Epoch: 8/10; Train loss: 0.6397004221024968; Val loss: 0.6261021399072239
Epoch: 9/10; Train loss: 0.6284567116413797; Val loss: 0.6219313162423316
Epoch: 10/10; Train loss: 0.6270953099358649; Val loss: 0.6082245122109141
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 38% |  2% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 38% |  2% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▂▃▄▅▆▇▇██
training_loss,█▄▃▂▂▂▁▁▁▁
validation_accuracy,▁▂▄▅▅▆▇▇█▇
validation_loss,█▅▃▃▃▂▂▂▁▁

0,1
epoch,10.0
training_accuracy,8.13867
training_loss,0.6271
validation_accuracy,11.62109
validation_loss,0.60822


[34m[1mwandb[0m: Agent Starting Run: wljtuohj with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	input_embedding_size: 64
[34m[1mwandb[0m: 	no_of_layers: 2




Epoch: 1/10; Train loss: 1.6227987191790627; Val loss: 1.1403558992204212
Epoch: 2/10; Train loss: 1.2322385064760843; Val loss: 1.0406229822408586
Epoch: 3/10; Train loss: 1.119827141648247; Val loss: 0.9867672224839529
Epoch: 4/10; Train loss: 1.0634309683527265; Val loss: 0.9560986728895278
Epoch: 5/10; Train loss: 1.009445663520268; Val loss: 0.9170189953985668
Epoch: 6/10; Train loss: 0.9788757898693993; Val loss: 0.8814303129911423
Epoch: 7/10; Train loss: 0.945692857844489; Val loss: 0.8611852924029032
Epoch: 8/10; Train loss: 0.924655339717865; Val loss: 0.8412277982348487
Epoch: 9/10; Train loss: 0.9043202223664238; Val loss: 0.8246626357237498
Epoch: 10/10; Train loss: 0.8795883921214512; Val loss: 0.8147319207588831
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 14% |  2% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 14% |  1% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▁▁▂▂▃▃▅▆█
training_loss,█▄▃▃▂▂▂▁▁▁
validation_accuracy,▁▁▁▁▂▄▄▇▆█
validation_loss,█▆▅▄▃▂▂▂▁▁

0,1
epoch,10.0
training_accuracy,0.50781
training_loss,0.87959
validation_accuracy,1.97754
validation_loss,0.81473


[34m[1mwandb[0m: Agent Starting Run: 4wcf9tif with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 64
[34m[1mwandb[0m: 	no_of_layers: 3




Epoch: 1/10; Train loss: 1.1381972717103503; Val loss: 0.67083873351415
Epoch: 2/10; Train loss: 0.601467853926477; Val loss: 0.5211097803853807
Epoch: 3/10; Train loss: 0.48946408862159363; Val loss: 0.4749594838136718
Epoch: 4/10; Train loss: 0.41656898183482033; Val loss: 0.4548182693265733
Epoch: 5/10; Train loss: 0.3753643718645686; Val loss: 0.45032412452357157
Epoch: 6/10; Train loss: 0.3445314168078559; Val loss: 0.41650029129925226
Epoch: 7/10; Train loss: 0.32144336721726824; Val loss: 0.433564753759475
Epoch: 8/10; Train loss: 0.3008617274817966; Val loss: 0.4310787071784337
Epoch: 9/10; Train loss: 0.27303455743051713; Val loss: 0.42053197679065524
Epoch: 10/10; Train loss: 0.2567658414017586; Val loss: 0.4154432235019548
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 33% |  4% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 33% |  2% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▂▃▄▅▆▆▇▇█
training_loss,█▄▃▂▂▂▂▁▁▁
validation_accuracy,▁▄▅▆▇▇▇███
validation_loss,█▄▃▂▂▁▁▁▁▁

0,1
epoch,10.0
training_accuracy,44.75781
training_loss,0.25677
validation_accuracy,35.9375
validation_loss,0.41544


[34m[1mwandb[0m: Agent Starting Run: c19xu1vo with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	no_of_layers: 3




Epoch: 1/10; Train loss: 1.2634920481273106; Val loss: 0.7833633820215861
Epoch: 2/10; Train loss: 0.7146614401681083; Val loss: 0.5768500083968753
Epoch: 3/10; Train loss: 0.5385132178806123; Val loss: 0.5104978141330537
Epoch: 4/10; Train loss: 0.4471418941588629; Val loss: 0.48946028096335276
Epoch: 5/10; Train loss: 0.4133984726951236; Val loss: 0.4414904004051572
Epoch: 6/10; Train loss: 0.3716514475005014; Val loss: 0.4337640234402248
Epoch: 7/10; Train loss: 0.33865950453849064; Val loss: 0.4323085007213411
Epoch: 8/10; Train loss: 0.29171149151665826; Val loss: 0.46876061530340285
Epoch: 9/10; Train loss: 0.2822908175559271; Val loss: 0.44884030591873897
Epoch: 10/10; Train loss: 0.2628310580480666; Val loss: 0.46770865008944557
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 40% |  7% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 37% |  3% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▂▃▄▅▅▆▇▇█
training_loss,█▄▃▂▂▂▂▁▁▁
validation_accuracy,▁▂▅▇▇▇▇▇██
validation_loss,█▄▃▂▁▁▁▂▁▂

0,1
epoch,10.0
training_accuracy,45.26758
training_loss,0.26283
validation_accuracy,32.51953
validation_loss,0.46771


[34m[1mwandb[0m: Agent Starting Run: 551ugghw with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	no_of_layers: 3




Epoch: 1/10; Train loss: 1.4168081015632266; Val loss: 0.9152000177474249
Epoch: 2/10; Train loss: 0.8080702209472657; Val loss: 0.6089639351481483
Epoch: 3/10; Train loss: 0.5742055502391997; Val loss: 0.5315153854233878
Epoch: 4/10; Train loss: 0.5063411129088629; Val loss: 0.5111083927608672
Epoch: 5/10; Train loss: 0.45248760756992157; Val loss: 0.45623081638699486
Epoch: 6/10; Train loss: 0.4047857828367324; Val loss: 0.4998375688280378
Epoch: 7/10; Train loss: 0.3805683138256981; Val loss: 0.475804450966063
Epoch: 8/10; Train loss: 0.360584693976811; Val loss: 0.45765141929898945
Epoch: 9/10; Train loss: 0.3595557005064828; Val loss: 0.41161008392061504
Epoch: 10/10; Train loss: 0.33488879799842836; Val loss: 0.42645783651442754
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 47% |  8% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 47% |  4% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▁▃▄▅▆▆▇▇█
training_loss,█▄▃▂▂▁▁▁▁▁
validation_accuracy,▁▃▄▆▇▇████
validation_loss,█▄▃▂▂▂▂▂▁▁

0,1
epoch,10.0
training_accuracy,32.07617
training_loss,0.33489
validation_accuracy,29.88281
validation_loss,0.42646


[34m[1mwandb[0m: Agent Starting Run: 632thtx5 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 64
[34m[1mwandb[0m: 	no_of_layers: 3




Epoch: 1/10; Train loss: 1.142355299052738; Val loss: 0.70116045716263
Epoch: 2/10; Train loss: 0.6007528656437283; Val loss: 0.5269195302611306
Epoch: 3/10; Train loss: 0.48024801416056495; Val loss: 0.4982325626271112
Epoch: 4/10; Train loss: 0.4191255369498616; Val loss: 0.5032950795832134
Epoch: 5/10; Train loss: 0.38092034151156745; Val loss: 0.44882327176275705
Epoch: 6/10; Train loss: 0.35318144792602174; Val loss: 0.44335324601048515
Epoch: 7/10; Train loss: 0.3271816967072941; Val loss: 0.4424736772974332
Epoch: 8/10; Train loss: 0.30205077399810154; Val loss: 0.4262014602621396
Epoch: 9/10; Train loss: 0.27258099675178526; Val loss: 0.43738246992939994
Epoch: 10/10; Train loss: 0.25329966534461296; Val loss: 0.43782842301187064
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 37% |  4% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 37% |  2% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▂▃▄▅▆▆▇▇█
training_loss,█▄▃▂▂▂▂▁▁▁
validation_accuracy,▁▃▅▆▇▇▇███
validation_loss,█▄▃▃▂▁▁▁▁▁

0,1
epoch,10.0
training_accuracy,44.42188
training_loss,0.2533
validation_accuracy,33.69141
validation_loss,0.43783


[34m[1mwandb[0m: Agent Starting Run: wwsqrncj with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 64
[34m[1mwandb[0m: 	no_of_layers: 3




Epoch: 1/10; Train loss: 1.3475143975303288; Val loss: 0.8506745213554019
Epoch: 2/10; Train loss: 0.7671050098964146; Val loss: 0.609976970014118
Epoch: 3/10; Train loss: 0.5783404077802385; Val loss: 0.5206363229524522
Epoch: 4/10; Train loss: 0.484334728377206; Val loss: 0.48088528286843074
Epoch: 5/10; Train loss: 0.4459622295129867; Val loss: 0.4579975236029852
Epoch: 6/10; Train loss: 0.3988532262756711; Val loss: 0.4430681396098364
Epoch: 7/10; Train loss: 0.3681570687180474; Val loss: 0.42653061023780275
Epoch: 8/10; Train loss: 0.3521631688163394; Val loss: 0.4480253500597818
Epoch: 9/10; Train loss: 0.32642869217055187; Val loss: 0.4191303799549739
Epoch: 10/10; Train loss: 0.31017560374169123; Val loss: 0.45492305003461386
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 35% |  5% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 35% |  2% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▁▃▄▅▆▆▇██
training_loss,█▄▃▂▂▂▁▁▁▁
validation_accuracy,▁▃▅▆▆▇▇███
validation_loss,█▄▃▂▂▁▁▁▁▂

0,1
epoch,10.0
training_accuracy,35.07617
training_loss,0.31018
validation_accuracy,32.95898
validation_loss,0.45492


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 2x200ymk with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 64
[34m[1mwandb[0m: 	no_of_layers: 2




Epoch: 1/10; Train loss: 1.391437744867234; Val loss: 0.8911761556352887
Epoch: 2/10; Train loss: 0.8378941688083467; Val loss: 0.6297522527830941
Epoch: 3/10; Train loss: 0.6311048973174322; Val loss: 0.534723835332053
Epoch: 4/10; Train loss: 0.5263244007882618; Val loss: 0.49296919788633076
Epoch: 5/10; Train loss: 0.45962323790504817; Val loss: 0.447666639373416
Epoch: 6/10; Train loss: 0.4301951555978684; Val loss: 0.4484854255403791
Epoch: 7/10; Train loss: 0.37429751021521435; Val loss: 0.4399024759020124
Epoch: 8/10; Train loss: 0.35863147519883654; Val loss: 0.46255176407950266
Epoch: 9/10; Train loss: 0.343917015734173; Val loss: 0.42781623062633334
Epoch: 10/10; Train loss: 0.329557713724318; Val loss: 0.428117824452264
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 37% |  5% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 37% |  2% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▁▂▄▅▅▆▇██
training_loss,█▄▃▂▂▂▁▁▁▁
validation_accuracy,▁▂▄▅▆▇▇███
validation_loss,█▄▃▂▁▁▁▂▁▁

0,1
epoch,10.0
training_accuracy,32.87109
training_loss,0.32956
validation_accuracy,32.73926
validation_loss,0.42812


[34m[1mwandb[0m: Agent Starting Run: ua99di3a with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	no_of_layers: 2




Epoch: 1/10; Train loss: 0.9600080297106788; Val loss: 0.5868990279379345
Epoch: 2/10; Train loss: 0.5232342922119867; Val loss: 0.5035605600902012
Epoch: 3/10; Train loss: 0.44423311420849393; Val loss: 0.4453928711868468
Epoch: 4/10; Train loss: 0.39959594771975565; Val loss: 0.42811018725236255
Epoch: 5/10; Train loss: 0.35978712845416294; Val loss: 0.4154775646470842
Epoch: 6/10; Train loss: 0.3277378817683175; Val loss: 0.4362395930857885
Epoch: 7/10; Train loss: 0.30683329636142365; Val loss: 0.42493308796769097
Epoch: 8/10; Train loss: 0.29200161371912275; Val loss: 0.40453243326573146
Epoch: 9/10; Train loss: 0.2776687615542185; Val loss: 0.4428317035947527
Epoch: 10/10; Train loss: 0.24494951858406974; Val loss: 0.4349809955982935
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 37% |  4% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 37% |  3% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.019 MB of 0.019 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▄▅▅▆▆▇▇█
training_loss,█▄▃▃▂▂▂▁▁▁
validation_accuracy,▁▄▆▆▆▇▇▇██
validation_loss,█▅▃▂▁▂▂▁▂▂

0,1
epoch,10.0
training_accuracy,45.6582
training_loss,0.24495
validation_accuracy,36.66992
validation_loss,0.43498


[34m[1mwandb[0m: Agent Starting Run: fzviimjc with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	no_of_layers: 1




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run fzviimjc errored:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/tmp/ipykernel_34/2849900153.py", line 13, in withoutattention
    Encoder1,Decoder1 = train(batchsize,hidden_size,char_embed_size,no_of_layers,dropout,epochs,rnn)
  File "/tmp/ipykernel_34/3900707963.py", line 52, in train
    output1, (hidden1, cell1) = decoder.forward(input2, hidden1, cell1, OGhidden, False)
  File "/tmp/ipykernel_34/6578593.py", line 46, in forward
    embedded1 = torch.cat((embedded, og_hidden[0].resize(s1, 1, s2), og_hidden[1].resize(s1, 1, s2)), dim=2)
IndexError: index 1 is out of bounds for dimension 0 with size 1

[34m[1mwandb[0m: [32m[41mERROR[0m Run fzviimjc errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/opt/conda/lib/python3.10/site-packages/wandb/agents/pyagent.py", line 308, in _run_jo

Epoch: 1/10; Train loss: 1.0141207313537597; Val loss: 0.6171488988967169
Epoch: 2/10; Train loss: 0.5693782927876427; Val loss: 0.5296486091046106
Epoch: 3/10; Train loss: 0.46123056213061014; Val loss: 0.46351765734808786
Epoch: 4/10; Train loss: 0.38625736398356303; Val loss: 0.5077126111303057
Epoch: 5/10; Train loss: 0.34655222793420154; Val loss: 0.46488588409764425
Epoch: 6/10; Train loss: 0.33326195123649777; Val loss: 0.4937998709224519
Epoch: 7/10; Train loss: 0.28871640773046586; Val loss: 0.4780666615281786
Epoch: 8/10; Train loss: 0.2673649269626254; Val loss: 0.47867640001433237
Epoch: 9/10; Train loss: 0.24253261617251806; Val loss: 0.4744393548795155
Epoch: 10/10; Train loss: 0.23216841202406657; Val loss: 0.48818271287849974
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 33% |  6% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 33% |  4% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▂▄▄▅▆▆▇██
training_loss,█▄▃▂▂▂▂▁▁▁
validation_accuracy,▁▅▆▇▇▇████
validation_loss,█▄▁▃▁▂▂▂▁▂

0,1
epoch,10.0
training_accuracy,51.7793
training_loss,0.23217
validation_accuracy,33.00781
validation_loss,0.48818


[34m[1mwandb[0m: Agent Starting Run: adtxhw06 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	no_of_layers: 3




Epoch: 1/10; Train loss: 1.0153245250384013; Val loss: 0.623368379615602
Epoch: 2/10; Train loss: 0.5466772095362346; Val loss: 0.5024128797508421
Epoch: 3/10; Train loss: 0.46950496537344794; Val loss: 0.4739001308168684
Epoch: 4/10; Train loss: 0.40568576667990003; Val loss: 0.45554565106119427
Epoch: 5/10; Train loss: 0.363286366718156; Val loss: 0.4672278548989977
Epoch: 6/10; Train loss: 0.3190952642474856; Val loss: 0.4577542195717494
Epoch: 7/10; Train loss: 0.2802481717722757; Val loss: 0.49995891395069303
Epoch: 8/10; Train loss: 0.2654828914006551; Val loss: 0.45620691989149365
Epoch: 9/10; Train loss: 0.24378314933606557; Val loss: 0.4851389328638713
Epoch: 10/10; Train loss: 0.22873508258944467; Val loss: 0.5012017417521704
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 36% |  6% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 36% |  3% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▂▃▄▅▆▆▇▇█
training_loss,█▄▃▃▂▂▁▁▁▁
validation_accuracy,▁▄▅▇▇█████
validation_loss,█▃▂▁▁▁▃▁▂▃

0,1
epoch,10.0
training_accuracy,52.73633
training_loss,0.22874
validation_accuracy,33.42285
validation_loss,0.5012


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0b5uedc3 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	no_of_layers: 3


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112942488883467, max=1.0…



Epoch: 1/10; Train loss: 0.9416118871598017; Val loss: 0.594383180851028
Epoch: 2/10; Train loss: 0.523915027266457; Val loss: 0.5048093157155173
Epoch: 3/10; Train loss: 0.43841587943690163; Val loss: 0.47145716775031316
Epoch: 4/10; Train loss: 0.3970765275756518; Val loss: 0.42697635293006897
Epoch: 5/10; Train loss: 0.3562189293617294; Val loss: 0.4497822207354364
Epoch: 6/10; Train loss: 0.32860115542298274; Val loss: 0.42770644632123767
Epoch: 7/10; Train loss: 0.3049690703267143; Val loss: 0.40780504296223324
Epoch: 8/10; Train loss: 0.28066254435550597; Val loss: 0.43427974234024685
Epoch: 9/10; Train loss: 0.2610777406891187; Val loss: 0.45175293513706755
Epoch: 10/10; Train loss: 0.23713676737887518; Val loss: 0.4471616539217177
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 35% |  5% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 35% |  4% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▄▄▅▆▆▇▇█
training_loss,█▄▃▃▂▂▂▁▁▁
validation_accuracy,▁▄▅▆▆▇▇███
validation_loss,█▅▃▂▃▂▁▂▃▂

0,1
epoch,10.0
training_accuracy,47.4082
training_loss,0.23714
validation_accuracy,35.30273
validation_loss,0.44716


[34m[1mwandb[0m: Agent Starting Run: gjdnjiyb with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	no_of_layers: 3




Epoch: 1/10; Train loss: 1.2613789667401996; Val loss: 0.7756399114926656
Epoch: 2/10; Train loss: 0.7466641357966832; Val loss: 0.5920353900818598
Epoch: 3/10; Train loss: 0.5508693335169838; Val loss: 0.5298708251544407
Epoch: 4/10; Train loss: 0.4885837660517011; Val loss: 0.48192342406227473
Epoch: 5/10; Train loss: 0.4233970027878171; Val loss: 0.4383050203323364
Epoch: 6/10; Train loss: 0.37911893549419584; Val loss: 0.43125597068241667
Epoch: 7/10; Train loss: 0.35857482654707773; Val loss: 0.4543741373788743
Epoch: 8/10; Train loss: 0.32476418603034246; Val loss: 0.4118564242408389
Epoch: 9/10; Train loss: 0.30659315364701406; Val loss: 0.4257818517230806
Epoch: 10/10; Train loss: 0.2570990002722967; Val loss: 0.452132659299033
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 42% |  8% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 41% |  3% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▂▃▄▅▅▆▇▇█
training_loss,█▄▃▃▂▂▂▁▁▁
validation_accuracy,▁▄▆▅▇▇▇▇██
validation_loss,█▄▃▂▂▁▂▁▁▂

0,1
epoch,10.0
training_accuracy,44.16406
training_loss,0.2571
validation_accuracy,32.39746
validation_loss,0.45213


[34m[1mwandb[0m: Agent Starting Run: urm5abtl with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	no_of_layers: 3




Epoch: 1/10; Train loss: 1.1534783975283305; Val loss: 0.6506754585674831
Epoch: 2/10; Train loss: 0.5996869734355381; Val loss: 0.5439011241708483
Epoch: 3/10; Train loss: 0.47151188770929975; Val loss: 0.48095674174172537
Epoch: 4/10; Train loss: 0.40790523829914277; Val loss: 0.4376241131907418
Epoch: 5/10; Train loss: 0.39191540366127375; Val loss: 0.43938732147216797
Epoch: 6/10; Train loss: 0.35422455577623274; Val loss: 0.42729057442574275
Epoch: 7/10; Train loss: 0.332593161378588; Val loss: 0.4119237122081575
Epoch: 8/10; Train loss: 0.31521604245617274; Val loss: 0.42603773943015505
Epoch: 9/10; Train loss: 0.27890667713823775; Val loss: 0.4310554088581176
Epoch: 10/10; Train loss: 0.2650426665374211; Val loss: 0.41204447405678885
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 36% |  6% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 36% |  3% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▂▄▅▅▆▆▇▇█
training_loss,█▄▃▂▂▂▂▁▁▁
validation_accuracy,▁▄▅▅▆▇▇▇██
validation_loss,█▅▃▂▂▁▁▁▂▁

0,1
epoch,10.0
training_accuracy,42.68359
training_loss,0.26504
validation_accuracy,35.96191
validation_loss,0.41204


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: w3jnqa9p with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	no_of_layers: 3




Epoch: 1/10; Train loss: 0.947409882687387; Val loss: 0.5867670689310346
Epoch: 2/10; Train loss: 0.5210630752926781; Val loss: 0.46988943148226964
Epoch: 3/10; Train loss: 0.4341507866552898; Val loss: 0.44715912852968487
Epoch: 4/10; Train loss: 0.38536190383491065; Val loss: 0.43356554671412423
Epoch: 5/10; Train loss: 0.3484727579213324; Val loss: 0.4325007576318014
Epoch: 6/10; Train loss: 0.33088022724503563; Val loss: 0.4216022108282362
Epoch: 7/10; Train loss: 0.3008312544226646; Val loss: 0.43461213402804877
Epoch: 8/10; Train loss: 0.2681870123531137; Val loss: 0.4553650284097308
Epoch: 9/10; Train loss: 0.2558678285706611; Val loss: 0.4426884172218187
Epoch: 10/10; Train loss: 0.22733508774922007; Val loss: 0.457383288868836
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 40% |  5% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 33% |  3% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.018 MB uploaded\r'), FloatProgress(value=0.07570093457943926, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▄▄▅▆▆▇▇█
training_loss,█▄▃▃▂▂▂▁▁▁
validation_accuracy,▁▄▆▆▇▇▇███
validation_loss,█▃▂▂▁▁▂▂▂▃

0,1
epoch,10.0
training_accuracy,48.64844
training_loss,0.22734
validation_accuracy,34.91211
validation_loss,0.45738


[34m[1mwandb[0m: Agent Starting Run: k5b7ga3k with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	no_of_layers: 3




Epoch: 1/10; Train loss: 1.4061690330505372; Val loss: 0.9209414550236293
Epoch: 2/10; Train loss: 0.7742803124019078; Val loss: 0.6512774683180309
Epoch: 3/10; Train loss: 0.5631498507090977; Val loss: 0.497496383530753
Epoch: 4/10; Train loss: 0.4879878174690973; Val loss: 0.4570255506606329
Epoch: 5/10; Train loss: 0.42700930970055717; Val loss: 0.4659511503719148
Epoch: 6/10; Train loss: 0.396539451167697; Val loss: 0.44450402685574125
Epoch: 7/10; Train loss: 0.3680822055680411; Val loss: 0.4425251909664699
Epoch: 8/10; Train loss: 0.33784825444221495; Val loss: 0.43333659569422406
Epoch: 9/10; Train loss: 0.3348192666825794; Val loss: 0.4005830202783857
Epoch: 10/10; Train loss: 0.285860314255669; Val loss: 0.4168704492705209
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 47% |  9% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 47% |  4% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.018 MB uploaded\r'), FloatProgress(value=0.07571272783922729, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▁▃▄▅▅▆▇▇█
training_loss,█▄▃▂▂▂▂▁▁▁
validation_accuracy,▁▃▄▅▇▇▇█▇█
validation_loss,█▄▂▂▂▂▂▁▁▁

0,1
epoch,10.0
training_accuracy,37.32617
training_loss,0.28586
validation_accuracy,33.05664
validation_loss,0.41687


[34m[1mwandb[0m: Agent Starting Run: vpd7vusf with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	no_of_layers: 3




Epoch: 1/10; Train loss: 0.7798005704368863; Val loss: 0.5324485152959824
Epoch: 2/10; Train loss: 0.461701283625194; Val loss: 0.4658959312807946
Epoch: 3/10; Train loss: 0.40143253830926756; Val loss: 0.43078693605604623
Epoch: 4/10; Train loss: 0.3593719742411659; Val loss: 0.4401047914510682
Epoch: 5/10; Train loss: 0.32497846877291087; Val loss: 0.45049133985525086
Epoch: 6/10; Train loss: 0.3006912658186186; Val loss: 0.4292509185061568
Epoch: 7/10; Train loss: 0.279557604718776; Val loss: 0.43679821242888767
Epoch: 8/10; Train loss: 0.2572918762585947; Val loss: 0.46212705153794514
Epoch: 9/10; Train loss: 0.24003363926850613; Val loss: 0.4466724283993244
Epoch: 10/10; Train loss: 0.22402604425237294; Val loss: 0.4712499587663582
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 34% |  5% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 34% |  3% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▄▅▅▆▆▇██
training_loss,█▄▃▃▂▂▂▁▁▁
validation_accuracy,▁▄▅▇▇▇████
validation_loss,█▃▁▂▂▁▂▃▂▄

0,1
epoch,10.0
training_accuracy,50.47461
training_loss,0.22403
validation_accuracy,34.32617
validation_loss,0.47125


[34m[1mwandb[0m: Agent Starting Run: vsdcjh0j with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	no_of_layers: 3




Epoch: 1/10; Train loss: 1.1585762501898267; Val loss: 0.6712070008118948
Epoch: 2/10; Train loss: 0.6113059933412642; Val loss: 0.5189828659806933
Epoch: 3/10; Train loss: 0.4937044332140968; Val loss: 0.4629302734420413
Epoch: 4/10; Train loss: 0.43857353380748204; Val loss: 0.4828089049884251
Epoch: 5/10; Train loss: 0.40022514593033565; Val loss: 0.43634202295825597
Epoch: 6/10; Train loss: 0.36761260747909547; Val loss: 0.44248960273606436
Epoch: 7/10; Train loss: 0.34107061933903465; Val loss: 0.43175371133145835
Epoch: 8/10; Train loss: 0.3167388016269321; Val loss: 0.41469764922346386
Epoch: 9/10; Train loss: 0.30067478035177503; Val loss: 0.42730310346399036
Epoch: 10/10; Train loss: 0.2846803113392421; Val loss: 0.4185128261645635
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 31% |  6% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 31% |  3% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▂▄▄▅▆▆▇▇█
training_loss,█▄▃▂▂▂▁▁▁▁
validation_accuracy,▁▃▅▆▇▇████
validation_loss,█▄▂▃▂▂▁▁▁▁

0,1
epoch,10.0
training_accuracy,39.36523
training_loss,0.28468
validation_accuracy,33.64258
validation_loss,0.41851


[34m[1mwandb[0m: Agent Starting Run: nccg15c8 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 64
[34m[1mwandb[0m: 	no_of_layers: 3




Epoch: 1/10; Train loss: 1.3448755393709455; Val loss: 0.8784490085783458
Epoch: 2/10; Train loss: 0.767744814327785; Val loss: 0.5932387113571167
Epoch: 3/10; Train loss: 0.5921627866654169; Val loss: 0.5049161371730623
Epoch: 4/10; Train loss: 0.4879154077030364; Val loss: 0.5019743683792296
Epoch: 5/10; Train loss: 0.4442544616404034; Val loss: 0.4545060240087055
Epoch: 6/10; Train loss: 0.4124255319436391; Val loss: 0.46287506960687186
Epoch: 7/10; Train loss: 0.3792462876864842; Val loss: 0.4258213575397219
Epoch: 8/10; Train loss: 0.35774403214454653; Val loss: 0.45032124363240744
Epoch: 9/10; Train loss: 0.3361634756553741; Val loss: 0.4185601756686256
Epoch: 10/10; Train loss: 0.30942357997099557; Val loss: 0.406534744870095
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 35% |  6% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 35% |  3% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▁▃▄▅▆▆▇▇█
training_loss,█▄▃▂▂▂▁▁▁▁
validation_accuracy,▁▃▄▆▆▇▇▇██
validation_loss,█▄▂▂▂▂▁▂▁▁

0,1
epoch,10.0
training_accuracy,34.61523
training_loss,0.30942
validation_accuracy,32.37305
validation_loss,0.40653


[34m[1mwandb[0m: Agent Starting Run: 9rcp3qt1 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	no_of_layers: 3




Epoch: 1/10; Train loss: 1.0390160732042222; Val loss: 0.6810741055579412
Epoch: 2/10; Train loss: 0.5863680823644002; Val loss: 0.5141920333816892
Epoch: 3/10; Train loss: 0.48203346490859983; Val loss: 0.4967244735785893
Epoch: 4/10; Train loss: 0.4083206030868349; Val loss: 0.4511074885016396
Epoch: 5/10; Train loss: 0.3611728819778987; Val loss: 0.4309945695457004
Epoch: 6/10; Train loss: 0.32057288669404527; Val loss: 0.444243202606837
Epoch: 7/10; Train loss: 0.3035358659710203; Val loss: 0.4783408585048857
Epoch: 8/10; Train loss: 0.2681768864677066; Val loss: 0.47077012062072754
Epoch: 9/10; Train loss: 0.2572856466543107; Val loss: 0.5008011928626469
Epoch: 10/10; Train loss: 0.23553735105764298; Val loss: 0.4991646054245177
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 35% |  6% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 35% |  3% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▂▃▄▅▆▆▇▇█
training_loss,█▄▃▃▂▂▂▁▁▁
validation_accuracy,▁▄▆▆▆▆▇█▇█
validation_loss,█▃▃▂▁▁▂▂▃▃

0,1
epoch,10.0
training_accuracy,50.21875
training_loss,0.23554
validation_accuracy,36.08398
validation_loss,0.49916


[34m[1mwandb[0m: Agent Starting Run: 2qjungw2 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	no_of_layers: 3




Epoch: 1/10; Train loss: 0.9341631642409733; Val loss: 0.5531874497731527
Epoch: 2/10; Train loss: 0.5114984550362541; Val loss: 0.5412943945044563
Epoch: 3/10; Train loss: 0.43195813786415826; Val loss: 0.453970519559724
Epoch: 4/10; Train loss: 0.38561937289578574; Val loss: 0.43051036447286606
Epoch: 5/10; Train loss: 0.35676061218693145; Val loss: 0.42025464170035864
Epoch: 6/10; Train loss: 0.3248747067224412; Val loss: 0.4388078071531795
Epoch: 7/10; Train loss: 0.30597308458316896; Val loss: 0.42073686456396464
Epoch: 8/10; Train loss: 0.2803291775924819; Val loss: 0.44396723097278956
Epoch: 9/10; Train loss: 0.25944013983011244; Val loss: 0.4473394421594484
Epoch: 10/10; Train loss: 0.23853550244654928; Val loss: 0.45684045233896803
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 36% |  5% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 30% |  3% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▄▅▅▆▆▇▇█
training_loss,█▄▃▂▂▂▂▁▁▁
validation_accuracy,▁▅▅▇▆▇▇███
validation_loss,█▇▃▂▁▂▁▂▂▃

0,1
epoch,10.0
training_accuracy,47.24219
training_loss,0.23854
validation_accuracy,36.23047
validation_loss,0.45684


[34m[1mwandb[0m: Agent Starting Run: yeazqt70 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	no_of_layers: 2




Epoch: 1/10; Train loss: 0.8213842109555289; Val loss: 0.5696084847052892
Epoch: 2/10; Train loss: 0.48128057022889453; Val loss: 0.47365615020195645
Epoch: 3/10; Train loss: 0.40696500840641203; Val loss: 0.4487497728495371
Epoch: 4/10; Train loss: 0.3684616581882749; Val loss: 0.4192034240279879
Epoch: 5/10; Train loss: 0.3412001155955451; Val loss: 0.42108325766665594
Epoch: 6/10; Train loss: 0.30983836530219944; Val loss: 0.4124781677410716
Epoch: 7/10; Train loss: 0.2902458545565605; Val loss: 0.4324939169344448
Epoch: 8/10; Train loss: 0.2818252511677288; Val loss: 0.43294421831766766
Epoch: 9/10; Train loss: 0.2666201466463861; Val loss: 0.41017784604004454
Epoch: 10/10; Train loss: 0.2478728669597989; Val loss: 0.43345470868405844
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 34% |  4% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 34% |  3% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▄▅▅▆▇▇██
training_loss,█▄▃▂▂▂▂▁▁▁
validation_accuracy,▁▄▆▆▇▇▇▇██
validation_loss,█▄▃▁▁▁▂▂▁▂

0,1
epoch,10.0
training_accuracy,46.34375
training_loss,0.24787
validation_accuracy,37.10938
validation_loss,0.43345


[34m[1mwandb[0m: Agent Starting Run: 22y4a6bv with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	no_of_layers: 2




Epoch: 1/10; Train loss: 0.7828876503592446; Val loss: 0.5235635652428582
Epoch: 2/10; Train loss: 0.4596713584377652; Val loss: 0.46894577358450207
Epoch: 3/10; Train loss: 0.3927089924471719; Val loss: 0.4262449805225645
Epoch: 4/10; Train loss: 0.3561484653183392; Val loss: 0.4338567129203251
Epoch: 5/10; Train loss: 0.3237172264996029; Val loss: 0.40922697810899644
Epoch: 6/10; Train loss: 0.2954283404917944; Val loss: 0.4238710531166622
Epoch: 7/10; Train loss: 0.26824843852292923; Val loss: 0.4144610996757235
Epoch: 8/10; Train loss: 0.25024014646098725; Val loss: 0.44252740911075045
Epoch: 9/10; Train loss: 0.23231930979660578; Val loss: 0.43939217357408433
Epoch: 10/10; Train loss: 0.22166679073657308; Val loss: 0.439725161308334
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 38% |  4% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 28% |  3% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.019 MB of 0.019 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▄▅▅▆▇▇██
training_loss,█▄▃▃▂▂▂▁▁▁
validation_accuracy,▁▄▅▇▆▇███▇
validation_loss,█▅▂▃▁▂▁▃▃▃

0,1
epoch,10.0
training_accuracy,51.39062
training_loss,0.22167
validation_accuracy,36.03516
validation_loss,0.43973


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8uwaq4tv with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	no_of_layers: 3




Epoch: 1/10; Train loss: 0.7951328099767367; Val loss: 0.5331302301159927
Epoch: 2/10; Train loss: 0.46731582078195755; Val loss: 0.4898726223480134
Epoch: 3/10; Train loss: 0.4037213013285682; Val loss: 0.4278997651168278
Epoch: 4/10; Train loss: 0.3583952979388691; Val loss: 0.45844907597416923
Epoch: 5/10; Train loss: 0.3355236108104388; Val loss: 0.40455780054132146
Epoch: 6/10; Train loss: 0.2986525982547374; Val loss: 0.4355945840832733
Epoch: 7/10; Train loss: 0.2813870633322568; Val loss: 0.4249773073409285
Epoch: 8/10; Train loss: 0.26762521031711783; Val loss: 0.4351799651270821
Epoch: 9/10; Train loss: 0.24736698809478963; Val loss: 0.45195772250493366
Epoch: 10/10; Train loss: 0.2243968548448313; Val loss: 0.4666368154187997
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 34% |  4% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 29% |  3% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▄▅▅▆▇▇▇█
training_loss,█▄▃▃▂▂▂▂▁▁
validation_accuracy,▁▅▆▇▇█████
validation_loss,█▆▂▄▁▃▂▃▄▄

0,1
epoch,10.0
training_accuracy,49.94531
training_loss,0.2244
validation_accuracy,35.5957
validation_loss,0.46664


[34m[1mwandb[0m: Agent Starting Run: 6zmrect4 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	no_of_layers: 2




Epoch: 1/10; Train loss: 0.8143632810456413; Val loss: 0.5292704020227704
Epoch: 2/10; Train loss: 0.4696939469802947; Val loss: 0.4955718417962392
Epoch: 3/10; Train loss: 0.40529380897680917; Val loss: 0.4535914829799107
Epoch: 4/10; Train loss: 0.3713316374023755; Val loss: 0.4347369018055144
Epoch: 5/10; Train loss: 0.34479155346041634; Val loss: 0.4197609864530109
Epoch: 6/10; Train loss: 0.3110156669361251; Val loss: 0.41955813268820447
Epoch: 7/10; Train loss: 0.2783420411036128; Val loss: 0.41959207504987717
Epoch: 8/10; Train loss: 0.27426367122502554; Val loss: 0.4180212336636725
Epoch: 9/10; Train loss: 0.2483479197536196; Val loss: 0.43055185214394615
Epoch: 10/10; Train loss: 0.23188758978531473; Val loss: 0.4399903884955815
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 36% |  4% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 36% |  3% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▄▅▅▆▇▇██
training_loss,█▄▃▃▂▂▂▂▁▁
validation_accuracy,▁▅▅▇▇▇████
validation_loss,█▆▃▂▁▁▁▁▂▂

0,1
epoch,10.0
training_accuracy,48.60742
training_loss,0.23189
validation_accuracy,35.18066
validation_loss,0.43999


[34m[1mwandb[0m: Agent Starting Run: vn6mprpr with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 64
[34m[1mwandb[0m: 	no_of_layers: 3




Epoch: 1/10; Train loss: 1.12588813214075; Val loss: 0.6770826507182348
Epoch: 2/10; Train loss: 0.5906533005975542; Val loss: 0.5362006858700797
Epoch: 3/10; Train loss: 0.45920020940757933; Val loss: 0.47808629735594704
Epoch: 4/10; Train loss: 0.4145887103818712; Val loss: 0.4405072636547543
Epoch: 5/10; Train loss: 0.36937514842975705; Val loss: 0.44210844735304516
Epoch: 6/10; Train loss: 0.3483811861702374; Val loss: 0.40704531754766193
Epoch: 7/10; Train loss: 0.3195512365585282; Val loss: 0.42598930178653627
Epoch: 8/10; Train loss: 0.2964397738661085; Val loss: 0.4292465934441203
Epoch: 9/10; Train loss: 0.27047446607124237; Val loss: 0.4194127225450107
Epoch: 10/10; Train loss: 0.2543595310690857; Val loss: 0.4450557210615703
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 39% |  5% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 31% |  4% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▂▄▄▅▆▆▇██
training_loss,█▄▃▂▂▂▂▁▁▁
validation_accuracy,▁▄▅▆▇▇████
validation_loss,█▄▃▂▂▁▁▂▁▂

0,1
epoch,10.0
training_accuracy,44.67969
training_loss,0.25436
validation_accuracy,34.64355
validation_loss,0.44506


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: g9qdy01p with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	no_of_layers: 3




Epoch: 1/10; Train loss: 1.1599456583885919; Val loss: 0.6592192082178026
Epoch: 2/10; Train loss: 0.606011308715457; Val loss: 0.5345589830761864
Epoch: 3/10; Train loss: 0.49604145663125176; Val loss: 0.5120600320044018
Epoch: 4/10; Train loss: 0.43370156878516786; Val loss: 0.4708567126875832
Epoch: 5/10; Train loss: 0.3926756752104986; Val loss: 0.4444099927232379
Epoch: 6/10; Train loss: 0.3597916568460919; Val loss: 0.44793010254700977
Epoch: 7/10; Train loss: 0.3329562645866757; Val loss: 0.45756937634377254
Epoch: 8/10; Train loss: 0.3032624908855983; Val loss: 0.4389546023947852
Epoch: 9/10; Train loss: 0.28679381200245446; Val loss: 0.4396777067865644
Epoch: 10/10; Train loss: 0.26661173636005037; Val loss: 0.4407134893394652
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 | 33% |  6% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 | 33% |  3% |
|  1 |  0% |  0% |


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▂▃▄▅▆▆▇▇█
training_loss,█▄▃▂▂▂▂▁▁▁
validation_accuracy,▁▃▅▆▆▇████
validation_loss,█▄▃▂▁▁▂▁▁▁

0,1
epoch,10.0
training_accuracy,42.27344
training_loss,0.26661
validation_accuracy,34.32617
validation_loss,0.44071


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ns3gjtbc with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	input_embedding_size: 16
[34m[1mwandb[0m: 	no_of_layers: 1




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run ns3gjtbc errored:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/tmp/ipykernel_34/2849900153.py", line 13, in withoutattention
    Encoder1,Decoder1 = train(batchsize,hidden_size,char_embed_size,no_of_layers,dropout,epochs,rnn)
  File "/tmp/ipykernel_34/3900707963.py", line 44, in train
    output1, (hidden1, cell1) = decoder.forward(input2, hidden1, cell1, OGhidden, False)
  File "/tmp/ipykernel_34/6578593.py", line 46, in forward
    embedded1 = torch.cat((embedded, og_hidden[0].resize(s1, 1, s2), og_hidden[1].resize(s1, 1, s2)), dim=2)
IndexError: index 1 is out of bounds for dimension 0 with size 1

[34m[1mwandb[0m: [32m[41mERROR[0m Run ns3gjtbc errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/opt/conda/lib/python3.10/site-packages/wandb/agents/pyagent.py", line 308, in _run_jo

Epoch: 1/10; Train loss: 1.1949039522806804; Val loss: 0.7672056697663807
