## IMPORTING LIBRARIES

In [1]:
import requests,zipfile,io
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from torch import optim
import numpy as np
import random
import torch.nn.functional as F
import warnings
warnings.filterwarnings("ignore")
import wandb

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache()

In [3]:
print(device)

cuda


## DOWNLOADING AND UNZIPPING DATA

In [4]:
def download_data(url="https://drive.google.com/u/0/uc?id=1uRKU4as2NlS9i8sdLRS1e326vQRdhvfw&export=download"):
    response=requests.get(url)
    z = zipfile.ZipFile(io.BytesIO(response.content))
    z.extractall()

## METHODS FOR GETTING CHARACTERS FOR CORPUSS AND ADDING THEIR INDICES

In [5]:
def get_corpus(data):
    eng_corpus=set()
    hin_corpus=set()
    for i in range(0,len(data)):
        eng_word=data[0][i]
        hin_word=data[1][i]
        for ch in eng_word:
            eng_corpus.add(ch)
        for ch in hin_word:
            hin_corpus.add(ch)
        # End Delimiter
        eng_corpus.add('#')
        hin_corpus.add('#')
        hin_corpus.add('$')
        eng_corpus.add('$')
        # Start Delimiter
#         eng_corpus.add('^')
        hin_corpus.add('^')
    return hin_corpus,eng_corpus

In [6]:
def word2index(data):
    hin_corpus,eng_corpus=get_corpus(data)
    engchar_idx={}
    hinchar_idx={}
    idx_engchar={}
    idx_hinchar={}
    i=0
    for char in eng_corpus:
        engchar_idx[char]=i
        idx_engchar[i]=char
        i+=1
    i=0
    for char in hin_corpus:
        hinchar_idx[char]=i
        idx_hinchar[i]=char
        i+=1
    return engchar_idx,hinchar_idx,idx_engchar,idx_hinchar,len(eng_corpus),len(hin_corpus)

## DATA PREPROCESSING

In [7]:
def maxlen(data):
    maxlen_eng=0
    maxlen_hin=0
    for i in range(0,len(data)):
        eng_word=data[0][i]
        hin_word=data[1][i]
        if(len(eng_word)>maxlen_eng):
            maxlen_eng=len(eng_word)
        if(len(hin_word)>maxlen_hin):
            maxlen_hin=len(hin_word)
    return maxlen_eng,maxlen_hin

In [8]:
def pre_process(data,eng_to_idx,hin_to_idx):
    eng=[]
    hin=[]
    maxlen_eng,maxlen_hin=maxlen(data)
    
    unknown= eng_to_idx['$']
    for i in range(0,len(data)):
        sz=0
        eng_word=data[0][i]
        hin_word='^'+data[1][i]
        eng_word = eng_word.ljust(maxlen_eng+1, '#')
        hin_word = hin_word.ljust(maxlen_hin+1, '#')
        idx=[]
        for char in eng_word:
            if eng_to_idx.get(char) is not None:
                idx.append(eng_to_idx[char])
            else:
                idx.append(unknown)
        eng.append(idx)
        idx=[]
        for char in hin_word:
            if hin_to_idx.get(char) is not None:
                idx.append(hin_to_idx[char])
            else:
                idx.append(unknown)
        hin.append(idx)    
    return eng,hin

## LOADING OUR CUSTOM DATASET TO DATALOADER

In [9]:
class MyDataset(Dataset):
    def __init__(self, train_x,train_y, transform=None):
        self.train_x = train_x
        self.train_y = train_y
        self.transform = transform
        
    
    def __len__(self):
        return len(self.train_x)
    
    def __getitem__(self, idx):
        if self.transform:
            sample = self.transform(sample)
        return torch.tensor(self.train_x[idx]).to(device),torch.tensor(self.train_y[idx]).to(device)

def get_data():
    download_data()
    
    train_df=pd.read_csv("aksharantar_sampled/hin/hin_train.csv",header=None)
    test_df=pd.read_csv("aksharantar_sampled/hin/hin_test.csv",header=None)
    val_df=pd.read_csv("aksharantar_sampled/hin/hin_valid.csv",header=None)
    eng_to_idx,hin_to_idx,idx_to_eng,idx_to_hin,input_len,target_len=word2index(train_df)
    
    return train_df,test_df,val_df,eng_to_idx,hin_to_idx,idx_to_eng,idx_to_hin,input_len,target_len

## Seq2Seq MODEL

In [10]:
class EncoderGRU(nn.Module):
    def __init__(self,input_size,hidden_size,embedding_size,num_of_layers,batch_size,bi_directional,dropout_p=0.1):
        super(EncoderGRU,self).__init__()
        self.hidden_size=hidden_size
        self.batch_size=batch_size
        self.input_size=input_size
        self.embedding_size=embedding_size
        self.embedding=nn.Embedding(input_size,embedding_size)
        self.num_of_layers=num_of_layers
        self.bi_directional=bi_directional
        if(bi_directional=="Yes"):
            flag=True
        else:
            flag=False
        self.gru = nn.GRU(embedding_size,hidden_size,num_of_layers,bidirectional=flag)
        self.dropout = nn.Dropout(dropout_p)

    def forward(self,input,hidden):
        embedded=self.embedding(input).view(-1,self.batch_size, self.embedding_size)
        embedded = self.dropout(embedded)
        output,hidden=self.gru(embedded,hidden)
    
        if self.bi_directional=="Yes":
            hidden=hidden.resize(2,self.num_of_layers,self.batch_size,self.hidden_size)
            hidden=torch.add(hidden[0],hidden[1])/2
            
        return output,hidden

    def initHidden(self):
        if(self.bi_directional=="Yes"):
            return torch.zeros(2*self.num_of_layers,self.batch_size,self.hidden_size,device=device)
        else:
            return torch.zeros(self.num_of_layers,self.batch_size,self.hidden_size,device=device)

class DecoderGRU(nn.Module):
    def __init__(self, output_size,hidden_size, embedding_size, decoder_layers,batch_size,dropout_p=0.1):
        super(DecoderGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding_size=embedding_size
        self.embedding = nn.Embedding(output_size, embedding_size)
        self.gru = nn.GRU(embedding_size,hidden_size, decoder_layers,dropout = dropout_p)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=2)
        self.batch_size=batch_size
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(-1, self.batch_size, self.embedding_size)
#         embedded = self.dropout(embedded)
        output, hidden = self.gru(embedded, hidden)
        output = self.softmax(self.out(output))
        return output, hidden

In [11]:
class EncoderRNN(nn.Module):
    def __init__(self,input_size,hidden_size,embedding_size,num_of_layers,batch_size,bi_directional,dropout_p=0.1):
        super(EncoderRNN,self).__init__()
        self.hidden_size=hidden_size
        self.batch_size=batch_size
        self.input_size=input_size
        self.embedding_size=embedding_size
        self.embedding=nn.Embedding(input_size,embedding_size)
        self.num_of_layers=num_of_layers
        self.bi_directional=bi_directional
        if(bi_directional=="Yes"):
            flag=True
        else:
            flag=False
        self.rnn = nn.RNN(embedding_size,hidden_size,num_of_layers,bidirectional=flag)
        self.dropout = nn.Dropout(dropout_p)

    def forward(self,input,hidden):
        embedded=self.embedding(input).view(-1,self.batch_size, self.embedding_size)
        embedded = self.dropout(embedded)
        output,hidden=self.rnn(embedded,hidden)
    
        if self.bi_directional=="Yes":
            hidden=hidden.resize(2,self.num_of_layers,self.batch_size,self.hidden_size)
            hidden=torch.add(hidden[0],hidden[1])/2
            
        return output,hidden

    def initHidden(self):
        if(self.bi_directional=="Yes"):
            return torch.zeros(2*self.num_of_layers,self.batch_size,self.hidden_size,device=device)
        else:
            return torch.zeros(self.num_of_layers,self.batch_size,self.hidden_size,device=device)

class DecoderRNN(nn.Module):
    def __init__(self, output_size,hidden_size, embedding_size, decoder_layers,batch_size,dropout_p=0.1):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding_size=embedding_size
        self.embedding = nn.Embedding(output_size, embedding_size)
        self.rnn = nn.RNN(embedding_size,hidden_size, decoder_layers,dropout = dropout_p)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=2)
        self.batch_size=batch_size
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(-1, self.batch_size, self.embedding_size)
#         embedded = self.dropout(embedded)
        output, hidden = self.rnn(embedded, hidden)
        output = self.softmax(self.out(output))
        return output, hidden

In [12]:
class EncoderLSTM(nn.Module):
    def __init__(self,input_size,hidden_size,embedding_size,num_of_layers,batch_size,bi_directional,dropout_p=0.1):
        super(EncoderLSTM,self).__init__()
        self.hidden_size=hidden_size
        self.batch_size=batch_size
        self.input_size=input_size
        self.embedding_size=embedding_size
        self.embedding=nn.Embedding(input_size,embedding_size)
        self.num_of_layers=num_of_layers
        self.bi_directional=bi_directional
        if(bi_directional=="Yes"):
            flag=True
        else:
            flag=False
        self.lstm = nn.LSTM(embedding_size,hidden_size,num_of_layers,bidirectional=flag)
        self.dropout = nn.Dropout(dropout_p)

    def forward(self,input,hidden,state):
        embedded=self.embedding(input).view(-1,self.batch_size, self.embedding_size)
        embedded = self.dropout(embedded)
        output,(hidden,state)=self.lstm(embedded,(hidden,state))
    
        if self.bi_directional=="Yes":
            hidden=hidden.resize(2,self.num_of_layers,self.batch_size,self.hidden_size)
            state=state.resize(2,self.num_of_layers,self.batch_size,self.hidden_size)
            hidden=torch.add(hidden[0],hidden[1])/2
            state=torch.add(state[0],hidden[1])/2
            
        return output,hidden,state

    def initHidden(self):
        if(self.bi_directional=="Yes"):
            return torch.zeros(2*self.num_of_layers,self.batch_size,self.hidden_size,device=device)
        else:
            return torch.zeros(self.num_of_layers,self.batch_size,self.hidden_size,device=device)
    
    def initState(self):
        if(self.bi_directional=="Yes"):
            return torch.zeros(2*self.num_of_layers,self.batch_size,self.hidden_size,device=device)
        else:
            return torch.zeros(self.num_of_layers,self.batch_size,self.hidden_size,device=device)

class DecoderLSTM(nn.Module):
    def __init__(self, output_size,hidden_size, embedding_size, decoder_layers,batch_size,dropout_p=0.1):
        super(DecoderLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding_size=embedding_size
        self.embedding = nn.Embedding(output_size, embedding_size)
        self.lstm = nn.LSTM(embedding_size,hidden_size,decoder_layers,dropout = dropout_p)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=2)
        self.batch_size=batch_size
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, input,hidden,state):
        embedded = self.embedding(input).view(-1, self.batch_size, self.embedding_size)
#         embedded = self.dropout(embedded)
        output,(hidden,state)=self.lstm(embedded,(hidden,state))
        output = self.softmax(self.out(output))
        return output,hidden,state

## ATTENTION MECHANISM

In [13]:
class AttnDecoder(nn.Module):
    def __init__(self,output_size,hidden_size,embedding_size,decoder_layers,batch_size,cell_type,dropout_p=0.1):
        super(AttnDecoder, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.batch_size=batch_size
        self.cell_type=cell_type
        self.embedding_size=embedding_size
        self.decoder_layers=decoder_layers
        
        self.embedding = nn.Embedding(self.output_size, self.embedding_size)
        self.dropout = nn.Dropout(self.dropout_p)

        self.U=nn.Linear(self.hidden_size,self.hidden_size,bias=False).to(device)
        self.W=nn.Linear(self.hidden_size,self.hidden_size,bias=False).to(device)
        self.V=nn.Linear(self.hidden_size,1,bias=False).to(device)
        
        self.linear=nn.Linear(self.hidden_size,output_size,bias=True)
        self.softmax=nn.LogSoftmax()
        self.softmax1=nn.LogSoftmax(dim=2)
        if(cell_type=="GRU"):
            self.gru = nn.GRU(self.embedding_size+self.hidden_size, self.hidden_size,self.decoder_layers,dropout = dropout_p)
        if(cell_type=="LSTM"):
            self.lstm = nn.LSTM(self.embedding_size+self.hidden_size, self.hidden_size,self.decoder_layers,dropout = dropout_p)
        if(cell_type=="RNN"):
            self.rnn = nn.RNN(self.embedding_size+self.hidden_size, self.hidden_size,self.decoder_layers,dropout = dropout_p)

    def forward(self, input, hidden,encoder_outputs,word_length,state=None):
        embedded = self.embedding(input).view(-1,self.batch_size, self.embedding_size)
#         embedded = self.dropout(embedded)
        T=word_length
        temp=self.W(hidden[-1])
        c=torch.zeros(self.batch_size,self.hidden_size).to(device)
        
        
        for j in range(0,T):
            e_j=self.V(torch.tanh(self.U(encoder_outputs[j])+temp))
            alpha_j=self.softmax(e_j)
            c+=alpha_j*encoder_outputs[j]
        

        final_input=torch.cat((embedded,c.unsqueeze(0)),dim=2)
        
        
        if(self.cell_type=="GRU"):
            output,hidden=self.gru(final_input,hidden)
        if(self.cell_type=="RNN"):
            output,hidden=self.rnn(final_input,hidden)
        if(self.cell_type=="LSTM"):
            output, (hidden,state) =self.lstm(final_input,(hidden,state))
        
        output1=self.softmax1(self.linear(output))
        
        if(self.cell_type=="GRU" or self.cell_type=="RNN"):
            return output1, hidden, c
        if(self.cell_type=="LSTM"):
            return output1, hidden, state, c

In [14]:
def train(train_data,encoder,decoder,loss_fun,encoder_optimizer,decoder_optimizer,encoder_layers,decoder_layers,batch_size,hidden_size,bi_directional,cell_type,attention):
    total_loss=0
    teacher_forcing_ratio=0.5
    for i,(train_x,train_y) in enumerate(train_data):
        loss=0
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()
        train_x=train_x.T
        train_y=train_y.T
        timesteps=len(train_x)
        
        if cell_type=='GRU' or cell_type=='RNN':
            
            encoder_hidden=encoder.initHidden()
            encoder_output,encoder_hidden=encoder(train_x,encoder_hidden)
            if(decoder_layers>encoder_layers):
                i = decoder_layers
                decoder_hidden=encoder_hidden

                while True:
                    if(i==encoder_layers):
                        break
                    # Concatenate the two tensors along the first dimension
                    decoder_hidden = torch.cat([decoder_hidden, encoder_hidden[-1].unsqueeze(0)], dim=0)
                    i-=1

            elif(decoder_layers<encoder_layers):
                decoder_hidden=encoder_hidden[-decoder_layers:]

            else:
                decoder_hidden=encoder_hidden
        
            decoder_input = train_y[0]
            
            if(bi_directional=="Yes"):
                split_tensor= torch.split(encoder_output, hidden_size, dim=-1)
                encoder_output=torch.add(split_tensor[0],split_tensor[1])/2
            
            
            use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
            if use_teacher_forcing:
                for i in range(0,len(train_y)):
                    if(attention=="Yes"):
                        decoder_output, decoder_hidden, attn_weights=decoder(decoder_input,decoder_hidden,encoder_output,len(train_x))
                        loss+=loss_fun(torch.squeeze(decoder_output), train_y[i])
                        decoder_input = train_y[i] 
                    else:
                        decoder_output, decoder_hidden= decoder(decoder_input, decoder_hidden)
                        loss+=loss_fun(torch.squeeze(decoder_output), train_y[i])
                        decoder_input = train_y[i]  # Teacher forcing
            else:
                for i in range(0,len(train_y)):
                    if(attention=="Yes"):
                        decoder_output, decoder_hidden, attn_weights=decoder(decoder_input,decoder_hidden,encoder_output,len(train_x))
                        max_prob,index=decoder_output.topk(1)
                        loss+=loss_fun(torch.squeeze(decoder_output), train_y[i])
                        decoder_input=index
                    else:
                        decoder_output,decoder_hidden=decoder(decoder_input,decoder_hidden)
                        max_prob,index=decoder_output.topk(1)
                        loss+=loss_fun(torch.squeeze(decoder_output), train_y[i])
                        decoder_input=index
            loss.backward()
            encoder_optimizer.step()
            decoder_optimizer.step()
            total_loss+=loss
        
        if cell_type=='LSTM':
    
            encoder_hidden=encoder.initHidden()
            encoder_state=encoder.initState()
            
            encoder_output,encoder_hidden,encoder_state=encoder(train_x,encoder_hidden,encoder_state)
        
            if(decoder_layers>encoder_layers):
                i = decoder_layers
                decoder_hidden=encoder_hidden
                decoder_state=encoder_state
                while True:
                    if(i==encoder_layers):
                        break
                    # Concatenate the two tensors along the first dimension
                    decoder_hidden = torch.cat([decoder_hidden, encoder_hidden[-1].unsqueeze(0)], dim=0)
                    decoder_state = torch.cat([decoder_state, encoder_state[-1].unsqueeze(0)], dim=0)
                    i-=1

            elif(decoder_layers<encoder_layers):
                decoder_hidden=encoder_hidden[-decoder_layers:]
                decoder_state=encoder_state[-decoder_layers:]

            else:
                decoder_hidden=encoder_hidden
                decoder_state=encoder_state
            
            
            if(bi_directional=="Yes"):
                split_tensor= torch.split(encoder_output, hidden_size, dim=-1)
                encoder_output=torch.add(split_tensor[0],split_tensor[1])/2
            
            decoder_input = train_y[0]
            use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
            if use_teacher_forcing:
                for i in range(0,len(train_y)):
                    if(attention=="Yes"):
                        decoder_output, decoder_hidden, decoder_state, attn_weights=decoder(decoder_input,decoder_hidden,encoder_output,len(train_x),decoder_state)
                        loss+=loss_fun(torch.squeeze(decoder_output), train_y[i])
                        decoder_input= train_y[i]
                    else:
                        decoder_output, decoder_hidden,decoder_state= decoder(decoder_input, decoder_hidden,decoder_state)
                        loss+=loss_fun(torch.squeeze(decoder_output), train_y[i])
                        decoder_input = train_y[i]  # Teacher forcing
            else:
                for i in range(0,len(train_y)):
                    if(attention=="Yes"):
                        decoder_output, decoder_hidden, decoder_state, attn_weights=decoder(decoder_input,decoder_hidden,encoder_output,len(train_x),decoder_state)
                        max_prob,index=decoder_output.topk(1)
                        loss+=loss_fun(torch.squeeze(decoder_output), train_y[i])
                        decoder_input=index
                    else:
                        decoder_output, decoder_hidden,decoder_state= decoder(decoder_input, decoder_hidden,decoder_state)
                        max_prob,index=decoder_output.topk(1)
                        loss+=loss_fun(torch.squeeze(decoder_output), train_y[i])
                        decoder_input=index
            loss.backward()
            encoder_optimizer.step()
            decoder_optimizer.step()
            total_loss+=loss

        
        
    return total_loss.item()/len(train_y),encoder,decoder

In [15]:
def train_iter(input_data,val_data,val_y,input_len,target_len,epochs,batch_size,embedding_size,encoder_layers,decoder_layers,hidden_size,cell_type,bi_directional,dropout,beam_size,attention):
    lr=0.001
    if(cell_type=='GRU'):
        encoder=EncoderGRU(input_len,hidden_size,embedding_size,encoder_layers,batch_size,bi_directional,dropout).to(device)
        if(attention=="Yes"):
            decoder=AttnDecoder(target_len,hidden_size,embedding_size,decoder_layers,batch_size,cell_type,dropout).to(device)
        else:
            decoder=DecoderGRU(target_len,hidden_size,embedding_size,decoder_layers,batch_size,dropout).to(device)
        
    if(cell_type=='RNN'):
        encoder=EncoderRNN(input_len,hidden_size,embedding_size,encoder_layers,batch_size,bi_directional,dropout).to(device)
        if(attention=="Yes"):
            decoder=AttnDecoder(target_len,hidden_size,embedding_size,decoder_layers,batch_size,cell_type,dropout).to(device)
        else:
            decoder=DecoderRNN(target_len,hidden_size,embedding_size,decoder_layers,batch_size,dropout).to(device)
    
    if cell_type=='LSTM':
        encoder=EncoderLSTM(input_len,hidden_size,embedding_size,encoder_layers,batch_size,bi_directional,dropout).to(device)
        if(attention=="Yes"):
            decoder=AttnDecoder(target_len,hidden_size,embedding_size,decoder_layers,batch_size,cell_type,dropout).to(device)
        else:
            decoder=DecoderLSTM(target_len,hidden_size,embedding_size,decoder_layers,batch_size,dropout).to(device)

    encoder_optimizer=optim.Adam(encoder.parameters(),lr)
    decoder_optimizer=optim.Adam(decoder.parameters(),lr)
    loss_fun=nn.CrossEntropyLoss(reduction="sum")
    epoch_train_loss=[]
    epoch_val_loss=[]
    epoch_val_acc=[]
    for i in range(0,epochs):
        loss,encoder,decoder=train(input_data,encoder,decoder,loss_fun,encoder_optimizer,decoder_optimizer,
                                   encoder_layers,decoder_layers,batch_size,hidden_size,bi_directional,
                                   cell_type,attention)
        val_predictions,val_loss=eval(val_data,encoder,decoder,encoder_layers,decoder_layers,
                                  batch_size,hidden_size,bi_directional,cell_type,attention)
        
        epoch_val_loss.append(val_loss)
        epoch_train_loss.append(loss/51200)
        
        val_acc=accuracy(val_predictions,val_y)
        epoch_val_acc.append(val_acc)
        print(loss/51200,val_loss,val_acc)
    
#     train_predictions,t=eval(input_data,encoder,decoder,encoder_layers,decoder_layers,batch_size,hidden_size,bi_directional,cell_type,attention)
    return epoch_train_loss,epoch_val_loss,epoch_val_acc,encoder,decoder,encoder_layers,decoder_layers

In [16]:
def eval(input_data,encoder,decoder,encoder_layers,decoder_layers,batch_size,hidden_size,bi_directional,cell_type,attention):
    with torch.no_grad():
        loss_fun=nn.CrossEntropyLoss(reduction="sum")
        total_loss=0
        pred_words=list()
        for x,y in input_data:
            loss=0
            decoder_words=[]
            x=x.T
            y=y.T
            encoder_hidden=encoder.initHidden()
            timesteps=len(x)
            if cell_type=='GRU' or cell_type=='RNN':

                encoder_hidden=encoder.initHidden()
                encoder_output,encoder_hidden=encoder(x,encoder_hidden)
                if(decoder_layers>encoder_layers):
                    i = decoder_layers
                    decoder_hidden=encoder_hidden

                    while True:
                        if(i==encoder_layers):
                            break
                        # Concatenate the two tensors along the first dimension
                        decoder_hidden = torch.cat([decoder_hidden, encoder_hidden[-1].unsqueeze(0)], dim=0)
                        i-=1

                elif(decoder_layers<encoder_layers):
                    decoder_hidden=encoder_hidden[-decoder_layers:]

                else:
                    decoder_hidden=encoder_hidden

                decoder_input = y[0]

                if(bi_directional=="Yes"):
                    split_tensor= torch.split(encoder_output, hidden_size, dim=-1)
                    encoder_output=torch.add(split_tensor[0],split_tensor[1])/2

                for i in range(0,len(y)):
                    if(attention=="Yes"):
                        decoder_output, decoder_hidden, attn_weights=decoder(decoder_input,decoder_hidden,encoder_output,len(x))
                        max_prob,index=decoder_output.topk(1)
                        loss+=loss_fun(torch.squeeze(decoder_output), y[i])
                        index=index.squeeze()
                        decoder_input=index
                        decoder_words.append(index.tolist())
                    else:
                        decoder_output,decoder_hidden=decoder(decoder_input,decoder_hidden)
                        max_prob,index=decoder_output.topk(1)
                        loss+=loss_fun(torch.squeeze(decoder_output), y[i])
                        index=index.squeeze()
                        decoder_input=index
                        decoder_words.append(index.tolist())
                decoder_words=np.array(decoder_words)
                pred_words.append(decoder_words.T)
                total_loss+=loss.item()


            if cell_type=='LSTM':

                encoder_hidden=encoder.initHidden()
                encoder_state=encoder.initState()

                encoder_output,encoder_hidden,encoder_state=encoder(x,encoder_hidden,encoder_state)

                if(decoder_layers>encoder_layers):
                    i = decoder_layers
                    decoder_hidden=encoder_hidden
                    decoder_state=encoder_state
                    while True:
                        if(i==encoder_layers):
                            break
                        # Concatenate the two tensors along the first dimension
                        decoder_hidden = torch.cat([decoder_hidden, encoder_hidden[-1].unsqueeze(0)], dim=0)
                        decoder_state = torch.cat([decoder_state, encoder_state[-1].unsqueeze(0)], dim=0)
                        i-=1

                elif(decoder_layers<encoder_layers):
                    decoder_hidden=encoder_hidden[-decoder_layers:]
                    decoder_state=encoder_state[-decoder_layers:]

                else:
                    decoder_hidden=encoder_hidden
                    decoder_state=encoder_state


                if(bi_directional=="Yes"):
                    split_tensor= torch.split(encoder_output, hidden_size, dim=-1)
                    encoder_output=torch.add(split_tensor[0],split_tensor[1])/2
                decoder_input = y[0]

                for i in range(0,len(y)):
                    if(attention=="Yes"):
                        decoder_output, decoder_hidden, decoder_state, attn_weights=decoder(decoder_input,decoder_hidden,encoder_output,len(x),decoder_state)
                        max_prob,index=decoder_output.topk(1)
                        loss+=loss_fun(torch.squeeze(decoder_output), y[i])
                        index=index.squeeze()
                        decoder_input=index
                        decoder_words.append(index.tolist())
                    else:
                        decoder_output, decoder_hidden,decoder_state= decoder(decoder_input, decoder_hidden,decoder_state)
                        max_prob,index=decoder_output.topk(1)
                        loss+=loss_fun(torch.squeeze(decoder_output), y[i])
                        index=index.squeeze()
                        decoder_input=index
                        decoder_words.append(index.tolist())
                decoder_words=np.array(decoder_words)
                pred_words.append(decoder_words.T)
                total_loss+=loss.item()


    predictions=[]
    for batch in pred_words:
        for word in batch:
            predictions.append(word)

    return predictions,total_loss/(len(predictions)*len(predictions[0]))

In [17]:
def accuracy(predictions,y):
    count=0
#     print(len(y),len(y[0]))
    for i in range(0,len(predictions)):
        p=predictions[i]
        if np.array_equal(p,y[i]):
            count+=1
    return (count/len(predictions))*100

## MODEL

In [18]:
# epochs=20
# batchsize=64
# embedding_size=256
# encoder_layers=2
# decoder_layers=3
# hidden_size=256
# cell_type="LSTM"
# bi_directional="Yes"
# dropout=0.2
# beam_size=16
# attention="No"

# train_df,test_df,val_df,eng_to_idx,hin_to_idx,idx_to_eng,idx_to_hin,input_len,target_len=get_data()

# train_x,train_y = pre_process(train_df,eng_to_idx,hin_to_idx)
# test_x,test_y = pre_process(test_df,eng_to_idx,hin_to_idx)
# val_x,val_y = pre_process(val_df,eng_to_idx,hin_to_idx)

# train_dataset=MyDataset(train_x,train_y)
# test_dataset=MyDataset(test_x,test_y)
# val_dataset=MyDataset(val_x,val_y)


# train_dataloader=DataLoader(train_dataset,batch_size=batchsize)
# test_dataloader=DataLoader(test_dataset,batch_size=batchsize)
# val_dataloader=DataLoader(val_dataset,batch_size=batchsize)

In [19]:
# epoch_train_loss,epoch_val_loss,epoch_val_acc,encoder,decoder,encoder_layers,decoder_layers=train_iter(train_dataloader,val_dataloader,val_y,input_len,target_len,epochs,batchsize,embedding_size,encoder_layers,decoder_layers,hidden_size,cell_type,bi_directional,dropout,beam_size,attention)

# print(epoch_val_acc)

# test_predictions,loss=eval(test_dataloader,encoder,decoder,encoder_layers,decoder_layers,batchsize,hidden_size,bi_directional,cell_type,attention)

# test_accuracy=accuracy(test_predictions,test_y)

# print(test_accuracy)

# train_predictions,loss=eval(train_dataloader,encoder,decoder,encoder_layers,decoder_layers,batchsize,hidden_size,bi_directional,cell_type,attention)
# train_accuracy=accuracy(train_predictions,train_y)

# print(train_accuracy)

## INTEGRATING WITH WANDB

In [20]:
def wandb_run_sweeps(train_dataset,val_dataset,test_dataset,train_y,val_y,test_y,input_len,target_len):
    
    config = {
        "project":"CS6910_Assignment3",
        "method": 'bayes',
        "metric": {
        'name': 'acc',
        'goal': 'maximize'
        },
        'parameters' :{
        "epochs": {"values":[15,20,25]},
        "batchsize": {"values": [64,128,256]},
        "embedding_size": {"values":[256, 512,1024]},
        "hidden_size": {"values":[256, 512,1024]},
        "encoder_layers": {"values":[2,3,4]},
        "decoder_layers": {"values":[2,3,4]},
        "cell_type": {"values":["RNN","GRU","LSTM"]},
        "bi_directional":{"values":["Yes"]},
        "dropout":{"values":[0.1,0.2,0.5]},
        "attention":{"values":["No"]},
        "beam_size":{"values":[5,10,15]}
        }
    }
    def train_rnn():
        wandb.init()

        name='_CT_'+str(wandb.config.cell_type)+"_BS_"+str(wandb.config.batchsize)+"_EPOCH_"+str(wandb.config.epochs)+"_ES_"+str(wandb.config.embedding_size)+"_HS_"+str(wandb.config.hidden_size)
        
        
        train_dataloader=DataLoader(train_dataset,batch_size=wandb.config.batchsize)
        test_dataloader=DataLoader(test_dataset,batch_size=wandb.config.batchsize)
        val_dataloader=DataLoader(val_dataset,batch_size=wandb.config.batchsize)
        
        epoch_train_loss,epoch_val_loss,epoch_val_acc,encoder,decoder,encoder_layers,decoder_layers=train_iter(train_dataloader,val_dataloader,val_y,input_len,target_len,wandb.config.epochs,wandb.config.batchsize,wandb.config.embedding_size,wandb.config.encoder_layers,wandb.config.decoder_layers,wandb.config.hidden_size,wandb.config.cell_type,wandb.config.bi_directional,wandb.config.dropout,wandb.config.beam_size,wandb.config.attention)

        for i in range(wandb.config.epochs):
            wandb.log({"loss":epoch_train_loss[i]})
            wandb.log({"val_loss":epoch_val_loss[i]})
            wandb.log({"val_acc":epoch_val_acc[i]})
            wandb.log({"epoch": (i+1)})
        wandb.log({"validation_accuracy":epoch_val_acc[-1]})    
        
        train_predictions,_=eval(train_dataloader,encoder,decoder,wandb.config.encoder_layers,
                              wandb.config.decoder_layers,wandb.config.batchsize,wandb.config.hidden_size,
                              wandb.config.bi_directional,wandb.config.cell_type,wandb.config.attention)

        train_accuracy=accuracy(train_predictions,train_y)
        wandb.log({"train_accuracy":train_accuracy})
        
        test_predictions,_=eval(test_dataloader,encoder,decoder,wandb.config.encoder_layers,
                              wandb.config.decoder_layers,wandb.config.batchsize,wandb.config.hidden_size,
                              wandb.config.bi_directional,wandb.config.cell_type,wandb.config.attention)

        test_accuracy=accuracy(test_predictions,test_y)
        wandb.log({"test_accuracy":test_accuracy})
        wandb.log({"acc":epoch_val_acc[-1]})
        wandb.run.name = name
        wandb.run.save()
        wandb.run.finish()
    wandb.login(key="aecb4b665a37b40204530b0627a42274aeddd3e1")
    sweep_id=wandb.sweep(config,project="CS6910_Assignment3")
    wandb.agent(sweep_id,function=train_rnn,count=20)

In [21]:
train_df,test_df,val_df,eng_to_idx,hin_to_idx,idx_to_eng,idx_to_hin,input_len,target_len=get_data()

train_x,train_y = pre_process(train_df,eng_to_idx,hin_to_idx)
test_x,test_y = pre_process(test_df,eng_to_idx,hin_to_idx)
val_x,val_y = pre_process(val_df,eng_to_idx,hin_to_idx)

train_dataset=MyDataset(train_x,train_y)
test_dataset=MyDataset(test_x,test_y)
val_dataset=MyDataset(val_x,val_y)

wandb_run_sweeps(train_dataset,val_dataset,test_dataset,train_y,val_y,test_y,input_len,target_len)

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: lj2769k5
Sweep URL: https://wandb.ai/cs22m078/CS6910_Assignment3/sweeps/lj2769k5


[34m[1mwandb[0m: Agent Starting Run: z2cl8mx2 with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 64
[34m[1mwandb[0m: 	beam_size: 15
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 4
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 1024
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: Currently logged in as: [33mcs22m078[0m. Use [1m`wandb login --relogin`[0m to force relogin


1.0143575613839286 0.5913486743257159 6.25
0.5714248511904763 0.4972603235925947 16.845703125
0.4634253801618303 0.4400637366232418 20.7763671875
0.41369410923549105 0.4103298829424949 23.73046875
0.37575198218936007 0.4005688322441919 26.8310546875
0.3472204299200149 0.39597362953992116 29.833984375
0.317063482375372 0.37313248749290195 27.9052734375
0.29717261904761905 0.380551444987456 30.6640625
0.28629350934709824 0.37137774874766666 31.1279296875
0.26753502255394346 0.3863221662385123 31.005859375
0.2546694800967262 0.3807802359972681 32.51953125
0.23892120361328126 0.39524541830732707 31.8359375
0.22990315755208332 0.4001039192080498 31.0546875
0.21178862072172622 0.40624770273764926 31.689453125
0.20801170712425596 0.3986086366432054 31.6650390625




0,1
acc,▁
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▄▅▆▆▇▇████████
val_loss,█▅▃▂▂▂▁▁▁▁▁▂▂▂▂
validation_accuracy,▁

0,1
acc,31.66504
epoch,15.0
loss,0.20801
test_accuracy,28.19824
train_accuracy,47.1543
val_acc,31.66504
val_loss,0.39861
validation_accuracy,31.66504


[34m[1mwandb[0m: Agent Starting Run: vw7kt0n4 with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 64
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_size: 1024


0.6234379650297619 0.3829964232586679 24.4140625
0.34916843959263394 0.35987177100919543 32.1044921875
0.28905215308779764 0.345000124048619 35.0341796875
0.2473912121000744 0.35071956117947894 35.6689453125
0.20733668736049107 0.36805268306107747 35.0830078125
0.1767747570219494 0.36919115270887104 34.423828125
0.14452183314732142 0.3867093125979106 35.6689453125
0.11976276216052828 0.41687552418027607 36.2060546875
0.10062826974051338 0.43422484468846095 35.2294921875
0.08449512300037203 0.45910344485725674 34.4970703125
0.07449044363839286 0.4792601473274685 35.83984375
0.06569330851236979 0.4897060025305975 35.3515625
0.06579849243164063 0.4922452100685665 35.9130859375
0.06203077770414806 0.4816363336784499 36.0107421875
0.058139012654622395 0.5185818413183803 36.4990234375
0.05160054161435082 0.5122279652527401 36.3037109375
0.05106428963797433 0.5148449430153483 36.0595703125
0.04901157924107143 0.5190813310799145 36.376953125
0.051456244332449774 0.5454491681995846 35.9375
0.05

0,1
acc,▁
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▅▄▃▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▅▇█▇▇██▇▇█▇███████▇
val_loss,▂▂▁▁▂▂▂▄▄▅▆▆▆▆▇▇▇▇██
validation_accuracy,▁

0,1
acc,35.62012
epoch,20.0
loss,0.0509
test_accuracy,34.10645
train_accuracy,79.91602
val_acc,35.62012
val_loss,0.53861
validation_accuracy,35.62012


[34m[1mwandb[0m: Agent Starting Run: qb84m978 with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 64
[34m[1mwandb[0m: 	beam_size: 15
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_size: 1024


0.679706043061756 0.45464227277608144 19.2626953125
0.43956720261346727 0.4286373644357636 23.193359375
0.4013518705822172 0.4102372945774169 24.2431640625
0.38023899623325896 0.40727957373573664 23.8525390625
0.3706536865234375 0.41962036667835145 24.6337890625
0.3717205519903274 0.42817719529072445 23.3642578125
0.361325189499628 0.42417658120393753 25.5615234375
0.362428240094866 0.40711646739925655 24.8291015625
0.36195995512462803 0.4173880123666355 19.677734375
0.3520810953776041 0.4174968440617834 24.21875
0.36173086983816966 0.4135979318193027 23.14453125
0.351837652297247 0.40894595569088343 21.923828125
0.3641886683872768 0.40108570953210193 24.0478515625
0.35732506161644345 0.4075545448632467 23.9501953125
0.3593990362258184 0.4053247535512561 22.8271484375
0.3649365815662203 0.40729467428865884 24.4873046875
0.3620103236607143 0.4163528613391377 24.9755859375
0.35672264462425596 0.40775393162454876 23.14453125
0.35649710518973216 0.4172400218390283 22.265625
0.3585795084635

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
acc,▁
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▅▇▆▇▆█▇▁▇▅▄▆▆▅▇▇▅▄▆
val_loss,█▅▂▂▃▅▄▂▃▃▃▂▁▂▂▂▃▂▃▃
validation_accuracy,▁

0,1
acc,23.75488
epoch,20.0
loss,0.35858
test_accuracy,21.02051
train_accuracy,22.82031
val_acc,23.75488
val_loss,0.41368
validation_accuracy,23.75488


[34m[1mwandb[0m: Agent Starting Run: tiv922oc with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 64
[34m[1mwandb[0m: 	beam_size: 15
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_size: 1024


1.3339806547619049 1.1696289720989408 0.09765625
1.0943663969494049 1.0927887033848536 0.0732421875
1.0337486049107143 0.9257751532963344 0.1953125
0.9670201474144345 0.8803236967041379 0.341796875
0.92916259765625 0.8101519140459242 0.5615234375
0.8977426292782738 0.8062794570411954 1.0498046875
0.8868797084263393 0.7762270385310763 1.1474609375
0.865211181640625 0.7640138247183391 1.708984375
0.8544828869047619 0.7406407069592249 2.24609375
0.8511747233072917 0.736464737426667 2.2216796875
0.8350396437872024 0.7499363947482336 2.7587890625
0.8256007021949405 0.7375006831827617 2.9541015625
0.8210420154389881 0.7184307525555292 2.63671875
0.8266901506696429 0.7398082777148202 2.3193359375
0.8089544387090775 0.7387417065245765 3.0029296875
0.8120974586123512 0.7328648893606096 2.7587890625
0.812276843843006 0.7254642560368493 3.1494140625
0.8120859491257439 0.7180813487086978 2.8076171875
0.7996886044456845 0.7098556636344819 3.173828125
0.8136095028831845 0.7183817965643746 3.07617187

VBox(children=(Label(value='0.001 MB of 0.042 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.025671…

0,1
acc,▁
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▁▁▂▂▃▃▅▆▆▇█▇▆█▇█▇██
val_loss,█▇▄▄▃▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁
validation_accuracy,▁

0,1
acc,3.07617
epoch,20.0
loss,0.81361
test_accuracy,2.7832
train_accuracy,1.17969
val_acc,3.07617
val_loss,0.71838
validation_accuracy,3.07617


[34m[1mwandb[0m: Agent Starting Run: imme170c with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 64
[34m[1mwandb[0m: 	beam_size: 15
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 4
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 4
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_size: 256


0.9534549386160713 0.5185567275399253 9.5458984375
0.5095889718191965 0.41158428930100943 18.5791015625
0.42529209681919644 0.39493744501045774 23.0224609375
0.37827212379092257 0.3737987130880356 26.26953125
0.3457647995721726 0.3719262941962197 27.4169921875
0.3269134521484375 0.35780730098485947 29.638671875
0.3131927780877976 0.36111137164490564 31.54296875
0.2961505126953125 0.3562524967959949 27.7099609375
0.2765539260137649 0.35152131957667215 32.12890625
0.27038443429129466 0.34178553911901655 32.373046875
0.2608067685081845 0.35028775355645586 31.640625
0.24387326195126488 0.34491760638498126 34.033203125
0.23710985456194197 0.350482630942549 32.958984375
0.22341920398530504 0.34839247450942085 34.814453125
0.21934924897693453 0.3648751097775641 35.009765625
0.21279930478050596 0.3535481109505608 33.7890625
0.20029355003720237 0.3679866095383962 33.9599609375
0.19248647054036458 0.3644115424581936 34.912109375
0.1879256620861235 0.3627024424217996 34.3505859375
0.1791977800641

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
acc,▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
loss,█▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▃▅▆▆▇▇▆▇▇▇█▇███████▇██▇▇
val_loss,█▄▃▂▂▂▂▂▁▁▁▁▁▁▂▁▂▂▂▃▃▂▃▄▃
validation_accuracy,▁

0,1
acc,32.9834
epoch,25.0
loss,0.14594
test_accuracy,30.88379
train_accuracy,56.1875
val_acc,32.9834
val_loss,0.40377
validation_accuracy,32.9834


[34m[1mwandb[0m: Agent Starting Run: pqmbyxy9 with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 128
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_size: 1024


0.7946658761160714 0.430787022624697 18.5791015625
0.40473487490699406 0.38753323682716917 29.6630859375
0.3346808442615328 0.3447749756631397 28.90625
0.2901421828497024 0.33723861617701395 32.91015625
0.25279700869605654 0.33169818988868166 35.2294921875
0.22067718505859374 0.34956677187056767 36.279296875
0.18633793422154019 0.3678222710178012 36.767578125
0.164490719749814 0.3703996411391667 36.083984375
0.14096856980096725 0.38809184446221306 37.01171875
0.11367223103841147 0.39875335139887674 36.5234375
0.1029652114141555 0.4181135899963833 35.9130859375
0.0874451918829055 0.4598745483727682 35.6689453125
0.06675668625604539 0.47873990805376143 35.693359375
0.06690270560128347 0.4765616754690806 36.3525390625
0.06081590561639695 0.4901632780120486 35.888671875
0.056149106706891744 0.49513337583768935 36.03515625
0.05023676917666481 0.5090342043411165 36.2548828125
0.045684044247581844 0.5206287467763537 36.0595703125
0.04450775509788876 0.5125647761992046 36.42578125
0.0476202683

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
acc,▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
loss,█▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▅▅▆▇██████▇▇████████▇██▇
val_loss,▄▃▁▁▁▂▂▂▃▃▄▅▅▅▆▆▆▆▆▇▇▇▇▇█
validation_accuracy,▁

0,1
acc,35.30273
epoch,25.0
loss,0.0389
test_accuracy,32.6416
train_accuracy,84.19336
val_acc,35.30273
val_loss,0.57249
validation_accuracy,35.30273


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jh235ga2 with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 128
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_size: 1024


0.7602779134114583 0.4313429913350514 19.1162109375
0.40400335402715776 0.37094441269125256 27.9541015625
0.32998331705729167 0.37149412078516825 33.4716796875
0.2891951497395833 0.3409396353222075 33.544921875
0.25727294921875 0.35424744586149853 34.6923828125
0.2257375226702009 0.34118597989990596 34.1064453125
0.20015444800967264 0.3555612230584735 35.25390625
0.17425702776227678 0.37496634466307505 35.986328125
0.14427798316592264 0.3921994311468942 36.376953125
0.13274020240420387 0.3957694932108834 35.1318359375
0.11390462239583334 0.4236453076203664 35.400390625
0.0953776114327567 0.4349223935887927 35.009765625
0.08371375674293155 0.45240854152611326 36.5478515625
0.07679811023530507 0.45668613768759225 34.8876953125
0.06859778994605654 0.4703954025393441 35.498046875
0.0644408453078497 0.4793582203842345 35.791015625
0.059570301600864954 0.49526862019584295 35.595703125
0.05225920904250372 0.5105615116301037 35.7421875
0.05346217564174107 0.5023249699955895 35.0341796875
0.050

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
acc,▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
loss,█▅▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▅▇▇▇▇▇██▇█▇█▇████▇█▇███▇
val_loss,▄▂▂▁▁▁▁▂▃▃▄▄▅▅▅▆▆▇▆▇▇████
validation_accuracy,▁

0,1
acc,35.10742
epoch,25.0
loss,0.04386
test_accuracy,32.10449
train_accuracy,83.35742
val_acc,35.10742
val_loss,0.55257
validation_accuracy,35.10742


[34m[1mwandb[0m: Agent Starting Run: jrqpkycu with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 128
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 4
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_size: 512


0.8560149855840774 0.4540294791970934 14.697265625
0.4402794247581845 0.40413646470932735 22.509765625
0.3586700730096726 0.3725930927764802 28.6376953125
0.3204982503255209 0.3528459320465724 29.8828125
0.28910348074776787 0.3378071494045712 32.2021484375
0.26269981747581844 0.3612032170806612 34.1064453125
0.2311466471354167 0.3661546749728067 34.7412109375
0.20973154703776042 0.3580202524151121 34.228515625
0.18227386474609375 0.3687417358160019 34.9853515625
0.16149871826171874 0.3949479801314218 35.3271484375
0.14262908935546875 0.40560637201581684 34.375
0.12726559593563988 0.42030297006879536 35.64453125
0.10088946387881326 0.42471136933281306 34.912109375
0.09809398832775297 0.4692544447524207 34.66796875
0.08902523222423735 0.4561205676623753 35.498046875
0.07701848347981771 0.4707584267570859 34.9853515625
0.07019606090727307 0.49736313876651583 35.6689453125
0.06711041405087426 0.49963738662855967 34.716796875
0.06327611287434895 0.5039642949899038 34.521484375
0.06182458786

0,1
acc,▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
loss,█▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▄▆▆▇▇███████████████████
val_loss,▅▃▂▁▁▂▂▂▂▃▃▄▄▅▅▅▆▆▆▇▇▇███
validation_accuracy,▁

0,1
acc,35.03418
epoch,25.0
loss,0.04452
test_accuracy,32.56836
train_accuracy,82.73047
val_acc,35.03418
val_loss,0.55293
validation_accuracy,35.03418


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: odsi8yjw with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 256
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_size: 1024


1.1304253859747024 0.5889741806756883 3.3935546875
0.5755254255022322 0.43443379799524945 14.501953125
0.453128168015253 0.41100582622346427 18.45703125
0.38370338076636906 0.38087633961722966 25.7568359375
0.3617802501860119 0.3812981943289439 28.9794921875
0.32268534342447913 0.38402303911390756 30.9326171875
0.2956971958705357 0.3457386791706085 31.3232421875
0.27666262672061015 0.34610346385410856 32.2998046875
0.26159894670758926 0.34029085011709304 32.373046875
0.23393511090959823 0.3562294940153758 33.349609375
0.22249742780412948 0.36703272944404963 33.1298828125
0.18978390648251486 0.3527960479259491 33.8134765625
0.17974993024553573 0.3733875723112197 34.5947265625
0.16309769403366814 0.37103165473256794 33.203125
0.14810484386625744 0.3951394912742433 33.69140625
0.12499087379092262 0.4078571966716221 32.958984375
0.1143590073358445 0.41742014175369624 34.1796875
0.11083902994791667 0.40833337959789096 32.470703125
0.09390154157366071 0.440870463848114 32.6416015625
0.085961

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
acc,▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
loss,█▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▃▄▆▇▇▇▇█████████████████
val_loss,█▄▃▂▂▂▁▁▁▁▂▁▂▂▃▃▃▃▄▄▄▅▅▅▅
validation_accuracy,▁

0,1
acc,34.15527
epoch,25.0
loss,0.06372
test_accuracy,29.93164
train_accuracy,79.40234
val_acc,34.15527
val_loss,0.49325
validation_accuracy,34.15527


[34m[1mwandb[0m: Agent Starting Run: o2z2kk5l with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 256
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_size: 1024


1.046214657738095 0.5803577133587429 3.3203125
0.535989990234375 0.4309394671803429 19.873046875
0.41504952566964287 0.3868538510231745 24.8291015625
0.3768141392299107 0.36187193791071576 28.369140625
0.3391090901692709 0.34808758043107535 29.78515625
0.3067066010974703 0.35407850572041105 33.0322265625
0.2742221214657738 0.34776080931935993 34.5703125
0.25410042898995533 0.3556284521307264 35.400390625
0.23998663039434526 0.3478044597875504 35.4736328125
0.22149463471912204 0.3480219741662343 33.740234375
0.1952913556780134 0.349830877213251 33.9111328125
0.1867917015438988 0.37116895545096623 35.8154296875
0.162626211983817 0.35655011449541363 33.59375
0.15769538516090031 0.36011370165007456 33.642578125
0.1470282709030878 0.3930374398117974 34.1552734375
0.12977696010044643 0.40328574038687204 34.228515625
0.12237710135323661 0.399881253639857 33.4228515625
0.10410663423084078 0.41617661714553833 32.763671875
0.09399382091703869 0.4279584217639196 35.2294921875
0.08975414457775298 

0,1
acc,▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
loss,█▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▅▆▆▇▇██████████▇▇██▇▇▇▇▇
val_loss,█▄▂▁▁▁▁▁▁▁▁▂▁▁▂▃▃▃▃▄▃▄▄▅▅
validation_accuracy,▁

0,1
acc,32.27539
epoch,25.0
loss,0.0658
test_accuracy,31.44531
train_accuracy,78.05078
val_acc,32.27539
val_loss,0.47883
validation_accuracy,32.27539


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kullijox with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 128
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 1024
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden_size: 1024


0.7435889834449405 0.4607523012728918 19.5068359375
0.40929495675223215 0.391942682720366 27.34375
0.3513713146391369 0.3660809454463777 29.6875
0.31906299409412203 0.34883494462285725 30.517578125
0.2703338913690476 0.363687344959804 34.1552734375
0.24321414039248512 0.3591855210917337 33.69140625
0.20632008870442708 0.3709956577845982 35.986328125
0.19202229817708333 0.3898987578494208 34.130859375
0.17016055152529763 0.3762488265832265 33.0810546875
0.14353912353515624 0.4146290293761662 34.1796875
0.11692228044782366 0.4222107607693899 34.2041015625
0.10706809634254093 0.4380736684515363 33.10546875
0.09744122459774925 0.44873141390936716 32.5439453125
0.08268280029296875 0.46309694789704825 31.9580078125
0.07716993786039807 0.4840985877173288 33.49609375


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
acc,▁
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
loss,█▄▄▄▃▃▂▂▂▂▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▄▅▆▇▇█▇▇▇▇▇▇▆▇
val_loss,▇▃▂▁▂▂▂▃▂▄▅▆▆▇█
validation_accuracy,▁

0,1
acc,33.49609
epoch,15.0
loss,0.07717
test_accuracy,30.85938
train_accuracy,73.74805
val_acc,33.49609
val_loss,0.4841
validation_accuracy,33.49609


[34m[1mwandb[0m: Agent Starting Run: iel199n1 with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 128
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 4
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 1024
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_size: 1024


0.8099623325892857 0.4463645468155543 14.8681640625
0.44732477097284223 0.4053931874888284 22.16796875
0.3764342680431547 0.37277316408497946 28.3203125
0.33288582938058037 0.3631368556192943 29.1748046875
0.3010525367373512 0.34716789495377315 29.5166015625
0.2783645484561012 0.3405914640142804 31.103515625
0.24345621744791668 0.3489048708052862 33.251953125
0.22469728015718005 0.3631797007152012 33.056640625
0.1980197288876488 0.36306690034412203 33.7646484375
0.18552179245721725 0.36564769844214123 33.5205078125
0.16001065208798362 0.3819324665126346 32.470703125
0.14465281168619792 0.4027381617398489 33.88671875
0.13003882998511906 0.4092149521623339 33.69140625
0.11429699125744047 0.4190338119154885 33.154296875
0.10593777611142112 0.429016527675447 33.4716796875
0.09671058291480655 0.4567740034489405 32.32421875
0.08513216291155133 0.4555005899497441 32.91015625
0.0775469752720424 0.46945463049979436 33.5205078125
0.07327234903971354 0.4844031383593877 32.568359375
0.069520757765

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
acc,▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
loss,█▅▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▄▆▆▆▇████▇████▇█████▇███
val_loss,▅▃▂▂▁▁▁▂▂▂▂▃▃▄▄▅▅▆▆▆▇▇███
validation_accuracy,▁

0,1
acc,32.95898
epoch,25.0
loss,0.04558
test_accuracy,29.88281
train_accuracy,78.94336
val_acc,32.95898
val_loss,0.53568
validation_accuracy,32.95898


[34m[1mwandb[0m: Agent Starting Run: i1jse58i with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 128
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_size: 512


0.817839587983631 0.4766740600268046 15.6494140625
0.45242486862909226 0.39870243625981466 25.048828125
0.37094401041666664 0.3906711375429517 28.61328125
0.34617637997581846 0.3557632217804591 29.8828125
0.3064273507254464 0.3578588998033887 31.54296875
0.28055126371837796 0.347292402670497 32.2998046875
0.2583926827566964 0.3601234931321371 34.1064453125
0.2320014154343378 0.3717026100272224 34.1552734375
0.21141383579799108 0.3786912936539877 34.619140625
0.1897165062313988 0.3720857969352177 33.88671875
0.1679683576311384 0.39839949068569 34.3505859375
0.15557598295665923 0.4187377748035249 34.8876953125
0.14255404517764136 0.42205535159224555 34.6923828125
0.13176907493954612 0.41869247598307474 33.4228515625
0.1144107709612165 0.4327765100059055 33.740234375
0.10720328921363467 0.44690899550914764 33.544921875
0.09761165073939733 0.44638338897909435 34.033203125
0.08944085984002977 0.47520512839158374 34.0087890625
0.0866773696172805 0.47851928500902086 34.3017578125
0.0763097853

0,1
acc,▁
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▄▆▆▇▇███████▇█████▇
val_loss,▇▄▃▁▂▁▂▂▃▂▃▄▅▄▅▆▆▇▇█
validation_accuracy,▁

0,1
acc,32.78809
epoch,20.0
loss,0.07631
test_accuracy,30.71289
train_accuracy,77.29492
val_acc,32.78809
val_loss,0.49072
validation_accuracy,32.78809


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 98q5wh1l with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 256
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 4
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_size: 1024


1.156943591889881 0.6045994247709002 2.8564453125
0.612220226469494 0.4421575523558117 10.546875
0.4666005161830357 0.38354311386744183 20.8251953125
0.387635003952753 0.37182891085034325 25.8544921875
0.34687096005394347 0.36796210351444425 27.0751953125
0.3105198160807292 0.3422640264034271 30.2490234375
0.28274454752604167 0.3540021890685672 32.470703125
0.2435920642671131 0.34560398970331463 33.30078125
0.22427019391741074 0.34865522668475196 34.47265625
0.19303960890997027 0.35886102772894357 32.666015625
0.17663141159784224 0.36544320696876165 33.349609375
0.15864901588076638 0.380965157633736 34.1796875
0.14011021205357144 0.40531267864363535 33.6669921875
0.1302596464611235 0.40298435943467276 33.69140625
0.11059132167271205 0.41620296239852905 34.3994140625
0.09141730172293526 0.42124228818076 33.69140625
0.08318107241675968 0.45928220521836055 34.5703125
0.06801111130487351 0.4819848253613427 33.0810546875
0.0588278561546689 0.4873812028339931 34.5703125
0.052756881713867186 

0,1
acc,▁
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▅▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▃▅▆▆▇██████████████
val_loss,█▄▂▂▂▁▁▁▁▁▂▂▃▃▃▃▄▅▅▅
validation_accuracy,▁

0,1
acc,32.86133
epoch,20.0
loss,0.05276
test_accuracy,30.85938
train_accuracy,77.52539
val_acc,32.86133
val_loss,0.48859
validation_accuracy,32.86133


[34m[1mwandb[0m: Agent Starting Run: b07pus3k with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 64
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_size: 512


0.76261962890625 0.4703058117911929 14.3798828125
0.4389919898623512 0.41913851137672153 24.4140625
0.3777348400297619 0.38140804036742165 25.7080078125
0.34189737955729166 0.3762745520188695 28.22265625
0.3208887881324405 0.36933114131291706 29.39453125
0.2981850179036458 0.35418053219715756 31.787109375
0.27413373674665176 0.3569349132123448 32.8125
0.2587236095610119 0.35602953143063043 32.6416015625
0.24742483956473216 0.34981003864890053 31.6162109375
0.23809456961495534 0.35761159587474095 33.5693359375
0.21810910179501486 0.36968333274126053 34.0087890625
0.20420366559709824 0.3663593331972758 33.5693359375
0.19989475795200892 0.37003318468729657 33.5205078125
0.19234892345610122 0.3680925234442666 32.71484375
0.17952412923177083 0.3815253266975993 33.935546875
0.17373087565104167 0.3770395737318766 33.3740234375
0.16350670224144345 0.38329328773986726 33.349609375
0.15737000964936756 0.40155874228193644 32.861328125
0.14970800490606398 0.3897771178966477 32.958984375
0.14178351

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
acc,▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
loss,█▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▅▅▆▆▇██▇███████████▇██▇▇
val_loss,█▅▃▃▂▁▁▁▁▁▂▂▂▂▃▃▃▄▃▄▄▅▅▆▆
validation_accuracy,▁

0,1
acc,32.27539
epoch,25.0
loss,0.11725
test_accuracy,29.5166
train_accuracy,65.63281
val_acc,32.27539
val_loss,0.43606
validation_accuracy,32.27539


[34m[1mwandb[0m: Agent Starting Run: da0wvtkz with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 128
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 4
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 1024
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_size: 1024


1.0006203497023811 0.5173528279576983 14.4775390625
0.44893330891927086 0.3901592180842445 23.33984375
0.3621880522228422 0.3805367350578308 31.7138671875
0.30977730887276783 0.3648537624449957 34.228515625
0.27888340541294643 0.3612537880738576 34.5947265625
0.2442867024739583 0.3451907223179227 36.3525390625
0.21515781947544643 0.37267205048175084 35.2294921875
0.19740522112165176 0.3675484997885568 36.669921875
0.17284008207775298 0.3758803598937534 35.9619140625
0.14348910377139137 0.39791221845717656 36.0107421875
0.12811276390438986 0.40656612458683195 35.5712890625
0.11749124436151413 0.42392503434703466 35.3515625
0.09817770095098587 0.4303572993902933 35.9130859375
0.08870269775390625 0.4430415800639561 34.7412109375
0.08448375156947545 0.45099136588119326 34.423828125
0.07930730183919271 0.4523356755574544 34.326171875
0.06898499988374257 0.4763037349496569 34.6435546875
0.06436337425595239 0.49005616704622906 35.205078125
0.05839508419945126 0.4996003196353004 35.7177734375


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
acc,▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
loss,█▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▄▆▇▇████████▇▇▇▇▇█████▇▇
val_loss,▇▃▂▂▂▁▂▂▂▃▃▄▄▄▅▅▆▆▆▇▇█▇▇█
validation_accuracy,▁

0,1
acc,35.15625
epoch,25.0
loss,0.05617
test_accuracy,32.51953
train_accuracy,80.74219
val_acc,35.15625
val_loss,0.53121
validation_accuracy,35.15625


[34m[1mwandb[0m: Agent Starting Run: sjmavlye with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 256
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 4
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_size: 256


1.2232445126488094 0.700664724622454 0.87890625
0.7314350818452381 0.5615278482437134 5.322265625
0.5971241978236607 0.473268639473688 12.4267578125
0.5127269345238095 0.4502139857837132 17.2607421875
0.4627638462611607 0.42499418485732304 20.849609375
0.4207128324962797 0.39617703216416494 21.9970703125
0.39231483096168157 0.39737547153518316 24.609375
0.3757888939267113 0.3766453195185888 26.3916015625
0.36220206124441967 0.37830511445090886 27.0263671875
0.33592569986979165 0.3855916815144675 28.6865234375
0.33861409505208334 0.3700102425756909 28.1982421875
0.3188077218191964 0.3719409533909389 30.1025390625
0.30865801130022325 0.3664779946917579 30.56640625
0.29468075706845237 0.3539439468156724 30.37109375
0.28554019019717264 0.3483526465438661 30.712890625
0.2747844005766369 0.3567281776950473 31.2744140625
0.2590169852120536 0.3597602205617087 32.2509765625
0.24968921479724704 0.3595169413657415 30.6884765625
0.24250546409970236 0.3522062599658966 31.8359375
0.2361759294782366 

0,1
acc,▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
loss,█▅▄▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▂▃▄▅▅▆▆▇▇▇▇▇▇▇▇█▇███████
val_loss,█▅▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▂▁▂▂
validation_accuracy,▁

0,1
acc,32.44629
epoch,25.0
loss,0.18723
test_accuracy,27.88086
train_accuracy,46.11914
val_acc,32.44629
val_loss,0.40057
validation_accuracy,32.44629


[34m[1mwandb[0m: Agent Starting Run: ijgwyacm with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 256
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 1024
[34m[1mwandb[0m: 	encoder_layers: 4
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_size: 1024


1.0778194754464285 0.5767203824860709 3.9306640625
0.48954961867559527 0.39794700486319406 19.7021484375
0.38608099074590774 0.38795008120082675 25.5126953125
0.33494166782924106 0.3637347079458691 31.3232421875
0.2970235479445684 0.3452838787010738 30.9814453125
0.27813255673363096 0.34673519077755155 34.1796875
0.24779352097284224 0.33712282351085116 35.8154296875
0.23332070486886158 0.3440701337087722 36.1328125
0.21153695242745535 0.34998103976249695 33.0078125
0.187202395484561 0.3559333284695943 33.984375
0.16628319149925594 0.36257878513563246 34.0576171875
0.16091350737072171 0.36620048256147475 34.8876953125
0.14244662330264138 0.379892688421976 32.9833984375
0.12979304722377233 0.3903951375257401 34.86328125
0.10806587582542783 0.4075120659101577 34.619140625
0.0986243911016555 0.42604551854587736 35.888671875
0.09239095052083333 0.42347211781002225 36.1328125
0.08421492803664435 0.44548687480744864 35.3271484375
0.07766103835332962 0.44863059123357135 34.86328125
0.068964785

0,1
acc,▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
loss,█▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▄▆▇▇███▇███▇████████████
val_loss,█▃▂▂▁▁▁▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▆▅▆
validation_accuracy,▁

0,1
acc,33.95996
epoch,25.0
loss,0.05487
test_accuracy,31.86035
train_accuracy,80.35352
val_acc,33.95996
val_loss,0.5053
validation_accuracy,33.95996


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: bvzhcmg9 with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 128
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_size: 1024


0.7633150809151786 0.4447657607850574 20.0439453125
0.4011928013392857 0.37048236316158656 25.87890625
0.32726643880208334 0.3652804210072472 32.763671875
0.2668686930338542 0.3564740518728892 35.8154296875
0.24630382719494048 0.34880085573309944 34.765625
0.21974654424758186 0.34608026487486704 36.083984375
0.18987439836774553 0.3639016463643029 37.5732421875
0.16021873837425596 0.3668081306275867 36.3037109375
0.13746679396856398 0.3899397225607009 34.8876953125
0.11451663789295015 0.4164829956633704 36.3525390625
0.10123794555664062 0.425079508906319 36.376953125
0.0849784923735119 0.43019638175056096 35.7421875
0.07641787574404763 0.44197555099214825 36.0595703125
0.06512513660249257 0.46050052486714865 36.2548828125
0.058810828072684146 0.49097793442862375 36.8408203125
0.05873615809849331 0.4888986476830074 36.03515625
0.04715756734212239 0.5085118554887318 36.9384765625
0.04335879734584263 0.5227621566681635 36.71875
0.04320492699032738 0.5298183574562981 36.62109375
0.039465361

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
acc,▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
loss,█▅▄▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▃▆▇▇▇█▇▇██▇▇▇█▇█████▇▇▇█
val_loss,▄▂▂▁▁▁▂▂▂▃▄▄▄▅▆▆▆▇▇██▇▇██
validation_accuracy,▁

0,1
acc,36.40137
epoch,25.0
loss,0.03319
test_accuracy,33.93555
train_accuracy,86.53906
val_acc,36.40137
val_loss,0.56193
validation_accuracy,36.40137


[34m[1mwandb[0m: Agent Starting Run: 9kmp2z62 with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 256
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 1024
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_size: 1024


1.0006211635044644 0.5689415704636347 5.224609375
0.49442452566964284 0.43135699204036165 22.3388671875
0.3992920212518601 0.40981892460868474 28.515625
0.3538254801432291 0.3744643117700304 27.7099609375
0.3223595028831845 0.35934424967992873 31.396484375
0.29057774135044645 0.3806578673067547 32.9345703125
0.26714088076636905 0.3555788681620643 33.740234375
0.23276476178850447 0.3650965591271718 34.0087890625
0.21910228910900298 0.3574165049053374 33.4716796875
0.1896871802920387 0.3818764090538025 33.154296875
0.1745669265020461 0.39612490222567603 34.4970703125
0.16145746140252976 0.39242874014945256 33.1298828125
0.146232183547247 0.40888698895772296 33.935546875
0.13149332682291667 0.4257855103129432 34.423828125
0.12009142194475446 0.42903208023025874 34.6923828125
0.11856634957449776 0.44025933742523193 32.861328125
0.1112275623139881 0.44058296510151457 33.544921875
0.10410496303013392 0.4471499579293387 33.3251953125
0.08839385259719122 0.473510898294903 33.30078125
0.0774868

VBox(children=(Label(value='0.001 MB of 0.042 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.025490…

0,1
acc,▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
loss,█▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▅▇▆▇████████████████████
val_loss,█▃▃▂▁▂▁▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▅▆▆
validation_accuracy,▁

0,1
acc,33.93555
epoch,25.0
loss,0.06391
test_accuracy,31.49414
train_accuracy,81.50977
val_acc,33.93555
val_loss,0.51736
validation_accuracy,33.93555
