## IMPORTING LIBRARIES

In [1]:
import requests,zipfile,io
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from torch import optim
import numpy as np
import random
import torch.nn.functional as F
import warnings
warnings.filterwarnings("ignore")
import wandb

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache()

In [3]:
print(device)

cuda


## DOWNLOADING AND UNZIPPING DATA

In [4]:
def download_data(url="https://drive.google.com/u/0/uc?id=1uRKU4as2NlS9i8sdLRS1e326vQRdhvfw&export=download"):
    response=requests.get(url)
    z = zipfile.ZipFile(io.BytesIO(response.content))
    z.extractall()

## METHODS FOR GETTING CHARACTERS FOR CORPUSS AND ADDING THEIR INDICES

In [5]:
def get_corpus(data):
    eng_corpus=set()
    hin_corpus=set()
    for i in range(0,len(data)):
        eng_word=data[0][i]
        hin_word=data[1][i]
        for ch in eng_word:
            eng_corpus.add(ch)
        for ch in hin_word:
            hin_corpus.add(ch)
        # End Delimiter
        eng_corpus.add('#')
        hin_corpus.add('#')
        hin_corpus.add('$')
        eng_corpus.add('$')
        # Start Delimiter
#         eng_corpus.add('^')
        hin_corpus.add('^')
    return hin_corpus,eng_corpus

In [6]:
def word2index(data):
    hin_corpus,eng_corpus=get_corpus(data)
    engchar_idx={}
    hinchar_idx={}
    idx_engchar={}
    idx_hinchar={}
    i=0
    for char in eng_corpus:
        engchar_idx[char]=i
        idx_engchar[i]=char
        i+=1
    i=0
    for char in hin_corpus:
        hinchar_idx[char]=i
        idx_hinchar[i]=char
        i+=1
    return engchar_idx,hinchar_idx,idx_engchar,idx_hinchar,len(eng_corpus),len(hin_corpus)

## DATA PREPROCESSING

In [7]:
def maxlen(data):
    maxlen_eng=0
    maxlen_hin=0
    for i in range(0,len(data)):
        eng_word=data[0][i]
        hin_word=data[1][i]
        if(len(eng_word)>maxlen_eng):
            maxlen_eng=len(eng_word)
        if(len(hin_word)>maxlen_hin):
            maxlen_hin=len(hin_word)
    return maxlen_eng,maxlen_hin

In [8]:
def pre_process(data,eng_to_idx,hin_to_idx):
    eng=[]
    hin=[]
    maxlen_eng,maxlen_hin=maxlen(data)
    
    unknown= eng_to_idx['$']
    for i in range(0,len(data)):
        sz=0
        eng_word=data[0][i]
        hin_word='^'+data[1][i]
        eng_word = eng_word.ljust(maxlen_eng+1, '#')
        hin_word = hin_word.ljust(maxlen_hin+1, '#')
        idx=[]
        for char in eng_word:
            if eng_to_idx.get(char) is not None:
                idx.append(eng_to_idx[char])
            else:
                idx.append(unknown)
        eng.append(idx)
        idx=[]
        for char in hin_word:
            if hin_to_idx.get(char) is not None:
                idx.append(hin_to_idx[char])
            else:
                idx.append(unknown)
        hin.append(idx)    
    return eng,hin

## LOADING OUR CUSTOM DATASET TO DATALOADER

In [9]:
class MyDataset(Dataset):
    def __init__(self, train_x,train_y, transform=None):
        self.train_x = train_x
        self.train_y = train_y
        self.transform = transform
        
    
    def __len__(self):
        return len(self.train_x)
    
    def __getitem__(self, idx):
        if self.transform:
            sample = self.transform(sample)
        return torch.tensor(self.train_x[idx]).to(device),torch.tensor(self.train_y[idx]).to(device)

def get_data():
    download_data()
    
    train_df=pd.read_csv("aksharantar_sampled/hin/hin_train.csv",header=None)
    test_df=pd.read_csv("aksharantar_sampled/hin/hin_test.csv",header=None)
    val_df=pd.read_csv("aksharantar_sampled/hin/hin_valid.csv",header=None)
    eng_to_idx,hin_to_idx,idx_to_eng,idx_to_hin,input_len,target_len=word2index(train_df)
    
    return train_df,test_df,val_df,eng_to_idx,hin_to_idx,idx_to_eng,idx_to_hin,input_len,target_len

## Seq2Seq MODEL

In [10]:
class EncoderGRU(nn.Module):
    def __init__(self,input_size,hidden_size,embedding_size,num_of_layers,batch_size,bi_directional,dropout_p=0.1):
        super(EncoderGRU,self).__init__()
        self.hidden_size=hidden_size
        self.batch_size=batch_size
        self.input_size=input_size
        self.embedding_size=embedding_size
        self.embedding=nn.Embedding(input_size,embedding_size)
        self.num_of_layers=num_of_layers
        self.bi_directional=bi_directional
        if(bi_directional=="Yes"):
            flag=True
        else:
            flag=False
        self.gru = nn.GRU(embedding_size,hidden_size,num_of_layers,bidirectional=flag)
        self.dropout = nn.Dropout(dropout_p)

    def forward(self,input,hidden):
        embedded=self.embedding(input).view(-1,self.batch_size, self.embedding_size)
        embedded = self.dropout(embedded)
        output,hidden=self.gru(embedded,hidden)
    
        if self.bi_directional=="Yes":
            hidden=hidden.resize(2,self.num_of_layers,self.batch_size,self.hidden_size)
            hidden=torch.add(hidden[0],hidden[1])/2
            
        return output,hidden

    def initHidden(self):
        if(self.bi_directional=="Yes"):
            return torch.zeros(2*self.num_of_layers,self.batch_size,self.hidden_size,device=device)
        else:
            return torch.zeros(self.num_of_layers,self.batch_size,self.hidden_size,device=device)

class DecoderGRU(nn.Module):
    def __init__(self, output_size,hidden_size, embedding_size, decoder_layers,batch_size,dropout_p=0.1):
        super(DecoderGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding_size=embedding_size
        self.embedding = nn.Embedding(output_size, embedding_size)
        self.gru = nn.GRU(embedding_size,hidden_size, decoder_layers)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        self.batch_size=batch_size
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(-1, self.batch_size, self.embedding_size)
        embedded = self.dropout(embedded)
        output, hidden = self.gru(embedded, hidden)
        output = self.softmax(self.out(output))
        return output, hidden

In [11]:
class EncoderRNN(nn.Module):
    def __init__(self,input_size,hidden_size,embedding_size,num_of_layers,batch_size,bi_directional,dropout_p=0.1):
        super(EncoderRNN,self).__init__()
        self.hidden_size=hidden_size
        self.batch_size=batch_size
        self.input_size=input_size
        self.embedding_size=embedding_size
        self.embedding=nn.Embedding(input_size,embedding_size)
        self.num_of_layers=num_of_layers
        self.bi_directional=bi_directional
        if(bi_directional=="Yes"):
            flag=True
        else:
            flag=False
        self.rnn = nn.RNN(embedding_size,hidden_size,num_of_layers,bidirectional=flag)
        self.dropout = nn.Dropout(dropout_p)

    def forward(self,input,hidden):
        embedded=self.embedding(input).view(-1,self.batch_size, self.embedding_size)
        embedded = self.dropout(embedded)
        output,hidden=self.rnn(embedded,hidden)
    
        if self.bi_directional=="Yes":
            hidden=hidden.resize(2,self.num_of_layers,self.batch_size,self.hidden_size)
            hidden=torch.add(hidden[0],hidden[1])/2
            
        return output,hidden

    def initHidden(self):
        if(self.bi_directional=="Yes"):
            return torch.zeros(2*self.num_of_layers,self.batch_size,self.hidden_size,device=device)
        else:
            return torch.zeros(self.num_of_layers,self.batch_size,self.hidden_size,device=device)

class DecoderRNN(nn.Module):
    def __init__(self, output_size,hidden_size, embedding_size, decoder_layers,batch_size,dropout_p=0.1):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding_size=embedding_size
        self.embedding = nn.Embedding(output_size, embedding_size)
        self.rnn = nn.RNN(embedding_size,hidden_size, decoder_layers)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        self.batch_size=batch_size
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(-1, self.batch_size, self.embedding_size)
        embedded = self.dropout(embedded)
        output, hidden = self.rnn(embedded, hidden)
        output = self.out(output)
        output = self.softmax(self.out(output))
        return output, hidden

In [12]:
class EncoderLSTM(nn.Module):
    def __init__(self,input_size,hidden_size,embedding_size,num_of_layers,batch_size,bi_directional,dropout_p=0.1):
        super(EncoderLSTM,self).__init__()
        self.hidden_size=hidden_size
        self.batch_size=batch_size
        self.input_size=input_size
        self.embedding_size=embedding_size
        self.embedding=nn.Embedding(input_size,embedding_size)
        self.num_of_layers=num_of_layers
        self.bi_directional=bi_directional
        if(bi_directional=="Yes"):
            flag=True
        else:
            flag=False
        self.lstm = nn.LSTM(embedding_size,hidden_size,num_of_layers,bidirectional=flag)
        self.dropout = nn.Dropout(dropout_p)

    def forward(self,input,hidden,state):
        embedded=self.embedding(input).view(-1,self.batch_size, self.embedding_size)
        embedded = self.dropout(embedded)
        output,(hidden,state)=self.lstm(embedded,(hidden,state))
    
        if self.bi_directional=="Yes":
            hidden=hidden.resize(2,self.num_of_layers,self.batch_size,self.hidden_size)
            state=state.resize(2,self.num_of_layers,self.batch_size,self.hidden_size)
            hidden=torch.add(hidden[0],hidden[1])/2
            state=torch.add(state[0],hidden[1])/2
            
        return output,hidden,state

    def initHidden(self):
        if(self.bi_directional=="Yes"):
            return torch.zeros(2*self.num_of_layers,self.batch_size,self.hidden_size,device=device)
        else:
            return torch.zeros(self.num_of_layers,self.batch_size,self.hidden_size,device=device)
    
    def initState(self):
        if(self.bi_directional=="Yes"):
            return torch.zeros(2*self.num_of_layers,self.batch_size,self.hidden_size,device=device)
        else:
            return torch.zeros(self.num_of_layers,self.batch_size,self.hidden_size,device=device)

class DecoderLSTM(nn.Module):
    def __init__(self, output_size,hidden_size, embedding_size, decoder_layers,batch_size,dropout_p=0.1):
        super(DecoderLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding_size=embedding_size
        self.embedding = nn.Embedding(output_size, embedding_size)
        self.lstm = nn.LSTM(embedding_size,hidden_size,decoder_layers)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        self.batch_size=batch_size
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, input,hidden,state):
        embedded = self.embedding(input).view(-1, self.batch_size, self.embedding_size)
        embedded = self.dropout(embedded)
        output,(hidden,state)=self.lstm(embedded,(hidden,state))
        output = self.out(output)
        return output,hidden,state

## ATTENTION MECHANISM

In [13]:
class AttnDecoder(nn.Module):
    def __init__(self,output_size,hidden_size,embedding_size,decoder_layers,batch_size,cell_type,dropout_p=0.1):
        super(AttnDecoder, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.batch_size=batch_size
        self.cell_type=cell_type
        self.embedding_size=embedding_size
        self.decoder_layers=decoder_layers
        
        self.embedding = nn.Embedding(self.output_size, self.embedding_size)
        self.dropout = nn.Dropout(self.dropout_p)

        self.U=nn.Linear(self.hidden_size,self.hidden_size,bias=False).to(device)
        self.W=nn.Linear(self.hidden_size,self.hidden_size,bias=False).to(device)
        self.V=nn.Linear(self.hidden_size,1,bias=False).to(device)
        
        self.linear=nn.Linear(self.hidden_size,output_size,bias=True)
        self.softmax=nn.Softmax()
        if(cell_type=="GRU"):
            self.gru = nn.GRU(self.embedding_size+self.hidden_size, self.hidden_size,self.decoder_layers)
        if(cell_type=="LSTM"):
            self.lstm = nn.LSTM(self.embedding_size+self.hidden_size, self.hidden_size,self.decoder_layers)
        if(cell_type=="RNN"):
            self.rnn = nn.RNN(self.embedding_size+self.hidden_size, self.hidden_size,self.decoder_layers)

    def forward(self, input, hidden,encoder_outputs,word_length,state=None):
        embedded = self.embedding(input).view(-1,self.batch_size, self.embedding_size)
        embedded = self.dropout(embedded)
        T=word_length
        temp=self.W(hidden[-1])
        c=torch.zeros(self.batch_size,self.hidden_size).to(device)
        
        
        for j in range(0,T):
            e_j=self.V(torch.tanh(self.U(encoder_outputs[j])+temp))
            alpha_j=self.softmax(e_j)
            c+=alpha_j*encoder_outputs[j]
        

        final_input=torch.cat((embedded,c.unsqueeze(0)),dim=2)
        
        
        if(self.cell_type=="GRU"):
            output,hidden=self.gru(final_input,hidden)
        if(self.cell_type=="RNN"):
            output,hidden=self.rnn(final_input,hidden)
        if(self.cell_type=="LSTM"):
            output, (hidden,state) =self.lstm(final_input,(hidden,state))
        
        output1=self.linear(output)
        
        if(self.cell_type=="GRU" or self.cell_type=="RNN"):
            return output1, hidden, c
        if(self.cell_type=="LSTM"):
            return output1, hidden, state, c

In [14]:
def train(train_data,encoder,decoder,loss_fun,encoder_optimizer,decoder_optimizer,encoder_layers,decoder_layers,batch_size,hidden_size,bi_directional,cell_type,attention):
    total_loss=0
    teacher_forcing_ratio=0.5
    for i,(train_x,train_y) in enumerate(train_data):
        loss=0
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()
        train_x=train_x.T
        train_y=train_y.T
        timesteps=len(train_x)
        
        if cell_type=='GRU' or cell_type=='RNN':
            
            encoder_hidden=encoder.initHidden()
            encoder_output,encoder_hidden=encoder(train_x,encoder_hidden)
            if(decoder_layers>encoder_layers):
                i = decoder_layers
                decoder_hidden=encoder_hidden

                while True:
                    if(i==encoder_layers):
                        break
                    # Concatenate the two tensors along the first dimension
                    decoder_hidden = torch.cat([decoder_hidden, encoder_hidden[-1].unsqueeze(0)], dim=0)
                    i-=1

            elif(decoder_layers<encoder_layers):
                decoder_hidden=encoder_hidden[-decoder_layers:]

            else:
                decoder_hidden=encoder_hidden
        
            decoder_input = train_y[0]
            
            if(bi_directional=="Yes"):
                split_tensor= torch.split(encoder_output, hidden_size, dim=-1)
                encoder_output=torch.add(split_tensor[0],split_tensor[1])/2
            
            
            use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
            if use_teacher_forcing:
                for i in range(0,len(train_y)):
                    if(attention=="Yes"):
                        decoder_output, decoder_hidden, attn_weights=decoder(decoder_input,decoder_hidden,encoder_output,len(train_x))
                        loss+=loss_fun(torch.squeeze(decoder_output), train_y[i])
                        decoder_input = train_y[i] 
                    else:
                        decoder_output, decoder_hidden= decoder(decoder_input, decoder_hidden)
                        loss+=loss_fun(torch.squeeze(decoder_output), train_y[i])
                        decoder_input = train_y[i]  # Teacher forcing
            else:
                for i in range(0,len(train_y)):
                    if(attention=="Yes"):
                        decoder_output, decoder_hidden, attn_weights=decoder(decoder_input,decoder_hidden,encoder_output,len(train_x))
                        max_prob,index=decoder_output.topk(1)
                        loss+=loss_fun(torch.squeeze(decoder_output), train_y[i])
                        decoder_input=index
                    else:
                        decoder_output,decoder_hidden=decoder(decoder_input,decoder_hidden)
                        max_prob,index=decoder_output.topk(1)
                        loss+=loss_fun(torch.squeeze(decoder_output), train_y[i])
                        decoder_input=index
            loss.backward()
            encoder_optimizer.step()
            decoder_optimizer.step()
            total_loss+=loss
        
        if cell_type=='LSTM':
    
            encoder_hidden=encoder.initHidden()
            encoder_state=encoder.initState()
            
            encoder_output,encoder_hidden,encoder_state=encoder(train_x,encoder_hidden,encoder_state)
        
            if(decoder_layers>encoder_layers):
                i = decoder_layers
                decoder_hidden=encoder_hidden
                decoder_state=encoder_state
                while True:
                    if(i==encoder_layers):
                        break
                    # Concatenate the two tensors along the first dimension
                    decoder_hidden = torch.cat([decoder_hidden, encoder_hidden[-1].unsqueeze(0)], dim=0)
                    decoder_state = torch.cat([decoder_state, encoder_state[-1].unsqueeze(0)], dim=0)
                    i-=1

            elif(decoder_layers<encoder_layers):
                decoder_hidden=encoder_hidden[-decoder_layers:]
                decoder_state=encoder_state[-decoder_layers:]

            else:
                decoder_hidden=encoder_hidden
                decoder_state=encoder_state
            
            
            if(bi_directional=="Yes"):
                split_tensor= torch.split(encoder_output, hidden_size, dim=-1)
                encoder_output=torch.add(split_tensor[0],split_tensor[1])/2
            
            decoder_input = train_y[0]
            use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
            if use_teacher_forcing:
                for i in range(0,len(train_y)):
                    if(attention=="Yes"):
                        decoder_output, decoder_hidden, decoder_state, attn_weights=decoder(decoder_input,decoder_hidden,encoder_output,len(train_x),decoder_state)
                        loss+=loss_fun(torch.squeeze(decoder_output), train_y[i])
                        decoder_input= train_y[i]
                    else:
                        decoder_output, decoder_hidden,decoder_state= decoder(decoder_input, decoder_hidden,decoder_state)
                        loss+=loss_fun(torch.squeeze(decoder_output), train_y[i])
                        decoder_input = train_y[i]  # Teacher forcing
            else:
                for i in range(0,len(train_y)):
                    if(attention=="Yes"):
                        decoder_output, decoder_hidden, decoder_state, attn_weights=decoder(decoder_input,decoder_hidden,encoder_output,len(train_x),decoder_state)
                        max_prob,index=decoder_output.topk(1)
                        loss+=loss_fun(torch.squeeze(decoder_output), train_y[i])
                        decoder_input=index
                    else:
                        decoder_output, decoder_hidden,decoder_state= decoder(decoder_input, decoder_hidden,decoder_state)
                        max_prob,index=decoder_output.topk(1)
                        loss+=loss_fun(torch.squeeze(decoder_output), train_y[i])
                        decoder_input=index
            loss.backward()
            encoder_optimizer.step()
            decoder_optimizer.step()
            total_loss+=loss

        
        
    return total_loss.item()/len(train_y),encoder,decoder

In [15]:
def train_iter(input_data,val_data,val_y,input_len,target_len,epochs,batch_size,embedding_size,encoder_layers,decoder_layers,hidden_size,cell_type,bi_directional,dropout,beam_size,attention):
    lr=0.001
    if(cell_type=='GRU'):
        encoder=EncoderGRU(input_len,hidden_size,embedding_size,encoder_layers,batch_size,bi_directional,dropout).to(device)
        if(attention=="Yes"):
            decoder=AttnDecoder(target_len,hidden_size,embedding_size,decoder_layers,batch_size,cell_type,dropout).to(device)
        else:
            decoder=DecoderGRU(target_len,hidden_size,embedding_size,decoder_layers,batch_size,dropout).to(device)
        
    if(cell_type=='RNN'):
        encoder=EncoderRNN(input_len,hidden_size,embedding_size,encoder_layers,batch_size,bi_directional,dropout).to(device)
        if(attention=="Yes"):
            decoder=AttnDecoder(target_len,hidden_size,embedding_size,decoder_layers,batch_size,cell_type,dropout).to(device)
        else:
            decoder=DecoderRNN(target_len,hidden_size,embedding_size,decoder_layers,batch_size,dropout).to(device)
    
    if cell_type=='LSTM':
        encoder=EncoderLSTM(input_len,hidden_size,embedding_size,encoder_layers,batch_size,bi_directional,dropout).to(device)
        if(attention=="Yes"):
            decoder=AttnDecoder(target_len,hidden_size,embedding_size,decoder_layers,batch_size,cell_type,dropout).to(device)
        else:
            decoder=DecoderLSTM(target_len,hidden_size,embedding_size,decoder_layers,batch_size,dropout).to(device)

    encoder_optimizer=optim.Adam(encoder.parameters(),lr)
    decoder_optimizer=optim.Adam(decoder.parameters(),lr)
    loss_fun=nn.CrossEntropyLoss(reduction="sum")
    epoch_train_loss=[]
    epoch_val_loss=[]
    epoch_val_acc=[]
    for i in range(0,epochs):
        loss,encoder,decoder=train(input_data,encoder,decoder,loss_fun,encoder_optimizer,decoder_optimizer,
                                   encoder_layers,decoder_layers,batch_size,hidden_size,bi_directional,
                                   cell_type,attention)
        val_predictions,val_loss=eval(val_data,encoder,decoder,encoder_layers,decoder_layers,
                                  batch_size,hidden_size,bi_directional,cell_type,attention)
        
        epoch_val_loss.append(val_loss)
        epoch_train_loss.append(loss/51200)
        
        val_acc=accuracy(val_predictions,val_y)
        epoch_val_acc.append(val_acc)
        print(loss/51200,val_loss,val_acc)
    
#     train_predictions,t=eval(input_data,encoder,decoder,encoder_layers,decoder_layers,batch_size,hidden_size,bi_directional,cell_type,attention)
    return epoch_train_loss,epoch_val_loss,epoch_val_acc,encoder,decoder,encoder_layers,decoder_layers

In [16]:
def eval(input_data,encoder,decoder,encoder_layers,decoder_layers,batch_size,hidden_size,bi_directional,cell_type,attention):
    with torch.no_grad():
        loss_fun=nn.CrossEntropyLoss(reduction="sum")
        total_loss=0
        pred_words=list()
        for x,y in input_data:
            loss=0
            decoder_words=[]
            x=x.T
            y=y.T
            encoder_hidden=encoder.initHidden()
            timesteps=len(x)
            if cell_type=='GRU' or cell_type=='RNN':

                encoder_hidden=encoder.initHidden()
                encoder_output,encoder_hidden=encoder(x,encoder_hidden)
                if(decoder_layers>encoder_layers):
                    i = decoder_layers
                    decoder_hidden=encoder_hidden

                    while True:
                        if(i==encoder_layers):
                            break
                        # Concatenate the two tensors along the first dimension
                        decoder_hidden = torch.cat([decoder_hidden, encoder_hidden[-1].unsqueeze(0)], dim=0)
                        i-=1

                elif(decoder_layers<encoder_layers):
                    decoder_hidden=encoder_hidden[-decoder_layers:]

                else:
                    decoder_hidden=encoder_hidden

                decoder_input = y[0]

                if(bi_directional=="Yes"):
                    split_tensor= torch.split(encoder_output, hidden_size, dim=-1)
                    encoder_output=torch.add(split_tensor[0],split_tensor[1])/2

                for i in range(0,len(y)):
                    if(attention=="Yes"):
                        decoder_output, decoder_hidden, attn_weights=decoder(decoder_input,decoder_hidden,encoder_output,len(x))
                        max_prob,index=decoder_output.topk(1)
                        loss+=loss_fun(torch.squeeze(decoder_output), y[i])
                        index=index.squeeze()
                        decoder_input=index
                        decoder_words.append(index.tolist())
                    else:
                        decoder_output,decoder_hidden=decoder(decoder_input,decoder_hidden)
                        max_prob,index=decoder_output.topk(1)
                        loss+=loss_fun(torch.squeeze(decoder_output), y[i])
                        index=index.squeeze()
                        decoder_input=index
                        decoder_words.append(index.tolist())
                decoder_words=np.array(decoder_words)
                pred_words.append(decoder_words.T)
                total_loss+=loss.item()


            if cell_type=='LSTM':

                encoder_hidden=encoder.initHidden()
                encoder_state=encoder.initState()

                encoder_output,encoder_hidden,encoder_state=encoder(x,encoder_hidden,encoder_state)

                if(decoder_layers>encoder_layers):
                    i = decoder_layers
                    decoder_hidden=encoder_hidden
                    decoder_state=encoder_state
                    while True:
                        if(i==encoder_layers):
                            break
                        # Concatenate the two tensors along the first dimension
                        decoder_hidden = torch.cat([decoder_hidden, encoder_hidden[-1].unsqueeze(0)], dim=0)
                        decoder_state = torch.cat([decoder_state, encoder_state[-1].unsqueeze(0)], dim=0)
                        i-=1

                elif(decoder_layers<encoder_layers):
                    decoder_hidden=encoder_hidden[-decoder_layers:]
                    decoder_state=encoder_state[-decoder_layers:]

                else:
                    decoder_hidden=encoder_hidden
                    decoder_state=encoder_state


                if(bi_directional=="Yes"):
                    split_tensor= torch.split(encoder_output, hidden_size, dim=-1)
                    encoder_output=torch.add(split_tensor[0],split_tensor[1])/2
                decoder_input = y[0]

                for i in range(0,len(y)):
                    if(attention=="Yes"):
                        decoder_output, decoder_hidden, decoder_state, attn_weights=decoder(decoder_input,decoder_hidden,encoder_output,len(x),decoder_state)
                        max_prob,index=decoder_output.topk(1)
                        loss+=loss_fun(torch.squeeze(decoder_output), y[i])
                        index=index.squeeze()
                        decoder_input=index
                        decoder_words.append(index.tolist())
                    else:
                        decoder_output, decoder_hidden,decoder_state= decoder(decoder_input, decoder_hidden,decoder_state)
                        max_prob,index=decoder_output.topk(1)
                        loss+=loss_fun(torch.squeeze(decoder_output), y[i])
                        index=index.squeeze()
                        decoder_input=index
                        decoder_words.append(index.tolist())
                decoder_words=np.array(decoder_words)
                pred_words.append(decoder_words.T)
                total_loss+=loss.item()


    predictions=[]
    for batch in pred_words:
        for word in batch:
            predictions.append(word)

    return predictions,total_loss/(len(predictions)*len(predictions[0]))

In [17]:
def accuracy(predictions,y):
    count=0
#     print(len(y),len(y[0]))
    for i in range(0,len(predictions)):
        p=predictions[i]
        if np.array_equal(p,y[i]):
            count+=1
    return (count/len(predictions))*100

## MODEL

In [18]:
# epochs=20
# batchsize=64
# embedding_size=256
# encoder_layers=2
# decoder_layers=3
# hidden_size=256
# cell_type="LSTM"
# bi_directional="Yes"
# dropout=0.2
# beam_size=16
# attention="No"

In [19]:
# train_df,test_df,val_df,eng_to_idx,hin_to_idx,idx_to_eng,idx_to_hin,input_len,target_len=get_data()

# train_x,train_y = pre_process(train_df,eng_to_idx,hin_to_idx)
# test_x,test_y = pre_process(test_df,eng_to_idx,hin_to_idx)
# val_x,val_y = pre_process(val_df,eng_to_idx,hin_to_idx)

# train_dataset=MyDataset(train_x,train_y)
# test_dataset=MyDataset(test_x,test_y)
# val_dataset=MyDataset(val_x,val_y)


# train_dataloader=DataLoader(train_dataset,batch_size=batchsize)
# test_dataloader=DataLoader(test_dataset,batch_size=batchsize)
# val_dataloader=DataLoader(val_dataset,batch_size=batchsize)

In [20]:
# epoch_train_loss,epoch_val_loss,epoch_val_acc,encoder,decoder,encoder_layers,decoder_layers=train_iter(train_dataloader,val_dataloader,val_y,input_len,target_len,epochs,batchsize,embedding_size,encoder_layers,decoder_layers,hidden_size,cell_type,bi_directional,dropout,beam_size,attention)

In [21]:
# print(epoch_val_acc)

In [22]:
# test_predictions,loss=eval(test_dataloader,encoder,decoder,encoder_layers,decoder_layers,batchsize,hidden_size,bi_directional,cell_type,attention)

In [23]:
# test_accuracy=accuracy(test_predictions,test_y)

In [24]:
# print(test_accuracy)

## INTEGRATING WITH WANDB

In [25]:
def wandb_run_sweeps(train_dataset,val_dataset,test_dataset,train_y,val_y,test_y,input_len,target_len):
    
    config = {
        "project":"CS6910_Assignment3",
        "method": 'random',
        "metric": {
        'name': 'acc',
        'goal': 'maximize'
        },
        'parameters' :{
        "epochs": {"values":[10,20]},
        "batchsize": {"values": [64,128,256]},
        "embedding_size": {"values":[16, 32, 64, 256, 512]},
        "hidden_size": {"values":[16, 32, 64, 256, 512]},
        "encoder_layers": {"values":[2,3,4]},
        "decoder_layers": {"values":[2,3,4]},
        "cell_type": {"values":["RNN","GRU","LSTM"]},
        "bi_directional":{"values":["Yes","No"]},
        "dropout":{"values":[0.1,0.2,0.5]},
        "attention":{"values":["Yes","No"]},
        "beam_size":{"values":[5,10,15]}
        }
    }
    def train_rnn():
        wandb.init()

        name='_CT_'+str(wandb.config.cell_type)+"_BS_"+str(wandb.config.batchsize)+"_EPOCH_"+str(wandb.config.epochs)+"_ES_"+str(wandb.config.embedding_size)+"_HS_"+str(wandb.config.hidden_size)
        
        
        train_dataloader=DataLoader(train_dataset,batch_size=wandb.config.batchsize)
        test_dataloader=DataLoader(test_dataset,batch_size=wandb.config.batchsize)
        val_dataloader=DataLoader(val_dataset,batch_size=wandb.config.batchsize)
        
        epoch_train_loss,epoch_val_loss,epoch_val_acc,encoder,decoder,encoder_layers,decoder_layers=train_iter(train_dataloader,val_dataloader,val_y,input_len,target_len,wandb.config.epochs,wandb.config.batchsize,wandb.config.embedding_size,wandb.config.encoder_layers,wandb.config.decoder_layers,wandb.config.hidden_size,wandb.config.cell_type,wandb.config.bi_directional,wandb.config.dropout,wandb.config.beam_size,wandb.config.attention)

        for i in range(wandb.config.epochs):
            wandb.log({"loss":epoch_train_loss[i]})
            wandb.log({"val_loss":epoch_val_loss[i]})
            wandb.log({"val_acc":epoch_val_acc[i]})
            wandb.log({"epoch": (i+1)})
        wandb.log({"validation_accuracy":epoch_val_acc[-1]})    
        
        train_predictions,_=eval(train_dataloader,encoder,decoder,wandb.config.encoder_layers,
                              wandb.config.decoder_layers,wandb.config.batchsize,wandb.config.hidden_size,
                              wandb.config.bi_directional,wandb.config.cell_type,wandb.config.attention)

        train_accuracy=accuracy(train_predictions,train_y)
        wandb.log({"train_accuracy":train_accuracy})
        
        test_predictions,_=eval(test_dataloader,encoder,decoder,wandb.config.encoder_layers,
                              wandb.config.decoder_layers,wandb.config.batchsize,wandb.config.hidden_size,
                              wandb.config.bi_directional,wandb.config.cell_type,wandb.config.attention)

        test_accuracy=accuracy(test_predictions,test_y)
        wandb.log({"test_accuracy":test_accuracy})
        wandb.run.name = name
        wandb.run.save()
        wandb.run.finish()
    wandb.login(key="aecb4b665a37b40204530b0627a42274aeddd3e1")
    sweep_id=wandb.sweep(config,project="CS6910_Assignment3")
    wandb.agent(sweep_id,function=train_rnn,count=20)

In [26]:
train_df,test_df,val_df,eng_to_idx,hin_to_idx,idx_to_eng,idx_to_hin,input_len,target_len=get_data()

train_x,train_y = pre_process(train_df,eng_to_idx,hin_to_idx)
test_x,test_y = pre_process(test_df,eng_to_idx,hin_to_idx)
val_x,val_y = pre_process(val_df,eng_to_idx,hin_to_idx)

train_dataset=MyDataset(train_x,train_y)
test_dataset=MyDataset(test_x,test_y)
val_dataset=MyDataset(val_x,val_y)

wandb_run_sweeps(train_dataset,val_dataset,test_dataset,train_y,val_y,test_y,input_len,target_len)

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: qq1xrvam
Sweep URL: https://wandb.ai/cs22m078/CS6910_Assignment3/sweeps/qq1xrvam


[34m[1mwandb[0m: Agent Starting Run: pnlx7nyv with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 256
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	bi_directional: No
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: Currently logged in as: [33mcs22m078[0m. Use [1m`wandb login --relogin`[0m to force relogin


3.6452071707589284 2.266708805447533 0.0
2.140413643973214 1.641264211563837 0.0
2.029067847842262 1.5635643005371094 0.0
1.872418271019345 1.453818440437317 0.0
1.8069174339657739 1.4316247190747942 0.0
1.722648460751488 1.485798642748878 0.0
1.7285330636160714 1.4132286707560222 0.0
1.6430598958333333 1.3880329699743361 0.0
1.5322760881696427 1.3602327676046462 0.0
1.54264892578125 1.3991765124457223 0.0




0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▃▃▂▂▂▂▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,█▃▃▂▂▂▁▁▁▁
validation_accuracy,▁

0,1
epoch,10.0
loss,1.54265
test_accuracy,0.0
train_accuracy,0.0
val_acc,0.0
val_loss,1.39918
validation_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: e608sfms with config:
[34m[1mwandb[0m: 	attention: Yes
[34m[1mwandb[0m: 	batchsize: 128
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 4
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_size: 64


1.5078944614955359 1.1242254149346125 0.0
1.227099841889881 1.0460248844964164 0.0
1.1143226841517857 0.9781368601889837 0.048828125
1.014278855096726 0.8872702008201963 0.5126953125
0.9350557454427083 0.7979121676513127 0.927734375
0.8687228538876488 0.7507848257110232 2.3681640625
0.8076714797247024 0.7095664555118197 2.7587890625
0.7698392740885417 0.6907098988691965 3.6376953125
0.7314649600074405 0.6776277720928192 4.7607421875
0.7100859142485119 0.7002450071630024 4.736328125
0.6763249279203869 0.6470354724498022 6.15234375
0.6790083821614583 0.628226576816468 5.5419921875
0.6440534319196429 0.628407141992024 8.88671875
0.6306691196986607 0.637699218023391 5.908203125
0.6201559012276785 0.6438492309479487 3.61328125
0.6124056570870535 0.5804729688735235 9.9365234375
0.5963765462239583 0.5860636716797238 7.5439453125
0.5934049479166666 0.578402164436522 8.056640625
0.5794583565848215 0.5737541828836713 10.5712890625
0.569851539248512 0.5717161879653022 9.86328125


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▆▅▄▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▁▁▁▂▃▃▃▄▄▅▅▇▅▃█▆▆██
val_loss,█▇▆▅▄▃▃▃▂▃▂▂▂▂▂▁▁▁▁▁
validation_accuracy,▁

0,1
epoch,20.0
loss,0.56985
test_accuracy,10.86426
train_accuracy,9.86719
val_acc,9.86328
val_loss,0.57172
validation_accuracy,9.86328


[34m[1mwandb[0m: Agent Starting Run: aeb3hj92 with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 256
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: No
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 4
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_size: 16


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016727921816664094, max=1.0…

3.6598879278273806 2.1147074812934514 0.0
2.124775623139881 1.5873959348315285 0.0
1.8862330264136904 1.500337663150969 0.0
1.7244796316964286 1.4781200942539034 0.0
1.61701904296875 1.4056756837027413 0.0
1.5579937453497024 1.3397579079582578 0.0
1.5132312593005952 1.3212640626089913 0.0
1.4748318917410714 1.312008539835612 0.0
1.6661879185267858 1.317755205290658 0.0
1.498856026785714 1.3130665506635393 0.0
1.5097958519345238 1.3669274818329584 0.0
1.4657875279017858 1.3191653717131842 0.0
1.7673840913318453 1.4989395311900549 0.0
1.6408936709449404 1.4299852507455009 0.0
1.574670642671131 1.3605351731890725 0.0
1.5202356538318451 1.3476599057515461 0.0
1.5018322172619047 1.3233928510120936 0.0
1.4800094168526785 1.3271236362911405 0.0
1.4840814499627977 1.3384075845990862 0.0
1.47290283203125 1.3500133866355533 0.0


VBox(children=(Label(value='0.001 MB of 0.049 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.021767…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▃▂▂▁▁▁▁▂▁▁▁▂▂▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▃▃▂▂▁▁▁▁▁▁▁▃▂▁▁▁▁▁▁
validation_accuracy,▁

0,1
epoch,20.0
loss,1.4729
test_accuracy,0.0
train_accuracy,0.0
val_acc,0.0
val_loss,1.35001
validation_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: ve8sbl4y with config:
[34m[1mwandb[0m: 	attention: Yes
[34m[1mwandb[0m: 	batchsize: 256
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_size: 64


1.6260660807291665 1.1100455522537231 0.0244140625
1.1851858956473214 0.9343312694912865 0.146484375
0.9948218936011906 0.7993489645776295 0.87890625
0.8647417922247024 0.7249235823040917 3.5400390625
0.7700097074962798 0.6559115676652818 5.9814453125
0.7081473795572917 0.6088138847124009 6.9091796875
0.6491111537388393 0.5786170420192537 8.30078125
0.6093374488467262 0.555447172550928 11.81640625
0.5492519414992559 0.5307396111034212 13.6962890625
0.5436360677083334 0.501063312802996 12.8173828125
0.5126227097284226 0.5032814400536674 16.796875
0.497694324311756 0.49399417638778687 16.943359375
0.4971492513020834 0.47004391465868267 16.8212890625
0.4747247605096726 0.46488718475614277 18.994140625
0.4654097202845982 0.4497024388540359 17.28515625
0.44979561941964286 0.4487149090993972 19.5556640625
0.439529535202753 0.445219235760825 21.1669921875
0.42373758951822915 0.4611803832508269 20.60546875
0.41395751953125 0.4536298144431341 21.875
0.4067895217168899 0.42431904730342684 21.997

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▅▄▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▁▁▂▃▃▄▅▅▅▆▆▆▇▆▇████
val_loss,█▆▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁
validation_accuracy,▁

0,1
epoch,20.0
loss,0.40679
test_accuracy,19.40918
train_accuracy,18.73242
val_acc,21.99707
val_loss,0.42432
validation_accuracy,21.99707


[34m[1mwandb[0m: Agent Starting Run: bg4a0g62 with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 64
[34m[1mwandb[0m: 	beam_size: 15
[34m[1mwandb[0m: 	bi_directional: No
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 512


1.6887877836681549 1.1481201705478488 0.0
1.1309307570684524 0.8187647930213383 2.5390625
0.7791654459635418 0.5899996062119802 13.623046875
0.6186449614025298 0.5635850543067569 15.7958984375
0.5498428780691964 0.49554593825624105 21.0205078125
0.5042256673177083 0.49523481123504187 21.6064453125
0.47896498907180063 0.5240118042344138 21.2890625
0.4628466506231399 0.4929049572064763 22.8515625
0.43891560872395835 0.48558351220119567 24.5361328125
0.41072701590401783 0.4817100518516132 25.87890625


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▅▃▂▂▂▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▂▅▅▇▇▇▇██
val_loss,█▅▂▂▁▁▁▁▁▁
validation_accuracy,▁

0,1
epoch,10.0
loss,0.41073
test_accuracy,25.5127
train_accuracy,29.4668
val_acc,25.87891
val_loss,0.48171
validation_accuracy,25.87891


[34m[1mwandb[0m: Agent Starting Run: 58bsgk6g with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 64
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_size: 256


1.7286879185267858 1.0544386151291074 0.1220703125
1.0792431640625 0.7914380977551142 2.7587890625
0.8266656784784225 0.6843570485001519 5.078125
0.7107477097284226 0.592219679128556 10.2783203125
0.6273887997581845 0.5705622484286627 14.6728515625
0.5837894694010416 0.5610386743432 16.552734375
0.5394591703869047 0.5349180045581999 15.234375
0.5204429408482143 0.4940967006342752 20.60546875
0.4896371605282738 0.4991914370939845 22.9736328125
0.4649928501674107 0.5029051218714032 21.533203125
0.4499412899925595 0.5269965678453445 19.677734375
0.42910865420386907 0.5087079898942084 22.4365234375
0.4089274088541666 0.4831052379948752 23.53515625
0.40655770438058036 0.4836887266664278 23.779296875
0.38670601981026786 0.4846311169011252 25.6591796875
0.36794657389322916 0.50705593505076 26.806640625
0.35504461379278274 0.4969179431597392 26.4404296875
0.34149602980840776 0.5001436475486982 27.392578125
0.3296458798363095 0.49736563790412175 28.3203125
0.31981756301153275 0.5108089255435126

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▅▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▂▂▄▅▅▅▆▇▆▆▇▇▇▇█████
val_loss,█▅▃▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁
validation_accuracy,▁

0,1
epoch,20.0
loss,0.31982
test_accuracy,24.58496
train_accuracy,39.61719
val_acc,26.63574
val_loss,0.51081
validation_accuracy,26.63574


[34m[1mwandb[0m: Agent Starting Run: lpr89cc7 with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 128
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: No
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 4
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_size: 512


Run lpr89cc7 errored: RuntimeError('mat1 and mat2 shapes cannot be multiplied (128x67 and 512x67)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run lpr89cc7 errored: RuntimeError('mat1 and mat2 shapes cannot be multiplied (128x67 and 512x67)')
[34m[1mwandb[0m: Agent Starting Run: osun1fq4 with config:
[34m[1mwandb[0m: 	attention: Yes
[34m[1mwandb[0m: 	batchsize: 64
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	bi_directional: No
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32


1.4792117745535713 1.1164581576983135 0.0
1.2061850120907738 1.073468926407042 0.0244140625
1.1135909598214286 0.995263166370846 0.09765625
1.0383623976934524 0.9479561632587796 0.2197265625
0.983651646205357 0.9352762826851436 0.3173828125
0.9325552804129464 0.8936477473803929 0.3173828125
0.8931516810825894 0.8563234919593448 0.8544921875
0.8571117582775298 0.8238834419420787 1.4892578125
0.8272445824032738 0.8210599912064416 1.1962890625
0.8021164085751488 0.7768813988992146 1.85546875


VBox(children=(Label(value='0.001 MB of 0.049 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.021858…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▅▄▃▃▂▂▂▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▁▁▂▂▂▄▇▆█
val_loss,█▇▆▅▄▃▃▂▂▁
validation_accuracy,▁

0,1
epoch,10.0
loss,0.80212
test_accuracy,1.75781
train_accuracy,1.44336
val_acc,1.85547
val_loss,0.77688
validation_accuracy,1.85547


[34m[1mwandb[0m: Agent Starting Run: bmt81rfz with config:
[34m[1mwandb[0m: 	attention: Yes
[34m[1mwandb[0m: 	batchsize: 256
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 4
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_size: 16


2.414844680059524 1.6832726910000755 0.0
1.6750816127232142 1.3300852151144118 0.0
1.4432939220610121 1.228223443031311 0.0
1.383665015811012 1.1880683160963512 0.0
1.353446103050595 1.1703499271756126 0.0
1.3249908156622023 1.1548658609390259 0.0
1.3065393647693453 1.1942712182090396 0.0
1.2907554408482143 1.151590591385251 0.0
1.2800296456473215 1.1337407884143649 0.0
1.2680898902529762 1.133822304861886 0.0
1.2554917689732144 1.1060669933046614 0.0
1.2469840494791666 1.0897489786148071 0.0
1.2231187220982143 1.081655848593939 0.0
1.212201915922619 1.0638353540783836 0.0244140625
1.20422119140625 1.0513230391911097 0.09765625
1.185678013392857 1.0534814198811848 0.048828125
1.172390369233631 1.0556718508402507 0.0244140625
1.161532738095238 1.035647602308364 0.0244140625
1.1521385556175594 1.037831913857233 0.0244140625
1.1322894577752975 1.033157711937314 0.0244140625


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▄▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▁▁▁▁▁▁▁▁▁▁▁▁▃█▅▃▃▃▃
val_loss,█▄▃▃▂▂▃▂▂▂▂▂▂▁▁▁▁▁▁▁
validation_accuracy,▁

0,1
epoch,20.0
loss,1.13229
test_accuracy,0.0
train_accuracy,0.0
val_acc,0.02441
val_loss,1.03316
validation_accuracy,0.02441


[34m[1mwandb[0m: Agent Starting Run: paz6uafd with config:
[34m[1mwandb[0m: 	attention: Yes
[34m[1mwandb[0m: 	batchsize: 128
[34m[1mwandb[0m: 	beam_size: 15
[34m[1mwandb[0m: 	bi_directional: No
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 4
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 4
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 256


1.402033923921131 1.1091569718860446 0.0
1.177099609375 0.8551961864743914 0.341796875
0.8812330845424107 0.5651492362930661 5.3466796875
0.624630591982887 0.44630317319007146 10.791015625
0.5042997233072917 0.4104270317724773 18.3837890625
0.42641119094122026 0.3870077636979875 23.7060546875
0.3820793224516369 0.3831728469757807 24.755859375
0.3537638636997768 0.37625775308836074 25.2197265625
0.31884036109561015 0.3751656512419383 29.6875
0.2951102992466518 0.37463122109572095 31.2744140625


VBox(children=(Label(value='0.001 MB of 0.017 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.061774…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▇▅▃▂▂▂▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▁▂▃▅▆▇▇██
val_loss,█▆▃▂▁▁▁▁▁▁
validation_accuracy,▁

0,1
epoch,10.0
loss,0.29511
test_accuracy,29.00391
train_accuracy,34.88477
val_acc,31.27441
val_loss,0.37463
validation_accuracy,31.27441


[34m[1mwandb[0m: Agent Starting Run: fhn793qs with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 64
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	bi_directional: No
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_size: 256


1.7343958100818453 1.1561776498953502 0.048828125
1.2027103097098213 0.9012898930481502 0.9033203125
0.926719447544643 0.7830449818145662 4.296875
0.7471064685639881 0.6441534807284673 8.1787109375
0.6707380603608631 0.5841118494669596 13.037109375
0.5968757556733632 0.5670441268455415 16.0888671875
0.5694728306361607 0.527673961860793 17.9443359375
0.5315557570684524 0.5129931930984769 18.359375
0.5173953683035715 0.544204709075746 19.8486328125
0.49647548130580355 0.4993885067247209 20.21484375
0.4677088564918155 0.5044838296515601 21.38671875
0.452349126906622 0.4954837745144254 21.2890625
0.4463476853143601 0.5025171722684588 21.630859375
0.42054173060825895 0.5005846328678585 23.53515625
0.41645161946614584 0.4696543113816352 25.439453125
0.38871677943638394 0.4825081559164183 26.26953125
0.3788931129092262 0.4999442444670768 25.8544921875
0.3663177780877976 0.48528714797326494 25.9765625
0.35864844912574406 0.4948236420750618 25.9521484375
0.3569630359468006 0.4850740113428661 26

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▁▂▃▄▅▆▆▆▆▇▇▇▇██████
val_loss,█▅▄▃▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁

0,1
epoch,20.0
loss,0.35696
test_accuracy,24.8291
train_accuracy,36.54492
val_acc,26.66016
val_loss,0.48507
validation_accuracy,26.66016


[34m[1mwandb[0m: Agent Starting Run: m6z25zax with config:
[34m[1mwandb[0m: 	attention: Yes
[34m[1mwandb[0m: 	batchsize: 256
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32


2.1309595889136905 1.3591498988015311 0.0
1.4354656110491073 1.1960755359558832 0.0
1.357066708519345 1.160548170407613 0.0
1.3224982561383927 1.1634808154333205 0.0
1.2925154622395834 1.1254543293090093 0.0
1.2526938011532738 1.130185808454241 0.0
1.2177804129464287 1.082827715646653 0.0244140625
1.193592471168155 1.0802425884065174 0.0
1.1711812918526787 1.0523314532779513 0.0
1.1365445963541667 1.0209269637153262 0.048828125


VBox(children=(Label(value='0.001 MB of 0.017 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.062033…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▃▃▂▂▂▂▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▁▁▁▁▁▅▁▁█
val_loss,█▅▄▄▃▃▂▂▂▁
validation_accuracy,▁

0,1
epoch,10.0
loss,1.13654
test_accuracy,0.0
train_accuracy,0.01367
val_acc,0.04883
val_loss,1.02093
validation_accuracy,0.04883


[34m[1mwandb[0m: Agent Starting Run: aao0nz1w with config:
[34m[1mwandb[0m: 	attention: Yes
[34m[1mwandb[0m: 	batchsize: 64
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 4
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32


1.4906660388764879 1.1123728113515037 0.0
1.2164654250372025 0.9578331183819544 0.1220703125
1.0575985863095239 0.8765271646635873 0.341796875
0.9524348377046131 0.7891616714852197 0.5615234375
0.8725442359561012 0.7168566989047187 1.7578125
0.8013966006324406 0.6637705131655648 3.0517578125
0.73549560546875 0.6193073796374458 5.4443359375
0.6926386951264881 0.591267812819708 6.8603515625
0.6579611932663689 0.5628446645679928 7.51953125
0.6253197079613095 0.5619369454327083 11.083984375


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▆▄▄▃▂▂▂▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▁▁▁▂▃▄▅▆█
val_loss,█▆▅▄▃▂▂▁▁▁
validation_accuracy,▁

0,1
epoch,10.0
loss,0.62532
test_accuracy,9.2041
train_accuracy,6.39844
val_acc,11.08398
val_loss,0.56194
validation_accuracy,11.08398


[34m[1mwandb[0m: Agent Starting Run: uc52swhn with config:
[34m[1mwandb[0m: 	attention: Yes
[34m[1mwandb[0m: 	batchsize: 64
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: No
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 256


1.2585663132440477 0.833143216513452 0.390625
0.7794820731026786 0.5449046790599823 4.6875
0.5863346935453869 0.44310133975176585 9.9609375
0.49590977260044644 0.43371880976926713 17.4072265625
0.43319888160342257 0.4086947962641716 20.21484375
0.4012484886532738 0.4112401416613942 19.5556640625
0.3760856119791666 0.43527641750517343 20.3125
0.35453453427269344 0.4110079287063508 22.119140625
0.3320077659970238 0.4061562234447116 23.193359375
0.32575416201636903 0.4093274850220907 22.0703125


VBox(children=(Label(value='0.001 MB of 0.049 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.021894…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▄▃▂▂▂▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▂▄▆▇▇▇███
val_loss,█▃▂▁▁▁▁▁▁▁
validation_accuracy,▁

0,1
epoch,10.0
loss,0.32575
test_accuracy,23.99902
train_accuracy,28.3125
val_acc,22.07031
val_loss,0.40933
validation_accuracy,22.07031


[34m[1mwandb[0m: Agent Starting Run: zcfv61fm with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 64
[34m[1mwandb[0m: 	beam_size: 15
[34m[1mwandb[0m: 	bi_directional: No
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 4
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64


2.0288409133184526 1.4119382259391604 0.0
1.59712890625 1.2796652189322881 0.0
1.5163268461681547 1.3282494715281896 0.0
1.4723300316220238 1.4925553585801805 0.0
1.3710956101190477 1.2654888927936554 0.0244140625
1.3334823753720237 1.2485774414879935 0.0
1.282146228608631 1.2469603362537565 0.0
1.2197607421875 1.1259913856074923 0.0244140625
1.186494373139881 1.126806575627554 0.0244140625
1.12552978515625 1.118520275467918 0.1220703125


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▅▄▄▃▃▂▂▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▁▁▁▂▁▁▂▂█
val_loss,▆▄▅█▄▃▃▁▁▁
validation_accuracy,▁

0,1
epoch,10.0
loss,1.12553
test_accuracy,0.09766
train_accuracy,0.03906
val_acc,0.12207
val_loss,1.11852
validation_accuracy,0.12207


[34m[1mwandb[0m: Agent Starting Run: r3jhd6gk with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 64
[34m[1mwandb[0m: 	beam_size: 15
[34m[1mwandb[0m: 	bi_directional: No
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 4
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 512


1.7557355608258929 1.3230597092991783 0.0
1.3588734654017858 1.1010677672567821 0.146484375
1.1543196614583333 1.2135037850765955 0.390625
0.9131734212239583 0.7795744587977728 5.1025390625
0.7727834356398811 0.6397450012820107 10.3759765625
0.66748779296875 0.6063901065360933 14.16015625
0.626137927827381 0.5967186966112682 15.1123046875
0.5857156808035715 0.576125389053708 16.30859375
0.5623193359375 0.5561215501456034 17.7490234375
0.5321393112909226 0.5269695733274732 20.703125


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▆▅▃▂▂▂▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▁▁▃▅▆▆▇▇█
val_loss,█▆▇▃▂▂▂▁▁▁
validation_accuracy,▁

0,1
epoch,10.0
loss,0.53214
test_accuracy,19.31152
train_accuracy,18.59375
val_acc,20.70312
val_loss,0.52697
validation_accuracy,20.70312


[34m[1mwandb[0m: Agent Starting Run: 76pqiaf9 with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 256
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 16


3.7201374162946426 2.1752912317003523 0.0
2.0600420851934524 1.589698973156157 0.0
1.7151310221354166 1.458583150591169 0.0
1.6462313988095238 1.3896115393865676 0.0
1.5618269856770834 1.3401772692089988 0.0
1.5111382765997023 1.2828459626152402 0.0
1.4656581333705359 1.2660952465874808 0.0
1.4697730654761905 1.2295193445114863 0.0
1.4258199637276787 1.2611784140268962 0.0
1.40858154296875 1.2256588027590798 0.0


VBox(children=(Label(value='0.001 MB of 0.049 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.021943…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▃▂▂▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▁▁▁▁▁▁▁▁▁
val_loss,█▄▃▂▂▁▁▁▁▁
validation_accuracy,▁

0,1
epoch,10.0
loss,1.40858
test_accuracy,0.0
train_accuracy,0.0
val_acc,0.0
val_loss,1.22566
validation_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: 8wpwyjse with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 128
[34m[1mwandb[0m: 	beam_size: 15
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_size: 256


Run 8wpwyjse errored: RuntimeError('mat1 and mat2 shapes cannot be multiplied (128x67 and 256x67)')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 8wpwyjse errored: RuntimeError('mat1 and mat2 shapes cannot be multiplied (128x67 and 256x67)')
[34m[1mwandb[0m: Agent Starting Run: s0r7cdcv with config:
[34m[1mwandb[0m: 	attention: No
[34m[1mwandb[0m: 	batchsize: 64
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: No
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 4
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_size: 32


1.5222963169642858 1.149841437737147 0.0
1.2971844773065475 1.1056677003701527 0.0
1.2147366768973213 1.0642197075344266 0.0
1.1519305710565475 1.0085400740305583 0.0244140625
1.0967921084449406 0.9481095529737926 0.0732421875
1.0468924386160714 0.9043796105044228 0.244140625
1.003049083891369 0.8903561333815256 0.2197265625
0.9712829008556548 0.8651931151038125 0.341796875
0.9403611537388393 0.848476672456378 0.341796875
0.9136065383184524 0.8363199659756252 0.5615234375
0.8798432849702381 0.8006411875997271 0.830078125
0.8630818684895832 0.7773186351571765 0.9521484375
0.8363820684523811 0.7608973128455025 1.5869140625
0.821109386625744 0.7632800965082078 1.9287109375
0.7972320847284226 0.7462748892250515 2.1728515625
0.778038562593006 0.7417981603315899 2.0263671875
0.7647118559337798 0.7247082662014734 3.0029296875
0.7467515345982143 0.7236880972271874 3.076171875
0.7280057198660714 0.7036481166169757 3.6865234375
0.7243287876674107 0.6901430913380214 3.02734375


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▆▅▅▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▁▁▁▁▁▁▂▂▂▃▃▄▅▅▅▇▇█▇
val_loss,█▇▇▆▅▄▄▄▃▃▃▂▂▂▂▂▂▂▁▁
validation_accuracy,▁

0,1
epoch,20.0
loss,0.72433
test_accuracy,3.24707
train_accuracy,1.85352
val_acc,3.02734
val_loss,0.69014
validation_accuracy,3.02734


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ygux4yzd with config:
[34m[1mwandb[0m: 	attention: Yes
[34m[1mwandb[0m: 	batchsize: 128
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	bi_directional: Yes
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 4
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_size: 16


1.9614734468005952 1.2238169766607738 0.0
1.374841076078869 1.1583850270225888 0.0
1.309494861421131 1.1048751473426819 0.0
1.2534951636904763 1.05692210935411 0.0
1.2054427083333332 1.0003062401499068 0.0
1.1623709542410714 0.9755965982164655 0.0
1.1213631184895834 0.9357057639530727 0.0244140625
1.085332496279762 0.9077214513506208 0.0244140625
1.0500760323660714 0.8912830324400038 0.0732421875
1.0183858816964286 0.8699745280402047 0.1220703125
0.99902099609375 0.8664898106030056 0.1708984375
0.9832378859747024 0.8349934617678324 0.244140625
0.9630300176711311 0.8262676823706854 0.3173828125
0.9443851143973214 0.8046604664552779 0.48828125
0.9281647019159226 0.7960791871661231 0.48828125
0.9113262067522321 0.7917549524988446 0.3662109375
0.8997060430617561 0.7799451663380578 0.439453125
0.8831096540178571 0.7752973706949324 0.732421875
0.8771052478608631 0.7611341958954221 0.87890625
0.86157470703125 0.7532032146340325 0.68359375


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁
test_accuracy,▁
train_accuracy,▁
val_acc,▁▁▁▁▁▁▁▁▂▂▂▃▄▅▅▄▅▇█▆
val_loss,█▇▆▆▅▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁
validation_accuracy,▁

0,1
epoch,20.0
loss,0.86157
test_accuracy,0.83008
train_accuracy,0.32617
val_acc,0.68359
val_loss,0.7532
validation_accuracy,0.68359
