In [None]:
import os
import wandb
import torch
import random
import argparse
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from datetime import datetime
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torch.autograd import Variable

### Create an argument parser object
# parser=argparse.ArgumentParser()

### Define command-line arguments with descriptions, data types, default values, and choices where applicable
### WandB project name
# parser.add_argument('-wp',      '--wandb_project',      help='project name',                                                    type=str,       default='CS6910_A3_')

### WandB entity name
# parser.add_argument('-we',      '--wandb_entity',       help='entity name',                                                     type=str,       default='cs22m064'  )

### Number of Epochs
# parser.add_argument('-e',       '--epochs',             help='epochs',                          choices=[5,10],                 type=int,       default=5           )

### Batch Size
# parser.add_argument('-b',       '--batch_size',         help='batch sizes',                     choices=[32,64,128],            type=int,       default=32          )

### Optimizer Choice
# parser.add_argument('-o',       '--optimizer',          help='optimizer',                       choices=['adam','nadam'],       type=str,       default='adam'      )

### Learning Rate
# parser.add_argument('-lr',      '--learning_rate',      help='learning rates',                  choices=[1e-2,1e-3],            type=float,     default=1e-2        )

### Number Of layers in Encoder
# parser.add_argument('-nle',     '--num_layers_en',      help='number of layers in encoder',     choices=[1,2,3],                type=int,       default=2           )

### Number of layers in Decoder
# parser.add_argument('-nld',     '--num_layers_dec',     help='number of layers in decoder',     choices=[1,2,3],                type=int,       default=2           )
# parser.add_argument('-sz',      '--hidden_size',        help='hidden layer size',               choices=[16,32,64,256],         type=int,       default=256         )

### Add Input Language
# parser.add_argument('-il',      '--input_lang',         help='input language',                  choices=['hin','tel'],          type=str,       default='hin'       )

### Add target Language
# parser.add_argument('-tl',      '--target_lang',        help='target language',                 choices=['hin','tel'],          type=str,       default='hin'       )

### Select Cell Type :LSTM , GRU, RNN
# parser.add_argument('-ct',      '--cell_type',          help='cell type',                       choices=['LSTM','GRU','RNN'],   type=str,       default='LSTM'      )

### Select Drop out value
# parser.add_argument('-do',      '--drop_out',           help='drop out',                        choices=[0.0,0.2,0.3],          type=float,     default='0.2'       )

### Select Embedding size
# parser.add_argument('-es',      '--embedding_size',     help='embedding size',                  choices=[16,32,64,256],         type=int,       default=256         )

### Bidirectional true or false
# parser.add_argument('-bd',      '--bidirectional',      help='bidirectional',                   choices=[True,False],           type=bool,      default=False       )

### Attention True or False
# parser.add_argument('-at',      '--attention',          help='attention',                       choices=[True,False],           type=bool,      default=True        )

# args=parser.parse_args()

project_name_ap     = 'Assifnment_3'
entity_name_ap      = 'samruddhipatil2526'
# epochs_ap           = args.epochs
# batch_size_ap       = args.batch_size
# optimizer_ap        = args.optimizer
# learning_rate_ap    = args.learning_rate
# num_layers_en_ap    = args.num_layers_en
# num_layers_dec_ap   = args.num_layers_dec
# hidden_size_ap      = args.hidden_size
input_lang_ap       = 'eng'
target_lang_ap      = 'hin'
# cell_type_ap        = args.cell_type
# drop_out_ap         = args.drop_out
# embedding_size_ap   = args.embedding_size
# bidirectional_ap    = args.bidirectional
attention_ap        = False

#dir = '/kaggle/input/aksharantar/aksharantar_sampled'
### Define the directory path where the data is located
dir = '/kaggle/working/aksharantar_sampled'

### Check if CUDA is available for GPU acceleration
use_cuda = torch.cuda.is_available()

# Define special tokens for the vocabulary
### Start-of-sequence token
SOS_token = 0
### End-of-sequence token
EOS_token = 1
### Unknown token
UNK_token = 3
### Padding token
PAD_token = 4


### # Define the sweep configuration using Bayesian optimization method
sweep_config ={
    'method':'bayes'
}

metric = {
    'name' : 'validation_accuracy',
    'goal' : 'maximize'
}
sweep_config['metric'] = metric

parameters_dict={
    'epochs':{
        'values' : [10]
    },
    'hidden_size':{
        'values' : [128,256,512]
    },
    'cell_type':{
        'values' : ['LSTM','RNN','GRU']
    },
    'learning_rate':{
        'values' : [1e-2,1e-3]
    },
    'num_layers_en':{
        'values' : [1,2,3]
    },
    'num_layers_dec':{
        'values' : [1,2,3]
    },
    'drop_out':{
        'values' : [0.0,0.2,0.3]
    },
    'embedding_size':{
        'values' : [64,128,256,512]
    },
    'batch_size':{
        'values' : [32,64,128]
    },
    'optimizer':{
        'values' : ['adam','nadam']
    },
    'bidirectional':{
        'values' : [True,False]
    }
}
sweep_config['parameters'] = parameters_dict

sweep_id = wandb.sweep(sweep_config, project=project_name_ap)

class Vocabulary:

    def __init__(self, name):
        self.char2count = {}
        self.char2index = {}
        self.n_chars = 4
        self.index2char = {0: '<', 1: '>',2 : '?', 3:'.'}
        self.name = name

    def addWord(self, word):
        for char in word:
            if char not in self.char2index:
                self.char2index[char] = self.n_chars
                self.index2char[self.n_chars] = char
                self.char2count[char] = 1
                self.n_chars += 1
            else:
                self.char2count[char] += 1
        

def prepareData(dir, lang1, lang2):

    data = pd.read_csv(dir,sep=",",names=['input', 'target'])
    max_input_length = max([len(txt) for txt in data['input'].to_list()])
    max_target_length = max([len(txt) for txt in data['target'].to_list()])

    input_lang = Vocabulary(lang1)
    output_lang = Vocabulary(lang2)

    pairs = []
    input_list,target_list = data['input'].to_list(),data['target'].to_list()
    for i in range(len(input_list)):
        pairs.append([input_list[i],target_list[i]])

    for pair in pairs:
        input_lang.addWord(pair[0])
        output_lang.addWord(pair[1])

    prepared_data = {
        'input_lang' : input_lang,
        'output_lang' : output_lang,
        'pairs' : pairs,
        'max_input_length' : max_input_length,
        'max_target_length' : max_target_length
    }

    return prepared_data

class EncoderRNN(nn.Module):
    def __init__(self, input_size, configuration):
        """
        Initialize the EncoderRNN module.

        Args:
            input_size (int): Size of the input vocabulary.
            configuration (dict): Configuration dictionary containing model parameters.

        Configuration Parameters:
            - embedding_size (int): Size of the embedding layer.
            - hidden_size (int): Size of the hidden state.
            - num_layers_encoder (int): Number of layers in the encoder.
            - cell_type (str): Type of RNN cell ('RNN', 'GRU', or 'LSTM').
            - drop_out (float): Dropout rate.
            - bi_directional (bool): Whether the encoder is bidirectional.
        """
        super(EncoderRNN, self).__init__()

        self.embedding_size = configuration['embedding_size']
        self.hidden_size = configuration['hidden_size']
        self.num_layers_encoder = configuration["num_layers_encoder"]
        self.cell_type = configuration["cell_type"]
        self.drop_out = configuration['drop_out']
        self.bi_directional = configuration['bi_directional']

        self.embedding = nn.Embedding(input_size, self.embedding_size)
        self.dropout = nn.Dropout(self.drop_out)
        self.cell_layer = None
        if self.cell_type == 'RNN':
            self.cell_layer = nn.RNN(self.embedding_size, self.hidden_size, num_layers = self.num_layers_encoder, dropout = self.drop_out, bidirectional = self.bi_directional)
        elif self.cell_type == 'GRU':
            self.cell_layer = nn.GRU(self.embedding_size, self.hidden_size, num_layers = self.num_layers_encoder, dropout = self.drop_out, bidirectional = self.bi_directional)
        elif self.cell_type == 'LSTM':
            self.cell_layer = nn.LSTM(self.embedding_size, self.hidden_size, num_layers = self.num_layers_encoder, dropout = self.drop_out, bidirectional = self.bi_directional)
 
    def forward(self, input, batch_size, hidden):
        """
        Forward pass of the EncoderRNN.

        Args:
            input (Tensor): Input tensor containing indices of input sequence.
            batch_size (int): Size of the input batch.
            hidden (Tensor): Initial hidden state.

        Returns:
            output (Tensor): Output tensor from the RNN layer.
            hidden (Tensor): Hidden state tensor.
        """
        embedded = self.dropout(self.embedding(input).view(1,batch_size, -1))
        output = embedded
        output, hidden = self.cell_layer(output, hidden)
        return output, hidden

    def initHidden(self ,batch_size, num_layers_enc):
        """
        Initialize the hidden state tensor.

        Args:
            batch_size (int): Size of the input batch.
            num_layers_enc (int): Number of layers in the encoder.

        Returns:
            Tensor: Initialized hidden state tensor.
        """
        res = None
        if self.bi_directional:
            res = torch.zeros(num_layers_enc* 2, batch_size, self.hidden_size)
        else:
            res = torch.zeros(num_layers_enc, batch_size, self.hidden_size)
        if use_cuda : 
            return res.cuda()
        else :
            return res

class DecoderRNN(nn.Module):
    def __init__(self, configuration,  output_size):
        super(DecoderRNN, self).__init__()
        """
        Initialize the DecoderRNN module.

        Args:
            configuration (dict): Configuration dictionary containing model parameters.
            output_size (int): Size of the output vocabulary.
        
        Configuration Parameters:
            - embedding_size (int): Size of the embedding layer.
            - hidden_size (int): Size of the hidden state.
            - num_layers_decoder (int): Number of layers in the decoder.
            - cell_type (str): Type of RNN cell ('RNN', 'GRU', or 'LSTM').
            - drop_out (float): Dropout rate.
            - bi_directional (bool): Whether the decoder is bidirectional.
        """
        self.embedding_size = configuration['embedding_size']
        self.hidden_size = configuration['hidden_size']
        self.num_layers_decoder = configuration["num_layers_decoder"]
        self.cell_type = configuration["cell_type"]
        self.drop_out = configuration["drop_out"]
        self.bi_directional = configuration["bi_directional"]
        self.dropout = nn.Dropout(self.drop_out)
        
        self.embedding = nn.Embedding(output_size, self.embedding_size)

        self.cell_layer = None
        if self.cell_type == 'RNN':
            self.cell_layer = nn.RNN(self.embedding_size, self.hidden_size, num_layers = self.num_layers_decoder, dropout = self.drop_out, bidirectional = self.bi_directional)
        elif self.cell_type == 'GRU':
            self.cell_layer =   nn.GRU(self.embedding_size, self.hidden_size, num_layers = self.num_layers_decoder, dropout = self.drop_out, bidirectional = self.bi_directional)
        elif self.cell_type == 'LSTM':
            self.cell_layer = nn.LSTM(self.embedding_size, self.hidden_size, num_layers = self.num_layers_decoder, dropout = self.drop_out, bidirectional = self.bi_directional)
        
        if self.bi_directional:
            self.out = nn.Linear(self.hidden_size * 2 ,output_size)
        else:
            self.out = nn.Linear(self.hidden_size, output_size)
        
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, batch_size, hidden):
        
        output = self.dropout(self.embedding(input).view(1,batch_size, -1))
        output = F.relu(output)
        output, hidden = self.cell_layer(output, hidden)
        
        output = self.softmax(self.out(output[0]))
        return output, hidden

def indexesFromWord(lang, word):
    """
    Convert a word into a list of indices based on the character-to-index mapping of the language.

    Args:
        lang (Lang): Language object containing the character-to-index mapping.
        word (str): Input word to convert into indices.

    Returns:
        index_list (list): List of indices corresponding to the characters in the word.
    """
    index_list = []
    for char in word:
        if char in lang.char2index.keys():
            index_list.append(lang.char2index[char])
        else:
            index_list.append(UNK_token)
    return index_list

def variableFromSentence(lang, word, max_length):
    """
    Convert a word into a PyTorch tensor variable.

    Args:
        lang (Lang): Language object.
        word (str): Input word to convert.
        max_length (int): Maximum length of the sequence.

    Returns:
        result (Tensor): PyTorch tensor variable representing the input word.
    """
    indexes = indexesFromWord(lang, word)
    indexes.append(EOS_token)
    indexes.extend([PAD_token] * (max_length - len(indexes)))
    result = torch.LongTensor(indexes)
    if use_cuda:
        return result.cuda()
    else:
        return result

def variablesFromPairs(input_lang, output_lang, pairs, max_length):
    """
    Convert a list of word pairs into PyTorch tensor variables.

    Args:
        input_lang (Lang): Language object for the input language.
        output_lang (Lang): Language object for the output language.
        pairs (list): List of word pairs.
        max_length (int): Maximum length of the sequence.

    Returns:
        res (list): List of tuples containing input and target tensor variables.
    """
    res = []
    for pair in pairs:
        input_variable = variableFromSentence(input_lang, pair[0], max_length)
        target_variable = variableFromSentence(output_lang, pair[1], max_length)
        res.append((input_variable, target_variable))
    return res

def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, configuration, max_length, teacher_forcing_ratio = 0.5):
    
    batch_size = configuration['batch_size']
    num_layers_enc = configuration['num_layers_encoder']
    encoder_hidden = encoder.initHidden(batch_size,num_layers_enc)

    input_tensor = Variable(input_tensor.transpose(0, 1))
    target_tensor = Variable(target_tensor.transpose(0, 1))

    if configuration["cell_type"] == "LSTM":
        encoder_cell_state = encoder.initHidden(batch_size,num_layers_enc)
        encoder_hidden = (encoder_hidden, encoder_cell_state)

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei], batch_size, encoder_hidden)

    decoder_input = Variable(torch.LongTensor([SOS_token]*batch_size))
    decoder_input = decoder_input.cuda() if use_cuda else decoder_input

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        for di in range(target_length):
            decoder_output, decoder_hidden= decoder(decoder_input, batch_size, decoder_hidden)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]

    else:
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(decoder_input, batch_size,decoder_hidden)
            topv, topi = decoder_output.data.topk(1)
            decoder_input = torch.cat(tuple(topi))

            decoder_input = decoder_input.cuda() if use_cuda else decoder_input

            loss += criterion(decoder_output, target_tensor[di])

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length
  
def cal_val_loss(encoder, decoder, input_tensor, target_tensor, configuration, criterion , max_length):

    with torch.no_grad():

        batch_size = configuration['batch_size']
        num_layers_enc = configuration['num_layers_encoder']
        encoder_hidden = encoder.initHidden(batch_size,num_layers_enc)

        input_tensor = Variable(input_tensor.transpose(0, 1))
        target_tensor = Variable(target_tensor.transpose(0, 1))
            
        if configuration["cell_type"] == "LSTM":
            encoder_cell_state = encoder.initHidden(batch_size,num_layers_enc)
            encoder_hidden = (encoder_hidden, encoder_cell_state)

        input_length = input_tensor.size()[0]
        target_length = target_tensor.size()[0]

        loss = 0
            
        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei], batch_size, encoder_hidden)

        decoder_input = Variable(torch.LongTensor([SOS_token] * batch_size))
        decoder_input = decoder_input.cuda() if use_cuda else decoder_input

        decoder_hidden = encoder_hidden

        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(decoder_input, batch_size, decoder_hidden)
            topv, topi = decoder_output.data.topk(1)
            decoder_input = torch.cat(tuple(topi))

            decoder_input = decoder_input.cuda() if use_cuda else decoder_input
            loss += criterion(decoder_output, target_tensor[di])

    return loss.item() / target_length

def evaluate(encoder, decoder, loader, configuration, criterion , max_length):

    with torch.no_grad():

        batch_size = configuration['batch_size']
        total = 0
        correct = 0
        
        for batch_x, batch_y in loader:
            num_layers_enc = configuration['num_layers_encoder']
            encoder_hidden = encoder.initHidden(batch_size,num_layers_enc)

            input_variable = Variable(batch_x.transpose(0, 1))
            target_variable = Variable(batch_y.transpose(0, 1))
            
            if configuration["cell_type"] == "LSTM":
                encoder_cell_state = encoder.initHidden(batch_size,num_layers_enc)
                encoder_hidden = (encoder_hidden, encoder_cell_state)

            input_length = input_variable.size()[0]
            target_length = target_variable.size()[0]

            output = torch.LongTensor(target_length, batch_size)

            encoder_outputs = Variable(torch.zeros(max_length, batch_size, encoder.hidden_size))
            encoder_outputs = encoder_outputs.cuda() if use_cuda else encoder_outputs
            
            for ei in range(input_length):
                encoder_output, encoder_hidden = encoder(input_variable[ei], batch_size, encoder_hidden)

            decoder_input = Variable(torch.LongTensor([SOS_token] * batch_size))
            decoder_input = decoder_input.cuda() if use_cuda else decoder_input

            decoder_hidden = encoder_hidden

            for di in range(target_length):
                decoder_output, decoder_hidden = decoder(decoder_input, batch_size, decoder_hidden)
                topv, topi = decoder_output.data.topk(1)
                decoder_input = torch.cat(tuple(topi))
                output[di] = torch.cat(tuple(topi))

            output = output.transpose(0,1)
            for di in range(output.size()[0]):
                ignore = [SOS_token, EOS_token, PAD_token]
                sent = [configuration['output_lang'].index2char[letter.item()] for letter in output[di] if letter not in ignore]
                y = [configuration['output_lang'].index2char[letter.item()] for letter in batch_y[di] if letter not in ignore]
                if sent == y:
                    correct += 1
                total += 1

    return (correct/total)*100

def trainIters(encoder, decoder, train_loader, val_loader, test_loader, learning_rate, configuration, wandb_flag):

    max_length = configuration['max_length_word']

    encoder_optimizer, decoder_optimizer = None, None

    if configuration['optimizer']=='nadam':
        encoder_optimizer = optim.NAdam(encoder.parameters(),lr=learning_rate)
        decoder_optimizer = optim.NAdam(decoder.parameters(),lr=learning_rate)
    else:
        encoder_optimizer = optim.Adam(encoder.parameters(),lr=learning_rate)
        decoder_optimizer = optim.Adam(decoder.parameters(),lr=learning_rate)

    criterion = nn.NLLLoss()
    
    ep = configuration['epochs']

    for i in range(ep):

        train_loss_total = 0
        val_loss_total = 0

        for batchx, batchy in train_loader:
            loss = None

            if configuration['attention'] == False:
                loss = train(batchx, batchy, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, configuration, max_length)
            
            train_loss_total += loss
        
        train_loss_total = train_loss_total/len(train_loader)
        print('ep : ', i, ' | ', end='')
        print('train loss :', train_loss_total, ' | ', end='')

        for batchx, batchy in val_loader:
            loss = None

            if configuration['attention'] == False:
                loss = cal_val_loss(encoder, decoder, batchx, batchy, configuration, criterion , max_length)
            
            val_loss_total += loss

        val_loss_total = val_loss_total/len(val_loader)
        # train_acc = evaluate(encoder, decoder, train_loader, configuration, criterion, max_length)
        val_acc = evaluate(encoder, decoder, val_loader, configuration, criterion, max_length)
        
        # print("train accuracy : " ,train_acc, ' | ', end='')
        print('val loss :', val_loss_total, ' | ', end='')
        print("val accuracy : " ,val_acc)

        if wandb_flag == True:
            wandb.log({
                "train_loss"           : train_loss_total,
                "validation_loss"      : val_loss_total,
                # "train_accuracy"       : train_acc,
                "validation_accuracy"  : val_acc
                })

#     temp = configuration['batch_size']
#     configuration['batch_size'] = 1
#     print("test accuracy for the model : " ,evaluate(encoder, decoder, test_loader, configuration, criterion, max_length))
#     configuration['batch_size'] = temp


def main(config = None):

    with wandb.init(config = config, entity = entity_name_ap) as run:
        
        config = wandb.config
        run.name = 'hs_'+str(config.hidden_size)+'_bs_'+str(config.batch_size)+'_ct_'+config.cell_type+'_es_'+str(config.embedding_size)+'_do_'+str(config.drop_out)+'_nle_'+str(config.num_layers_en)+'_nld_'+str(config.num_layers_dec)+'_lr_'+str(config.learning_rate)+'_bd_'+str(config.bidirectional)

        configuration = {

                'hidden_size'         : config.hidden_size,
                'source_lang'         : input_lang_ap,
                'target_lang'         : target_lang_ap,
                'cell_type'           : config.cell_type,
                'num_layers_encoder'  : config.num_layers_en,
                'num_layers_decoder'  : config.num_layers_en,
                'drop_out'            : config.drop_out, 
                'embedding_size'      : config.embedding_size,
                'bi_directional'      : config.bidirectional,
                'batch_size'          : config.batch_size,
                'attention'           : attention_ap,
                'epochs'              : config.epochs,
                'optimizer'           : config.optimizer

            }
        
        
        train_path = os.path.join(dir, configuration['target_lang'], configuration['target_lang'] + '_train.csv')
        validation_path = os.path.join(dir, configuration['target_lang'], configuration['target_lang'] + '_valid.csv')
        test_path = os.path.join(dir, configuration['target_lang'], configuration['target_lang'] + '_test.csv')

        train_prepared_data= prepareData(train_path,configuration['source_lang'], configuration['target_lang'])

        input_lang = train_prepared_data['input_lang']
        output_lang = train_prepared_data['output_lang']
        pairs = train_prepared_data['pairs']
        max_input_length = train_prepared_data['max_input_length']
        max_target_length = train_prepared_data['max_target_length']
        
        val_prepared_data= prepareData(validation_path,configuration['source_lang'], configuration['target_lang'])

        val_pairs = val_prepared_data['pairs']
        max_input_length_val = val_prepared_data['max_input_length']
        max_target_length_val = val_prepared_data['max_target_length']

        test_prepared_data= prepareData(validation_path, configuration['source_lang'], configuration['target_lang'])

        test_pairs = test_prepared_data['pairs']
        max_input_length_test = test_prepared_data['max_input_length']
        max_target_length_test = test_prepared_data['max_target_length']

        max_list = [max_input_length, max_target_length, max_input_length_val, max_target_length_val, max_input_length_test, max_target_length_test]
        max_len_all = max(max_list)

        max_len = max(max_input_length, max_target_length) + 2

        configuration['input_lang'] = input_lang
        configuration['output_lang'] = output_lang
        configuration['max_length_word'] = max_len_all + 1

        encoder1 = EncoderRNN(input_lang.n_chars, configuration)
        decoder1 = DecoderRNN(configuration, output_lang.n_chars)
        if use_cuda:
            encoder1=encoder1.cuda()
            decoder1=decoder1.cuda()

        pairs = variablesFromPairs(configuration['input_lang'], configuration['output_lang'], pairs , configuration['max_length_word'])
        val_pairs = variablesFromPairs(configuration['input_lang'], configuration['output_lang'], val_pairs, configuration['max_length_word'])
        test_pairs = variablesFromPairs(configuration['input_lang'], configuration['output_lang'], test_pairs, configuration['max_length_word'])

        train_loader = torch.utils.data.DataLoader(pairs, batch_size=configuration['batch_size'], shuffle=True)
        val_loader = torch.utils.data.DataLoader(val_pairs, batch_size=configuration['batch_size'], shuffle=True)
        test_loader = torch.utils.data.DataLoader(test_pairs, batch_size=1, shuffle=True)

        trainIters(encoder1, decoder1, train_loader, val_loader, test_loader, config.learning_rate, configuration, True)

    # if configuration['attention'] == False :
    #     trainIters(encoder1, decoder1, train_loader, val_loader, test_loader, learning_rate, configuration, True)

# main()

#wandb.init(project = 'Assignment_3', entity = 'samruddhipatil2526')
wandb.agent(sweep_id, main, count = 20)

Create sweep with ID: wr4s1cxm
Sweep URL: https://wandb.ai/samruddhipatil2526/Assifnment_3/sweeps/wr4s1cxm


[34m[1mwandb[0m: Agent Starting Run: qny50gx2 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	drop_out: 0.2
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers_dec: 1
[34m[1mwandb[0m: 	num_layers_en: 1
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: Currently logged in as: [33msamruddhipatil2526[0m. Use [1m`wandb login --relogin`[0m to force relogin




ep :  0  | train loss : 0.9959798126220707  | val loss : 0.7639406120777131  | val accuracy :  1.46484375
ep :  1  | train loss : 0.7233202877044669  | val loss : 0.6227178502082825  | val accuracy :  5.6396484375
ep :  2  | train loss : 0.6250614918231958  | val loss : 0.5569117742776872  | val accuracy :  7.32421875
ep :  3  | train loss : 0.5612232528686522  | val loss : 0.5410967934131621  | val accuracy :  11.279296875
ep :  4  | train loss : 0.5246916293382642  | val loss : 0.49437784790992734  | val accuracy :  10.7177734375
ep :  5  | train loss : 0.5045923003196713  | val loss : 0.50852643430233  | val accuracy :  15.4296875
ep :  6  | train loss : 0.4823392650842662  | val loss : 0.47333507895469684  | val accuracy :  15.8935546875
ep :  7  | train loss : 0.45275799264907785  | val loss : 0.46301242411136634  | val accuracy :  18.3837890625
ep :  8  | train loss : 0.4481380415201184  | val loss : 0.4478187757730484  | val accuracy :  18.75
ep :  9  | train loss : 0.4313530162

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_loss,█▅▃▃▂▂▂▁▁▁
validation_accuracy,▁▃▃▅▅▇▇███
validation_loss,█▅▄▃▂▃▂▂▁▁

0,1
train_loss,0.43135
validation_accuracy,18.35938
validation_loss,0.43326


[34m[1mwandb[0m: Agent Starting Run: hchtcr0e with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	drop_out: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers_dec: 3
[34m[1mwandb[0m: 	num_layers_en: 1
[34m[1mwandb[0m: 	optimizer: nadam




ep :  0  | train loss : 1.24528905582428  | val loss : 0.9998929166793824  | val accuracy :  0.0244140625
ep :  1  | train loss : 0.9631988475799561  | val loss : 0.7957443022727968  | val accuracy :  0.927734375
ep :  2  | train loss : 0.7381002824783323  | val loss : 0.6151840567588804  | val accuracy :  3.6376953125
ep :  3  | train loss : 0.6075582278251642  | val loss : 0.5300202071666718  | val accuracy :  7.421875
ep :  4  | train loss : 0.5448929289817805  | val loss : 0.4969428479671479  | val accuracy :  11.1328125
ep :  5  | train loss : 0.49412689805030824  | val loss : 0.4658757352828979  | val accuracy :  14.208984375
ep :  6  | train loss : 0.4729110693454742  | val loss : 0.4493951773643493  | val accuracy :  16.69921875
ep :  7  | train loss : 0.43149660778045634  | val loss : 0.4317544674873352  | val accuracy :  17.0166015625
ep :  8  | train loss : 0.4177530562877655  | val loss : 0.4362046182155609  | val accuracy :  18.4326171875
ep :  9  | train loss : 0.41013629

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_loss,█▆▄▃▂▂▂▁▁▁
validation_accuracy,▁▁▂▃▅▆▇▇▇█
validation_loss,█▆▃▂▂▂▁▁▁▁

0,1
train_loss,0.41014
validation_accuracy,20.8252
validation_loss,0.41545


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7wkf9oj5 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	drop_out: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers_dec: 1
[34m[1mwandb[0m: 	num_layers_en: 1
[34m[1mwandb[0m: 	optimizer: adam




ep :  0  | train loss : 0.9349789519548399  | val loss : 0.7115571314096453  | val accuracy :  2.9052734375
ep :  1  | train loss : 0.7078579340934761  | val loss : 0.648515413105488  | val accuracy :  4.0283203125
ep :  2  | train loss : 0.6626938763380055  | val loss : 0.6094657742977143  | val accuracy :  6.2744140625
ep :  3  | train loss : 0.6216711493492132  | val loss : 0.5944718730449678  | val accuracy :  8.49609375
ep :  4  | train loss : 0.6004038566708565  | val loss : 0.5622344297170633  | val accuracy :  8.056640625
ep :  5  | train loss : 0.5676001241207118  | val loss : 0.5350189682841302  | val accuracy :  9.3017578125
ep :  6  | train loss : 0.5504084409713749  | val loss : 0.5407110011577604  | val accuracy :  10.7421875
ep :  7  | train loss : 0.5425971352100367  | val loss : 0.5361834958195681  | val accuracy :  11.23046875
ep :  8  | train loss : 0.5303929729342454  | val loss : 0.5335341700911521  | val accuracy :  11.1328125
ep :  9  | train loss : 0.52388855955

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_loss,█▄▃▃▂▂▁▁▁▁
validation_accuracy,▁▂▄▅▅▆▇█▇█
validation_loss,█▆▄▃▂▁▁▁▁▁

0,1
train_loss,0.52389
validation_accuracy,11.81641
validation_loss,0.53088


[34m[1mwandb[0m: Agent Starting Run: sd99xado with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	drop_out: 0.3
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers_dec: 3
[34m[1mwandb[0m: 	num_layers_en: 2
[34m[1mwandb[0m: 	optimizer: adam


ep :  0  | train loss : 1.0178848074913034  | val loss : 0.7467405450344088  | val accuracy :  0.5859375
ep :  1  | train loss : 0.710842105102539  | val loss : 0.5966389298439028  | val accuracy :  4.443359375
ep :  2  | train loss : 0.6230865775585169  | val loss : 0.5643476289510726  | val accuracy :  6.103515625
ep :  3  | train loss : 0.5723875119686125  | val loss : 0.5306826138496398  | val accuracy :  9.4970703125
ep :  4  | train loss : 0.5338493308544159  | val loss : 0.5228085517883301  | val accuracy :  11.279296875
ep :  5  | train loss : 0.5121717365264897  | val loss : 0.506967777609825  | val accuracy :  13.4033203125
ep :  6  | train loss : 0.501272924995423  | val loss : 0.49172202229499806  | val accuracy :  12.6708984375
ep :  7  | train loss : 0.48699014387130724  | val loss : 0.47876088500022906  | val accuracy :  15.0390625
ep :  8  | train loss : 0.4787479133605953  | val loss : 0.48505579471588145  | val accuracy :  14.94140625
ep :  9  | train loss : 0.4791667

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_loss,█▄▃▂▂▁▁▁▁▁
validation_accuracy,▁▃▃▅▆▇▆▇▇█
validation_loss,█▄▃▂▂▂▁▁▁▁

0,1
train_loss,0.47917
validation_accuracy,16.33301
validation_loss,0.47422


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: c1j06sba with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	drop_out: 0.2
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_layers_dec: 1
[34m[1mwandb[0m: 	num_layers_en: 3
[34m[1mwandb[0m: 	optimizer: adam


ep :  0  | train loss : 0.9736241504907609  | val loss : 0.721842506825924  | val accuracy :  1.0498046875
ep :  1  | train loss : 0.7075648931741725  | val loss : 0.6051110526919367  | val accuracy :  4.931640625
ep :  2  | train loss : 0.6145466021060946  | val loss : 0.5730796852707861  | val accuracy :  8.3251953125
ep :  3  | train loss : 0.5768445823431013  | val loss : 0.5466050943732262  | val accuracy :  10.888671875
ep :  4  | train loss : 0.5528649607896806  | val loss : 0.5331340792775153  | val accuracy :  11.9873046875
ep :  5  | train loss : 0.5384702794551847  | val loss : 0.5222534546256066  | val accuracy :  11.4501953125
ep :  6  | train loss : 0.5302940493345263  | val loss : 0.5085448932647707  | val accuracy :  12.451171875
ep :  7  | train loss : 0.5203341313362118  | val loss : 0.5006300371885299  | val accuracy :  12.890625
ep :  8  | train loss : 0.5048530788779263  | val loss : 0.5034084203839301  | val accuracy :  13.2080078125
ep :  9  | train loss : 0.5053

VBox(children=(Label(value='0.019 MB of 0.019 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_loss,█▄▃▂▂▂▁▁▁▁
validation_accuracy,▁▃▅▆▇▇▇███
validation_loss,█▄▃▃▂▂▁▁▁▁

0,1
train_loss,0.50536
validation_accuracy,13.7207
validation_loss,0.49874


[34m[1mwandb[0m: Agent Starting Run: ply3qhtj with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	drop_out: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers_dec: 3
[34m[1mwandb[0m: 	num_layers_en: 2
[34m[1mwandb[0m: 	optimizer: nadam


ep :  0  | train loss : 1.219251584815978  | val loss : 0.9901049947738648  | val accuracy :  0.0
ep :  1  | train loss : 0.8573052552223211  | val loss : 0.6378718447685242  | val accuracy :  3.9306640625
ep :  2  | train loss : 0.590393955421447  | val loss : 0.5045588397979737  | val accuracy :  8.984375
ep :  3  | train loss : 0.4922001732349394  | val loss : 0.4464535892009734  | val accuracy :  15.3076171875
ep :  4  | train loss : 0.4403254193782806  | val loss : 0.41514432311058047  | val accuracy :  17.8466796875
ep :  5  | train loss : 0.40543422098159765  | val loss : 0.3979798698425293  | val accuracy :  21.4111328125
ep :  6  | train loss : 0.3724530145168304  | val loss : 0.38017256855964665  | val accuracy :  23.095703125
ep :  7  | train loss : 0.3598144251823421  | val loss : 0.38194087088108064  | val accuracy :  24.9755859375
ep :  8  | train loss : 0.3407164212703709  | val loss : 0.36980876386165606  | val accuracy :  26.953125
ep :  9  | train loss : 0.32305150327

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_loss,█▅▃▂▂▂▁▁▁▁
validation_accuracy,▁▂▃▅▆▆▇▇██
validation_loss,█▄▃▂▂▁▁▁▁▁

0,1
train_loss,0.32305
validation_accuracy,27.44141
validation_loss,0.35954


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6pedb1v8 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	drop_out: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers_dec: 3
[34m[1mwandb[0m: 	num_layers_en: 3
[34m[1mwandb[0m: 	optimizer: nadam


ep :  0  | train loss : 1.2762180103301999  | val loss : 1.0213471627235413  | val accuracy :  0.0
ep :  1  | train loss : 1.116713551712036  | val loss : 0.9901698589324953  | val accuracy :  0.0
ep :  2  | train loss : 1.0305254903793333  | val loss : 0.8852300906181334  | val accuracy :  0.0244140625
ep :  3  | train loss : 0.938734447669983  | val loss : 0.7820861530303957  | val accuracy :  0.1953125
ep :  4  | train loss : 0.8081489223480236  | val loss : 0.6753667581081391  | val accuracy :  1.8798828125
ep :  5  | train loss : 0.6802726984024051  | val loss : 0.5664280891418456  | val accuracy :  5.517578125
ep :  6  | train loss : 0.5811729640007023  | val loss : 0.49810410380363473  | val accuracy :  9.08203125
ep :  7  | train loss : 0.5193462835311891  | val loss : 0.47394316911697393  | val accuracy :  10.7177734375
ep :  8  | train loss : 0.48419048924446095  | val loss : 0.4476900160312653  | val accuracy :  12.890625
ep :  9  | train loss : 0.45492207374572763  | val lo

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_loss,█▇▆▅▄▃▂▂▁▁
validation_accuracy,▁▁▁▁▂▃▅▆▇█
validation_loss,██▆▅▄▃▂▂▁▁

0,1
train_loss,0.45492
validation_accuracy,16.18652
validation_loss,0.42981


[34m[1mwandb[0m: Agent Starting Run: n3vragfa with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	drop_out: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers_dec: 2
[34m[1mwandb[0m: 	num_layers_en: 3
[34m[1mwandb[0m: 	optimizer: nadam


ep :  0  | train loss : 1.273778536033631  | val loss : 1.1543648290634156  | val accuracy :  0.0
ep :  1  | train loss : 1.1766351737976068  | val loss : 1.1443086075782778  | val accuracy :  0.0
ep :  2  | train loss : 1.1707219343185424  | val loss : 1.1586808180809018  | val accuracy :  0.0
ep :  3  | train loss : 1.1436465337753297  | val loss : 1.1658714866638185  | val accuracy :  0.0
ep :  4  | train loss : 1.1401755176544197  | val loss : 1.1543452620506287  | val accuracy :  0.0
ep :  5  | train loss : 1.1339416196823124  | val loss : 1.1478458166122432  | val accuracy :  0.0
ep :  6  | train loss : 1.1430768684387216  | val loss : 1.1664226579666142  | val accuracy :  0.0
ep :  7  | train loss : 1.1256816942214962  | val loss : 1.1613065910339355  | val accuracy :  0.0
ep :  8  | train loss : 1.1241129819869997  | val loss : 1.1542762756347655  | val accuracy :  0.0
ep :  9  | train loss : 1.135160351181031  | val loss : 1.1517285728454587  | val accuracy :  0.0


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_loss,█▃▃▂▂▁▂▁▁▂
validation_accuracy,▁▁▁▁▁▁▁▁▁▁
validation_loss,▄▁▆█▄▂█▆▄▃

0,1
train_loss,1.13516
validation_accuracy,0.0
validation_loss,1.15173


[34m[1mwandb[0m: Agent Starting Run: hxxkmgiz with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	drop_out: 0.3
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers_dec: 2
[34m[1mwandb[0m: 	num_layers_en: 1
[34m[1mwandb[0m: 	optimizer: nadam




ep :  0  | train loss : 1.2000474647521981  | val loss : 0.9412042522430422  | val accuracy :  0.048828125
ep :  1  | train loss : 0.9479376329421992  | val loss : 0.8351457643508912  | val accuracy :  0.48828125
ep :  2  | train loss : 0.839675189876557  | val loss : 0.7543781685829164  | val accuracy :  1.171875
ep :  3  | train loss : 0.7789453413963308  | val loss : 0.7150559091567992  | val accuracy :  2.3193359375
ep :  4  | train loss : 0.7232451472282406  | val loss : 0.6684839415550231  | val accuracy :  3.173828125
ep :  5  | train loss : 0.6926980728149412  | val loss : 0.6510516512393953  | val accuracy :  5.4443359375
ep :  6  | train loss : 0.6577730596542364  | val loss : 0.6144362080097198  | val accuracy :  6.73828125
ep :  7  | train loss : 0.6157169685363769  | val loss : 0.6306118047237399  | val accuracy :  7.763671875
ep :  8  | train loss : 0.6041767635345457  | val loss : 0.5660864722728729  | val accuracy :  7.5927734375
ep :  9  | train loss : 0.58034111452102

VBox(children=(Label(value='0.001 MB of 0.019 MB uploaded\r'), FloatProgress(value=0.07002066844784997, max=1.…

0,1
train_loss,█▅▄▃▃▂▂▁▁▁
validation_accuracy,▁▁▂▃▃▅▆▆▆█
validation_loss,█▆▅▄▃▃▂▃▁▁

0,1
train_loss,0.58034
validation_accuracy,9.93652
validation_loss,0.54283


[34m[1mwandb[0m: Agent Starting Run: kpw6bnvs with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	drop_out: 0.3
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers_dec: 3
[34m[1mwandb[0m: 	num_layers_en: 1
[34m[1mwandb[0m: 	optimizer: nadam




ep :  0  | train loss : 1.0859934443473827  | val loss : 0.8167866849899291  | val accuracy :  0.6591796875
ep :  1  | train loss : 0.7151913614749907  | val loss : 0.5961892509460451  | val accuracy :  6.4453125
ep :  2  | train loss : 0.5816034362792966  | val loss : 0.5093889749050141  | val accuracy :  9.8388671875
ep :  3  | train loss : 0.5082354935884474  | val loss : 0.46309256017208106  | val accuracy :  13.1591796875
ep :  4  | train loss : 0.4600799700498576  | val loss : 0.4559062570333481  | val accuracy :  15.673828125
ep :  5  | train loss : 0.43110210893154116  | val loss : 0.43192438185215004  | val accuracy :  18.603515625
ep :  6  | train loss : 0.4006398843526843  | val loss : 0.42150750994682334  | val accuracy :  20.9716796875
ep :  7  | train loss : 0.3828679634332658  | val loss : 0.426901484131813  | val accuracy :  22.8759765625
ep :  8  | train loss : 0.37957626168727837  | val loss : 0.39654004812240606  | val accuracy :  24.3408203125
ep :  9  | train loss 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_loss,█▄▃▂▂▂▁▁▁▁
validation_accuracy,▁▃▄▅▅▆▇███
validation_loss,█▄▃▂▂▂▁▂▁▁

0,1
train_loss,0.36453
validation_accuracy,24.38965
validation_loss,0.39403


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 02tvtr0j with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	drop_out: 0.2
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers_dec: 3
[34m[1mwandb[0m: 	num_layers_en: 3
[34m[1mwandb[0m: 	optimizer: nadam


ep :  0  | train loss : 1.245664974594117  | val loss : 1.1864336037635803  | val accuracy :  0.0
ep :  1  | train loss : 1.1554804235458371  | val loss : 1.148561136722565  | val accuracy :  0.0
ep :  2  | train loss : 1.143657032203674  | val loss : 1.1737605857849118  | val accuracy :  0.0
ep :  3  | train loss : 1.1193002632141107  | val loss : 1.1513422107696534  | val accuracy :  0.0
ep :  4  | train loss : 1.1172472663879391  | val loss : 1.1553745770454404  | val accuracy :  0.0
ep :  5  | train loss : 1.1335521484375004  | val loss : 1.1469634652137757  | val accuracy :  0.0
ep :  6  | train loss : 1.1307941366195673  | val loss : 1.1598538064956663  | val accuracy :  0.0
ep :  7  | train loss : 1.0981663305282594  | val loss : 1.1735327243804934  | val accuracy :  0.0
ep :  8  | train loss : 1.1151995445251475  | val loss : 1.1528307151794435  | val accuracy :  0.0
ep :  9  | train loss : 1.114324074363708  | val loss : 1.2687716245651244  | val accuracy :  0.0


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_loss,█▄▃▂▂▃▃▁▂▂
validation_accuracy,▁▁▁▁▁▁▁▁▁▁
validation_loss,▃▁▃▁▁▁▂▃▁█

0,1
train_loss,1.11432
validation_accuracy,0.0
validation_loss,1.26877


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fp66mmz0 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	drop_out: 0.3
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers_dec: 3
[34m[1mwandb[0m: 	num_layers_en: 1
[34m[1mwandb[0m: 	optimizer: nadam




ep :  0  | train loss : 1.1766086362838752  | val loss : 0.9206032562255859  | val accuracy :  0.1220703125
ep :  1  | train loss : 0.9114319215774536  | val loss : 0.8219615817070007  | val accuracy :  1.2939453125
ep :  2  | train loss : 0.797185865974427  | val loss : 0.7140752530097962  | val accuracy :  1.5625
ep :  3  | train loss : 0.7378139847755434  | val loss : 0.6912567460536956  | val accuracy :  4.4921875
ep :  4  | train loss : 0.6569687745094298  | val loss : 0.6441038620471955  | val accuracy :  6.3720703125
ep :  5  | train loss : 0.6216193286895756  | val loss : 0.6145094013214112  | val accuracy :  7.958984375
ep :  6  | train loss : 0.5994953090667718  | val loss : 0.575274385213852  | val accuracy :  9.0576171875
ep :  7  | train loss : 0.5689279286384583  | val loss : 0.5422014653682709  | val accuracy :  8.740234375
ep :  8  | train loss : 0.5574912074089049  | val loss : 0.5292287993431091  | val accuracy :  10.4736328125
ep :  9  | train loss : 0.53854595003128

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_loss,█▅▄▃▂▂▂▁▁▁
validation_accuracy,▁▂▂▄▅▆▆▆▇█
validation_loss,█▆▅▄▃▃▂▂▁▁

0,1
train_loss,0.53855
validation_accuracy,12.01172
validation_loss,0.50333


[34m[1mwandb[0m: Agent Starting Run: 3ilvu0y3 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	drop_out: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers_dec: 3
[34m[1mwandb[0m: 	num_layers_en: 2
[34m[1mwandb[0m: 	optimizer: nadam


ep :  0  | train loss : 1.2737018177032475  | val loss : 1.071655592918396  | val accuracy :  0.0
ep :  1  | train loss : 1.057853424644471  | val loss : 0.8843236613273622  | val accuracy :  0.1220703125
ep :  2  | train loss : 0.8691702001571653  | val loss : 0.7054474472999575  | val accuracy :  1.2939453125
ep :  3  | train loss : 0.7063236801147456  | val loss : 0.5859911799430847  | val accuracy :  4.58984375
ep :  4  | train loss : 0.6103523632049562  | val loss : 0.5456655144691467  | val accuracy :  9.1064453125
ep :  5  | train loss : 0.5484780768394472  | val loss : 0.4921502256393433  | val accuracy :  10.546875
ep :  6  | train loss : 0.5021689752578736  | val loss : 0.4609121191501617  | val accuracy :  11.81640625
ep :  7  | train loss : 0.4680393371582026  | val loss : 0.46707073211669925  | val accuracy :  15.9912109375
ep :  8  | train loss : 0.4532557086467745  | val loss : 0.4404950869083404  | val accuracy :  16.4306640625
ep :  9  | train loss : 0.4414731310367588

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_loss,█▆▅▃▂▂▂▁▁▁
validation_accuracy,▁▁▁▃▅▅▆▇▇█
validation_loss,█▆▄▃▂▂▁▁▁▁

0,1
train_loss,0.44147
validation_accuracy,18.18848
validation_loss,0.42566


[34m[1mwandb[0m: Agent Starting Run: 71u86c2y with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	drop_out: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers_dec: 2
[34m[1mwandb[0m: 	num_layers_en: 2
[34m[1mwandb[0m: 	optimizer: nadam


ep :  0  | train loss : 1.2452890508651728  | val loss : 1.1518842792510986  | val accuracy :  0.0
ep :  1  | train loss : 1.1671324789047244  | val loss : 1.174070043563843  | val accuracy :  0.0
ep :  2  | train loss : 1.1598476732254033  | val loss : 1.1434949374198913  | val accuracy :  0.0
ep :  3  | train loss : 1.1488455846786492  | val loss : 1.1509352731704712  | val accuracy :  0.0
ep :  4  | train loss : 1.1444977619171146  | val loss : 1.148844232559204  | val accuracy :  0.0
ep :  5  | train loss : 1.1309392295837404  | val loss : 1.1553643274307248  | val accuracy :  0.0
ep :  6  | train loss : 1.1166476455688472  | val loss : 1.1572041630744934  | val accuracy :  0.0
ep :  7  | train loss : 1.1357588922500614  | val loss : 1.1565524983406066  | val accuracy :  0.0
ep :  8  | train loss : 1.1151272731781001  | val loss : 1.1807154178619386  | val accuracy :  0.0
ep :  9  | train loss : 1.1417136358261109  | val loss : 1.1525188612937929  | val accuracy :  0.0


VBox(children=(Label(value='0.019 MB of 0.019 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_loss,█▄▃▃▃▂▁▂▁▂
validation_accuracy,▁▁▁▁▁▁▁▁▁▁
validation_loss,▃▇▁▂▂▃▄▃█▃

0,1
train_loss,1.14171
validation_accuracy,0.0
validation_loss,1.15252


[34m[1mwandb[0m: Agent Starting Run: r6yqbcin with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	drop_out: 0.2
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers_dec: 3
[34m[1mwandb[0m: 	num_layers_en: 1
[34m[1mwandb[0m: 	optimizer: nadam




ep :  0  | train loss : 1.1909214536190043  | val loss : 1.2255142039060594  | val accuracy :  0.0
ep :  1  | train loss : 1.1510698625087734  | val loss : 1.1411984521150593  | val accuracy :  0.0
ep :  2  | train loss : 1.1312039198875439  | val loss : 1.164978295564652  | val accuracy :  0.0
ep :  3  | train loss : 1.1240202140331286  | val loss : 1.179939859509468  | val accuracy :  0.0
ep :  4  | train loss : 1.1373410990238186  | val loss : 1.1551781928539278  | val accuracy :  0.0
ep :  5  | train loss : 1.1124553700447097  | val loss : 1.1565102928876867  | val accuracy :  0.0
ep :  6  | train loss : 1.1299103657245622  | val loss : 1.1912765753269197  | val accuracy :  0.0
ep :  7  | train loss : 1.1159791396141046  | val loss : 1.1580130988359454  | val accuracy :  0.0
ep :  8  | train loss : 1.1139803699016566  | val loss : 1.1887311321496967  | val accuracy :  0.0
ep :  9  | train loss : 1.1110150201320634  | val loss : 1.1756548112630854  | val accuracy :  0.0


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_loss,█▅▃▂▃▁▃▁▁▁
validation_accuracy,▁▁▁▁▁▁▁▁▁▁
validation_loss,█▁▃▄▂▂▅▂▅▄

0,1
train_loss,1.11102
validation_accuracy,0.0
validation_loss,1.17565


[34m[1mwandb[0m: Agent Starting Run: blklave9 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	drop_out: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers_dec: 3
[34m[1mwandb[0m: 	num_layers_en: 2
[34m[1mwandb[0m: 	optimizer: nadam


ep :  0  | train loss : 1.2694712799072252  | val loss : 1.0040684914588927  | val accuracy :  0.0
ep :  1  | train loss : 1.073971066093445  | val loss : 0.9206065392494202  | val accuracy :  0.0244140625
ep :  2  | train loss : 0.8709371803283691  | val loss : 0.6823148822784422  | val accuracy :  1.171875
ep :  3  | train loss : 0.6826630373001099  | val loss : 0.584095848798752  | val accuracy :  6.8359375
ep :  4  | train loss : 0.5884586327552791  | val loss : 0.5065111517906189  | val accuracy :  8.447265625
ep :  5  | train loss : 0.5053204417228695  | val loss : 0.48899513721466065  | val accuracy :  13.3544921875
ep :  6  | train loss : 0.475466038894653  | val loss : 0.45216178178787236  | val accuracy :  15.625
ep :  7  | train loss : 0.4453702092647553  | val loss : 0.45881771922111503  | val accuracy :  17.626953125
ep :  8  | train loss : 0.419848074245453  | val loss : 0.41997215628623963  | val accuracy :  19.189453125
ep :  9  | train loss : 0.4178171975135804  | val 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_loss,█▆▅▃▂▂▁▁▁▁
validation_accuracy,▁▁▁▃▄▆▆▇██
validation_loss,█▇▄▃▂▂▂▂▁▁

0,1
train_loss,0.41782
validation_accuracy,20.33691
validation_loss,0.40276


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 39siqbnl with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	drop_out: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers_dec: 3
[34m[1mwandb[0m: 	num_layers_en: 2
[34m[1mwandb[0m: 	optimizer: nadam


ep :  0  | train loss : 1.160268549728395  | val loss : 0.8233831357955934  | val accuracy :  0.2197265625
ep :  1  | train loss : 0.7058232266426084  | val loss : 0.5379182744026185  | val accuracy :  7.763671875
ep :  2  | train loss : 0.5199237816810608  | val loss : 0.47547334671020497  | val accuracy :  14.208984375
ep :  3  | train loss : 0.46061815171241755  | val loss : 0.4277462470531464  | val accuracy :  17.6025390625
ep :  4  | train loss : 0.4116797802448275  | val loss : 0.42270873546600335  | val accuracy :  21.6552734375
ep :  5  | train loss : 0.39128828792572007  | val loss : 0.3891694128513337  | val accuracy :  21.77734375


In [1]:
!pip install wandb
!pip install kaggle



In [2]:
!kaggle datasets download -d anon1729/aksharantar-sampled

Dataset URL: https://www.kaggle.com/datasets/anon1729/aksharantar-sampled
License(s): unknown
Downloading aksharantar-sampled.zip to /kaggle/working
 78%|█████████████████████████████▋        | 11.0M/14.1M [00:01<00:00, 15.5MB/s]
100%|██████████████████████████████████████| 14.1M/14.1M [00:01<00:00, 11.8MB/s]


In [3]:
!unzip aksharantar-sampled.zip


Archive:  aksharantar-sampled.zip
  inflating: aksharantar_sampled/asm/asm_test.csv  
  inflating: aksharantar_sampled/asm/asm_train.csv  
  inflating: aksharantar_sampled/asm/asm_valid.csv  
  inflating: aksharantar_sampled/ben/ben_test.csv  
  inflating: aksharantar_sampled/ben/ben_train.csv  
  inflating: aksharantar_sampled/ben/ben_valid.csv  
  inflating: aksharantar_sampled/brx/brx_test.csv  
  inflating: aksharantar_sampled/brx/brx_train.csv  
  inflating: aksharantar_sampled/brx/brx_valid.csv  
  inflating: aksharantar_sampled/guj/guj_test.csv  
  inflating: aksharantar_sampled/guj/guj_train.csv  
  inflating: aksharantar_sampled/guj/guj_valid.csv  
  inflating: aksharantar_sampled/hin/hin_test.csv  
  inflating: aksharantar_sampled/hin/hin_train.csv  
  inflating: aksharantar_sampled/hin/hin_valid.csv  
  inflating: aksharantar_sampled/kan/kan_test.csv  
  inflating: aksharantar_sampled/kan/kan_train.csv  
  inflating: aksharantar_sampled/kan/kan_valid.csv  
  inflating: aksha