In [None]:
pip install wandb

Collecting wandb
  Downloading wandb-0.16.6-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
Collecting GitPython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-2.0.1-py2.py3-none-any.whl (266 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m266.8/266.8 kB[0m [31m21.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting setproctitle (from wandb)
  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)
Collecting gitdb<5,>=4.0.1 (from GitPython!=3.1.29,>=1.0.0->wa

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
import wandb
import numpy as np
import os
import pandas as pd
import zipfile
import random
from tqdm import tqdm
from google.colab import files
from IPython.display import display
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Define the model architecture
class Encoder(nn.Module):
    def __init__(self, input_size,embedding_size, hidden_size,batch_size,encoder_num_layers, cell_type, bidirectional, dropout):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, embedding_size)
        self.dropout = nn.Dropout(dropout)
        self.batch_size=batch_size
        self.embedding_size=embedding_size
        self.encoder_num_layers=encoder_num_layers
        self.cell_type = cell_type
        self.bidirectional=bidirectional



        if cell_type == "RNN":
            self.rnn = nn.RNN(self.embedding_size, self.hidden_size,self.encoder_num_layers, bidirectional=bidirectional, dropout=dropout)
        elif cell_type == "LSTM":
            self.rnn = nn.LSTM(self.embedding_size, self.hidden_size, self.encoder_num_layers, bidirectional=bidirectional, dropout=dropout)
        elif cell_type == "GRU":
            self.rnn = nn.GRU(self.embedding_size, self.hidden_size, self.encoder_num_layers, bidirectional=bidirectional, dropout=dropout)




    def forward(self, input_seq, hidden):
        embedded = self.dropout((self.embedding(input_seq.long())).view(-1,self.batch_size, self.embedding_size))
        outputs, hidden = self.rnn(embedded,hidden)


        if self.bidirectional:
            if self.cell_type == "LSTM":
                # Dividing the hidden state into parts for each direction
                hidden_state = hidden[0].view(2, self.encoder_num_layers, self.batch_size, self.hidden_size)
                cell_state = hidden[0].view(2, self.encoder_num_layers, self.batch_size, self.hidden_size)

                # Combining the hidden and cell states by taking their average
                hidden = (torch.add(hidden_state[0], hidden_state[1]) / 2, torch.add(cell_state[0], cell_state[1]) / 2)
            else:
                # Dividing the hidden state into parts for each direction
                hidden = hidden.view(2, self.encoder_num_layers, self.batch_size, self.hidden_size)

                # Combining the hidden states by taking their average
                hidden = torch.add(hidden[0], hidden[1]) / 2

            # Splitting the output tensor into parts for each direction
            split_tensor = torch.split(outputs, self.hidden_size, dim=-1)

            # Combining the outputs by taking their average
            output = torch.add(split_tensor[0], split_tensor[1]) / 2


        return outputs, hidden



    def initHidden(self):
            num_directions = 2 if self.bidirectional else 1  # For bidirectional, set to 2, otherwise 1
            if self.cell_type == "LSTM":
                return (torch.zeros(self.encoder_num_layers * num_directions, self.batch_size, self.hidden_size, device=device),
                        torch.zeros(self.encoder_num_layers * num_directions, self.batch_size, self.hidden_size, device=device))
            else:
                return torch.zeros(self.encoder_num_layers * num_directions, self.batch_size, self.hidden_size, device=device)

class Decoder(nn.Module):
    def __init__(self, output_size, embedding_size, hidden_size, batch_size, decoder_num_layers, cell_type, dropout, MAX_LENGTH):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.batch_size = batch_size
        self.embedding_size = embedding_size
        self.decoder_num_layers = decoder_num_layers
        self.MAX_LENGTH = MAX_LENGTH
        self.cell_type = cell_type
        self.dropout=dropout

        self.embedding = nn.Embedding(output_size, embedding_size)
        self.dropout = nn.Dropout(dropout)
        if cell_type == "RNN":
            self.rnn = nn.RNN(embedding_size, hidden_size, self.decoder_num_layers,dropout=dropout)
        elif cell_type == "LSTM":
            self.rnn = nn.LSTM(embedding_size, hidden_size, self.decoder_num_layers, dropout=dropout)
        elif cell_type == "GRU":
            self.rnn = nn.GRU(embedding_size, hidden_size, self.decoder_num_layers, dropout=dropout)


        self.out = nn.Linear(hidden_size, output_size)

        self.softmax = nn.LogSoftmax(dim=2)

    def forward(self, input, hidden):
        output = self.embedding(input.long()).view(-1,self.batch_size, self.embedding_size)

        output = F.relu(output)

        output, hidden = self.rnn(output, hidden)

        output = self.softmax(self.out(output))
        return output, hidden




In [None]:
def Datasetretrival(pathofzip, folder_name):
    # Define the path to your zip file
    zip_file_path = pathofzip

    # Extract the contents of the zip file
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall('extracted_data')

    # Define the path to the extracted data directory
    extracted_data_dir = 'extracted_data/aksharantar_sampled'
    contents = os.listdir(extracted_data_dir)

    # Initialize empty lists for train, test, and validation datasets
    train_datasets = []
    test_datasets = []
    val_datasets = []

 # Load train, test, and validation CSV files from the specified folder
    folder_path = os.path.join(extracted_data_dir, folder_name)

    # List all files in the folder
    folder_files = os.listdir(folder_path)

    # Filter files with the specified folder name as prefix
    foldername_prefix = folder_name + "_"
    folder_files_with_prefix = [file for file in folder_files if file.startswith(foldername_prefix)]

    for file in folder_files_with_prefix:
        file_path = os.path.join(folder_path, file)
        if 'train' in file:
            train_datasets.append(pd.read_csv(file_path,header=None))
        elif 'test' in file:
            test_datasets.append(pd.read_csv(file_path,header=None))
        elif 'val' in file:
            val_datasets.append(pd.read_csv(file_path,header=None))

    # Concatenate the loaded dataframes to create single train, test, and validation datasets
    train_dataset = pd.concat(train_datasets, ignore_index=True)
    test_dataset = pd.concat(test_datasets, ignore_index=True)
    val_dataset = pd.concat(val_datasets, ignore_index=True)


    return train_dataset, val_dataset, test_dataset

In [None]:
def vecorizeddata(data_pairs,index2char, char2index):

    # Tokens
    # 0 -> SOS
    # 1 -> EOS
    # 3 -> Pad

    maxlength_input = 0
    maxlength_output = 0


    # Adding in the main index2char and char2index dictionary
    for word_pair in data_pairs:
        maxlength_input = max(maxlength_input, len(word_pair[0]))

        for char in word_pair[0]:
            if char not in  char2index:
                char2index[char] = len(char2index)
                index2char[len(index2char)] = char



        maxlength_output = max(maxlength_output, len(word_pair[1]))

        for char in word_pair[1]:
            if char not in  char2index:
                char2index[char] = len(char2index)
                index2char[len(index2char)] = char


    MAX_LENGTH = max(maxlength_input, maxlength_output) + 2

    max_of_all = max(maxlength_input, maxlength_output)

    SOS_token = 0
    EOS_token = 1
    PAD_token = 2

    vec_pair_list = []
    for word_pair in data_pairs:


        vec_1 = []
        for char in word_pair[0]:
            vec_1.append(char2index[char])

        wordvec_1 = vec_1
        wordvec_1.append(EOS_token)

        for i in range(MAX_LENGTH - len(word_pair[0])):
            wordvec_1.append(PAD_token)
        wordvec_1 = torch.LongTensor(wordvec_1)
        eng_vec = wordvec_1


        vec_2 = []
        for char in word_pair[1]:
            vec_2.append(char2index[char])

        wordvec_2 = vec_2
        wordvec_2.append(EOS_token)

        for i in range(MAX_LENGTH - len(word_pair[1])):
            wordvec_2.append(PAD_token)
        wordvec_2 = torch.LongTensor(wordvec_2)
        guj_vec = wordvec_2


        vec_pair = (eng_vec, guj_vec)
        vec_pair_list.append(vec_pair)

    return vec_pair_list,char2index, index2char ,MAX_LENGTH


In [None]:
def train(encoder, decoder,train_loader, encoder_optimizer,decoder_optimizer,encoder_num_layers,decoder_num_layers,cell_type,criterion,char2index,index2char,MAX_LENGTH,teacher_forcing_ratio, device):
    total_loss = 0
    correct=0
    total=0
    encoder.to(device)
    decoder.to(device)
    encoder.train()
    decoder.train()
    target_length=0


    for data in tqdm(train_loader):

        input_tensor, target_tensor = data
        input_tensor = input_tensor.to(device)
        target_tensor=target_tensor.to(device)
        batch_size = input_tensor.shape[0]
        input_tensor=input_tensor.T
        target_tensor=target_tensor.T

        encoder_hidden = encoder.initHidden()

        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()


        input_length = len(input_tensor)
        target_length = len(target_tensor)


        encoder_output, encoder_hidden = encoder(input_tensor, encoder_hidden)


        decoder_input = target_tensor[0]
        # Handle different numbers of layers in the encoder and decoder
        if encoder_num_layers != decoder_num_layers:
            if encoder_num_layers < decoder_num_layers:
                remaining_layers = decoder_num_layers - encoder_num_layers
                # Copy all encoder hidden layers and then repeat the top layer
                if cell_type == "LSTM":
                    top_layer_hidden = (encoder_hidden[0][-1].unsqueeze(0), encoder_hidden[1][-1].unsqueeze(0))
                    extra_hidden = (top_layer_hidden[0].repeat(remaining_layers, 1, 1), top_layer_hidden[1].repeat(remaining_layers, 1, 1))
                    decoder_hidden = (torch.cat((encoder_hidden[0], extra_hidden[0]), dim=0), torch.cat((encoder_hidden[1], extra_hidden[1]), dim=0))
                else:
                    top_layer_hidden = encoder_hidden[-1].unsqueeze(0) #top_layer_hidden shape (1, batch_size, hidden_size)
                    extra_hidden = top_layer_hidden.repeat(remaining_layers, 1, 1)
                    decoder_hidden = torch.cat((encoder_hidden, extra_hidden), dim=0)

            else:
                # Slice the hidden states of the encoder to match the decoder layers
                if cell_type == "LSTM":
                    decoder_hidden = (encoder_hidden[0][-decoder_num_layers:], encoder_hidden[1][-decoder_num_layers:])
                else :
                    decoder_hidden = encoder_hidden[-decoder_num_layers:]
        else:
            decoder_hidden = encoder_hidden


        loss = 0

        pred=torch.zeros(len(target_tensor), batch_size).to(device)
        use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

        if use_teacher_forcing:

            for di in range(target_length):
                decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
                decoder_output = torch.squeeze(decoder_output)
                loss += criterion(decoder_output, target_tensor[di].long())


                topv, topi = decoder_output.topk(1)
                topi=torch.squeeze(topi)
                pred[di]=topi

                decoder_input = target_tensor[di]


        else:

            for di in range(target_length):
                decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
                decoder_output = torch.squeeze(decoder_output)
                loss += criterion(decoder_output, target_tensor[di].long())


                topv, topi = decoder_output.topk(1)
                topi = torch.squeeze(topi)
                pred[di]=topi
                decoder_input = topi

        pred = pred.T
        act = target_tensor.T

        for i in range(len(pred)):
            f=0
            for j in range(len(pred[i])):
                if(pred[i][j]!=act[i][j]):
                    f=1
                    break
            if(f==0):
                correct += 1

        total_loss += loss
        loss.backward()
        encoder_optimizer.step()
        decoder_optimizer.step()



    accuracy = correct / (len(train_loader) * batch_size )
    final_loss = total_loss /  (len(train_loader) * batch_size )
    return   final_loss , accuracy



In [None]:

# Define evaluation function
def evaluate(encoder, decoder, val_loader,encoder_num_layers,decoder_num_layers,cell_type, criterion,char2index,index2char,MAX_LENGTH, device):
    EOS_token=1
    SOS_token=0
    correct=0
    total_loss=0
    target_length=0
    pred=[]
    predictions = []
    Input = []
    Target = []

    with torch.no_grad():

        encoder.eval()
        decoder.eval()
        for data in tqdm(val_loader):

            input_tensor, target_tensor = data
            batch_size = input_tensor.shape[0]
            input_tensor = input_tensor.to(device)
            target_tensor=target_tensor.to(device)




            input_tensor=input_tensor.T
            target_tensor=target_tensor.T

            encoder_hidden = encoder.initHidden()

            input_length = len(input_tensor)
            target_length = len(target_tensor)


            encoder_output, encoder_hidden = encoder(input_tensor, encoder_hidden)


            decoder_input = target_tensor[0]
            # Handle different numbers of layers in the encoder and decoder
            if encoder_num_layers != decoder_num_layers:
                if encoder_num_layers < decoder_num_layers:
                    remaining_layers = decoder_num_layers - encoder_num_layers
                    # Copy all encoder hidden layers and then repeat the top layer
                    if cell_type == "LSTM":
                        top_layer_hidden = (encoder_hidden[0][-1].unsqueeze(0), encoder_hidden[1][-1].unsqueeze(0))
                        extra_hidden = (top_layer_hidden[0].repeat(remaining_layers, 1, 1), top_layer_hidden[1].repeat(remaining_layers, 1, 1))
                        decoder_hidden = (torch.cat((encoder_hidden[0], extra_hidden[0]), dim=0), torch.cat((encoder_hidden[1], extra_hidden[1]), dim=0))
                    else:
                        top_layer_hidden = encoder_hidden[-1].unsqueeze(0) #top_layer_hidden shape (1, batch_size, hidden_size)
                        extra_hidden = top_layer_hidden.repeat(remaining_layers, 1, 1)
                        decoder_hidden = torch.cat((encoder_hidden, extra_hidden), dim=0)

                else:
                    # Slice the hidden states of the encoder to match the decoder layers
                    if cell_type == "LSTM":
                        decoder_hidden = (encoder_hidden[0][-decoder_num_layers:], encoder_hidden[1][-decoder_num_layers:])
                    else :
                        decoder_hidden = encoder_hidden[-decoder_num_layers:]
            else:
                decoder_hidden = encoder_hidden


            loss = 0
            pred=torch.zeros(len(target_tensor), batch_size).to(device)


            for di in range(target_length):
                decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
                decoder_output = torch.squeeze(decoder_output)
                loss += criterion(decoder_output, target_tensor[di].long())


                topv, topi = decoder_output.topk(1)
                topi = torch.squeeze(topi)
                pred[di]=topi
                decoder_input = topi

            pred = pred.T
            act = target_tensor.T
            act_eng = input_tensor.T
            for i in range(batch_size):
                pred_word=""
                input_word=""
                target_word = ""

                f=0
                for j in range(len(act[i])):

                    if(int(pred[i][j].item()) > 2):
                        pred_word += index2char[int(pred[i][j].item())]
                    if(int(act[i][j].item()) > 2):
                        target_word += index2char[int(act[i][j].item())]

                    if(pred[i][j]!=act[i][j]):
                        f=1

                if(f==0):
                    correct += 1

                for j in range(len(act_eng[i])):

                      if(int(act_eng[i][j].item()) > 2):
                          input_word += index2char[int(act_eng[i][j].item())]

                predictions.append(pred_word)
                Input.append(input_word)
                Target.append(target_word)


        total_loss += loss
        accuracy = correct / (len(val_loader) * batch_size )
        final_loss = total_loss / (len(val_loader) * batch_size )
    return  final_loss , accuracy , predictions ,Input , Target


In [None]:

# Define training function
def arguments(input_embedding_size,encoder_num_layers,decoder_num_layers,hidden_size,cell_type,bidirectional,batch_size,learning_rate,num_epochs,dropout,teacher_forcing_ratio,pathofzip,Folder_name):

    train_dataset ,val_dataset ,test_dataset= Datasetretrival(pathofzip,Folder_name)

    # Convert DataFrame to list of tuples
    data_pairs_train = [list(row) for row in train_dataset.values]
    data_pairs_val = [list(row) for row in val_dataset.values]
    data_pairs_test = [list(row) for row in test_dataset.values]



    index2char = {0:'<', 1: '>', 2 : '.'}
    char2index = {'<' : 0, '>' : 1, '.' : 2 }


    vec_pair_list_train,char2index, index2char,MAX_LENGTH_1=vecorizeddata(data_pairs_train,index2char, char2index)
    vec_pair_list_val,_,_,MAX_LENGTH_2=vecorizeddata(data_pairs_val,index2char, char2index)
    vec_pair_list_test,_,_,MAX_LENGTH_3=vecorizeddata(data_pairs_test,index2char, char2index)


    predictions = []
    Input = []
    Target = []


    MAX_LENGTH= 30
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    train_loader = DataLoader(vec_pair_list_train, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(vec_pair_list_val, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(vec_pair_list_test, batch_size=batch_size, shuffle=False)

    # Define model
    encoder = Encoder(len(char2index),input_embedding_size, hidden_size,batch_size, encoder_num_layers, cell_type,bidirectional, dropout).to(device)
    decoder = Decoder(len(char2index),input_embedding_size, hidden_size,batch_size, decoder_num_layers, cell_type, dropout,MAX_LENGTH).to(device)
    #model = Seq2Seq(encoder, decoder, encoder_num_layers, decoder_num_layers,batch_size, hidden_size,bidirectional, cell_type,device).to(device)

    output_predictions=torch.tensor([]).to(device)
    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    #optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)


    # Training loop
    for epoch in range(num_epochs):


        # Train the model
        train_loss, train_accuracy = train(encoder,decoder, train_loader, encoder_optimizer,decoder_optimizer,encoder_num_layers,decoder_num_layers,cell_type, criterion,char2index,index2char,MAX_LENGTH,teacher_forcing_ratio, device)

        # Validation loop
        val_loss, val_accuracy,predictions ,Input , Target = evaluate(encoder,decoder, val_loader,encoder_num_layers,decoder_num_layers,cell_type, criterion,char2index,index2char,MAX_LENGTH, device)

        # Log metrics to Weights & Biases
        wandb.log({
            "Epoch": epoch + 1,
            "Train_Accuracy": train_accuracy,
            "Train_Loss": train_loss,
            "Val_Accuracy": val_accuracy,
            "Val_Loss": val_loss
        })

        # Print epoch results
        print(f"\nEpoch {epoch+1}/{num_epochs},\n Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f},\n Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}\n")

    print(f"\n Predictions Generated by current sweep and actual output:\n")
    dataframe = pd.DataFrame({"INPUT": Input, "PREDICTED": predictions,"ACTUAL":Target})
    dataframe.to_csv("predictions.csv", index=False)
    data = pd.read_csv("predictions.csv",header=None)
    #files.download("predictions.csv")
    display(data)



In [None]:
#arguments(32,2,2,1024,"RNN",True,128,0.001,1,0.2,0.5,'/content/sample_data/aksharantar_sampled.zip','guj')

In [None]:
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:

# Define hyperparameters to sweep
sweep_config = {
    "method": "bayes",
    'name'  : 'Train Dataset Run',
    'metric': {'goal': 'maximize', 'name': 'Val_Accuracy'},
    "parameters": {
        "input_embedding_size": {"values": [16, 32, 64, 256]},
        "encoder_num_layers": {"values": [1, 2, 3]},
        "decoder_num_layers": {"values": [1, 2, 3]},
        "hidden_size": {"values": [128,256,512,1024]},
        "cell_type": {"values": ["RNN", "GRU", "LSTM"]},
        "bidirectional": {"values": [True, False]},
        "batch_size": {"values": [32, 64]},
        "learning_rate": {"values": [0.001, 0.0001]},
        "num_epochs": {"values": [5,10]},
        "dropout": {"values": [0.2, 0.3]},
        "teacher_forcing_ratio" : {"values":[0.5]}
    }
}

# Initialize wandb sweep
sweep_id = wandb.sweep(sweep=sweep_config, project="DL_Assignment_3_CS23M046")






Create sweep with ID: 5cxoqh4z
Sweep URL: https://wandb.ai/cs23m046/DL_Assignment_3_CS23M046/sweeps/5cxoqh4z


In [None]:
def main_1():

    # Initialize wandb
    with wandb.init() as run:

        config = wandb.config

        run_name=str(config.cell_type)+"_embedding_"+str(config.input_embedding_size)+"_hidden_size_"+str(config.hidden_size)+"_bidirectional_"+str(config.bidirectional)+"_Encoder_layers_"+str(config.encoder_num_layers)+"_Decoder_layers_"+str(config.decoder_num_layers)
        wandb.run.name=run_name

        pathofzip='/content/sample_data/aksharantar_sampled.zip'
        Folder_name='guj'

        arguments(config.input_embedding_size,config.encoder_num_layers,config.decoder_num_layers,config.hidden_size,config.cell_type,config.bidirectional,config.batch_size,config.learning_rate,config.num_epochs,config.dropout,config.teacher_forcing_ratio,pathofzip,Folder_name)



# Run sweep
wandb.agent(sweep_id, function=main_1, count=5)

wandb.finish()

[34m[1mwandb[0m: Agent Starting Run: xy9fa7sn with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_num_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	encoder_num_layers: 2
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	input_embedding_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: Currently logged in as: [33mcs23m046[0m. Use [1m`wandb login --relogin`[0m to force relogin


100%|██████████| 800/800 [00:39<00:00, 20.47it/s]
100%|██████████| 64/64 [00:13<00:00,  4.62it/s]



Epoch 1/5,
 Train Loss: 0.5007, Train Accuracy: 0.0000,
 Val Loss: 0.0056, Val Accuracy: 0.0005



100%|██████████| 800/800 [00:38<00:00, 20.63it/s]
100%|██████████| 64/64 [00:13<00:00,  4.80it/s]



Epoch 2/5,
 Train Loss: 0.3767, Train Accuracy: 0.0002,
 Val Loss: 0.0045, Val Accuracy: 0.0049



100%|██████████| 800/800 [00:40<00:00, 19.89it/s]
100%|██████████| 64/64 [00:13<00:00,  4.78it/s]



Epoch 3/5,
 Train Loss: 0.3031, Train Accuracy: 0.0040,
 Val Loss: 0.0035, Val Accuracy: 0.0261



100%|██████████| 800/800 [00:42<00:00, 18.76it/s]
100%|██████████| 64/64 [00:13<00:00,  4.77it/s]



Epoch 4/5,
 Train Loss: 0.2527, Train Accuracy: 0.0216,
 Val Loss: 0.0029, Val Accuracy: 0.0918



100%|██████████| 800/800 [00:44<00:00, 17.97it/s]
100%|██████████| 64/64 [00:13<00:00,  4.78it/s]


Epoch 5/5,
 Train Loss: 0.2174, Train Accuracy: 0.0564,
 Val Loss: 0.0026, Val Accuracy: 0.1553


 Predictions Generated by current sweep and actual output:






Unnamed: 0,0,1,2
0,INPUT,PREDICTED,ACTUAL
1,vasteena,વસ્તીના,વસતીના
2,gardanana,ગર્દનનાા,ગરદનના
3,roqsa,રોક્સા,રોક્સ
4,suparastarsa,સુપર્્્્્્સ,સુપરસ્ટાર્સ
...,...,...,...
4092,humirsir,હુમિરિર્ર,હમીરસર
4093,batata,બટતા,બટાટા
4094,dhovaai,ધોવાઈ,ધોવાઇ
4095,daridrataa,દરિદર્તા,દરિદ્રતા


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Epoch,▁▃▅▆█
Train_Accuracy,▁▁▁▄█
Train_Loss,█▅▃▂▁
Val_Accuracy,▁▁▂▅█
Val_Loss,█▅▃▂▁

0,1
Epoch,5.0
Train_Accuracy,0.05641
Train_Loss,0.21741
Val_Accuracy,0.15527
Val_Loss,0.0026


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: u70rwxet with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_num_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	encoder_num_layers: 2
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


100%|██████████| 800/800 [00:33<00:00, 24.04it/s]
100%|██████████| 64/64 [00:11<00:00,  5.48it/s]



Epoch 1/10,
 Train Loss: 0.4434, Train Accuracy: 0.0002,
 Val Loss: 0.0043, Val Accuracy: 0.0051



100%|██████████| 800/800 [00:35<00:00, 22.84it/s]
100%|██████████| 64/64 [00:11<00:00,  5.37it/s]



Epoch 2/10,
 Train Loss: 0.3236, Train Accuracy: 0.0036,
 Val Loss: 0.0038, Val Accuracy: 0.0256



100%|██████████| 800/800 [00:36<00:00, 21.90it/s]
100%|██████████| 64/64 [00:11<00:00,  5.42it/s]



Epoch 3/10,
 Train Loss: 0.2794, Train Accuracy: 0.0135,
 Val Loss: 0.0033, Val Accuracy: 0.0588



100%|██████████| 800/800 [00:37<00:00, 21.58it/s]
100%|██████████| 64/64 [00:11<00:00,  5.36it/s]



Epoch 4/10,
 Train Loss: 0.2546, Train Accuracy: 0.0289,
 Val Loss: 0.0031, Val Accuracy: 0.0779



100%|██████████| 800/800 [00:38<00:00, 20.58it/s]
100%|██████████| 64/64 [00:11<00:00,  5.39it/s]



Epoch 5/10,
 Train Loss: 0.2346, Train Accuracy: 0.0485,
 Val Loss: 0.0030, Val Accuracy: 0.1025



100%|██████████| 800/800 [00:38<00:00, 20.80it/s]
100%|██████████| 64/64 [00:11<00:00,  5.35it/s]



Epoch 6/10,
 Train Loss: 0.2205, Train Accuracy: 0.0637,
 Val Loss: 0.0028, Val Accuracy: 0.1309



100%|██████████| 800/800 [00:39<00:00, 20.36it/s]
100%|██████████| 64/64 [00:12<00:00,  5.29it/s]



Epoch 7/10,
 Train Loss: 0.2142, Train Accuracy: 0.0748,
 Val Loss: 0.0029, Val Accuracy: 0.1340



100%|██████████| 800/800 [00:39<00:00, 20.06it/s]
100%|██████████| 64/64 [00:11<00:00,  5.39it/s]



Epoch 8/10,
 Train Loss: 0.2056, Train Accuracy: 0.0852,
 Val Loss: 0.0027, Val Accuracy: 0.1433



100%|██████████| 800/800 [00:40<00:00, 19.53it/s]
100%|██████████| 64/64 [00:11<00:00,  5.42it/s]



Epoch 9/10,
 Train Loss: 0.1992, Train Accuracy: 0.0978,
 Val Loss: 0.0025, Val Accuracy: 0.1501



100%|██████████| 800/800 [00:40<00:00, 19.89it/s]
100%|██████████| 64/64 [00:11<00:00,  5.43it/s]



Epoch 10/10,
 Train Loss: 0.1962, Train Accuracy: 0.1020,
 Val Loss: 0.0027, Val Accuracy: 0.1460


 Predictions Generated by current sweep and actual output:



Unnamed: 0,0,1,2
0,INPUT,PREDICTED,ACTUAL
1,vasteena,વસ્તિના,વસતીના
2,gardanana,ગર્ડનાના,ગરદનના
3,roqsa,રોક્સા,રોક્સ
4,suparastarsa,સુપરસ્ત્રરા,સુપરસ્ટાર્સ
...,...,...,...
4092,humirsir,હુમરિિતર,હમીરસર
4093,batata,બટટા,બટાટા
4094,dhovaai,ધોવાઈ,ધોવાઇ
4095,daridrataa,દરિદ્રતા,દરિદ્રતા


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train_Accuracy,▁▁▂▃▄▅▆▇██
Train_Loss,█▅▃▃▂▂▂▁▁▁
Val_Accuracy,▁▂▄▅▆▇▇███
Val_Loss,█▆▄▃▃▂▂▂▁▂

0,1
Epoch,10.0
Train_Accuracy,0.10201
Train_Loss,0.19622
Val_Accuracy,0.146
Val_Loss,0.00275


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jt0suosd with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_num_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	encoder_num_layers: 3
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	input_embedding_size: 16
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


100%|██████████| 800/800 [00:35<00:00, 22.25it/s]
100%|██████████| 64/64 [00:12<00:00,  5.12it/s]



Epoch 1/10,
 Train Loss: 0.6391, Train Accuracy: 0.0000,
 Val Loss: 0.0083, Val Accuracy: 0.0000



100%|██████████| 800/800 [00:35<00:00, 22.42it/s]
100%|██████████| 64/64 [00:12<00:00,  5.14it/s]



Epoch 2/10,
 Train Loss: 0.4887, Train Accuracy: 0.0000,
 Val Loss: 0.0087, Val Accuracy: 0.0000



100%|██████████| 800/800 [00:36<00:00, 22.17it/s]
100%|██████████| 64/64 [00:12<00:00,  5.08it/s]



Epoch 3/10,
 Train Loss: 0.4738, Train Accuracy: 0.0000,
 Val Loss: 0.0086, Val Accuracy: 0.0000



100%|██████████| 800/800 [00:35<00:00, 22.36it/s]
100%|██████████| 64/64 [00:12<00:00,  5.10it/s]



Epoch 4/10,
 Train Loss: 0.4629, Train Accuracy: 0.0000,
 Val Loss: 0.0084, Val Accuracy: 0.0000



100%|██████████| 800/800 [00:36<00:00, 22.16it/s]
100%|██████████| 64/64 [00:12<00:00,  5.13it/s]



Epoch 5/10,
 Train Loss: 0.4523, Train Accuracy: 0.0000,
 Val Loss: 0.0081, Val Accuracy: 0.0000



100%|██████████| 800/800 [00:36<00:00, 22.10it/s]
100%|██████████| 64/64 [00:12<00:00,  5.07it/s]



Epoch 6/10,
 Train Loss: 0.4413, Train Accuracy: 0.0000,
 Val Loss: 0.0086, Val Accuracy: 0.0000



100%|██████████| 800/800 [00:36<00:00, 21.83it/s]
100%|██████████| 64/64 [00:12<00:00,  4.93it/s]



Epoch 7/10,
 Train Loss: 0.4308, Train Accuracy: 0.0000,
 Val Loss: 0.0083, Val Accuracy: 0.0000



100%|██████████| 800/800 [00:36<00:00, 21.84it/s]
100%|██████████| 64/64 [00:13<00:00,  4.91it/s]



Epoch 8/10,
 Train Loss: 0.4204, Train Accuracy: 0.0001,
 Val Loss: 0.0081, Val Accuracy: 0.0000



100%|██████████| 800/800 [00:36<00:00, 21.62it/s]
100%|██████████| 64/64 [00:12<00:00,  5.13it/s]



Epoch 9/10,
 Train Loss: 0.4076, Train Accuracy: 0.0001,
 Val Loss: 0.0080, Val Accuracy: 0.0000



100%|██████████| 800/800 [00:36<00:00, 21.81it/s]
100%|██████████| 64/64 [00:12<00:00,  5.10it/s]


Epoch 10/10,
 Train Loss: 0.3948, Train Accuracy: 0.0002,
 Val Loss: 0.0079, Val Accuracy: 0.0000


 Predictions Generated by current sweep and actual output:






Unnamed: 0,0,1,2
0,INPUT,PREDICTED,ACTUAL
1,vasteena,વેવ્ર્રાન,વસતીના
2,gardanana,અાાાાાાાાા,ગરદનના
3,roqsa,ર્ર્ર્સ,રોક્સ
4,suparastarsa,સ્ર્ર્ર્ર્ર્ર્ર,સુપરસ્ટાર્સ
...,...,...,...
4092,humirsir,મિર્ર્ર્સ્સ,હમીરસર
4093,batata,બરરારા,બટાટા
4094,dhovaai,વાવવાાા,ધોવાઇ
4095,daridrataa,હાર્ર્રરારા,દરિદ્રતા


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train_Accuracy,▁▁▁▁▁▁▁▃▃█
Train_Loss,█▄▃▃▃▂▂▂▁▁
Val_Accuracy,▁▁▁▁▁▁▁▁▁▁
Val_Loss,▅█▇▅▃▇▄▃▂▁

0,1
Epoch,10.0
Train_Accuracy,0.00018
Train_Loss,0.39484
Val_Accuracy,0.0
Val_Loss,0.00786


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3j0alwxg with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_num_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	encoder_num_layers: 2
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


100%|██████████| 800/800 [00:42<00:00, 18.74it/s]
100%|██████████| 64/64 [00:12<00:00,  5.32it/s]



Epoch 1/10,
 Train Loss: 0.3636, Train Accuracy: 0.0084,
 Val Loss: 0.0031, Val Accuracy: 0.1074



100%|██████████| 800/800 [00:49<00:00, 16.26it/s]
100%|██████████| 64/64 [00:11<00:00,  5.34it/s]



Epoch 2/10,
 Train Loss: 0.1884, Train Accuracy: 0.1033,
 Val Loss: 0.0023, Val Accuracy: 0.2358



100%|██████████| 800/800 [00:53<00:00, 14.85it/s]
100%|██████████| 64/64 [00:11<00:00,  5.42it/s]



Epoch 3/10,
 Train Loss: 0.1516, Train Accuracy: 0.1909,
 Val Loss: 0.0023, Val Accuracy: 0.2922



100%|██████████| 800/800 [00:55<00:00, 14.29it/s]
100%|██████████| 64/64 [00:11<00:00,  5.41it/s]



Epoch 4/10,
 Train Loss: 0.1290, Train Accuracy: 0.2579,
 Val Loss: 0.0023, Val Accuracy: 0.2603



100%|██████████| 800/800 [00:58<00:00, 13.68it/s]
100%|██████████| 64/64 [00:11<00:00,  5.37it/s]



Epoch 5/10,
 Train Loss: 0.1166, Train Accuracy: 0.2989,
 Val Loss: 0.0027, Val Accuracy: 0.3528



100%|██████████| 800/800 [00:59<00:00, 13.35it/s]
100%|██████████| 64/64 [00:11<00:00,  5.42it/s]



Epoch 6/10,
 Train Loss: 0.1072, Train Accuracy: 0.3364,
 Val Loss: 0.0024, Val Accuracy: 0.3396



100%|██████████| 800/800 [01:01<00:00, 12.99it/s]
100%|██████████| 64/64 [00:12<00:00,  5.07it/s]



Epoch 7/10,
 Train Loss: 0.0978, Train Accuracy: 0.3705,
 Val Loss: 0.0024, Val Accuracy: 0.3442



100%|██████████| 800/800 [01:03<00:00, 12.68it/s]
100%|██████████| 64/64 [00:12<00:00,  5.30it/s]



Epoch 8/10,
 Train Loss: 0.0907, Train Accuracy: 0.3925,
 Val Loss: 0.0024, Val Accuracy: 0.3784



100%|██████████| 800/800 [01:03<00:00, 12.51it/s]
100%|██████████| 64/64 [00:11<00:00,  5.40it/s]



Epoch 9/10,
 Train Loss: 0.0869, Train Accuracy: 0.4162,
 Val Loss: 0.0024, Val Accuracy: 0.3826



100%|██████████| 800/800 [01:03<00:00, 12.55it/s]
100%|██████████| 64/64 [00:12<00:00,  5.11it/s]



Epoch 10/10,
 Train Loss: 0.0846, Train Accuracy: 0.4303,
 Val Loss: 0.0026, Val Accuracy: 0.3955


 Predictions Generated by current sweep and actual output:



Unnamed: 0,0,1,2
0,INPUT,PREDICTED,ACTUAL
1,vasteena,વસ્તીના,વસતીના
2,gardanana,ગર્દનાના,ગરદનના
3,roqsa,રોક્સા,રોક્સ
4,suparastarsa,સુપરસાર્સ,સુપરસ્ટાર્સ
...,...,...,...
4092,humirsir,હુમુરસિર,હમીરસર
4093,batata,બટાટા,બટાટા
4094,dhovaai,ધોવાઈ,ધોવાઇ
4095,daridrataa,દરિદ્રતા,દરિદ્રતા


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train_Accuracy,▁▃▄▅▆▆▇▇██
Train_Loss,█▄▃▂▂▂▁▁▁▁
Val_Accuracy,▁▄▅▅▇▇▇███
Val_Loss,█▁▁▁▅▁▂▂▂▄

0,1
Epoch,10.0
Train_Accuracy,0.43029
Train_Loss,0.08457
Val_Accuracy,0.39551
Val_Loss,0.00259


[34m[1mwandb[0m: Agent Starting Run: xkctw9v0 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_num_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	encoder_num_layers: 3
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	input_embedding_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


100%|██████████| 800/800 [00:38<00:00, 21.03it/s]
100%|██████████| 64/64 [00:12<00:00,  5.01it/s]



Epoch 1/10,
 Train Loss: 0.6511, Train Accuracy: 0.0000,
 Val Loss: 0.0063, Val Accuracy: 0.0000



100%|██████████| 800/800 [00:38<00:00, 20.76it/s]
100%|██████████| 64/64 [00:13<00:00,  4.68it/s]



Epoch 2/10,
 Train Loss: 0.4893, Train Accuracy: 0.0000,
 Val Loss: 0.0064, Val Accuracy: 0.0000



100%|██████████| 800/800 [00:38<00:00, 20.54it/s]
100%|██████████| 64/64 [00:13<00:00,  4.90it/s]



Epoch 3/10,
 Train Loss: 0.4811, Train Accuracy: 0.0000,
 Val Loss: 0.0063, Val Accuracy: 0.0000



100%|██████████| 800/800 [00:38<00:00, 20.80it/s]
100%|██████████| 64/64 [00:13<00:00,  4.90it/s]



Epoch 4/10,
 Train Loss: 0.4712, Train Accuracy: 0.0000,
 Val Loss: 0.0061, Val Accuracy: 0.0000



100%|██████████| 800/800 [00:39<00:00, 20.24it/s]
100%|██████████| 64/64 [00:13<00:00,  4.90it/s]



Epoch 5/10,
 Train Loss: 0.4596, Train Accuracy: 0.0000,
 Val Loss: 0.0060, Val Accuracy: 0.0000



100%|██████████| 800/800 [00:38<00:00, 20.79it/s]
100%|██████████| 64/64 [00:13<00:00,  4.70it/s]



Epoch 6/10,
 Train Loss: 0.4480, Train Accuracy: 0.0000,
 Val Loss: 0.0060, Val Accuracy: 0.0000



100%|██████████| 800/800 [00:39<00:00, 20.02it/s]
100%|██████████| 64/64 [00:12<00:00,  4.94it/s]



Epoch 7/10,
 Train Loss: 0.4338, Train Accuracy: 0.0000,
 Val Loss: 0.0060, Val Accuracy: 0.0002



100%|██████████| 800/800 [00:38<00:00, 20.57it/s]
100%|██████████| 64/64 [00:13<00:00,  4.89it/s]



Epoch 8/10,
 Train Loss: 0.4151, Train Accuracy: 0.0000,
 Val Loss: 0.0056, Val Accuracy: 0.0002



100%|██████████| 800/800 [00:39<00:00, 20.28it/s]
100%|██████████| 64/64 [00:13<00:00,  4.75it/s]



Epoch 9/10,
 Train Loss: 0.3968, Train Accuracy: 0.0000,
 Val Loss: 0.0054, Val Accuracy: 0.0002



100%|██████████| 800/800 [00:39<00:00, 20.39it/s]
100%|██████████| 64/64 [00:12<00:00,  4.93it/s]


Epoch 10/10,
 Train Loss: 0.3805, Train Accuracy: 0.0001,
 Val Loss: 0.0056, Val Accuracy: 0.0002


 Predictions Generated by current sweep and actual output:






Unnamed: 0,0,1,2
0,INPUT,PREDICTED,ACTUAL
1,vasteena,પ્ર્રનનન,વસતીના
2,gardanana,પર્રાનનના,ગરદનના
3,roqsa,સ્ર્ર,રોક્સ
4,suparastarsa,સ્ર્ર્ર્ર્ર્ર,સુપરસ્ટાર્સ
...,...,...,...
4092,humirsir,મ્ર્ર્ર્્,હમીરસર
4093,batata,પ્રાાા,બટાટા
4094,dhovaai,વાવાાા,ધોવાઇ
4095,daridrataa,ર્ર્રરરરાા,દરિદ્રતા


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train_Accuracy,▁▁▁▁▁▁▁▁▄█
Train_Loss,█▄▄▃▃▃▂▂▁▁
Val_Accuracy,▁▁▁▁▁▁████
Val_Loss,███▆▅▆▅▂▁▂

0,1
Epoch,10.0
Train_Accuracy,0.0001
Train_Loss,0.38053
Val_Accuracy,0.00024
Val_Loss,0.0056
