## **Import Libraries**

In [1]:
import wandb
import numpy as np
import pandas as pd
import os, string, glob
import random, math, time


# Visualization tools
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import clear_output
import tqdm as tqdm

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
from torch.optim.lr_scheduler import CosineAnnealingLR, StepLR


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## **Dataset Creation**

In [3]:
class Data_Preparation():
    def __init__(self, DATAPATH):
        self.train_path = glob.glob(DATAPATH + '/*')[1]
        self.val_path = glob.glob(DATAPATH + '/*')[2]
        self.test_path = glob.glob(DATAPATH + '/*')[0]

        self.train_df = pd.read_csv(self.train_path, names = ['English', 'Hindi'])
        self.val_df = pd.read_csv(self.val_path, names = ['English', 'Hindi'])
        self.test_df = pd.read_csv(self.test_path, names = ['English', 'Hindi'])


    
    def dictionary_lookup(self, vocab):
        char2int = dict([(char, i) for i, char in enumerate(vocab)])
        int2char = dict([(i, char) for char, i in char2int.items()])
        return char2int, int2char
    
    def encode(self, source, target, source_chars, target_chars, source_char2int=None, target_char2int=None):
        num_encoder_tokens = len(source_chars)
        num_decoder_tokens = len(target_chars)
        max_source_length = max([len(txt) for txt in source])
        max_target_length = max([len(txt) for txt in target])

        encoder_input_data = np.zeros((len(source), max_source_length, num_encoder_tokens), dtype="float32")
        decoder_input_data = np.zeros((len(target), max_target_length, num_decoder_tokens), dtype="float32")
        decoder_target_data = np.zeros((len(target), max_target_length, num_decoder_tokens), dtype="float32")

        source_vocab, target_vocab = None, None
        if source_char2int == None and target_char2int == None:
            
            source_char2int, source_int2char = self.dictionary_lookup(source_chars)
            target_char2int, target_int2char = self.dictionary_lookup(target_chars)

            source_vocab = (source_char2int, source_int2char)
            target_vocab = (target_char2int, target_int2char)

        for i, (input_text, target_text) in enumerate(zip(source, target)):
            for t, char in enumerate(input_text):
                encoder_input_data[i, t, source_char2int[char]] = 1.0
            encoder_input_data[i, t + 1 :,source_char2int["-PAD-"]] = 1.0
            for t, char in enumerate(target_text):
                # decoder_target_data is ahead of decoder_input_data by one timestep
                decoder_input_data[i, t, target_char2int[char]] = 1.0
                if t > 0:
                    # decoder_target_data will be ahead by one timestep
                    # and will not include the start character.
                    decoder_target_data[i, t - 1, target_char2int[char]] = 1.0
            decoder_input_data[i, t + 1:, target_char2int["-PAD-"]] = 1.0
            decoder_target_data[i, t:, target_char2int["-PAD-"]] = 1.0
        
        if source_vocab !=None and target_vocab !=None:
            return encoder_input_data, decoder_input_data, decoder_target_data, source_vocab, target_vocab

        else:
            return encoder_input_data, decoder_input_data, decoder_target_data
        

    def preprocess(self, source, target):
        source_chars = set(list(string.ascii_lowercase))
        #source_chars = set()
        target_chars = set([chr(alpha) for alpha in range(2304, 2432)])

        source = [str(x) for x in source]
        target = [str(x) for x in target]

        source_words = []
        target_words = []
        for src, tgt in zip(source, target):
            tgt = "\t" + tgt + "\n"
            source_words.append(src)
            target_words.append(tgt)
            for char in src:
                if char not in source_chars:
                    source_chars.add(char)
            for char in tgt:
                if char not in target_chars:
                    target_chars.add(char)
        
        source_chars = sorted(list(source_chars))
        target_chars = sorted(list(target_chars))

        # Adding pad token
        source_chars.append('-PAD-')
        target_chars.append('-PAD-')

        self.num_encoder_tokens = len(source_chars)
        self.num_decoder_tokens = len(target_chars)
        self.max_source_length = max([len(txt) for txt in source_words])
        self.max_target_length = max([len(txt) for txt in target_words])

        print("\n Number of samples:", len(source))
        print("Source Vocab length:", self.num_encoder_tokens)
        print("Target Vocab length:", self.num_decoder_tokens)
        print("Max sequence length for inputs:", self.max_source_length)
        print("Max sequence length for outputs:", self.max_target_length)
        
        return source_words, target_words, source_chars, target_chars
    
    def create_dataloaders(self, batch_size):
        train_source_words, train_target_words, train_source_chars, train_target_chars = self.preprocess(self.train_df["English"].to_list(), self.train_df["Hindi"].to_list())
        self.train_data = self.encode(train_source_words, train_target_words, train_source_chars, train_target_chars)
        (self.train_encoder_input, self.train_decoder_input, self.train_decoder_target, self.source_vocab, self.target_vocab) = self.train_data
        self.source_char2int, self.source_int2char = self.source_vocab
        self.target_char2int, self.target_int2char = self.target_vocab

        val_source_words, val_target_words, val_source_chars, val_target_chars  = self.preprocess(self.val_df["English"].to_list(), self.val_df["Hindi"].to_list())
        self.val_data = self.encode(val_source_words, val_target_words, list(self.source_char2int.keys()), list(self.target_char2int.keys()), source_char2int = self.source_char2int, target_char2int=self.target_char2int)
        self.val_encoder_input, self.val_decoder_input, self.val_decoder_target = self.val_data
        # self.source_char2int, self.source_int2char = self.source_vocab
        # self.target_char2int, self.target_int2char = self.target_vocab

        test_source_words, test_target_words, test_source_chars, test_target_chars  = self.preprocess(self.test_df["English"].to_list(), self.test_df["Hindi"].to_list())
        self.test_data = self.encode(test_source_words, test_target_words, list(self.source_char2int.keys()), list(self.target_char2int.keys()), source_char2int = self.source_char2int, target_char2int=self.target_char2int)
        self.test_encoder_input, self.test_decoder_input, self.test_decoder_target = self.test_data
        # self.source_char2int, self.source_int2char = self.source_vocab
        # self.target_char2int, self.target_int2char = self.target_vocab

        encoder_input_data_train = torch.stack([torch.from_numpy(np.array(i)) for i in self.train_encoder_input])
        decoder_input_data_train = torch.stack([torch.from_numpy(np.array(i)) for i in self.train_decoder_input])
    
        encoder_input_data_val = torch.stack([torch.from_numpy(np.array(i)) for i in self.val_encoder_input])
        decoder_input_data_val = torch.stack([torch.from_numpy(np.array(i)) for i in self.val_decoder_input])

        encoder_input_data_test = torch.stack([torch.from_numpy(np.array(i)).float() for i in self.test_encoder_input])
        decoder_input_data_test = torch.stack([torch.from_numpy(np.array(i)).float() for i in self.test_decoder_input])

        train_dataset = torch.utils.data.TensorDataset(encoder_input_data_train, decoder_input_data_train)
        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        val_dataset = torch.utils.data.TensorDataset(encoder_input_data_val, decoder_input_data_val)
        val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)


        test_dataset = torch.utils.data.TensorDataset(encoder_input_data_test, decoder_input_data_test)
        test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

        return train_loader, val_loader, test_loader

## **Model**

### **Note:** Model Building part has been inspired from below link

'https://github.com/bentrevett/pytorch-seq2seq/blob/main/1%20-%20Sequence%20to%20Sequence%20Learning%20with%20Neural%20Networks.ipynb'

In [21]:
class Attention(nn.Module):
    
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.hidden_size = hidden_size
        # Linear layer to combine hidden state and encoder outputs
        self.attn = nn.Linear(self.hidden_size * 2, hidden_size)
        # Parameter vector for attention
        self.v = nn.Parameter(torch.rand(hidden_size))
        # Initialize parameter vector
        stdv = 1. / math.sqrt(self.v.size(0))
        self.v.data.uniform_(-stdv, stdv)

    def forward(self, hidden, encoder_outputs):
        # Calculate attention energies
        attn_energies = self.score(hidden, encoder_outputs)
        # Return softmax normalized probabilities
        return F.softmax(attn_energies, dim=1).unsqueeze(1)
    
    def score(self, hidden, encoder_outputs):
        """
        Calculate the attention score for each encoder output.
        """
        # Repeat hidden state across all time steps
        timestep = encoder_outputs.size(0)
        h = hidden.repeat(timestep, 1, 1).transpose(0, 1)
        # Transpose encoder outputs for concatenation
        encoder_outputs = encoder_outputs.transpose(0, 1)
        # Concatenate hidden state and encoder outputs and pass through a linear layer
        energy = F.relu(self.attn(torch.cat([h, encoder_outputs], 2)))
        # Transpose energy for batch multiplication
        energy = energy.transpose(1, 2)
        # Repeat and reshape the parameter vector for batch multiplication
        v = self.v.unsqueeze(0).expand(encoder_outputs.size(0), -1).unsqueeze(1)
        # Compute the attention scores
        energy = torch.bmm(v, energy)
        return energy.squeeze(1)



In [22]:
class Encoder(nn.Module):
    def __init__(self, num_encoder_tokens, hidden_dim, n_layers, dropout, encoder_embedding_dim=0, cell_type="LSTM", verbose=False):
        """
        Initialize the Encoder.

        Parameters:
        num_encoder_tokens (int): Size of the input vocabulary.
        hidden_dim (int): Number of features in the hidden state.
        n_layers (int): Number of recurrent layers.
        dropout (float): Dropout probability.
        encoder_embedding_dim (int, optional): Dimension of the embeddings (0 if not using embeddings). Default is 0.
        cell_type (str, optional): Type of RNN cell ('LSTM', 'GRU', or 'RNN'). Default is 'LSTM'.
        verbose (bool, optional): If True, print shapes of tensors for debugging. Default is False.
        """
        super(Encoder, self).__init__()

        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.cell_type = cell_type
        self.embedding_dim = encoder_embedding_dim
        self.encoder_input_size = num_encoder_tokens
        self.verbose = verbose

        # Dropout layer for regularization
        self.dropout = nn.Dropout(dropout)

        # Adjust dropout for single-layer RNNs (no dropout if n_layers == 1)
        dropout = 0 if (n_layers == 1) else dropout

        # If embedding dimension is specified, add an embedding layer
        if self.embedding_dim != 0:
            self.encoder_input_size = self.embedding_dim
            self.embedding = nn.Embedding(num_encoder_tokens, self.embedding_dim, padding_idx=num_encoder_tokens-1)

        # Define the RNN cell (LSTM, GRU, or RNN)
        if self.cell_type == 'LSTM':
            self.rnn = nn.LSTM(self.encoder_input_size, hidden_dim, n_layers, dropout=dropout)
        elif self.cell_type == 'RNN':
            self.rnn = nn.RNN(self.encoder_input_size, hidden_dim, n_layers, dropout=dropout)
        elif self.cell_type == "GRU":
            self.rnn = nn.GRU(self.encoder_input_size, hidden_dim, n_layers, dropout=dropout)

    def forward(self, input):
        """
        Forward pass through the encoder.

        Parameters:
        input (Tensor): Input tensor of shape (batch_size, seq_length, num_encoder_tokens).

        Returns:
        outputs (Tensor): Output features from the last layer of the RNN for each time step of shape (seq_length, batch_size, hidden_dim).
        hidden_cell (Tensor or Tuple[Tensor, Tensor]): Hidden state (and cell state if LSTM) of shape (n_layers, batch_size, hidden_dim).
        """
        # Transpose input to have sequence length first
        input = input.transpose(0, 1)

        # If embedding is used, apply embedding and dropout
        if self.embedding_dim != 0:
            input = input.argmax(2)  # Convert one-hot encoding to indices
            input = self.dropout(self.embedding(input))  # Apply embedding and dropout

        if self.verbose:
            print(f"Input shape after embedding: {input.shape}")

        # Pass the input through the RNN
        outputs, hidden_cell = self.rnn(input)

        if self.verbose:
            print(f"Input shape: {input.shape}")
            print(f"Outputs shape: {outputs.shape}")
            print(f"Hidden/Cell state shape: {hidden_cell.shape}")

        return outputs, hidden_cell




In [23]:
import torch
import torch.nn as nn

class Decoder(nn.Module):
    def __init__(self, num_decoder_tokens, decoder_hidden_dim, n_layers, dropout, decoder_embedding_dim=0, cell_type='LSTM', atten=False, verbose=False):
        """
        Decoder module of a seq2seq model.

        Args:
        - num_decoder_tokens (int): Number of tokens in the decoder vocabulary.
        - decoder_hidden_dim (int): Dimensionality of the decoder hidden states.
        - n_layers (int): Number of layers in the decoder RNN.
        - dropout (float): Dropout probability.
        - decoder_embedding_dim (int, optional): Dimensionality of the decoder embeddings. Defaults to 0.
        - cell_type (str, optional): Type of RNN cell used in the decoder ('LSTM', 'GRU', or 'RNN'). Defaults to 'LSTM'.
        - atten (bool, optional): Whether to use attention mechanism. Defaults to False.
        - verbose (bool, optional): Whether to print verbose information during forward pass. Defaults to False.
        """
        super(Decoder, self).__init__()
        
        self.output_dim = num_decoder_tokens
        self.decoder_hidden_dim = decoder_hidden_dim
        self.cell_type = cell_type
        self.attention = atten
        self.n_layers = n_layers
        self.decoder_embedding_dim = decoder_embedding_dim
        self.decoder_input = num_decoder_tokens
        self.verbose = verbose
        self.dropout = nn.Dropout(dropout)

        # Adjust dropout for single-layer RNNs
        dropout = 0 if (n_layers == 1) else dropout

        if self.decoder_hidden_dim != 0:
            self.decoder_input = self.decoder_embedding_dim
            self.embedding = nn.Embedding(num_decoder_tokens, self.decoder_embedding_dim)

        if self.attention == False:
            if cell_type == 'LSTM':
                self.rnn = nn.LSTM(self.decoder_input, self.decoder_hidden_dim, n_layers, dropout=dropout)
            elif cell_type == 'RNN':
                self.rnn = nn.RNN(self.decoder_input, self.decoder_hidden_dim, n_layers, dropout=dropout)
            elif cell_type == 'GRU':
                self.rnn = nn.GRU(self.decoder_input, self.decoder_hidden_dim, n_layers, dropout=dropout)

            self.fc_out = nn.Linear(self.decoder_hidden_dim, self.output_dim)

        else:
            self.attention = Attention(self.decoder_hidden_dim)
            
            if cell_type == "LSTM":
                self.rnn = nn.LSTM(self.decoder_hidden_dim + self.decoder_input, decoder_hidden_dim, n_layers, dropout=dropout)

            elif cell_type == "RNN":
                self.rnn = nn.RNN(self.decoder_hidden_dim + self.decoder_input, decoder_hidden_dim, n_layers, dropout=dropout)

            elif cell_type == "GRU":
                self.rnn = nn.GRU(self.decoder_hidden_dim + self.decoder_input, decoder_hidden_dim, n_layers, dropout=dropout)

            self.fc_out = nn.Linear(self.decoder_hidden_dim * 2, self.output_dim)
    
    def forward(self, input, hidden_cell, encoder_states):
        """
        Perform forward pass.

        Args:
        - input (Tensor): Input tensor of shape (batch_size, 1, num_decoder_tokens).
        - hidden_cell (Tensor or Tuple[Tensor, Tensor]): Hidden state (and cell state if LSTM) from the encoder.
        - encoder_states (Tensor): Encoder outputs of shape (seq_length, batch_size, hidden_dim).

        Returns:
        - output (Tensor): Output tensor of shape (batch_size, num_decoder_tokens).
        - hidden (Tensor or Tuple[Tensor, Tensor]): Hidden state (and cell state if LSTM) of shape (n_layers, batch_size, hidden_dim).
        - attn_weights (Tensor, optional): Attention weights of shape (batch_size, 1, seq_length) if attention is used.
        """
        if isinstance(hidden_cell, tuple):
            hidden = hidden_cell[0]
            cell = hidden_cell[1]
        else:
            hidden = hidden_cell
        
        if self.decoder_embedding_dim != 0:
            input = input.argmax(2)
            input = self.dropout(self.embedding(input))

        if self.attention == False:
            if self.cell_type == "LSTM":
                output, hidden = self.rnn(input, (hidden, cell))
            else:
                output, hidden = self.rnn(input, hidden)

            prediction = self.fc_out(output.squeeze(0))

            return prediction, hidden
        
        else:
            attn_weights = self.attention(hidden[-1], encoder_states)
            context = attn_weights.bmm(encoder_states.transpose(0, 1))
            context = context.transpose(0, 1)
            rnn_input = torch.cat([input, context], 2)

            if self.cell_type == "LSTM":
                output, hidden = self.rnn(rnn_input, (hidden, cell))
            else:
                output, hidden = self.rnn(rnn_input, hidden)
            
            output = output.squeeze(0)
            context = context.squeeze(0)
            output = self.fc_out(torch.cat([output, context], 1))

            if self.verbose:
                print(output.shape)
                print(hidden.shape)

            return output, hidden, attn_weights


In [24]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, max_source_length, max_target_length, target_char2int, num_decoder_tokens, device):
        """
        Sequence-to-Sequence model composed of an encoder and a decoder.

        Args:
        - encoder (nn.Module): Encoder module.
        - decoder (nn.Module): Decoder module.
        - max_source_length (int): Maximum length of source sequences.
        - max_target_length (int): Maximum length of target sequences.
        - target_char2int (dict): Mapping from target characters to integers.
        - num_decoder_tokens (int): Number of tokens in the decoder vocabulary.
        - device (str): Device to run the model on ('cpu' or 'cuda').
        """
        super(Seq2Seq, self).__init__()

        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        self.atten = self.decoder.attention
        self.max_source_length = max_source_length
        self.max_target_length = max_target_length
        self.target_chr2int = target_char2int
        self.num_decoder_tokens = num_decoder_tokens

        assert encoder.hidden_dim == decoder.decoder_hidden_dim, "Hidden dimensions of encoder and decoder must be equal"
        assert encoder.n_layers == decoder.n_layers, "Encoder and decoder must have equal number of layers"
        
    def forward(self, src, trg, to_train, teacher_forcing_ratio=0.5, beam_width=3):
        """
        Forward pass of the seq2seq model.

        Args:
        - src (Tensor): Source sequence tensor of shape (max_source_length, batch_size).
        - trg (Tensor): Target sequence tensor of shape (max_target_length, batch_size).
        - to_train (bool): Whether to train the model or not.
        - teacher_forcing_ratio (float): Probability of teacher forcing during training.
        - beam_width (int): Beam width for beam search decoding.

        Returns:
        - outputs (Tensor): Model outputs of shape (max_target_length, batch_size, num_decoder_tokens).
        """
        if to_train:
            teacher_forcing_ratio = teacher_forcing_ratio
        else:
            teacher_forcing_ratio = 0

        trg = trg.transpose(0, 1)
        batch_size = trg.shape[1]
        trg_len = trg.shape[0]
        trg_vocab_size = self.decoder.output_dim

        outputs = torch.zeros(trg_len, batch_size, trg_vocab_size).to(self.device)

        encoder_output, hidden_cell = self.encoder(src)

        inp = trg[0, :]

        for t in range(1, trg_len):
            if self.atten == False:
                prediction, hidden_cell = self.decoder(inp.unsqueeze(0), hidden_cell, encoder_output)
            else:
                prediction, hidden_cell, attn_weights = self.decoder(inp.unsqueeze(0), hidden_cell, encoder_output)

            outputs[t] = prediction
            teacher_force = random.random() < teacher_forcing_ratio
            top1 = prediction.argmax(1)
            top1_one_hot = torch.zeros_like(prediction).to(self.device)
            top1_one_hot[:, top1] = 1.0

            inp = trg[t] if teacher_force else top1_one_hot

        return outputs

        
  

## **Helper Functions**

In [None]:
def accuracy_calc(target, output, train=True):
    """
    Calculate accuracy of model predictions.

    Args:
    - target (Tensor): Target sequences tensor of shape (max_target_length, batch_size).
    - output (Tensor): Model output sequences tensor of shape (max_target_length, batch_size, num_decoder_tokens).
    - train (bool): Whether the model is being trained or not.

    Returns:
    - num_correct (int): Number of correctly predicted sequences.
    - batch_size (int): Total number of sequences in the batch.
    """
    target = target.transpose(0, 1)
    num_correct = 0
    batch_size = target.shape[0]

    target_indices = (target == 1).nonzero()[:, 1]

    assert batch_size == len(target_indices)

    if train:
        output = output.argmax(2)  # LxB
        output = output.transpose(0, 1)
        output_indices = (output == 1).nonzero()[:, 1]
        for seq, i in zip(range(batch_size), target_indices):
            if torch.all(output[seq, :i + 1] == target[seq, :i + 1]):
                num_correct += 1
    else:
        for seq, i in zip(range(batch_size), target_indices):
            if torch.all(torch.tensor(output[seq][0]).to(device) == target[seq, :i + 1]):
                num_correct += 1

    return num_correct, batch_size


In [25]:
def train(model, iterator, optimizer, criterion, clip, teacher_forcing_ratio, device):
    """
    Train the model on the given data.

    Args:
    - model (nn.Module): The model to be trained.
    - iterator (DataLoader): Data iterator containing the training data.
    - optimizer (torch.optim.Optimizer): Optimizer for updating model parameters.
    - criterion (nn.Module): Loss function.
    - clip (float): Gradient clipping value.
    - teacher_forcing_ratio (float): Probability of teacher forcing during training.
    - device (str): Device to run the model on ('cpu' or 'cuda').

    Returns:
    - epoch_loss (float): Average loss per epoch.
    - accuracy (float): Accuracy of the model on the training data.
    """
    model.train()
    epoch_loss = 0.0
    total_no_correct = 0
    total_samples = 0

    for i, (src, trg) in enumerate(iterator):
        optimizer.zero_grad()
        src, trg = src.to(device), trg.to(device)
        output = model(src, trg, teacher_forcing_ratio=teacher_forcing_ratio, to_train=True).to(device)
        trg = trg.transpose(0, 1)
        trg = trg.argmax(2)
        num_correct, num_samples = accuracy_calc(trg, output, train=True)

        output_dim = output.shape[-1]
        output = output[1:].view(-1, output_dim)
        trg = trg[1:].reshape(-1)

        loss = criterion(output, trg)

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)

        optimizer.step()

        epoch_loss += loss.item()
        total_no_correct += num_correct
        total_samples += num_samples

    return epoch_loss / len(iterator), total_no_correct / total_samples


In [26]:
def evaluate(model, iterator, criterion, beam_width, device):
    """
    Evaluate the model on the given data.

    Args:
    - model (nn.Module): The model to be evaluated.
    - iterator (DataLoader): Data iterator containing the evaluation data.
    - criterion (nn.Module): Loss function.
    - beam_width (int): Width of the beam for beam search.
    - device (str): Device to run the model on ('cpu' or 'cuda').

    Returns:
    - epoch_loss (float): Average loss per epoch.
    - accuracy (float): Accuracy of the model on the evaluation data.
    """
    model.eval()
    epoch_loss = 0
    total_no_correct = 0
    total_samples = 0

    with torch.no_grad():
        for i, (src, trg) in enumerate(iterator):
            src, trg = src.to(device), trg.to(device)
            output = model(src, trg, teacher_forcing_ratio=0, to_train=True).to(device)
            trg = trg.transpose(0, 1)
            trg = trg.argmax(2)
            num_correct, num_samples = accuracy_calc(trg, output, train=True)
            output_dim = output.shape[-1]
            output = output[1:].view(-1, output_dim)
            trg = trg[1:].reshape(-1)
            loss = criterion(output, trg)
            epoch_loss += loss.item()
            total_no_correct += num_correct
            total_samples += num_samples

    return epoch_loss / len(iterator), total_no_correct / total_samples


In [27]:
def train_loop(model, train_dataloader, validation_dataloader, device, criterion, config, SAVE_PATH, clip=1, sweep = True):
    """
    Train the model using the training data and evaluate it on the validation data.

    Args:
    - model (nn.Module): The model to be trained and evaluated.
    - train_dataloader (DataLoader): DataLoader containing the training data.
    - validation_dataloader (DataLoader): DataLoader containing the validation data.
    - device (str): Device to run the model on ('cpu' or 'cuda').
    - criterion (nn.Module): Loss function.
    - config (dict): Configuration parameters including optimizer, learning rate, weight decay, etc.
    - clip (float): Gradient clipping value (default=1).

    Returns:
    - epoch_loss_train (list): List of training losses for each epoch.
    - epoch_loss_val (list): List of validation losses for each epoch.
    - epoch_accuracy_train (list): List of training accuracies for each epoch.
    - epoch_accuracy_val (list): List of validation accuracies for each epoch.
    """
    if config.optimizer == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
        #scheduler = StepLR(optimizer, step_size = 5, gamma = 0.5)
        # scheduler = CosineAnnealingLR(optimizer, T_max=config.epochs/2, eta_min = 0.00001)
    elif config.optimizer == 'NAdam':
        optimizer = torch.optim.NAdam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
        #scheduler = StepLR(optimizer, step_size = 5, gamma = 0.5)
        # scheduler = CosineAnnealingLR(optimizer, T_max=config.epochs/2, eta_min = 0.00001)
    elif config.optimizer == 'RAdam':
        optimizer = torch.optim.RAdam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
        #scheduler = StepLR(optimizer, step_size = 5, gamma = 0.5)
        # scheduler = CosineAnnealingLR(optimizer, T_max=config.epochs/2, eta_min = 0.00001)
    elif config.optimizer == 'AdamW':
        optimizer = torch.optim.RAdam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
        #scheduler = StepLR(optimizer, step_size = 5, gamma = 0.5)
        # scheduler = CosineAnnealingLR(optimizer, T_max=config.epochs/2, eta_min = 0.00001)

    epoch_loss_train = []
    epoch_loss_val = []
    epoch_accuracy_train = []
    epoch_accuracy_val = []

    for epoch in range(1, config.epochs + 1):
        print(f"\n EPOCH: {epoch}")

        train_loss, train_accuracy = train(model, train_dataloader, optimizer, criterion, clip, config.teacher_forcing_ratio, device)
        val_loss, val_accuracy = evaluate(model, validation_dataloader, criterion, config.beam_width, device)

        if sweep:
            wandb.log({"validation_accuracy": val_accuracy, "validation_loss": val_loss, "training_accuracy": train_accuracy, "training_loss": train_loss, "epochs": epoch})

        epoch_loss_train.append(train_loss)
        epoch_loss_val.append(val_loss)
        epoch_accuracy_train.append(train_accuracy)
        epoch_accuracy_val.append(val_accuracy)

        print(f"TRAINING LOSS :{train_loss}")
        print(f"TRAINING ACCURACY :{train_accuracy}")
        print(f"VALIDATION LOSS :{val_loss}")
        print(f"VALIDATION ACCURACY :{val_accuracy}")

        #scheduler.step()
    
    torch.save(model, os.path.join(SAVE_PATH + '/model.pth'))
    if not sweep:
        return epoch_loss_train, epoch_loss_val, epoch_accuracy_train, epoch_accuracy_val


## **Sweep**

In [29]:
def sweep_train(sweep_config=None):
    user = "Shashank M"
    project = "Assignment_3_trial"
    display_name = "ch23s019"
    wandb.init(entity=user, project=project, name=display_name, config = sweep_config)

    config_ = wandb.config
    wandb.run.name = "_cell_type_" + str(config_.cell_type) + "__embedding__" + str(config_.encoder_embedding_dim) + "__hidden__" + str(config_.hidden_dim) + "__attention__" + str(config_.attention) + "lr_" + str(config_.learning_rate) + "_opt_" + str(config_.optimizer) + "_epoch_" + str(config_.epochs) + "_bs_" + str(config_.batch_size) 

    # LOAD DATASET

    DATAPATH = '/home/fml-pc/Assignments/Assignment_3/Assignment_3_2024/aksharantar_sampled/aksharantar_sampled/hin'
    dataset_func = Data_Preparation(DATAPATH)
    train_dataloader, validation_dataloader, test_dataloader = dataset_func.create_dataloaders(config_.batch_size)

    num_encoder_tokens = dataset_func.num_encoder_tokens
    hidden_dim = config_.hidden_dim
    n_layers = config_.n_layers
    encoder_embedding_dim = config_.encoder_embedding_dim
    dropout = config_.dropout
    cell_type = config_.cell_type
    decoder_embedding_dim = config_.encoder_embedding_dim
    num_decoder_tokens = dataset_func.num_decoder_tokens
    
    attention = config_.attention

    device = device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    enc = Encoder(num_encoder_tokens, hidden_dim, n_layers, dropout, encoder_embedding_dim, cell_type, verbose=False)
    dec = Decoder(num_decoder_tokens, hidden_dim, n_layers, dropout, decoder_embedding_dim, cell_type, atten=attention, verbose=False)
    model = Seq2Seq(enc, dec, dataset_func.max_source_length, dataset_func.max_target_length, dataset_func.target_char2int, dataset_func.num_decoder_tokens, device)
    model = model.to(device)

    SAVE_PATH = '/home/fml-pc/Assignments/Assignment_3/Assignment_3_2024/'

    criterion = nn.CrossEntropyLoss(ignore_index = dataset_func.target_char2int["-PAD-"])
    train_loop(model, train_dataloader, validation_dataloader, device, criterion,config=config_, SAVE_PATH= SAVE_PATH, clip=1)

In [30]:
sweep_config = {
    'method': 'bayes',
    'metric':{
        'name' : 'val_accuracy',
        'goal' : 'maximize'},
    'parameters':{
        'epochs':{'values': [25, 30, 40]},
        'cell_type':{'values':['GRU', 'LSTM']},
        'n_layers':{'values':[1, 2, 3]},
        'hidden_dim':{'values':[256, 400, 512, 1024]},
        'encoder_embedding_dim':{'values':[200, 256, 300, 512]},
        'dropout':{'values':[0.2, 0.4, 0.5]},
        'teacher_forcing_ratio':{'values':[0.3, 0.35, 0.4, 0.45, 0.5]},
        'learning_rate': {'min': 0.0001, 'max': 0.001},
        'optimizer':{'values': ['Adam', 'NAdam', 'RAdam', 'AdamW']},
        'batch_size': {'values':[64, 128, 256]},
        'weight_decay':{'values':[0]},
        'attention':{'values':[True]},
        'beam_width': {'values': [1, 2, 3]}        
    },
}

In [20]:
print(sweep_config)
sweep_id = wandb.sweep(sweep_config,project="Assignment_3_start")
wandb.agent(sweep_id,function=sweep_train, count = 10)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


{'method': 'bayes', 'metric': {'name': 'val_accuracy', 'goal': 'maximize'}, 'parameters': {'epochs': {'values': [25, 30, 40]}, 'cell_type': {'values': ['GRU', 'LSTM']}, 'n_layers': {'values': [1, 2, 3]}, 'hidden_dim': {'values': [256, 400, 512, 1024]}, 'encoder_embedding_dim': {'values': [200, 256, 300, 512]}, 'dropout': {'values': [0.2, 0.4, 0.5]}, 'teacher_forcing_ratio': {'values': [0.3, 0.35, 0.4, 0.45, 0.5]}, 'learning_rate': {'min': 0.0001, 'max': 0.001}, 'optimizer': {'values': ['Adam', 'NAdam', 'RAdam', 'AdamW']}, 'batch_size': {'values': [64, 128, 256]}, 'weight_decay': {'values': [0]}, 'attention': {'values': [True]}, 'beam_width': {'values': [1, 2, 3]}}}
Create sweep with ID: 0l5hwc4d
Sweep URL: https://wandb.ai/ch23s019/Assignment_3_start/sweeps/0l5hwc4d


[34m[1mwandb[0m: Agent Starting Run: iku3rg3c with config:
[34m[1mwandb[0m: 	attention: True
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_width: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	encoder_embedding_dim: 256
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	hidden_dim: 1024
[34m[1mwandb[0m: 	learning_rate: 0.0002533453975210106
[34m[1mwandb[0m: 	n_layers: 3
[34m[1mwandb[0m: 	optimizer: NAdam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.45
[34m[1mwandb[0m: 	weight_decay: 0
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mch23s019[0m. Use [1m`wandb login --relogin`[0m to force relogin



 Number of samples: 51200
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 24
Max sequence length for outputs: 22

 Number of samples: 4096
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 22
Max sequence length for outputs: 22

 Number of samples: 4096
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 26
Max sequence length for outputs: 22

 EPOCH: 1
CrossEntropyLoss()
TRAINING LOSS :2.3740765592455864
TRAINING ACCURACY :0.00623046875
VALIDATION LOSS :1.4574610590934753
VALIDATION ACCURACY :0.007080078125

 EPOCH: 2
CrossEntropyLoss()
TRAINING LOSS :1.1313007655739784
TRAINING ACCURACY :0.08216796875
VALIDATION LOSS :1.0255829989910126
VALIDATION ACCURACY :0.116943359375

 EPOCH: 3
CrossEntropyLoss()
TRAINING LOSS :0.8367953509092331
TRAINING ACCURACY :0.1634375
VALIDATION LOSS :0.9488147888332605
VALIDATION ACCURACY :0.14453125

 EPOCH: 4
CrossEntropyLoss()
TRAINING LOSS :0.691118515506386

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
training_accuracy,▁▂▂▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
training_loss,█▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▃▄▄▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇█▇████▇▇▇██
validation_loss,█▃▂▂▂▁▁▂▁▁▂▂▂▂▂▂▂▂▃▃▄▃▃▄▄▅▅▅▅▆

0,1
epochs,30.0
training_accuracy,0.80186
training_loss,0.10468
validation_accuracy,0.33252
validation_loss,1.23607


[34m[1mwandb[0m: Agent Starting Run: dvfhtdo8 with config:
[34m[1mwandb[0m: 	attention: True
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	encoder_embedding_dim: 300
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0004885708232072144
[34m[1mwandb[0m: 	n_layers: 1
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 0
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.



 Number of samples: 51200
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 24
Max sequence length for outputs: 22

 Number of samples: 4096
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 22
Max sequence length for outputs: 22

 Number of samples: 4096
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 26
Max sequence length for outputs: 22

 EPOCH: 1
CrossEntropyLoss()
TRAINING LOSS :2.0517615312337876
TRAINING ACCURACY :0.01892578125
VALIDATION LOSS :1.9935546554625034
VALIDATION ACCURACY :0.00146484375

 EPOCH: 2
CrossEntropyLoss()
TRAINING LOSS :1.1861068885028363
TRAINING ACCURACY :0.06931640625
VALIDATION LOSS :1.6571651957929134
VALIDATION ACCURACY :0.01220703125

 EPOCH: 3
CrossEntropyLoss()
TRAINING LOSS :0.9935606755316257
TRAINING ACCURACY :0.100703125
VALIDATION LOSS :1.2947236485779285
VALIDATION ACCURACY :0.052490234375

 EPOCH: 4
CrossEntropyLoss()
TRAINING LOSS :0.89273542672

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_accuracy,▁▂▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇████████
training_loss,█▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▁▂▄▃▄▄▅▆▅▅▄▆▆▆▆▆▇▆▇▆▇▇▇▆▇▇████▇███▇▇▇██
validation_loss,█▆▄▃▃▂▂▂▂▂▂▃▂▂▁▁▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁

0,1
epochs,40.0
training_accuracy,0.39383
training_loss,0.39648
validation_accuracy,0.2749
validation_loss,0.90035


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hq491a36 with config:
[34m[1mwandb[0m: 	attention: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 2
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	encoder_embedding_dim: 256
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_dim: 400
[34m[1mwandb[0m: 	learning_rate: 0.0005132905090003188
[34m[1mwandb[0m: 	n_layers: 3
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.4
[34m[1mwandb[0m: 	weight_decay: 0
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.



 Number of samples: 51200
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 24
Max sequence length for outputs: 22

 Number of samples: 4096
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 22
Max sequence length for outputs: 22

 Number of samples: 4096
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 26
Max sequence length for outputs: 22

 EPOCH: 1
CrossEntropyLoss()
TRAINING LOSS :1.6354526763409376
TRAINING ACCURACY :0.044609375
VALIDATION LOSS :1.469151696190238
VALIDATION ACCURACY :0.005859375

 EPOCH: 2
CrossEntropyLoss()
TRAINING LOSS :0.9423057006299496
TRAINING ACCURACY :0.128984375
VALIDATION LOSS :1.10893424320966
VALIDATION ACCURACY :0.0673828125

 EPOCH: 3
CrossEntropyLoss()
TRAINING LOSS :0.7822970907762646
TRAINING ACCURACY :0.18220703125
VALIDATION LOSS :1.205957479774952
VALIDATION ACCURACY :0.0615234375

 EPOCH: 4
CrossEntropyLoss()
TRAINING LOSS :0.6898278763145208
TRAIN

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
training_accuracy,▁▂▃▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
training_loss,█▄▄▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▂▂▄▄▆▆▆▆▇▆▆▇██▇██▇▇█████
validation_loss,█▄▅▂▃▁▂▁▁▁▂▂▂▁▁▂▁▁▂▂▂▂▃▃▃

0,1
epochs,25.0
training_accuracy,0.50223
training_loss,0.3079
validation_accuracy,0.31396
validation_loss,1.00053


[34m[1mwandb[0m: Agent Starting Run: czz27iex with config:
[34m[1mwandb[0m: 	attention: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	encoder_embedding_dim: 200
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_dim: 512
[34m[1mwandb[0m: 	learning_rate: 0.0005397779295245354
[34m[1mwandb[0m: 	n_layers: 2
[34m[1mwandb[0m: 	optimizer: RAdam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.4
[34m[1mwandb[0m: 	weight_decay: 0
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.



 Number of samples: 51200
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 24
Max sequence length for outputs: 22

 Number of samples: 4096
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 22
Max sequence length for outputs: 22

 Number of samples: 4096
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 26
Max sequence length for outputs: 22

 EPOCH: 1
CrossEntropyLoss()
TRAINING LOSS :2.4238476176559924
TRAINING ACCURACY :0.00697265625
VALIDATION LOSS :1.3617533948272467
VALIDATION ACCURACY :0.03857421875

 EPOCH: 2
CrossEntropyLoss()
TRAINING LOSS :1.1732499679923059
TRAINING ACCURACY :0.07384765625
VALIDATION LOSS :1.10653021838516
VALIDATION ACCURACY :0.08984375

 EPOCH: 3
CrossEntropyLoss()
TRAINING LOSS :0.912893572896719
TRAINING ACCURACY :0.135625
VALIDATION LOSS :1.0244549103081226
VALIDATION ACCURACY :0.119384765625

 EPOCH: 4
CrossEntropyLoss()
TRAINING LOSS :0.7775288724899292
TRA

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
training_accuracy,▁▂▃▃▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▂▃▃▃▆▆▆▆▆▆▇▇▇█▇▇█▇██████
validation_loss,█▅▄▄▃▁▂▁▂▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂

0,1
epochs,25.0
training_accuracy,0.53275
training_loss,0.27378
validation_accuracy,0.3374
validation_loss,0.91149


[34m[1mwandb[0m: Agent Starting Run: qcogci0k with config:
[34m[1mwandb[0m: 	attention: True
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	encoder_embedding_dim: 200
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00011855996115818143
[34m[1mwandb[0m: 	n_layers: 2
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.35
[34m[1mwandb[0m: 	weight_decay: 0
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.



 Number of samples: 51200
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 24
Max sequence length for outputs: 22

 Number of samples: 4096
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 22
Max sequence length for outputs: 22

 Number of samples: 4096
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 26
Max sequence length for outputs: 22

 EPOCH: 1
CrossEntropyLoss()
TRAINING LOSS :3.498865964412689
TRAINING ACCURACY :0.0
VALIDATION LOSS :3.1414350867271423
VALIDATION ACCURACY :0.0

 EPOCH: 2
CrossEntropyLoss()
TRAINING LOSS :2.9189817690849305
TRAINING ACCURACY :3.90625e-05
VALIDATION LOSS :2.723590597510338
VALIDATION ACCURACY :0.00048828125

 EPOCH: 3
CrossEntropyLoss()
TRAINING LOSS :2.5526170551776888
TRAINING ACCURACY :0.00013671875
VALIDATION LOSS :2.3603220880031586
VALIDATION ACCURACY :0.000244140625

 EPOCH: 4
CrossEntropyLoss()
TRAINING LOSS :2.240159876346588
TRAINING ACCURACY

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_accuracy,▁▁▁▁▁▁▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇█████
training_loss,█▇▆▅▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▁▁▁▁▁▁▂▂▂▂▂▂▃▂▂▃▂▃▃▄▄▃▃▄▃▄▅▄▅▅▆▆▅▆█▆▅▇▇
validation_loss,█▇▆▅▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▁▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁

0,1
epochs,40.0
training_accuracy,0.21617
training_loss,0.68894
validation_accuracy,0.14575
validation_loss,0.97368


[34m[1mwandb[0m: Agent Starting Run: kbcfyknm with config:
[34m[1mwandb[0m: 	attention: True
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	encoder_embedding_dim: 512
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	hidden_dim: 400
[34m[1mwandb[0m: 	learning_rate: 0.0004432240510577218
[34m[1mwandb[0m: 	n_layers: 2
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.35
[34m[1mwandb[0m: 	weight_decay: 0
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.



 Number of samples: 51200
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 24
Max sequence length for outputs: 22

 Number of samples: 4096
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 22
Max sequence length for outputs: 22

 Number of samples: 4096
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 26
Max sequence length for outputs: 22

 EPOCH: 1
CrossEntropyLoss()
TRAINING LOSS :1.8499194526672362
TRAINING ACCURACY :0.02501953125
VALIDATION LOSS :1.4707735255360603
VALIDATION ACCURACY :0.007080078125

 EPOCH: 2
CrossEntropyLoss()
TRAINING LOSS :1.1383085753023625
TRAINING ACCURACY :0.07794921875
VALIDATION LOSS :1.3124392442405224
VALIDATION ACCURACY :0.01513671875

 EPOCH: 3
CrossEntropyLoss()
TRAINING LOSS :0.9545404380559921
TRAINING ACCURACY :0.12232421875
VALIDATION LOSS :1.0693686325103045
VALIDATION ACCURACY :0.091796875

 EPOCH: 4
CrossEntropyLoss()
TRAINING LOSS :0.87268731564

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_accuracy,▁▂▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇█████
training_loss,█▅▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▁▃▃▃▃▄▃▄▅▅▆▅▅▇▆▆▆▆▇▆▆▇▇▇▇▇▇▇▇▇▇█▇█▇██▇▇
validation_loss,█▆▄▄▄▄▄▅▂▃▂▁▂▂▁▁▂▂▁▁▂▁▁▁▁▁▁▂▁▂▁▂▁▂▂▂▁▂▂▂

0,1
epochs,40.0
training_accuracy,0.43572
training_loss,0.37129
validation_accuracy,0.29321
validation_loss,0.94912


[34m[1mwandb[0m: Agent Starting Run: u5qquafi with config:
[34m[1mwandb[0m: 	attention: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 2
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	encoder_embedding_dim: 300
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	hidden_dim: 400
[34m[1mwandb[0m: 	learning_rate: 0.0001352414943957306
[34m[1mwandb[0m: 	n_layers: 2
[34m[1mwandb[0m: 	optimizer: RAdam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.4
[34m[1mwandb[0m: 	weight_decay: 0
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.



 Number of samples: 51200
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 24
Max sequence length for outputs: 22

 Number of samples: 4096
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 22
Max sequence length for outputs: 22

 Number of samples: 4096
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 26
Max sequence length for outputs: 22

 EPOCH: 1
CrossEntropyLoss()
TRAINING LOSS :2.8905639435350894
TRAINING ACCURACY :0.002265625
VALIDATION LOSS :1.9577084016054869
VALIDATION ACCURACY :0.003173828125

 EPOCH: 2
CrossEntropyLoss()
TRAINING LOSS :1.5075893422961235
TRAINING ACCURACY :0.035859375
VALIDATION LOSS :1.666482798755169
VALIDATION ACCURACY :0.003662109375

 EPOCH: 3
CrossEntropyLoss()
TRAINING LOSS :1.2067807547748088
TRAINING ACCURACY :0.07134765625
VALIDATION LOSS :1.3256480526179075
VALIDATION ACCURACY :0.033203125

 EPOCH: 4
CrossEntropyLoss()
TRAINING LOSS :1.050102429986
TR

VBox(children=(Label(value='0.004 MB of 0.038 MB uploaded\r'), FloatProgress(value=0.10918539821892581, max=1.…

0,1
epochs,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
training_accuracy,▁▂▂▃▃▃▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇████
training_loss,█▄▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▁▂▂▂▄▃▄▅▄▄▅▅▅▅▅▆▆▆▆▇█▇█▇▇██▇█
validation_loss,█▆▄▃▄▂▂▂▂▂▂▂▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁

0,1
epochs,30.0
training_accuracy,0.38949
training_loss,0.41477
validation_accuracy,0.31592
validation_loss,0.85451


[34m[1mwandb[0m: Agent Starting Run: sz5uinq6 with config:
[34m[1mwandb[0m: 	attention: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	encoder_embedding_dim: 256
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	hidden_dim: 1024
[34m[1mwandb[0m: 	learning_rate: 0.0008765319780103813
[34m[1mwandb[0m: 	n_layers: 3
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.4
[34m[1mwandb[0m: 	weight_decay: 0
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.



 Number of samples: 51200
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 24
Max sequence length for outputs: 22

 Number of samples: 4096
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 22
Max sequence length for outputs: 22

 Number of samples: 4096
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 26
Max sequence length for outputs: 22

 EPOCH: 1
CrossEntropyLoss()
TRAINING LOSS :1.677952388599515
TRAINING ACCURACY :0.061015625
VALIDATION LOSS :1.053777546621859
VALIDATION ACCURACY :0.110595703125

 EPOCH: 2
CrossEntropyLoss()
TRAINING LOSS :0.8269407499209046
TRAINING ACCURACY :0.16998046875
VALIDATION LOSS :0.9219216257333755
VALIDATION ACCURACY :0.168212890625

 EPOCH: 3
CrossEntropyLoss()
TRAINING LOSS :0.6963038048520684
TRAINING ACCURACY :0.21744140625
VALIDATION LOSS :0.8454484278336167
VALIDATION ACCURACY :0.224853515625

 EPOCH: 4
CrossEntropyLoss()
TRAINING LOSS :0.62893269386

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_accuracy,▁▂▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇███████
training_loss,█▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▃▄▄▅▅▆▇▆▇▇▇▇▇▇▇▇█▇▇▇█▇█▇▇███▇▇█████████
validation_loss,▆▄▂▂▂▂▁▁▁▁▂▁▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▆▇▇▆▇███

0,1
epochs,40.0
training_accuracy,0.64408
training_loss,0.18429
validation_accuracy,0.34351
validation_loss,1.1413


[34m[1mwandb[0m: Agent Starting Run: deqlbkki with config:
[34m[1mwandb[0m: 	attention: True
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	encoder_embedding_dim: 512
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0009682998704477824
[34m[1mwandb[0m: 	n_layers: 3
[34m[1mwandb[0m: 	optimizer: RAdam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.4
[34m[1mwandb[0m: 	weight_decay: 0
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.



 Number of samples: 51200
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 24
Max sequence length for outputs: 22

 Number of samples: 4096
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 22
Max sequence length for outputs: 22

 Number of samples: 4096
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 26
Max sequence length for outputs: 22

 EPOCH: 1
CrossEntropyLoss()
TRAINING LOSS :3.4515267145633697
TRAINING ACCURACY :0.0
VALIDATION LOSS :2.886043071746826
VALIDATION ACCURACY :0.0

 EPOCH: 2
CrossEntropyLoss()
TRAINING LOSS :1.9999766767024993
TRAINING ACCURACY :0.009375
VALIDATION LOSS :1.7026739418506622
VALIDATION ACCURACY :0.00244140625

 EPOCH: 3
CrossEntropyLoss()
TRAINING LOSS :1.4207876253128051
TRAINING ACCURACY :0.04150390625
VALIDATION LOSS :1.5833080038428307
VALIDATION ACCURACY :0.002685546875

 EPOCH: 4
CrossEntropyLoss()
TRAINING LOSS :1.1938881519436837
TRAINING ACCURACY 

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_accuracy,▁▁▂▂▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇███████
training_loss,█▅▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▁▁▁▁▃▃▅▂▄▃▃▄▆▆▅▆▆▇▆▆▆▆▇▇▇█▇██▇▇███▇▇██▇
validation_loss,█▄▄▃▄▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epochs,40.0
training_accuracy,0.40807
training_loss,0.39535
validation_accuracy,0.2522
validation_loss,0.91713


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0dizh353 with config:
[34m[1mwandb[0m: 	attention: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	encoder_embedding_dim: 300
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	hidden_dim: 1024
[34m[1mwandb[0m: 	learning_rate: 0.000983339645625703
[34m[1mwandb[0m: 	n_layers: 1
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.35
[34m[1mwandb[0m: 	weight_decay: 0
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.



 Number of samples: 51200
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 24
Max sequence length for outputs: 22

 Number of samples: 4096
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 22
Max sequence length for outputs: 22

 Number of samples: 4096
Source Vocab length: 27
Target Vocab length: 131
Max sequence length for inputs: 26
Max sequence length for outputs: 22

 EPOCH: 1
CrossEntropyLoss()
TRAINING LOSS :1.2844227271527051
TRAINING ACCURACY :0.08013671875
VALIDATION LOSS :1.1829611472785473
VALIDATION ACCURACY :0.112548828125

 EPOCH: 2
CrossEntropyLoss()
TRAINING LOSS :0.7997508271038533
TRAINING ACCURACY :0.17697265625
VALIDATION LOSS :0.9140072669833899
VALIDATION ACCURACY :0.181396484375

 EPOCH: 3
CrossEntropyLoss()
TRAINING LOSS :0.6743815407529473
TRAINING ACCURACY :0.224453125
VALIDATION LOSS :0.8688921993598342
VALIDATION ACCURACY :0.213623046875

 EPOCH: 4
CrossEntropyLoss()
TRAINING LOSS :0.619760202

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epochs,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
training_accuracy,▁▂▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇██████
training_loss,█▅▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▃▄▄▄▆▅▆▅▆▆▇▇▇▇▆▇▇▆▇▇▇▇▇▇▇█▇▇▇▇▇▇▇███▇█▇
validation_loss,▇▃▂▂▂▁▁▁▃▁▁▂▂▂▃▂▂▂▃▃▄▄▄▄▄▄▅▅▅▆▆▆▆▇▇▆▇███

0,1
epochs,40.0
training_accuracy,0.68365
training_loss,0.1623
validation_accuracy,0.33667
validation_loss,1.24121


## **MISC For Testing**

In [None]:
def accuracy_calc_test(target, output, train = True):
    Outputs, Targets = [], []
    target = target.transpose(0, 1)
    num_correct = 0
    batch_size = target.shape[0]

    target_indices = (target == 1).nonzero()[:, 1]

    # print(target.shape, output.shape )
    assert (batch_size == len(target_indices))

    if train:
        output = output.argmax(2) # LxB
        output = output.transpose(0, 1)
        output_indices = (output == 1).nonzero()[:, 1]
        for seq, i in zip(range(batch_size), target_indices):
            if torch.all(output[seq, :i+1] == target[seq, :i+1]):
                num_correct += 1
                # print(output[seq, :i+1], target[seq, :i+1])
        return num_correct, batch_size

    else:
        output = output.argmax(2) # LxB
        output = output.transpose(0, 1)
        output_indices = (output == 1).nonzero()[:, 1]
        for seq, i in zip(range(batch_size), target_indices):
            if torch.all(output[seq, :i+1] == target[seq, :i+1]):
                num_correct += 1
        Outputs.append(output)
        Targets.append(target)  
        return num_correct, batch_size, Outputs, Targets 

In [None]:
DATAPATH = '/home/fml-pc/Assignments/Assignment_3/Assignment_3_2024/aksharantar_sampled/aksharantar_sampled/hin'
dataset_func = Data_Preparation(DATAPATH)
train_dataloader, validation_dataloader, test_dataloader = dataset_func.create_dataloaders(batch_size=128)
criterion = nn.CrossEntropyLoss(ignore_index = dataset_func.target_char2int["-PAD-"])
int2char = dataset_func.target_int2char

In [None]:
def evaluate_test(model, iterator,criterion, beam_width, device):
    
    model.eval()
    
    epoch_loss = 0
    total_no_correct = 0
    total_samples = 0
    Final_Outputs, Final_Targets = [], []

    print(criterion)
    with torch.no_grad():
    
        for i, (src,trg) in enumerate(iterator):

            src, trg = src.to(device), trg.to(device)

            output = model(src, trg, teacher_forcing_ratio=0, to_train=True).to(device)
            
            trg = trg.transpose(0,1)

            trg = trg.argmax(2)

   

            num_correct, num_samples, Outputs, Targets = accuracy_calc_test(trg, output, train=False)

            Final_Outputs.extend(Outputs)
            Final_Targets.extend(Targets)
            


            output_dim = output.shape[-1]

            output = output[1:].view(-1, output_dim)

            trg = trg[1:].reshape(-1)

            loss = criterion(output, trg)

 

            epoch_loss += loss.item()
            
            total_no_correct += num_correct
            total_samples += num_samples
        # print(total_no_correct, total_samples)
    return epoch_loss / len(iterator), total_no_correct/total_samples, Final_Outputs, Final_Targets

In [None]:
config_ = {
    'method': 'bayes',
    'metric':{
        'name' : 'val_accuracy',
        'goal' : 'maximize'},
    'parameters':{
        'epochs':{'values': 28},
        'cell_type':{'values': 'LSTM'},
        'n_layers':{'values': 2 },
        'hidden_dim':{'values': 1024 },
        'encoder_embedding_dim':{'values': 200 },
        'dropout':{'values':0.5},
        'teacher_forcing_ratio':{'values': 0.4},
        'learning_rate': 0.00042645834279076306,
        'optimizer':{'values': 'RAdam'},
        'batch_size': {'values':128 },
        'weight_decay':{'values':[0]},
        'attention':{'values':True},
        'beam_width': {'values': 1}        
    },
}

DATAPATH = '/home/fml-pc/Assignments/Assignment_3/Assignment_3_2024/aksharantar_sampled/aksharantar_sampled/hin'
dataset_func = Data_Preparation(DATAPATH)
train_dataloader, validation_dataloader, test_dataloader = dataset_func.create_dataloaders(config_['parameters']['batch_size']['values'])

num_encoder_tokens = dataset_func.num_encoder_tokens
hidden_dim = config_['parameters']['hidden_dim']['values']
n_layers = config_['parameters']['n_layers']['values']
encoder_embedding_dim = config_['parameters']['encoder_embedding_dim']['values']
dropout = config_['parameters']['dropout']['values']
cell_type = config_['parameters']['cell_type']['values']
decoder_embedding_dim = config_['parameters']['encoder_embedding_dim']['values']
num_decoder_tokens = dataset_func.num_decoder_tokens

attention = config_['parameters']['attention']['values']

device = device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
enc = Encoder(num_encoder_tokens, hidden_dim, n_layers, dropout, encoder_embedding_dim, cell_type, verbose=False)
dec = Decoder(num_decoder_tokens, hidden_dim, n_layers, dropout, decoder_embedding_dim, cell_type, atten=attention, verbose=False)
model = Seq2Seq(enc, dec, dataset_func.max_source_length, dataset_func.max_target_length, dataset_func.target_char2int, dataset_func.num_decoder_tokens, device)
model = model.to(device)

# model = load_model_state(model, './model_state.pth')

model = torch.load('/home/fml-pc/Assignments/Assignment_3/Assignment_3_2024/model_with_attention.pth')
loss, acc, Outputs, Targets = evaluate_test(model, test_dataloader,criterion, 1, device)


loss, acc, Outputs, Targets = evaluate_test(model, test_dataloader,criterion, 1, device)

In [None]:
import itertools
flattened_outputs = list(itertools.chain.from_iterable(Outputs))
flattened_targets = list(itertools.chain.from_iterable(Targets))

In [None]:
test_df_1 = dataset_func.test_df

In [None]:
hin_outputs = []
for b, i in enumerate(flattened_outputs):
    # Convert tensor to numpy array
    Z = i.cpu().numpy()

    try:
        # Find the index of the first occurrence of 1
        idx = np.where(Z == 1)[0][0]
        # Slice the array up to the index
        A = Z[1:idx]
    except IndexError:
        # If 1 is not found, handle the exception
        print(f"No '1' found in the array at index {b}, using the whole array except the first element.")
        A = Z[1:]

    print(b, A)

    # Convert indices to characters
    hin_word = ''.join([int2char[int(j)] for j in A])
    hin_outputs.append(hin_word)


In [None]:
sum_all = 0
hin_targets = []
for b, i  in enumerate(flattened_targets):
    Z = i.cpu().numpy()
    # print('Before', Z)
    # if b == 1463:
    #    A =  Z[1:]
    # else:
    idx = np.where(Z == 1)[0][0]
    A = Z[1:idx]
    # print(b, Z[1:idx])
    hin_word = ''
    for j in A:
        hin_word += int2char[j]
    hin_targets.append(hin_word)

In [None]:
idxs = []
for i in hin_targets:
    idx = test_df_1.loc[test_df_1['Hindi'] == i].index[0]
    idxs.append(idx)

# Access the corresponding 'English' values using the obtained indices
english_values = test_df_1.loc[idxs, 'English']
e = list(english_values)

In [None]:
import pandas as pd

# Assuming english_values, hin_targets, and hin_outputs are lists
data = list(zip(e, hin_targets, hin_outputs))
df_preds = pd.DataFrame(data, columns=['English Values', 'Hindi Targets', 'Hindi Outputs'])
df_preds.to_csv('/home/fml-pc/Assignments/Assignment_3/Assignment_3_2024/Test_Predictions_without_attention.csv')