In [None]:
!pip install wandb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wandb
  Downloading wandb-0.15.1-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m25.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pathtools
  Downloading pathtools-0.1.2.tar.gz (11 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting docker-pycreds>=0.4.0
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting setproctitle
  Downloading setproctitle-1.3.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)
Collecting GitPython!=3.1.29,>=1.0.0
  Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentry-sdk>=1.0.0
  Downloading sentry_sdk-1.21.1-py2.py3-none-any.whl (201 kB)
[2K     [90m

In [None]:
import pandas as pd
import torchtext
import numpy as np
import torch
import torch.nn as nn
import random
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
zip_path = "/content/drive/MyDrive/aksharantar_sampled.zip"
!cp "{zip_path}" .
!unzip -q aksharantar_sampled.zip
!rm aksharantar_sampled.zip 

In [None]:
def get_data(lang):
  train_csv=f"aksharantar_sampled/{lang}/{lang}_train.csv"
  test_csv=f"aksharantar_sampled/{lang}/{lang}_test.csv"
  val_csv=f"aksharantar_sampled/{lang}/{lang}_valid.csv"
  
  return train_csv,test_csv,val_csv



In [None]:
train,test,val=get_data("hin")

In [None]:
### For train
input_texts = []
target_texts = []

train_df = pd.read_csv(train, header=None, names=["1", "2"]).astype(str)

# Add all the input and target texts with start sequence and end sequence added to target 
for index, row in train_df.iterrows():
      input_text = row['1']
      target_text = row['2']
      if target_text == '' or input_text == '':
          continue
      target_text = "\t" + target_text + "\n"
      input_texts.append(input_text)
      target_texts.append(target_text)

english_tokens = set()
hindi_tokens = set()

for x,y in zip(input_texts,target_texts):
    for ch in x:
        english_tokens.add(ch)
    for ch in y:
        hindi_tokens.add(ch)
    
english_tokens = sorted(list(english_tokens))
hindi_tokens = sorted(list(hindi_tokens))

eng_token_map = dict([(ch,i+1) for i,ch in enumerate(english_tokens)])
hin_token_map = dict([(ch,i+1) for i,ch in enumerate(hindi_tokens)])

eng_token_map["<UNK>"]=len(english_tokens)+1
hin_token_map["<UNK>"]=len(hindi_tokens)+1
eng_token_map['<PAD>']=0
hin_token_map['<PAD>']=0

max_eng_len = max([len(i) for i in input_texts])
max_hin_len = max([len(i) for i in target_texts])

In [None]:
def pre_process(data):
    
    input_texts = []
    target_texts = []
    
    df = pd.read_csv(data, header=None, names=["1", "2"]).astype(str)

    for index, row in df.iterrows():
      input_text = row['1']
      target_text = row['2']
      if target_text == '' or input_text == '':
          continue
      target_text = "\t" + target_text + "\n"
      input_texts.append(input_text)
      target_texts.append(target_text)

    
    a = np.zeros((len(input_texts),max_eng_len+2),dtype="float32")
    b = np.zeros((len(target_texts),max_hin_len+2),dtype="float32")
    
    
    for i,(x,y) in enumerate(zip(input_texts,target_texts)):
        for j,ch in enumerate(x):
            a[i,j] = eng_token_map.get(ch,eng_token_map["<UNK>"])

        for j,ch in enumerate(y):
            b[i,j] = hin_token_map.get(ch,hin_token_map["<UNK>"])
        
      
    return a,b

In [None]:
trainx, trainy= pre_process(train)
valx, valy= pre_process(val)
testx,testy,= pre_process(test)

In [None]:
reverse_eng_map = dict([(i,char) for char,i in eng_token_map.items()])
reverse_hin_map = dict([(i,char) for char,i in hin_token_map.items()])

In [None]:

class CustomDataset(Dataset):
    def __init__(self, input_data, target_data):
        self.input_data = input_data
        self.target_data = target_data
    

    def __len__(self):
        return len(self.input_data)

    def __getitem__(self, idx):
        input_seq = self.input_data[idx]
        target_seq = self.target_data[idx]

        return input_seq, target_seq

def custom_collate(batch):
    input_seqs, target_seqs= zip(*batch)
    input_seqs = torch.from_numpy(np.stack(input_seqs, axis=1))
    target_seqs = torch.from_numpy(np.stack(target_seqs, axis=1))

    return input_seqs, target_seqs


train_dataset = CustomDataset(trainx, trainy)
#train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=custom_collate)

val_dataset = CustomDataset(valx, valy)
#val_dataloader = DataLoader(val_dataset, batch_size=8, shuffle=False, collate_fn=custom_collate)

test_dataset = CustomDataset(testx, testy)
test_dataloader = DataLoader(test_dataset, batch_size=8, shuffle=False, collate_fn=custom_collate)



In [None]:
a,b= next(iter(test_dataloader))



In [None]:
a[:,0]

tensor([20.,  8.,  5., 18., 13.,  1., 24.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [None]:
b[:,0]

tensor([ 1., 34., 44., 42., 62., 18., 66., 50.,  2.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'


class Encoder(nn.Module):
    def __init__(self, input_size, embed_dim, hidden_size, num_layers, dropout,cell_type):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, embed_dim,padding_idx=0)
        self.dropout = nn.Dropout(dropout)
        self.cell_type=cell_type

        if cell_type=="LSTM":
          self.rnn = nn.LSTM(embed_dim, hidden_size, num_layers, dropout=dropout)
        elif cell_type=="GRU":
          self.rnn=nn.GRU(embed_dim,hidden_size,num_layers,dropout=dropout)
        else:
          self.rnn=nn.RNN(embed_dim,hidden_size,num_layers,dropout=dropout)
    
    def forward(self, x):
        embedded = self.dropout(self.embedding(x))

        if self.cell_type=="LSTM":
          output, (hidden, cell) = self.rnn(embedded)
          return hidden, cell
        
        elif self.cell_type=="GRU":
          output, hidden = self.rnn(embedded)

          return output, hidden
        
        else:
          output, hidden = self.rnn(embedded)

          return output,hidden



class Decoder(nn.Module):
    def __init__(self, output_size, embed_dim, hidden_size, num_layers, dropout,cell_type):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.cell_type=cell_type
        self.embedding = nn.Embedding(output_size, embed_dim,padding_idx=0)
        if cell_type=="LSTM":
          self.rnn = nn.LSTM(embed_dim, hidden_size, num_layers,  dropout=dropout)
        elif cell_type=="GRU":
          self.rnn=nn.GRU(embed_dim,hidden_size,num_layers,dropout=dropout)
        else:
          self.rnn=nn.RNN(embed_dim,hidden_size,num_layers,dropout=dropout)

        self.fc = nn.Linear(hidden_size, output_size)
        self.dropout=nn.Dropout(dropout)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x, hidden, cell):
        x = x.unsqueeze(0)
        embedded = self.dropout(self.embedding(x))
        if self.cell_type=="LSTM":
          output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
          output= self.fc(output)
          output = output.squeeze(0)
          return output, hidden, cell
        
        elif self.cell_type=="GRU":
          output, hidden=self.rnn(embedded,hidden)
          output=self.fc(output)
          output=output.squeeze(0)
          return output, hidden
        
        else:
          output, hidden=self.rnn(embedded,hidden)
          output=self.fc(output)
          output = output.squeeze(0)
          return output, hidden

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
    
       
    def forward(self, source, target, teacher_forcing_ratio=0.5):
        batch_size = source.shape[1]
        target_len = target.shape[0]
        target_vocab_size = len(hin_token_map)

        outputs = torch.zeros(target_len,batch_size, target_vocab_size).to(device)
        if self.encoder.cell_type=="LSTM":
          hidden, cell = self.encoder(source)

          x = target[0]

          for t in range(1, target_len):
              output, hidden, cell = self.decoder(x, hidden, cell)
              outputs[t] = output
              top1 = output.argmax(1)
              if random.random() < teacher_forcing_ratio:
                  x = target[t]
              else:
                  x = top1

          return outputs
        
        elif self.encoder.cell_type=="GRU":
          enc_output,hidden = self.encoder(source)

          x = target[0]

          for t in range(1, target_len):
              output,hidden=self.decoder(x,hidden,None)
              outputs[t] = output
              top1= output.argmax(1)
              if random.random() < teacher_forcing_ratio:
                  x = target[t]
              else:
                  x = top1
          return outputs
        
        else:
          enc_output,hidden = self.encoder(source)

          x = target[0]

          for t in range(1, target_len):
              output,hidden=self.decoder(x,hidden,None)
              outputs[t] = output
              top1= output.argmax(1)
              if random.random() < teacher_forcing_ratio:
                  x = target[t]
              else:
                  x = top1
          return outputs


def build_model(cell = "LSTM",nunits = 64, enc_dec_layers = 2,embed_dim = 128,dense_size=128,dropout=0):
    encoder = Encoder(input_size=len(eng_token_map), embed_dim=embed_dim, hidden_size=nunits, num_layers=enc_dec_layers, dropout=dropout,cell_type=cell)
    decoder = Decoder(output_size=len(hin_token_map), embed_dim=embed_dim, hidden_size=nunits, num_layers=enc_dec_layers, dropout=dropout,cell_type=cell)
    model = Seq2Seq(encoder, decoder)
    return model


In [None]:
a,b=next(iter(train_dataloader))

In [None]:
encoder = Encoder(len(eng_token_map), 32, 128,2, 0.1,"GRU")
outputs, hidden = encoder(a.long())
outputs.shape, hidden.shape

(torch.Size([26, 8, 128]), torch.Size([2, 8, 128]))

In [None]:
decoder = Decoder(len(hin_token_map), 32, 128,2, 0.1,"GRU")
outputs, hidden = decoder(b.long()[0],hidden,None)
outputs.shape, hidden.shape

(torch.Size([8, 68]), torch.Size([2, 8, 128]))

In [None]:
print(device)

cuda


In [None]:
model(a.int(),b.int()).shape

torch.Size([24, 16, 68])

In [None]:


def train(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss = 0.0
    total_chars = 0
    correct_chars = 0

    for i, (input_seq, target_seq) in enumerate(dataloader):
        input_seq = input_seq.long().to(device)
        target_seq = target_seq.long().to(device)

        optimizer.zero_grad()

        output = model(input_seq, target_seq)
        _, predicted = torch.max(output, dim=2)


        for j in range(predicted.shape[1]):
              predicted_seq = predicted[:, j]
              targets_seq = target_seq[:, j]

              # Find the index of the first EOS token in the sequence (for character & word-level accuracy)
              eos_idx = (targets_seq == hin_token_map["\n"]).nonzero()
              if eos_idx.numel() > 0:
                  eos_idx = eos_idx[0][0]
                  predicted_seq = predicted_seq[:eos_idx]
                  targets_seq = targets_seq[:eos_idx]
        
        # reshape for cross-entropy loss
        output_flatten = output[1:].view(-1, output.shape[-1])
        trg_flatten = target_seq[1:].view(-1)

        loss = criterion(output_flatten, trg_flatten)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        predicted_seq=predicted_seq[1:].view(-1)
        targets_seq=targets_seq[1:].view(-1)
     
        correct_chars += torch.sum(predicted_seq == targets_seq).item()
        total_chars += targets_seq.numel()


    return total_loss / len(dataloader), correct_chars/total_chars

In [None]:
hin_token_map['\n']

2

In [None]:

def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0.0

    total_chars = 0
    correct_chars = 0

    with torch.no_grad():
        for i, (input_seq, target_seq) in enumerate(dataloader):
            input_seq = input_seq.long().to(device)
            target_seq = target_seq.long().to(device)

            output = model(input_seq, target_seq,0)
            _, predicted = torch.max(output, dim=2)


            for j in range(predicted.shape[1]):
                predicted_seq = predicted[:, j]
                targets_seq = target_seq[:, j]

                # Find the index of the first EOS token in the sequence
                eos_idx = (targets_seq == hin_token_map["\n"]).nonzero()
                if eos_idx.numel() > 0:
                    eos_idx = eos_idx[0][0]
                    predicted_seq = predicted_seq[:eos_idx]
                    targets_seq = targets_seq[:eos_idx]


            # reshape for cross-entropy loss
            output_flatten = output[1:].view(-1, output.shape[-1])
            trg_flatten = target_seq[1:].view(-1)

            loss = criterion(output_flatten, trg_flatten)

            total_loss += loss.item()

            predicted_seq=predicted_seq[1:].view(-1)
            targets_seq=targets_seq[1:].view(-1)
     
            correct_chars += torch.sum(predicted_seq == targets_seq).item()
            total_chars += targets_seq.numel()

    return total_loss / len(dataloader), correct_chars/total_chars

In [None]:
import torch
import torch.nn.functional as F

def beam_search_decoder(model, input_seq, beam_size, max_len, device):
    """
    Beam search decoding algorithm for seq2seq models.
    
    Args:
        model: seq2seq model
        input_seq: tensor of shape (seq_len, batch_size)
        beam_size: size of the beam
        max_len: maximum length of the output sequence
        device: torch device
    
    Returns:
        top_k: list of k most likely output sequences, each represented as a tensor of shape (seq_len, 1)
        scores: list of k scores corresponding to the top k output sequences
    """
    # Set model to evaluation mode
    model.eval()

    # Initialize candidate sequences with the start-of-sequence token
    start_token = torch.tensor([hin_token_map["\t"]], dtype=torch.long, device=device)
    candidates = [(start_token, 0)]

    # Iterate until all candidate sequences have reached the end-of-sequence token or max_len is reached
    for i in range(max_len):
        new_candidates = []

        # Iterate over each candidate sequence
        for seq, score in candidates:
            # Pad sequence to match input_seq length
            padded_seq = F.pad(seq, (0, input_seq.shape[0]-seq.shape[0]), value=hin_token_map["<PAD>"])

            # Make predictions for next token
            output = model(input_seq, padded_seq.unsqueeze(1))
            log_probs = F.log_softmax(output[-1], dim=1)

            # Select top k candidates and add to new_candidates
            for j in range(beam_size):
                token_idx = log_probs.argmax().item()
                token_score = log_probs[0, token_idx].item()
                new_seq = torch.cat([seq, torch.tensor([token_idx], dtype=torch.long, device=device)], dim=0)
                new_score = score + token_score
                new_candidates.append((new_seq, new_score))
                log_probs[0, token_idx] = float('-inf')

        # Select top k candidates and update candidate set
        candidates = sorted(new_candidates, key=lambda x: x[1], reverse=True)[:beam_size]

        # Check if all candidate sequences have reached the end-of-sequence token
        eos_count = sum([seq[-1] == hin_token_map["\n"] for seq, score in candidates])
        if eos_count == beam_size:
            break

    # Extract top k candidates and scores
    top_k = [seq.unsqueeze(1) for seq, score in candidates]
    scores = [score for seq, score in candidates]

    return top_k, scores


In [None]:
a,b=next(iter(test_dataloader))

In [None]:
model.load_state_dict(torch.load('tut1-model.pt'))


beam_search_decoder(model,a[:,5:6].long(),5,50,device)

([tensor([[1],
          [2],
          [2]]),
  tensor([[ 1],
          [23],
          [ 2]]),
  tensor([[ 1],
          [56],
          [ 2]]),
  tensor([[ 1],
          [66],
          [ 2]]),
  tensor([[ 1],
          [58],
          [ 2]])],
 [-1.059869259595871,
  -1.633980818092823,
  -2.0424399971961975,
  -3.086896002292633,
  -3.77677720785141])

In [None]:
model=build_model(cell="GRU")

In [None]:
print(model)

Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(28, 128, padding_idx=0)
    (dropout): Dropout(p=0, inplace=False)
    (rnn): GRU(128, 64, num_layers=2)
  )
  (decoder): Decoder(
    (embedding): Embedding(68, 128, padding_idx=0)
    (rnn): GRU(128, 64, num_layers=2)
    (fc): Linear(in_features=64, out_features=68, bias=True)
    (dropout): Dropout(p=0, inplace=False)
  )
)


In [None]:
N_EPOCHS = 1
best_valid_loss = float('inf')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=custom_collate)

val_dataloader = DataLoader(val_dataset, batch_size=8, shuffle=False, collate_fn=custom_collate)
model=model.to(device)
criterion = nn.CrossEntropyLoss(ignore_index=hin_token_map["<PAD>"]) # ignore padding index
optimizer = optim.Adam(model.parameters(), lr=1e-3)
for epoch in range(N_EPOCHS):    
    train_loss,acc = train(model=model, dataloader=train_dataloader, optimizer=optimizer, criterion=criterion,device=device)
    valid_loss,ch = evaluate(model, val_dataloader, criterion,device)


    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut1-model.pt')

    # it's easier to see a change in perplexity between epoch as it's an exponential
    # of the loss, hence the scale of the measure is much bigger
    print(f'Epoch: {epoch+1}')
    print(f'\tTrain Loss: {train_loss}')
    print(f'\t Val. Loss: {valid_loss}')
    print(ch)
    print(acc)

Epoch: 1
	Train Loss: 2.890680533964187
	 Val. Loss: 2.3192018535919487
0.3226397800183318
0.18977003652125055


In [None]:
import wandb
def wandb_sweeps():
  config_defaults = {
        'cell': "LSTM",
        'hidden_units': 64,
        'enc_dec_layers': 2,
        'embed_dim': 128,
        'dense_size':128,
        'lr':1e-4,
        'dropout':0.1,
        'epochs':20,
        'batch_size':8
  }
  
  wandb.init(config=config_defaults)

  config = wandb.config

  
  cell=config.cell
  hidden_units=config.hidden_units
  enc_dec_layers=config.enc_dec_layers
  embed_dim=config.embed_dim
  dense_size=config.dense_size
  lr=config.lr
  epochs=config.epochs
  dropout=config.dropout
  batch_size=config.batch_size


  run_name = f"cell_{cell}_hunit_{hidden_units}_embed_dim_{embed_dim}_dense_{dense_size}_lr_{lr}_ep_{epochs}_enc_dec_layer_{enc_dec_layers}_dropout_{dropout}_bs{batch_size}"
  print(run_name)

  train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate)

  val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate)


  model=build_model(cell = cell,nunits = hidden_units, enc_dec_layers =enc_dec_layers,embed_dim = embed_dim,dense_size=dense_size,dropout=dropout)
  
  device = 'cuda' if torch.cuda.is_available() else 'cpu'

  model=model.to(device)

  
  criterion = nn.CrossEntropyLoss(ignore_index=hin_token_map["<PAD>"]) # ignore padding index
  optimizer = optim.Adam(model.parameters(), lr=lr)

  N_EPOCHS = epochs
 
  for epoch in range(N_EPOCHS):    
    train_loss,train_acc = train(model=model, dataloader=train_dataloader, optimizer=optimizer, criterion=criterion,device=device)
    valid_loss,val_acc = evaluate(model, val_dataloader, criterion,device)
    
    wandb.log({"training_acc": train_acc, "validation_accuracy": val_acc, "training_loss": train_loss, "validation_loss": valid_loss, "Epoch": epoch+1})

    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f} \tTraining Accuracy: {:.6f} \tValidation Accuracy: {:.6f}'.format(
        epoch+1, train_loss, valid_loss, train_acc, val_acc))
  
  wandb.run.name = run_name
  wandb.run.save()
  wandb.run.finish()




  


  


  


In [None]:
sweep_config = {
  "name": "Bayesian Sweep",
  "method": "bayes",
  "metric":{
  "name": "validation_accuracy",
  "goal": "maximize"
  },
  'early_terminate': {
        'type':'hyperband',
        'min_iter': 5,
        's': 2
  },
  "parameters": {
        "cell": {
            "values": [ "LSTM", "GRU", "RNN"]
        },
        "hidden_units": {
            "values": [32,64,256,512]
        },
        "enc_dec_layers": {
            "values": [1,2,3,4]
        },
        "embed_dim": {
            "values": [32,64,128,256]
        }, 
        "dense_size": {
            "values": [128,256,512]
        },
        "lr": {
            "values": [1e-5,1e-4,1e-3]
        },
        "epochs": {
            "values": [20,30]
        },
        "dropout": {
            "values": [0,0.1,0.2]
        },
        "batch_size": {
            "values": [8,16,32,64]
        }     
    }
}

sweep_id = wandb.sweep(sweep_config,  entity="shashwat_mm19b053", project="Assignment-3")
wandb.agent(sweep_id,project='Assignment-3', function=wandb_sweeps, count=90)

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: lo1gzlng
Sweep URL: https://wandb.ai/shashwat_mm19b053/Assignment-3/sweeps/lo1gzlng


[34m[1mwandb[0m: Agent Starting Run: k981czvb with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	cell: GRU
[34m[1mwandb[0m: 	dense_size: 256
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_dec_layers: 4
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 1e-05
[34m[1mwandb[0m: Currently logged in as: [33mmm19b053[0m ([33mshashwat_mm19b053[0m). Use [1m`wandb login --relogin`[0m to force relogin


cell_GRU_hunit_512_embed_dim_256_dense_256_lr_1e-05_ep_30_enc_dec_layer_4_dropout_0.1_bs8
Epoch: 1 	Training Loss: 3.182387 	Validation Loss: 3.047473 	Training Accuracy: 0.088441 	Validation Accuracy: 0.108158
Epoch: 2 	Training Loss: 2.950160 	Validation Loss: 2.720938 	Training Accuracy: 0.143284 	Validation Accuracy: 0.206538
Epoch: 3 	Training Loss: 2.655846 	Validation Loss: 2.446026 	Training Accuracy: 0.223948 	Validation Accuracy: 0.296670
Epoch: 4 	Training Loss: 2.416054 	Validation Loss: 2.176892 	Training Accuracy: 0.281528 	Validation Accuracy: 0.346777
Epoch: 5 	Training Loss: 2.197863 	Validation Loss: 1.954270 	Training Accuracy: 0.334476 	Validation Accuracy: 0.409105
Epoch: 6 	Training Loss: 1.990990 	Validation Loss: 1.733113 	Training Accuracy: 0.389452 	Validation Accuracy: 0.457379
Epoch: 7 	Training Loss: 1.786775 	Validation Loss: 1.523024 	Training Accuracy: 0.438936 	Validation Accuracy: 0.517568
Epoch: 8 	Training Loss: 1.587908 	Validation Loss: 1.340072 	T



Epoch: 30 	Training Loss: 0.627202 	Validation Loss: 0.663365 	Training Accuracy: 0.791777 	Validation Accuracy: 0.779713


0,1
Epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
training_acc,▁▂▂▃▃▄▄▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇████████
training_loss,█▇▇▆▅▅▄▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▂▃▃▄▅▅▆▆▆▇▇▇▇▇▇▇▇████████████
validation_loss,█▇▆▅▅▄▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Epoch,30.0
training_acc,0.79178
training_loss,0.6272
validation_accuracy,0.77971
validation_loss,0.66336


[34m[1mwandb[0m: Agent Starting Run: mtv63y2g with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	cell: GRU
[34m[1mwandb[0m: 	dense_size: 256
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 128
[34m[1mwandb[0m: 	enc_dec_layers: 2
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_units: 64
[34m[1mwandb[0m: 	lr: 0.0001


cell_GRU_hunit_64_embed_dim_128_dense_256_lr_0.0001_ep_20_enc_dec_layer_2_dropout_0.2_bs16
Epoch: 1 	Training Loss: 3.205958 	Validation Loss: 2.837578 	Training Accuracy: 0.097885 	Validation Accuracy: 0.186316
Epoch: 2 	Training Loss: 2.747694 	Validation Loss: 2.503124 	Training Accuracy: 0.199457 	Validation Accuracy: 0.258400
Epoch: 3 	Training Loss: 2.504544 	Validation Loss: 2.240152 	Training Accuracy: 0.242237 	Validation Accuracy: 0.315211
Epoch: 4 	Training Loss: 2.294982 	Validation Loss: 2.038148 	Training Accuracy: 0.290404 	Validation Accuracy: 0.370189
Epoch: 5 	Training Loss: 2.130679 	Validation Loss: 1.866962 	Training Accuracy: 0.331403 	Validation Accuracy: 0.400122


VBox(children=(Label(value='0.001 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.405217…

0,1
Epoch,▁▃▅▆█
training_acc,▁▄▅▇█
training_loss,█▅▃▂▁
validation_accuracy,▁▃▅▇█
validation_loss,█▆▄▂▁

0,1
Epoch,5.0
training_acc,0.3314
training_loss,2.13068
validation_accuracy,0.40012
validation_loss,1.86696


[34m[1mwandb[0m: Agent Starting Run: zpe5r9t1 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	cell: LSTM
[34m[1mwandb[0m: 	dense_size: 128
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_dec_layers: 1
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 1e-05


cell_LSTM_hunit_256_embed_dim_256_dense_128_lr_1e-05_ep_30_enc_dec_layer_1_dropout_0.1_bs32




Epoch: 1 	Training Loss: 3.562024 	Validation Loss: 3.234492 	Training Accuracy: 0.062455 	Validation Accuracy: 0.106599
Epoch: 2 	Training Loss: 3.147462 	Validation Loss: 3.034097 	Training Accuracy: 0.104650 	Validation Accuracy: 0.116751
Epoch: 3 	Training Loss: 3.008401 	Validation Loss: 2.904694 	Training Accuracy: 0.136860 	Validation Accuracy: 0.144670
Epoch: 4 	Training Loss: 2.892285 	Validation Loss: 2.791108 	Training Accuracy: 0.164973 	Validation Accuracy: 0.172589
Epoch: 5 	Training Loss: 2.798315 	Validation Loss: 2.695712 	Training Accuracy: 0.180947 	Validation Accuracy: 0.189086
Epoch: 6 	Training Loss: 2.716187 	Validation Loss: 2.620044 	Training Accuracy: 0.193716 	Validation Accuracy: 0.210660


0,1
Epoch,▁▂▄▅▇█
training_acc,▁▃▅▆▇█
training_loss,█▅▃▂▂▁
validation_accuracy,▁▂▄▅▇█
validation_loss,█▆▄▃▂▁

0,1
Epoch,6.0
training_acc,0.19372
training_loss,2.71619
validation_accuracy,0.21066
validation_loss,2.62004


[34m[1mwandb[0m: Agent Starting Run: nu3byuqt with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	cell: GRU
[34m[1mwandb[0m: 	dense_size: 128
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_dec_layers: 1
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.0001


cell_GRU_hunit_512_embed_dim_32_dense_128_lr_0.0001_ep_20_enc_dec_layer_1_dropout_0.2_bs32




Epoch: 1 	Training Loss: 3.091116 	Validation Loss: 2.736837 	Training Accuracy: 0.110053 	Validation Accuracy: 0.176396
Epoch: 2 	Training Loss: 2.710880 	Validation Loss: 2.447178 	Training Accuracy: 0.187005 	Validation Accuracy: 0.243655
Epoch: 3 	Training Loss: 2.457617 	Validation Loss: 2.167317 	Training Accuracy: 0.256243 	Validation Accuracy: 0.303299
Epoch: 4 	Training Loss: 2.203920 	Validation Loss: 1.862911 	Training Accuracy: 0.307961 	Validation Accuracy: 0.411168
Epoch: 5 	Training Loss: 1.936242 	Validation Loss: 1.599136 	Training Accuracy: 0.384319 	Validation Accuracy: 0.480964
Epoch: 6 	Training Loss: 1.702969 	Validation Loss: 1.414605 	Training Accuracy: 0.438725 	Validation Accuracy: 0.540609
Epoch: 7 	Training Loss: 1.535864 	Validation Loss: 1.270909 	Training Accuracy: 0.493544 	Validation Accuracy: 0.592640
Epoch: 8 	Training Loss: 1.408239 	Validation Loss: 1.157280 	Training Accuracy: 0.525668 	Validation Accuracy: 0.626904
Epoch: 9 	Training Loss: 1.30996

0,1
Epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
training_acc,▁▂▃▃▄▅▅▆▆▆▇▇▇▇██████
training_loss,█▇▆▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁
validation_accuracy,▁▂▂▄▅▅▆▆▇▆▇▇▇▇█▇████
validation_loss,█▇▆▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁

0,1
Epoch,20.0
training_acc,0.72853
training_loss,0.79272
validation_accuracy,0.75
validation_loss,0.77431


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 53lbq0m1 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	cell: GRU
[34m[1mwandb[0m: 	dense_size: 512
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 128
[34m[1mwandb[0m: 	enc_dec_layers: 2
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	hidden_units: 64
[34m[1mwandb[0m: 	lr: 1e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016669651949996478, max=1.0…

cell_GRU_hunit_64_embed_dim_128_dense_512_lr_1e-05_ep_30_enc_dec_layer_2_dropout_0_bs32
Epoch: 1 	Training Loss: 3.903835 	Validation Loss: 3.494086 	Training Accuracy: 0.039197 	Validation Accuracy: 0.044416
Epoch: 2 	Training Loss: 3.417401 	Validation Loss: 3.350960 	Training Accuracy: 0.040160 	Validation Accuracy: 0.081218
Epoch: 3 	Training Loss: 3.339759 	Validation Loss: 3.308464 	Training Accuracy: 0.061152 	Validation Accuracy: 0.091371
Epoch: 4 	Training Loss: 3.304999 	Validation Loss: 3.282990 	Training Accuracy: 0.072296 	Validation Accuracy: 0.102792
Epoch: 5 	Training Loss: 3.278496 	Validation Loss: 3.256522 	Training Accuracy: 0.087009 	Validation Accuracy: 0.118020


VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.111153…

0,1
Epoch,▁▃▅▆█
training_acc,▁▁▄▆█
training_loss,█▃▂▁▁
validation_accuracy,▁▄▅▇█
validation_loss,█▄▃▂▁

0,1
Epoch,5.0
training_acc,0.08701
training_loss,3.2785
validation_accuracy,0.11802
validation_loss,3.25652


[34m[1mwandb[0m: Agent Starting Run: 0h2spg65 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell: GRU
[34m[1mwandb[0m: 	dense_size: 256
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 128
[34m[1mwandb[0m: 	enc_dec_layers: 4
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	hidden_units: 64
[34m[1mwandb[0m: 	lr: 0.001


cell_GRU_hunit_64_embed_dim_128_dense_256_lr_0.001_ep_30_enc_dec_layer_4_dropout_0_bs64
Epoch: 1 	Training Loss: 3.009610 	Validation Loss: 2.591196 	Training Accuracy: 0.128239 	Validation Accuracy: 0.208543
Epoch: 2 	Training Loss: 2.434342 	Validation Loss: 2.087399 	Training Accuracy: 0.267684 	Validation Accuracy: 0.356784
Epoch: 3 	Training Loss: 1.924770 	Validation Loss: 1.505431 	Training Accuracy: 0.409245 	Validation Accuracy: 0.547739
Epoch: 4 	Training Loss: 1.468483 	Validation Loss: 1.205120 	Training Accuracy: 0.553485 	Validation Accuracy: 0.630653
Epoch: 5 	Training Loss: 1.234476 	Validation Loss: 1.062509 	Training Accuracy: 0.616080 	Validation Accuracy: 0.663317
Epoch: 6 	Training Loss: 1.096001 	Validation Loss: 0.970916 	Training Accuracy: 0.631270 	Validation Accuracy: 0.746231
Epoch: 7 	Training Loss: 1.029311 	Validation Loss: 0.934955 	Training Accuracy: 0.658037 	Validation Accuracy: 0.693467
Epoch: 8 	Training Loss: 0.963560 	Validation Loss: 0.882538 	Tra

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
training_acc,▁▂▄▅▆▆▆▇▇▇▇▇▇▇████████████████
training_loss,█▆▅▄▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy,▁▃▅▆▆▇▆▇▇▇▇▇▇▇▇▇█▇▇▇█▇▇███▇███
validation_loss,█▆▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Epoch,30.0
training_acc,0.79146
training_loss,0.60747
validation_accuracy,0.81156
validation_loss,0.73429


[34m[1mwandb[0m: Agent Starting Run: z7n9ayg8 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	cell: RNN
[34m[1mwandb[0m: 	dense_size: 128
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	enc_dec_layers: 4
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001


cell_RNN_hunit_256_embed_dim_64_dense_128_lr_0.0001_ep_30_enc_dec_layer_4_dropout_0_bs8
Epoch: 1 	Training Loss: 3.085120 	Validation Loss: 3.031897 	Training Accuracy: 0.107736 	Validation Accuracy: 0.127101


In [None]:
model

Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(28, 32, padding_idx=0)
    (rnn): LSTM(32, 16, num_layers=2)
  )
  (decoder): Decoder(
    (embedding): Embedding(68, 32, padding_idx=0)
    (rnn): LSTM(32, 16, num_layers=2)
    (fc): Linear(in_features=16, out_features=68, bias=True)
  )
)

In [None]:
model.load_state_dict(torch.load('tut1-model.pt'))

test_loss,ch = evaluate(model, test_dataloader, criterion,device)
print(f'Test Loss: {test_loss:.9f}')
print(f'Acc: {ch:.6f}')

Test Loss: 0.884725921
Acc: 0.723820


In [None]:
target_texts

['\tशस्त्रागार\n',
 '\tबिन्द्या\n',
 '\tकिरणकांत\n',
 '\tयज्ञोपवीत\n',
 '\tरटानिया\n',
 '\tवागण्याचे\n',
 '\tदेशभरामध्ये\n',
 '\tसुघड़पन\n',
 '\tमोहीवाल\n',
 '\tसर्वसंग्रह\n',
 '\tबसेको\n',
 '\tतुमच्यापैकी\n',
 '\tकान्यकुब्ज\n',
 '\tइनटॉक्सिनेशन\n',
 '\tमेच्यूरिटी\n',
 '\tअगरी\n',
 '\tअनुक्रमानुपात\n',
 '\tधूलचन्द\n',
 '\tअवलेह\n',
 '\tएबरोर्ड\n',
 '\tबैलर्स\n',
 '\tबार्गली\n',
 '\tपंक्चर्ड\n',
 '\tहैंकर्स\n',
 '\tजवानोंके\n',
 '\tपौंकी\n',
 '\tजगनधाम\n',
 '\tपोन्नियम\n',
 '\tआईएनबीए\n',
 '\tवेदलम\n',
 '\tचिनवास\n',
 '\tमारवाड़ा\n',
 '\tअनसमझा\n',
 '\tइस्टूडेंट\n',
 '\tदुःखीत\n',
 '\tसिघांची\n',
 '\tशिक्षेचे\n',
 '\tउज्वलतम\n',
 '\tआपट्टन\n',
 '\tअम्बिकावन\n',
 '\tखगनी\n',
 '\tथियामिन\n',
 '\tसोशियोलाजिकल\n',
 '\tएप\n',
 '\tभाजपाको\n',
 '\tइस्तिथि\n',
 '\tछायाटांड\n',
 '\tतोपवाल\n',
 '\tगुदवाते\n',
 '\tविसलेरी\n',
 '\tथर्मलपावर\n',
 '\tसांचे\n',
 '\tपिंटिया\n',
 '\tकैंटाबिल\n',
 '\tअरेंजिंग\n',
 '\tवाहिन्यांसाठी\n',
 '\tआईटीपीए\n',
 '\tपिक्सलमेटर\n',
 '\tपहनायूंगा\n',
 '\tमेडसन\n',
 '\

In [None]:
a,b = next(iter(test_dataloader))


In [None]:
for _ in range(50):
        # Decode one token at a time
      output, decoder_hidden, decoder_cell = model.decoder(decoder_input, decoder_hidden, decoder_cell)
      output = F.softmax(output, dim=1)

        # Get the index of the predicted token with highest probability
      top1 = output.argmax(1)

        # Append the predicted token to the translation list
      #translation.append(hin_token_map[top1.item()])

        # If the predicted token is the end-of-sentence token, stop decoding
      #if top1.item() == hin_token_map['\n']:
      #    break

        # Otherwise, set the decoder input to the predicted token for the next decoding step
        #decoder_input = top1.unsqueeze(0)

RuntimeError: ignored

In [None]:

model.eval()
with torch.no_grad():
    outputs = model(a[:,15:16].long(), b[:,15:16].long(), teacher_forcing_ratio=0)
outputs.shape
output_idx = outputs[1:].squeeze(1).argmax(1)
#' '.join([target.vocab.itos[idx] for idx in output_idx])

In [None]:
reverse_hin_map[2]

'\n'

In [None]:
word_idx=[]
for idx in output_idx:
  num=int(idx.numpy())
  if num ==2:
    break
  else:
    word_idx.append(reverse_hin_map[num])
  

In [None]:
trainxx[1]

array([ 1., 40., 55., 37., 66., 35., 66., 43., 54.,  2.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
      dtype=float32)

In [None]:
b[:,15:16],output_idx

(tensor([[ 1.],
         [10.],
         [36.],
         [61.],
         [30.],
         [52.],
         [ 2.],
         [ 0.],
         [ 0.],
         [ 0.],
         [ 0.],
         [ 0.],
         [ 0.],
         [ 0.],
         [ 0.],
         [ 0.],
         [ 0.],
         [ 0.],
         [ 0.],
         [ 0.],
         [ 0.],
         [ 0.],
         [ 0.],
         [ 0.]]),
 tensor([10, 36, 61, 35,  2,  2,  2,  2,  2,  2,  2,  2, 23,  2,  2,  2,  2, 23,
          2,  2,  2,  2, 23]))

In [None]:
"".join(word_idx)

'उधेद'

In [None]:
output_idx = outputs[1:].squeeze(1).argmax(1)
' '.join([reverse_hin_map[idx.numpy()] for idx in output_idx])

TypeError: ignored

In [None]:
outputs[1:].squeeze(1).argmax(1)

tensor([[ 4,  4, 15,  ...,  5, 12,  4],
        [ 9,  9, 11,  ...,  0,  6,  9],
        [13, 14,  6,  ..., 12, 11, 14],
        ...,
        [15, 15, 13,  ...,  1,  2, 15],
        [15, 15, 13,  ...,  1,  2, 15],
        [15, 13, 11,  ..., 15,  2, 13]])

In [None]:
model()

Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(28, 32, padding_idx=0)
    (rnn): LSTM(32, 16, batch_first=True)
  )
  (decoder): Decoder(
    (embedding): Embedding(68, 32, padding_idx=0)
    (rnn): LSTM(32, 16, batch_first=True)
    (fc): Linear(in_features=16, out_features=68, bias=True)
  )
)


In [None]:
model.encoder

Encoder(
  (embedding): Embedding(28, 32, padding_idx=0)
  (rnn): LSTM(32, 16, batch_first=True)
)

In [None]:
import torch.functional as F

In [None]:
def translate_sentence(model, sentence, eng_token_map, hin_token_map, device, max_length=50):
    model.eval()

    # Convert input sentence to tensor
    with torch.no_grad():
        input_seq = torch.tensor([eng_token_map[tok] for tok in sentence], dtype=torch.long, device=device).unsqueeze(0)

    # Initialize decoder input with SOS_token
    sos_idx = hin_token_map['\t']
    decoder_input = torch.tensor([sos_idx], dtype=torch.long, device=device).unsqueeze(0)

    # Initialize hidden state and cell state with encoder final states
    hidden, cell = model.encoder(input_seq)
    decoder_hidden, decoder_cell = hidden, cell

    # Initialize translation list to store output tokens
    translation = []

    # Decode the translation one token at a time
    for _ in range(max_length):
        # Decode one token at a time
        output, decoder_hidden, decoder_cell = model.decoder(decoder_input, decoder_hidden, decoder_cell)
        output = F.softmax(output, dim=1)

        # Get the index of the predicted token with highest probability
        top1 = output.argmax(1)

        # Append the predicted token to the translation list
        translation.append(hin_token_map[top1.item()])

        # If the predicted token is the end-of-sentence token, stop decoding
        if top1.item() == hin_token_map['\n']:
            break

        # Otherwise, set the decoder input to the predicted token for the next decoding step
        decoder_input = top1.unsqueeze(0)

    # Convert the translation list to a string and return it
    return ' '.join(translation)


In [None]:
input_texts[0]

'shastragaar'

In [None]:
hin_token_map

{'\t': 1,
 '\n': 2,
 'ँ': 3,
 'ं': 4,
 'ः': 5,
 'अ': 6,
 'आ': 7,
 'इ': 8,
 'ई': 9,
 'उ': 10,
 'ऊ': 11,
 'ऋ': 12,
 'ए': 13,
 'ऐ': 14,
 'ऑ': 15,
 'ओ': 16,
 'औ': 17,
 'क': 18,
 'ख': 19,
 'ग': 20,
 'घ': 21,
 'ङ': 22,
 'च': 23,
 'छ': 24,
 'ज': 25,
 'झ': 26,
 'ञ': 27,
 'ट': 28,
 'ठ': 29,
 'ड': 30,
 'ढ': 31,
 'ण': 32,
 'त': 33,
 'थ': 34,
 'द': 35,
 'ध': 36,
 'न': 37,
 'प': 38,
 'फ': 39,
 'ब': 40,
 'भ': 41,
 'म': 42,
 'य': 43,
 'र': 44,
 'ल': 45,
 'ळ': 46,
 'व': 47,
 'श': 48,
 'ष': 49,
 'स': 50,
 'ह': 51,
 '़': 52,
 'ऽ': 53,
 'ा': 54,
 'ि': 55,
 'ी': 56,
 'ु': 57,
 'ू': 58,
 'ृ': 59,
 'ॅ': 60,
 'े': 61,
 'ै': 62,
 'ॉ': 63,
 'ो': 64,
 'ौ': 65,
 '्': 66,
 '<UNK>': 67,
 '<PAD>': 0}

In [None]:
translate_sentence(model,input_texts[0],eng_token_map,hin_token_map,device)

AssertionError: ignored

In [None]:
encoder=Encoder(len(eng_token_map),32,16,2,0.1,'LSTM',True)

Encoder(
  (embedding): Embedding(28, 32, padding_idx=0)
  (rnn): LSTM(32, 16, num_layers=2, batch_first=True, dropout=0.1, bidirectional=True)
)

In [None]:
a,b,c=pre_process(test)

In [None]:
len(c[0][0])

68

In [None]:
b[0]

array([ 1., 34., 44., 42., 62., 18., 66., 50.,  2.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
      dtype=float32)

In [None]:
list(target_texts[3])

['\t', 'ट', '्', 'व', 'ि', 'ट', 'र', '्', 'स', '\n']

In [None]:
len(c[0][0])

68

In [None]:
b[3]

array([ 1., 28., 66., 47., 55., 28., 44., 66., 50.,  2.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
      dtype=float32)

In [None]:
a=torch.tensor(a)

In [None]:
a.dtype

torch.float32

In [None]:
a,b,c=pre_process(val)

67

In [None]:
input_texts[0]

'shastragaar'

In [None]:
uncommon_list1 = [x for x in hindi_tokens_train if x not in hindi_tokens_test]
uncommon_list2 = [x for x in hindi_tokens_test if x not in hindi_tokens_train]

# Print the result
print("Uncommon elements in list1:", uncommon_list1)
print("Uncommon elements in list2:", uncommon_list2)

Uncommon elements in list1: ['ङ', 'ळ']
Uncommon elements in list2: ['ॊ']


In [None]:
uncommon_elements

{'ङ', 'ळ', 'ॊ'}