In [33]:
!pip install torchtext==0.6.0
# import locale
# locale.getpreferredencoding = lambda: "UTF-8"
! pip install wget
! pip install gdown
! pip install --upgrade gdown



In [34]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import random
import torch.optim as optim
from torch.autograd import Variable
import os
import gdown
from tqdm import tqdm
# import wandb
from io import open
import string, time, math
import wget
from zipfile import ZipFile
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import clear_output
from torch.utils.data import Dataset
import re
# from torchtext.datasets import Multi30k
from torchtext.data import Field, TabularDataset, BucketIterator
# import numpy as np
import spacy
# import random
# from torch.utils.tensorboard import SummaryWriter # to print to tensorboard
# from utils import translate_sentence, bleu, save_checkpoint, load_checkpoint

In [35]:
seed = 42
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
# CUDA
torch.cuda.manual_seed_all(seed)
# torch.backends.cudnn.deterministic=True
# torch.backends.cudnn.benchmark=False

In [36]:
# Getting the Dataset
url = 'https://drive.google.com/uc?id=1uRKU4as2NlS9i8sdLRS1e326vQRdhvfw&export=download'
# filename = os.path.basename(url)
# print(filename)

if not os.path.exists("aksharantar_sampled"):
  filename = gdown.download(url = url, quiet=False, fuzzy=True)
  print(filename)
  with ZipFile(filename, 'r') as z:
    print('Extracting files...')
    z.extractall()
    print('Done!')
  os.remove(filename)

In [37]:
eng_alpha = 'abcdefghijklmnopqrstuvwxyz'
pad_char = '<PAD>'

eng_alpha2idx = {pad_char: 0}
for index, alpha in enumerate(eng_alpha):
  eng_alpha2idx[alpha] = index+1

print(eng_alpha2idx)

{'<PAD>': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26}


In [38]:
# Bengali Unicode Hex Range: 2432-2558
# Hindi Unicode Hex Range: 2304-2431

min_range = 2304
max_range = 2431

# if indic_lang == 'ben':
#   min_range = 2432
#   max_range = 2558
# elif indic_lang == 'hindi':
#   min_range = 2304
#   max_range = 2431

indic_alpha = [chr(alpha) for alpha in range(min_range, max_range + 1)]
print(indic_alpha)
indic_alpha_size = len(indic_alpha)

indic_alpha2idx = {pad_char: 0}
for index, alpha in enumerate(indic_alpha):
  indic_alpha2idx[alpha] = index+1

print(indic_alpha2idx)

['ऀ', 'ँ', 'ं', 'ः', 'ऄ', 'अ', 'आ', 'इ', 'ई', 'उ', 'ऊ', 'ऋ', 'ऌ', 'ऍ', 'ऎ', 'ए', 'ऐ', 'ऑ', 'ऒ', 'ओ', 'औ', 'क', 'ख', 'ग', 'घ', 'ङ', 'च', 'छ', 'ज', 'झ', 'ञ', 'ट', 'ठ', 'ड', 'ढ', 'ण', 'त', 'थ', 'द', 'ध', 'न', 'ऩ', 'प', 'फ', 'ब', 'भ', 'म', 'य', 'र', 'ऱ', 'ल', 'ळ', 'ऴ', 'व', 'श', 'ष', 'स', 'ह', 'ऺ', 'ऻ', '़', 'ऽ', 'ा', 'ि', 'ी', 'ु', 'ू', 'ृ', 'ॄ', 'ॅ', 'ॆ', 'े', 'ै', 'ॉ', 'ॊ', 'ो', 'ौ', '्', 'ॎ', 'ॏ', 'ॐ', '॑', '॒', '॓', '॔', 'ॕ', 'ॖ', 'ॗ', 'क़', 'ख़', 'ग़', 'ज़', 'ड़', 'ढ़', 'फ़', 'य़', 'ॠ', 'ॡ', 'ॢ', 'ॣ', '।', '॥', '०', '१', '२', '३', '४', '५', '६', '७', '८', '९', '॰', 'ॱ', 'ॲ', 'ॳ', 'ॴ', 'ॵ', 'ॶ', 'ॷ', 'ॸ', 'ॹ', 'ॺ', 'ॻ', 'ॼ', 'ॽ', 'ॾ', 'ॿ']
{'<PAD>': 0, 'ऀ': 1, 'ँ': 2, 'ं': 3, 'ः': 4, 'ऄ': 5, 'अ': 6, 'आ': 7, 'इ': 8, 'ई': 9, 'उ': 10, 'ऊ': 11, 'ऋ': 12, 'ऌ': 13, 'ऍ': 14, 'ऎ': 15, 'ए': 16, 'ऐ': 17, 'ऑ': 18, 'ऒ': 19, 'ओ': 20, 'औ': 21, 'क': 22, 'ख': 23, 'ग': 24, 'घ': 25, 'ङ': 26, 'च': 27, 'छ': 28, 'ज': 29, 'झ': 30, 'ञ': 31, 'ट': 32, 'ठ': 33, 'ड': 34, 'ढ': 35, 'ण': 36, 'त': 37, 'थ': 38, 'द': 39, 'ध':

In [39]:
indic_idx2alpha = {v: k for k, v in indic_alpha2idx.items()}
eng_idx2alpha = {v: k for k, v in eng_alpha2idx.items()}

In [40]:
def tokenize_indic(string):
  # return string.split()
  char_list =  [*string]
  char_list = [indic_alpha2idx[char] for char in char_list]
  return char_list

def tokenize_eng(string):
  # return string.split()
  char_list =  [*string]
  char_list = [eng_alpha2idx[char] for char in char_list]
  return char_list

In [41]:
print(tokenize_indic('बिन्द्या'))
print(tokenize_eng('hello'))

[45, 64, 41, 78, 39, 78, 48, 63]
[8, 5, 12, 12, 15]


In [42]:
# Change Indic Language here
# indic_lang = 'ben'
indic_lang = 'hin'

In [43]:
# importing python package
import pandas as pd
  
file_names = ['test', 'train', 'valid']

for index, file_name in enumerate(file_names):
  # read contents of csv file
  file = pd.read_csv(f'aksharantar_sampled/{indic_lang}/{indic_lang}_{file_name}.csv')
  # print("\nOriginal file:")
  # print(file)
    
  # adding header
  headerList = ['eng', f'{indic_lang}']
    
  # converting data frame to csv
  file.to_csv(f'aksharantar_sampled/{indic_lang}/{indic_lang}_{file_name}.csv', header=headerList, index=False)
    
  # display modified csv file
  # file2 = pd.read_csv(f'aksharantar_sampled/{indic_lang}/{indic_lang}_valid_2.csv')
  # print('\nModified file:')
  # print(file2)

In [44]:
eng = Field(sequential=True, use_vocab=True, tokenize=tokenize_eng, init_token='<sos>', eos_token='<eos>')
indic = Field(sequential=True, use_vocab=True, tokenize=tokenize_indic, init_token='<sos>', eos_token='<eos>')

In [45]:
fields={'eng': ('eng', eng), f'{indic_lang}': ('indic', indic)}

path_name = f'aksharantar_sampled/{indic_lang}'
train_name = f'{indic_lang}_train.csv'
val_name = f'{indic_lang}_valid.csv'
test_name = f'{indic_lang}_test.csv'
train_data, val_data, test_data = TabularDataset.splits(
    path= path_name,
    train=train_name,
    validation=val_name,
    test=test_name,
    format='csv',
    fields=fields
)

In [46]:
print(train_data[0].__dict__.keys())

dict_keys(['eng', 'indic'])


In [47]:
print(train_data[0].__dict__.values())

dict_values([[2, 9, 14, 4, 8, 25, 1], [45, 64, 41, 78, 39, 78, 48, 63]])


In [48]:
eng_w, indic_w = [i for i in train_data[0].__dict__.values()]
print(eng_w)
for val in eng_w:
  print(eng_idx2alpha[val])

[2, 9, 14, 4, 8, 25, 1]
b
i
n
d
h
y
a


In [49]:
eng.build_vocab(train_data, max_size = 1000, min_freq = 1)
indic.build_vocab(train_data, max_size = 1000, min_freq = 1)

In [50]:
len(eng.vocab)

30

In [51]:
print(eng.vocab.__dict__.keys())
# print(eng.vocab.help?)
print(help(eng.vocab))

dict_keys(['freqs', 'itos', 'unk_index', 'stoi', 'vectors'])
Help on Vocab in module torchtext.vocab object:

class Vocab(builtins.object)
 |  Vocab(counter, max_size=None, min_freq=1, specials=['<unk>', '<pad>'], vectors=None, unk_init=None, vectors_cache=None, specials_first=True)
 |  
 |  Defines a vocabulary object that will be used to numericalize a field.
 |  
 |  Attributes:
 |      freqs: A collections.Counter object holding the frequencies of tokens
 |          in the data used to build the Vocab.
 |      stoi: A collections.defaultdict instance mapping token strings to
 |          numerical identifiers.
 |      itos: A list of token strings indexed by their numerical identifiers.
 |  
 |  Methods defined here:
 |  
 |  __eq__(self, other)
 |      Return self==value.
 |  
 |  __getitem__(self, token)
 |  
 |  __getstate__(self)
 |  
 |  __init__(self, counter, max_size=None, min_freq=1, specials=['<unk>', '<pad>'], vectors=None, unk_init=None, vectors_cache=None, specials_firs

In [52]:
len(indic.vocab)

68

In [53]:
# indic_langs = sorted([indic_lang for indic_lang in os.listdir("aksharantar_sampled") if indic_lang != '.DS_Store'])
# print(indic_langs)

In [54]:
# class TransLit_DataLoader(Dataset):
#   def __init__(self, filename):
#     self.eng_lang_words, self.indic_lang_words = self.readDataset(filename)
#     self.shuffle_indices = list(range(len(self.eng_lang_words)))
#     random.shuffle(self.shuffle_indices)
#     self.shuffle_start_index = 0

#   def __len__(self):
#     return len(self.eng_lang_words)

#   def __getitem__(self, idx):
#     return self.eng_lang_words[idx], self.indic_lang_words[idx]

#   def readDataset(self, filename):
#     X = []
#     y = []
#     # data = []

#     with open(filename, 'r') as f:
#       for line in f:
#         line = line.split(',')
#         eng_word = line[0].strip()
#         indic_word = line[1].strip()
#         X.append(eng_word)
#         y.append(indic_word)
#         # data_train.append((eng_word, indic_word))
#     return X, y

#   def get_random_sample(self):
#     return self.__getitem__(np.random.randint(len(self.eng_lang_words)))

#   def get_batch_from_array(self, batch_size, array):
#     end = self.shuffle_start_index + batch_size
#     batch = []
#     if end >= len(self.eng_lang_words):
#       batch = [array[i] for i in self.shuffle_indices[0:end%len(self.eng_lang_words)]]
#     return batch + [array[i] for i in self.shuffle_indices[self.shuffle_start_index:end]]

#   def get_batch(self, batch_size, postprocess = True):
#     eng_lang_batch = self.get_batch_from_array(batch_size, self.eng_lang_words)
#     indic_lang_batch = self.get_batch_from_array(batch_size, self.indic_lang_words)
#     self.shuffle_start_index += batch_size + 1

#     # Reshuffle if 1 epoch is complete
#     if self.shuffle_start_index >= len(self.eng_lang_words):
#       random.shuffle(self.shuffle_indices)
#       self.shuffle_start_index = 0

#     return eng_lang_batch, indic_lang_batch

In [55]:
# data_train = TransLit_DataLoader(f'aksharantar_sampled/{indic_lang}/{indic_lang}_train.csv')
# data_val = TransLit_DataLoader(f'aksharantar_sampled/{indic_lang}/{indic_lang}_valid.csv')
# data_test = TransLit_DataLoader(f'aksharantar_sampled/{indic_lang}/{indic_lang}_test.csv')

In [56]:
def calc_accuracy(net, device = 'cpu', data = val_data):
    # net = net.eval().to(device)
    # predictions = []
    accuracy = 0
    count = 0
    for i in range(len(data)):
        eng_word, indic_word = [j for j in data[i].__dict__.values()]
        # gt = gt_rep(indic_word, indic_alpha2idx, device)

        # outputs = infer(net, eng_word, gt.shape[0], device)
        output = translit_infer(net, eng_word, eng, indic, device, max_length=50)
        correct = 0

        for index, char in output:
          if char == indic_word[index]:
            correct += 1


        char_level_acc = correct/len(indic_word)
        
        if char_level_acc == 1.0:
          count += 1
    print(count)
    accuracy = count/len(data)
    
    return accuracy

In [180]:
class Encoder(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, num_layers, p):
        super(Encoder, self).__init__()
        self.dropout = nn.Dropout(p)
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embedding = nn.Embedding(input_size, embedding_size)
        self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, bidirectional=True, dropout=p)

        # Linear Layer takes the two hidden states from Bidirectional RNN and outputs a single hidden state with weights learnt by network
        self.fc_hidden = nn.Linear(hidden_size*2, hidden_size)
        # Same for cell/state
        self.fc_cell = nn.Linear(hidden_size*2, hidden_size)

    def forward(self, x):
        # x shape: (seq_length, N) where N is batch size

        embedding = self.dropout(self.embedding(x))
        # embedding shape: (seq_length, N, embedding_size)

        encoder_states, (hidden, cell) = self.rnn(embedding)
#         print(encoder_states.shape, hidden[0].shape, cell.shape)
        # outputs shape: (seq_length, N, hidden_size)

        # First Value for forward dir, scond value for forward dir
        #hidden shape: (2, N, hidden_size)
#         encoder_states = self.fc_hidden(torch.cat((encoder_states[0:1], encoder_states[1:2]), dim=2))
#         print(hidden.shape)
        #hidden shape: (1, N, hidden_size)
#         cell = self.fc_cell(torch.cat((cell[0:1], cell[1:2]), dim=2))

        # extra dimension due to bidirectional - one going forward, one going backward
        
        # Now for attention, we would need all the encoder_states to add attention, but the hidden one is just the final encoded output
        return encoder_states, hidden, cell


class Decoder(nn.Module):
    def __init__(
        self, input_size, embedding_size, hidden_size, output_size, num_layers, p
    ):
        super(Decoder, self).__init__()
        self.dropout = nn.Dropout(p)
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embedding = nn.Embedding(input_size, embedding_size)
        # hidden_size*2 comes from the encoder
        self.rnn = nn.LSTM(hidden_size*2 + embedding_size, hidden_size, num_layers, dropout=p)

        # We would take the hidden states from the encoder, but we would also take the hidden state from the decoder (s(t-1), h(j))
        self.energy = nn.Linear(hidden_size*2, 1)
        self.softmax = nn.Softmax(dim=0)
        self.relu = nn.ReLU()

        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, encoder_states, hidden, cell):
        # x shape: (N) where N is for batch size, we want it to be (1, N), seq_length
        # is 1 here because we are sending in a single word and not a sentence
        x = x.unsqueeze(0)

        embedding = self.dropout(self.embedding(x))
        # embedding shape: (1, N, embedding_size)

        sequence_length = encoder_states.shape[0]

        # To add them together, we need to have the same dim along that axis
#         h_reshaped = hidden.repeat(sequence_length, 1, 1)

        # h_reshape -> (hidden_size), encoder_states -> (hidden_size*2)
#         energy = self.relu(self.energy(torch.cat((h_reshaped, encoder_states), dim = 2)))
        energy = self.relu(self.energy(encoder_states))

        # Compute attention values
        attention = self.softmax(energy)
        # (seq_length, N, 1) -> We'll normalize through the first dim - seq_length so that sum of attention scores equals 1 for a specific batch

        # Goal is to elementwise multiply attention with the encoder state
        attention = attention.permute(1,2,0) #-> changing order to (N,seq_length,1)
        encoder_states = encoder_states.permute(1,0,2) #-> changer order to (N, seq_length, hidden_size*2)

        # Multiplying the above by torch.bmm we'll get: (N, 1, hidden_size*2) -> permuting it -> (1,N, hidden_size*2)
        context_vector = torch.bmm(attention, encoder_states).permute(1,0,2)

        #context_vector is for a particular timestep for decoder, since decoding one word at a time
        # we'll concat along 3rd dim to get hidden_size*3 -> concatenating attention vector and embedding input
        rnn_input = torch.cat((context_vector, embedding), dim=2)

        #Send this through RNN
        outputs, (hidden, cell) = self.rnn(rnn_input, (hidden, cell))
        # outputs shape: (1, N, hidden_size)

        predictions = self.fc(outputs)

        # predictions shape: (1, N, length_target_vocabulary) to send it to
        # loss function we want it to be (N, length_target_vocabulary) so we're
        # just gonna remove the first dim
        predictions = predictions.squeeze(0)

        return predictions, hidden, cell


class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, source, target, teacher_force_ratio=0.5):
        batch_size = source.shape[1]
        target_len = target.shape[0]
        target_vocab_size = len(indic.vocab)

        outputs = torch.zeros(target_len, batch_size, target_vocab_size).to(device)

        encoder_states, hidden, cell = self.encoder(source)

        # Grab the first input to the Decoder which will be <SOS> token
        x = target[0]

        for t in range(1, target_len):
            # Use previous hidden, cell as context from encoder at start
            print(hidden.shape, cell.shape)
            output, hidden, cell = self.decoder(x, encoder_states, hidden, cell)

            # Store next output prediction
            outputs[t] = output

            # Get the best word the Decoder predicted (index in the vocabulary)
            best_guess = output.argmax(1)

            # With probability of teacher_force_ratio we take the actual next word
            # otherwise we take the word that the Decoder predicted it to be.
            # Teacher Forcing is used so that the model gets used to seeing
            # similar inputs at training and testing time, if teacher forcing is 1
            # then inputs at test time might be completely different than what the
            # network is used to. This was a long comment.
            x = target[t] if random.random() < teacher_force_ratio else best_guess

        return outputs



In [181]:

def translit_infer(model, word, eng, indic, device, max_length=50):
    tokens = tokenize_eng(word)

    # Add <SOS> and <EOS> in beginning and end respectively
    tokens.insert(0, eng.init_token)
    tokens.append(eng.eos_token)


    text_to_indices = [eng.vocab.stoi[token] for token in tokens]

    # Convert to Tensor
    word_tensor = torch.LongTensor(text_to_indices).unsqueeze(1).to(device)

    # Build encoder hidden, cell state
    with torch.no_grad():
        encoder_states, hidden, cell = model.encoder(word_tensor)

    outputs = [indic.vocab.stoi["<sos>"]]

    for _ in range(max_length):
        previous_char = torch.LongTensor([outputs[-1]]).to(device)

        with torch.no_grad():
            output, hidden, cell = model.decoder(previous_char, encoder_states, hidden, cell)
            best_guess = output.argmax(1).item()

        outputs.append(best_guess)

        # Model predicts it's the end of the sentence
        if output.argmax(1).item() == indic.vocab.stoi["<eos>"]:
            break

    translit_res = [indic.vocab.itos[idx] for idx in outputs]

    # remove start token
    translit_res_word = ''
    translit_res = translit_res[1:]
    # return translit_res
    for i in translit_res:
      if i != "<eos>":
        translit_res_word += indic_idx2alpha[i]
      else:
        break
    return translit_res_word


In [182]:
def save_checkpoint(state, filename=f"{indic_lang}_2_checkpoint.pth.tar"):
    print("=> Saving checkpoint")
    torch.save(state, filename)


def load_checkpoint(checkpoint, model, optimizer):
    print("=> Loading checkpoint")
    model.load_state_dict(checkpoint["state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer"])

In [183]:
from IPython.utils.path import target_outdated
def check_accuracy(loader, model, input_shape=None, toggle_eval=True, print_accuracy=True):
    if toggle_eval:
        model.eval()
    device = next(model.parameters()).device
    num_correct = 0
    num_samples = 0

    with torch.no_grad():
        loader.create_batches()
        for batch in loader.batches:
          for example in batch:
            num_samples += 1
            eng_word = "".join([eng_idx2alpha[val] for val in example.eng])
            indic_word = "".join([indic_idx2alpha[val2] for val2 in example.indic])
            indic_pred = translit_infer(model, eng_word, eng, indic, device, max_length=50)
            
            if indic_pred == indic_word:
              num_correct += 1

    accuracy = num_correct / num_samples
    if toggle_eval:
        model.train()
#     if print_accuracy:
#         print(f"Accuracy on validation set: {accuracy * 100:.2f}%")
    return accuracy

In [184]:
### Now model is ready to train

In [185]:
%%time
# Training Hyperparameters
num_epochs =30
learning_rate = 0.001
batch_size = 64

# Model Hyperparameters
load_model = False
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
input_size_encoder = len(eng.vocab)
input_size_decoder = len(indic.vocab)
output_size = len(indic.vocab)
encoder_embedding_size = 300
decoder_embedding_size = 300
hidden_size = 512
num_layers = 2
enc_dropout = 0.5
dec_dropout = 0.5

CPU times: user 52 µs, sys: 0 ns, total: 52 µs
Wall time: 57.9 µs


In [186]:
def train():
    
    train_iterator, val_iterator, test_iterator = BucketIterator.splits(
    (train_data, val_data, test_data),
    batch_size = batch_size,
    # Examples of similar length will be in same batch to minimize padding and save on compute
    sort_within_batch = True,
    sort_key = lambda x: len(x.eng),
    device = device)



    encoder_net = Encoder(
        input_size_encoder, encoder_embedding_size, hidden_size, num_layers, enc_dropout
        ).to(device)
    decoder_net = Decoder(
        input_size_decoder, decoder_embedding_size, hidden_size, output_size, num_layers, dec_dropout
        ).to(device)

    model = Seq2Seq(encoder_net, decoder_net).to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    pad_idx = indic.vocab.stoi['<pad>']
    # if all examples in batch are of similar length, don't incur penalty for this padding
    criterion = nn.CrossEntropyLoss(ignore_index = pad_idx)

    if load_model:
      load_checkpoint(torch.load(f'{indic_lang}_checkpoint.pth.tar'), model, optimizer)

    word = 'bachta'
    og_translit = 'बचता'
    acc_val_prev = 0
    acc_val_current = 0
    for epoch in range(num_epochs):
      print(f'Epoch [{epoch+1} / {num_epochs}]')

      checkpoint = {
          'state_dict': model.state_dict(),
          'optimizer': optimizer.state_dict()
      }
      if acc_val_current > acc_val_prev:

          if os.path.exists(f'{indic_lang}_checkpoint_new_{acc_val_prev*100:.2f}.pth.tar'):
              os.remove(f'{indic_lang}_checkpoint_new_{acc_val_prev*100:.2f}.pth.tar')
          acc_val_prev = acc_val_current
          save_checkpoint(checkpoint, f'{indic_lang}_checkpoint_new_{acc_val_current*100:.2f}.pth.tar')



      loop = tqdm(enumerate(train_iterator), total=len(train_iterator))
      for batch_idx, batch in loop:
        inp_data = batch.eng.to(device)
        target = batch.indic.to(device)

        output = model(inp_data, target)
        # output shape: (target_len, batch_size, output_dim)

        #basically reshape output keeping last output_dim same
        output = output[1:].reshape(-1, output.shape[2]) # so that first start token is not sent to out model
        # target -> (target_len, batch_size)
        target = target[1:].reshape(-1)
        optimizer.zero_grad()
        loss = criterion(output, target)

        loss.backward()

        # to avoid exploding gradients, clip them when they are above a threshold
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
        optimizer.step()

      model.eval() # turns off Dropout
      translit_res = translit_infer(model, word, eng, indic, device, max_length=50)
      print(f'Translated example word:  English: {word}, Actual: {og_translit}, Predicted: {translit_res}')
      model.train()

      print('Computing Loss and Validation Accuracy...')
      acc_val_current = check_accuracy(val_iterator, model, input_shape=None, toggle_eval=True, print_accuracy=True)
      print(f'Training Loss: {loss.item()}, Validation Accuracy: {acc_val_current * 100:.2f}%')
      print('--------------------------')



In [187]:
%%time
train()

Epoch [1 / 30]


  0%|                                                   | 0/800 [00:00<?, ?it/s]

torch.Size([4, 64, 512]) torch.Size([4, 64, 512])





RuntimeError: Expected hidden[0] size (2, 64, 512), got [4, 64, 512]

In [None]:
for batch_idx, batch in enumerate(val_iterator):
    inp_data = batch.eng
    target = batch.indic
    print(inp_data.shape, target.shape)
    break