# IMPORTS

In [1]:
import csv
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.optim as optim
import heapq
import torch.nn.functional as F
from tqdm import tqdm
from torch.nn.utils import clip_grad_norm_
import random
import wandb
import warnings
warnings.filterwarnings("ignore")

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:
!wandb login 3c81526a5ec348850a4c9d0f852f6631959307ed

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


# PREPROCESSING

In [4]:
def loadData(params):
    language = params['language']
    dataset_path = params['dataset_path']
    train_path = os.path.join(dataset_path, language, language + '_train.csv')
    val_path = os.path.join(dataset_path, language, language + '_valid.csv')
    test_path = os.path.join(dataset_path, language, language + '_test.csv')
    train_data = csv.reader(open(train_path,encoding='utf8'))
    val_data = csv.reader(open(val_path,encoding='utf8'))
    test_data = csv.reader(open(test_path,encoding='utf8'))
    train_words , train_translations = [], []
    val_words , val_translations = [], []
    test_words , test_translations = [], []
    pad, start, end ='', '^', '$'
    
    for pair in train_data:
        train_words.append(pair[0] + end)
        train_translations.append(start + pair[1] + end)
    for pair in val_data:
        val_words.append(pair[0] + end)
        val_translations.append(start + pair[1] + end)
    for pair in test_data:
        test_words.append(pair[0] + end)
        test_translations.append(start + pair[1] + end)
    
    train_words , train_translations = np.array(train_words), np.array(train_translations)
    val_words , val_translations = np.array(val_words), np.array(val_translations)
    test_words , test_translations = np.array(test_words), np.array(test_translations)
    input_vocab = set()
    output_vocab = set()
    
    for w in train_words:
        for c in w:
            input_vocab.add(c)
    for w in val_words:
        for c in w:
            input_vocab.add(c)
    for w in test_words:
        for c in w:
            input_vocab.add(c)
            
    for w in train_translations:
        for c in w:
            output_vocab.add(c)
    for w in val_translations:
        for c in w:
            output_vocab.add(c)
    for w in test_translations:
        for c in w:
            output_vocab.add(c)
    
    input_vocab.remove(end)
    output_vocab.remove(start)
    output_vocab.remove(end)  
    input_vocab, output_vocab = [pad, start, end] + list(sorted(input_vocab)), [pad, start, end] + list(sorted(output_vocab))
            
    input_index = {char: idx for idx, char in enumerate(input_vocab)}
    output_index = {char: idx for idx, char in enumerate(output_vocab)}
    # output_index =  dict([(char, idx) for idx, char in enumerate(output_vocab)])
    input_index_rev = {idx: char for char, idx in input_index.items()}
    output_index_rev = {idx: char for char, idx in output_index.items()}
    
    max_enc_len = max([len(word) for word in np.hstack((train_words, test_words, val_words))])
    max_dec_len = max([len(word) for word in np.hstack((train_translations, val_translations, test_translations))])
    max_len = max(max_enc_len, max_dec_len)
        
    preprocessed_data = {
        'SOS' : start,
        'EOS' : end,
        'PAD' : pad,
        'train_words' : train_words,
        'train_translations' : train_translations,
        'val_words' : val_words,
        'val_translations' : val_translations,
        'test_words' : test_words,
        'test_translations' : test_translations,
        'max_enc_len' : max_enc_len,
        'max_dec_len' : max_dec_len,
        'max_len' : max_len,
        'input_index' : input_index,
        'output_index' : output_index,
        'input_index_rev' : input_index_rev,
        'output_index_rev' : output_index_rev
    }
    return preprocessed_data

In [5]:
def create_tensor(preprocessed_data):
    input_data = np.zeros((preprocessed_data['max_len'],len(preprocessed_data['train_words'])), dtype = 'int64')
    output_data = np.zeros((preprocessed_data['max_len'],len(preprocessed_data['train_words'])), dtype = 'int64')
    
    val_input_data = np.zeros((preprocessed_data['max_len'],len(preprocessed_data['val_words'])), dtype = 'int64')
    val_output_data = np.zeros((preprocessed_data['max_len'],len(preprocessed_data['val_words'])), dtype = 'int64')
    
    test_input_data = np.zeros((preprocessed_data['max_len'],len(preprocessed_data['test_words'])), dtype = 'int64')
    test_output_data = np.zeros((preprocessed_data['max_len'],len(preprocessed_data['test_words'])), dtype = 'int64')
    
    for idx, (w, t) in enumerate(zip(preprocessed_data['train_words'], preprocessed_data['train_translations'])):
        for i, char in enumerate(w):
            input_data[i, idx] = preprocessed_data['input_index'][char]
        for i, char in enumerate(t):
            output_data[i, idx] = preprocessed_data['output_index'][char]
        
    for idx, (w, t) in enumerate(zip(preprocessed_data['val_words'], preprocessed_data['val_translations'])):
        for i, char in enumerate(w):
            val_input_data[i, idx] = preprocessed_data['input_index'][char]
        for i, char in enumerate(t):
            val_output_data[i, idx] = preprocessed_data['output_index'][char]
    
    for idx, (w, t) in enumerate(zip(preprocessed_data['test_words'], preprocessed_data['test_translations'])):
        for i, char in enumerate(w):
            test_input_data[i, idx] = preprocessed_data['input_index'][char]
        for i, char in enumerate(t):
            test_output_data[i, idx] = preprocessed_data['output_index'][char]
    
    input_data, output_data = torch.tensor(input_data,dtype = torch.int64), torch.tensor(output_data, dtype = torch.int64)
    val_input_data, val_output_data = torch.tensor(val_input_data,dtype = torch.int64), torch.tensor(val_output_data, dtype = torch.int64)
    test_input_data, test_output_data = torch.tensor(test_input_data,dtype = torch.int64), torch.tensor(test_output_data, dtype = torch.int64)
    
    tensors = {
        'input_data' : input_data,
        'output_data' : output_data,
        'val_input_data' : val_input_data,
        'val_output_data' : val_output_data, 
        'test_input_data' : test_input_data,
        'test_output_data' : test_output_data
    }
    return tensors

In [6]:
# dict = {
# 'language' : 'hin',
# # 'dataset_path' : r'C:\Users\gragh\OneDrive\Desktop\Codes\CS6910 DL\Assignment 3\DataSet\aksharantar_sampled',
# 'dataset_path' : '/kaggle/input/dl-ass3/aksharantar_sampled'
# }
# preprocessed_data = loadData(dict)
# tensors = create_tensor(preprocessed_data)

# print('Input data : ', preprocessed_data['train_words'])
# print('Output data : ', preprocessed_data['train_translations'])
# print('Number of samples : ', len(preprocessed_data['train_words']))

# print('Input data : ', preprocessed_data['val_words'])
# print('Output data : ', preprocessed_data['val_translations'])
# print('Number of val samples : ', len(preprocessed_data['val_words']))

# print('Input data : ', preprocessed_data['test_words'])
# print('Output data : ', preprocessed_data['test_translations'])
# print('Number of test samples : ', len(preprocessed_data['test_words']))

# print('Max incoder length : ', preprocessed_data['max_enc_len'])
# print('Max incoder length : ', preprocessed_data['max_enc_len'])
# print('Max length : ', preprocessed_data['max_len'])

# print('Input index length', len(preprocessed_data['input_index']))
# print('Output index length', len(preprocessed_data['output_index']))
# print('Input index', preprocessed_data['input_index'])
# print('Output index', preprocessed_data['output_index'])
# print('Input index Rev', preprocessed_data['input_index_rev'])
# print('Output index Rev', preprocessed_data['output_index_rev'])

# print('Input Data', tensors['input_data'].shape)
# print('Output Data', tensors['output_data'].shape)
# print('Input Data Val', tensors['val_input_data'].shape)
# print('Output Data Val', tensors['val_output_data'].shape)
# print('Input Data Test', tensors['test_input_data'].shape)
# print('Output Data Test', tensors['test_output_data'].shape)

# print(tensors['input_data'][:,0])
# print(tensors['output_data'][:,0])

# Encoder

In [7]:
class Encoder(nn.Module): 
    def __init__(self, params, preprocessed_data):
        super(Encoder, self).__init__()
        self.cell_type = params['cell_type']
        self.dropout = nn.Dropout(params['dropout'])
        self.embedding = nn.Embedding(len(preprocessed_data['input_index']), params['embedding_size'])
        if self.cell_type == 'RNN':
            self.cell = nn.RNN(params['embedding_size'], params['hidden_size'], params['num_layers_enc'], dropout = params['dropout'], bidirectional = params['bi_dir'])
        elif self.cell_type == 'LSTM':
            self.cell = nn.LSTM(params['embedding_size'], params['hidden_size'], params['num_layers_enc'], dropout = params['dropout'], bidirectional = params['bi_dir'])
        elif self.cell_type == 'GRU':
            self.cell = nn.GRU(params['embedding_size'], params['hidden_size'], params['num_layers_enc'], dropout = params['dropout'], bidirectional = params['bi_dir'])
        else:
            raise ValueError("Invalid type. Choose from 'RNN', 'LSTM', or 'GRU'.")
        
    def forward(self, x):
        drop_par = self.embedding(x)
        if self.cell_type == 'LSTM':
            outputs , (hidden, cell) = self.cell(self.dropout(drop_par))
            return hidden, cell
        outputs , hidden = self.cell(self.dropout(drop_par))
        return hidden

# Decoder

In [8]:
class Decoder(nn.Module):
    def __init__(self, params, preprocessed_data):
        super(Decoder, self).__init__()
        self.cell_type = params['cell_type']
        self.dropout = nn.Dropout(params['dropout'])
        self.embedding = nn.Embedding(len(preprocessed_data['output_index']), params['embedding_size'])
        if self.cell_type == 'RNN':
            self.cell = nn.RNN(params['embedding_size'], params['hidden_size'], params['num_layers_dec'], dropout = params['dropout'], bidirectional = params['bi_dir'])
        elif self.cell_type == 'LSTM':
            self.cell = nn.LSTM(params['embedding_size'], params['hidden_size'], params['num_layers_dec'], dropout = params['dropout'], bidirectional = params['bi_dir'])
        elif self.cell_type == 'GRU':
            self.cell = nn.GRU(params['embedding_size'], params['hidden_size'], params['num_layers_dec'], dropout = params['dropout'], bidirectional = params['bi_dir'])
        else:
            raise ValueError("Invalid type. Choose from 'RNN', 'LSTM', or 'GRU'.")
        
        self.fc = nn.Linear(params['hidden_size'] * 2 if params['bi_dir'] == True else params['hidden_size'], len(preprocessed_data['output_index']))

    def forward(self, x, hidden, cell):
        embedding = self.embedding(x.unsqueeze(0))
        if self.cell_type == 'LSTM':
            outputs, (hidden, cell) = self.cell(self.dropout(embedding), (hidden, cell))
        else:    
            outputs, hidden = self.cell(self.dropout(embedding), hidden)
        predictions = self.fc(outputs).squeeze(0)
        if self.cell_type == 'LSTM':
            predictions = F.log_softmax(predictions, dim = 1)
            return predictions, hidden, cell
        return predictions, hidden

# Seq2Seq

In [9]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, params,  preprocessed_data):
        super(Seq2Seq, self).__init__()
        self.cell_type = params['cell_type']
        self.decoder, self.encoder  = decoder, encoder
        self.output_index_len = len(preprocessed_data['output_index'])
        self.tfr = params['teacher_fr']

    def forward(self, source, target):
        batch_size, target_len = source.shape[1], target.shape[0]
        x = target[0]
        outputs = torch.zeros(target_len, batch_size, self.output_index_len).to(device)
        if self.cell_type == 'LSTM':
            hidden, cell = self.encoder(source)
        else:    
            hidden = self.encoder(source)
        for t in range(1, target_len):
            if self.cell_type == 'LSTM':
                output, hidden, cell = self.decoder(x, hidden, cell)
            else:    
                output, hidden = self.decoder(x, hidden, None)
            outputs[t], best_guess = output, output.argmax(1)
            x = best_guess if random.random() >= self.tfr else target[t]
        return outputs

# GET OPTIMIZERS

In [10]:
def get_optim(model, params):
    if params['optimizer'].lower() == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr = params['learning_rate'], momentum = 0.9)
    if params['optimizer'].lower() == 'adam':
        optimizer = optim.Adam(model.parameters(), lr = params['learning_rate'], betas = (0.9, 0.999), eps = 1e-8)
    if params['optimizer'].lower() == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(), lr = params['learning_rate'], alpha = 0.99, eps = 1e-8)
    if params['optimizer'].lower() == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr = params['learning_rate'], lr_decay = 0, weight_decay = 0, initial_accumulator_value = 0, eps = 1e-10)
    return optimizer

# GET TOTAL PARAMETERS

In [11]:
def get_total_parameters(model):
    total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return total_params

# BEAM SEARCH

In [12]:
def beam_search(model, word, preprocessed_data, bw, lp, ct):
    data = np.zeros((preprocessed_data['max_len']+1, 1), dtype=np.int32)
    for idx, char in enumerate(word):
        data[idx, 0] = preprocessed_data['input_index'][char]
    data[idx + 1, 0] = preprocessed_data['input_index'][preprocessed_data['EOS']]
    data = torch.tensor(data, dtype=torch.int32).to(device)
    with torch.no_grad():
        if ct == 'LSTM':
           hidden, cell = model.encoder(data)
        else:
           hidden = model.encoder(data)
    output_start = preprocessed_data['output_index'][preprocessed_data['SOS']]
    out_reshape = np.array(output_start).reshape(1,)
    hidden_par = hidden.unsqueeze(0)
    initial_sequence = torch.tensor(out_reshape).to(device)
    beam = [(0.0, initial_sequence, hidden_par)]
    for i in range(len(preprocessed_data['output_index'])):
        candidates = []
        for score, seq, hidden in beam:
            if seq[-1].item() == preprocessed_data['output_index'][preprocessed_data['EOS']]:
                candidates.append((score, seq, hidden))
                continue
            reshape_last = np.array(seq[-1].item()).reshape(1, )
            hdn = hidden.squeeze(0) 
            x = torch.tensor(reshape_last).to(device)
            if ct == 'LSTM':
                output, hidden, cell = model.decoder(x, hdn, cell)
            else:
                output, hidden = model.decoder(x, hdn, None)
            topk_probs, topk_tokens = torch.topk(F.softmax(output, dim=1), k = bw)               
            for prob, token in zip(topk_probs[0], topk_tokens[0]):
                new_seq = torch.cat((seq, token.unsqueeze(0)), dim=0)
                ln_ns = len(new_seq)
                ln_pf = ((ln_ns - 1) / 5)
                candidate_score = score + torch.log(prob).item() / (ln_pf ** lp)
                candidates.append((candidate_score, new_seq, hidden.unsqueeze(0)))
        beam = heapq.nlargest(bw, candidates, key=lambda x: x[0])
    _, best_sequence, _ = max(beam, key=lambda x: x[0]) 
    prediction = ''.join([preprocessed_data['output_index_rev'][token.item()] for token in best_sequence[1:]])
    return prediction[:-1]          


# TRAIN MODEL

In [13]:
def train(model, criterion, optimizer, preprocessed_data, tensors, params):
    train_data, train_result = torch.split(tensors['input_data'], params['batch_size'], dim = 1), torch.split(tensors['output_data'], params['batch_size'], dim = 1)
    val_data, val_result = torch.split(tensors['val_input_data'], params['batch_size'], dim=1), torch.split(tensors['val_output_data'], params['batch_size'], dim=1)
    for epoch in range(params['num_epochs']):
        total_words = 0
        correct_pred = 0
        total_loss = 0
        model.train()
        with tqdm(total = len(train_data), desc = 'Training') as pbar:
            for i, (x, y) in enumerate(zip(train_data, train_result)):
                target, inp_data = y.to(device), x.to(device)
                optimizer.zero_grad()
                output = model(inp_data, target)
                target = target.reshape(-1)
                output = output.reshape(-1, output.shape[2])
            
#                 pad_mask = (target != preprocessed_data['output_index'][preprocessed_data['PAD']])
#                 target = target[pad_mask]
#                 output = output[pad_mask]
                
                loss = criterion(output, target)
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
                optimizer.step()
                total_loss += loss.item()
                total_words += target.size(0)
                correct_pred += torch.sum(torch.argmax(output, dim=1) == target).item()
                pbar.update(1)
        train_accuracy = (correct_pred / total_words)*100
        train_loss = total_loss / len(train_data)
        model.eval()
        with torch.no_grad():
            val_total_loss = 0
            val_total_words = 0
            val_correct_pred = 0
            with tqdm(total = len(val_data), desc = 'Validation') as pbar:
                for x_val, y_val in zip(val_data, val_result):
                    target_val, inp_data_val = y_val.to(device), x_val.to(device)
                    output_val = model(inp_data_val, target_val)
                    target_val = target_val.reshape(-1)
                    output_val = output_val.reshape(-1, output_val.shape[2])
                    
#                     pad_mask = (target_val != preprocessed_data['output_index'][preprocessed_data['PAD']])
#                     target_val = target_val[pad_mask]
#                     output_val = output_val[pad_mask]
                    
                    val_loss = criterion(output_val, target_val)
                    val_total_loss += val_loss.item()
                    val_total_words += target_val.size(0)
                    val_correct_pred += torch.sum(torch.argmax(output_val, dim=1) == target_val).item()
                    pbar.update(1)
            val_accuracy = (val_correct_pred / val_total_words) * 100
            val_loss = val_total_loss / len(val_data)
            
            correct_pred = 0
            total_words = len(preprocessed_data['val_words'])
            with tqdm(total = total_words, desc = 'Beam') as pbar_:
                for word, translation in zip(preprocessed_data['val_words'], preprocessed_data['val_translations']):
                    ans = beam_search(model, word, preprocessed_data, params['beam_width'], params['length_penalty'], params['cell_type'])
                    if ans == translation[1:-1]:
                        correct_pred += 1
                    pbar_.update(1)
        val_accuracy_beam = (correct_pred / total_words) * 100
        print(f'''Epoch : {epoch+1}
              Train Accuracy : {train_accuracy:.4f}, Train Loss : {train_loss:.4f}
              Validation Accuracy Char Level : {val_accuracy:.4f}, Validation Loss : {val_loss:.4f}
              Validation Accuracy Word Level : {val_accuracy_beam:.4f},  Correctly predicted : {correct_pred}/{total_words}''')
        if params['w_log']:
            wandb.log(
                    {
                        'epoch': epoch+1,
                        'training_loss' : train_loss,
                        'training_accuracy' : train_accuracy,
                        'validation_loss' : val_loss,
                        'validation_accuracy_char' : val_accuracy,
                        'validation_accuracy_word' : val_accuracy_beam,
                        'correctly_predicted' : correct_pred
                    }
                )
    return model, val_accuracy, val_accuracy_beam

# QUESTION 1 : Train Model

# HYPERPARAMETERS

In [None]:
# params = {
# #     'dataset_path' : r'C:\Users\gragh\OneDrive\Desktop\Codes\CS6910 DL\Assignment 3\DataSet\aksharantar_sampled',
#     'language' : 'hin',
#     'dataset_path' : '/kaggle/input/dl-ass3/aksharantar_sampled',
#     'embedding_size': 256,
#     'hidden_size': 512,
#     'num_layers_enc': 2,
#     'num_layers_dec': 2,
#     'cell_type': 'GRU',
#     'dropout': 0.3,
#     'optimizer' : 'adagrad',
#     'learning_rate': 0.01,
#     'batch_size': 32,
#     'num_epochs': 10,
#     'teacher_fr' : 0.7,
#     'length_penalty' : 0.6,
#     'beam_width': 4,
#     'bi_dir' : False,
#     'w_log' : 0
# }
# Epoch : 10
#               Train Accuracy : 28.5620, Train Loss : 0.8019
#               Validation Accuracy Char Level : 23.6319, Validation Loss : 0.9732
#               Validation Accuracy Word Level : 40.7227,  Correctly predicted : 1668/4096

In [25]:
params = {
#     'dataset_path' : r'C:\Users\gragh\OneDrive\Desktop\Codes\CS6910 DL\Assignment 3\DataSet\aksharantar_sampled',
    'language' : 'hin',
    'dataset_path' : '/kaggle/input/dl-ass3/aksharantar_sampled',
    'embedding_size': 256,
    'hidden_size': 512,
    'num_layers_enc': 2,
    'num_layers_dec': 2,
    'cell_type': 'GRU',
    'dropout': 0.3,
    'optimizer' : 'adagrad',
    'learning_rate': 0.01,
    'batch_size': 32,
    'num_epochs': 10,
    'teacher_fr' : 0.7,
    'length_penalty' : 0.6,
    'beam_width': 4,
    'bi_dir' : False,
    'w_log' : 0
}
preprocessed_data = loadData(params)
tensors = create_tensor(preprocessed_data)

encoder = Encoder(params, preprocessed_data).to(device)
decoder = Decoder(params, preprocessed_data).to(device)
model = Seq2Seq(encoder, decoder, params, preprocessed_data).to(device)  
# print(model)

criterion = nn.CrossEntropyLoss(ignore_index = 0)
optimizer = get_optim(model,params)
# Print total number of parameters in the model
# total_parameters = get_total_parameters(model)
# print(f'Total Trainable Parameters: {total_parameters}')

if params['w_log']:
    wandb.init(project = 'DL-Assignment-3')
    wandb.run.name = (
        'check_c:' + params['cell_type'] +
        '_e:' + str(params['num_epochs']) +
        '_es:' + str(params['embedding_size']) +
        '_hs:' + str(params['hidden_size']) +
        '_nle:' + str(params['num_layers_enc']) +
        '_nld:' + str(params['num_layers_dec']) +
        '_o:' + params['optimizer'] +
        '_lr:' + str(params['learning_rate']) +
        '_bs:' + str(params['batch_size']) +
        '_tf:' + str(params['teacher_fr']) +
        '_lp:' + str(params['length_penalty']) +
        '_b:' + str(params['bi_dir']) +
        '_bw:' + str(params['beam_width'])
    )
trained_model, _, _ = train(model, criterion, optimizer, preprocessed_data, tensors, params)
if params['w_log']:
    wandb.finish()

Training: 100%|██████████| 1600/1600 [00:52<00:00, 30.47it/s]
Validation: 100%|██████████| 128/128 [00:01<00:00, 84.98it/s]
Beam: 100%|██████████| 4096/4096 [01:49<00:00, 37.29it/s]


Epoch : 1
              Train Accuracy : 19.8973, Train Loss : 1.6130
              Validation Accuracy Char Level : 21.3460, Validation Loss : 1.1907
              Validation Accuracy Word Level : 25.0244,  Correctly predicted : 1025/4096


Training: 100%|██████████| 1600/1600 [00:52<00:00, 30.72it/s]
Validation: 100%|██████████| 128/128 [00:01<00:00, 85.99it/s]
Beam: 100%|██████████| 4096/4096 [01:50<00:00, 37.21it/s]


Epoch : 2
              Train Accuracy : 24.6471, Train Loss : 1.1460
              Validation Accuracy Char Level : 22.2602, Validation Loss : 1.0997
              Validation Accuracy Word Level : 31.7627,  Correctly predicted : 1301/4096


Training: 100%|██████████| 1600/1600 [00:52<00:00, 30.60it/s]
Validation: 100%|██████████| 128/128 [00:01<00:00, 85.13it/s]
Beam: 100%|██████████| 4096/4096 [01:52<00:00, 36.54it/s]


Epoch : 3
              Train Accuracy : 25.9499, Train Loss : 1.0301
              Validation Accuracy Char Level : 22.8371, Validation Loss : 1.0378
              Validation Accuracy Word Level : 34.3018,  Correctly predicted : 1405/4096


Training: 100%|██████████| 1600/1600 [00:52<00:00, 30.63it/s]
Validation: 100%|██████████| 128/128 [00:01<00:00, 85.46it/s]
Beam: 100%|██████████| 4096/4096 [01:51<00:00, 36.65it/s]


Epoch : 4
              Train Accuracy : 26.6657, Train Loss : 0.9671
              Validation Accuracy Char Level : 23.1228, Validation Loss : 1.0073
              Validation Accuracy Word Level : 36.3770,  Correctly predicted : 1490/4096


Training: 100%|██████████| 1600/1600 [00:52<00:00, 30.36it/s]
Validation: 100%|██████████| 128/128 [00:01<00:00, 85.48it/s]
Beam: 100%|██████████| 4096/4096 [01:50<00:00, 36.95it/s]


Epoch : 5
              Train Accuracy : 27.1622, Train Loss : 0.9226
              Validation Accuracy Char Level : 23.3778, Validation Loss : 0.9913
              Validation Accuracy Word Level : 37.5488,  Correctly predicted : 1538/4096


Training: 100%|██████████| 1600/1600 [00:51<00:00, 30.95it/s]
Validation: 100%|██████████| 128/128 [00:01<00:00, 86.87it/s]
Beam: 100%|██████████| 4096/4096 [01:51<00:00, 36.74it/s]


Epoch : 6
              Train Accuracy : 27.5605, Train Loss : 0.8896
              Validation Accuracy Char Level : 23.4456, Validation Loss : 0.9795
              Validation Accuracy Word Level : 38.0371,  Correctly predicted : 1558/4096


Training: 100%|██████████| 1600/1600 [00:51<00:00, 30.89it/s]
Validation: 100%|██████████| 128/128 [00:01<00:00, 87.73it/s]
Beam: 100%|██████████| 4096/4096 [01:51<00:00, 36.89it/s]


Epoch : 7
              Train Accuracy : 27.8257, Train Loss : 0.8652
              Validation Accuracy Char Level : 23.4384, Validation Loss : 0.9828
              Validation Accuracy Word Level : 39.2334,  Correctly predicted : 1607/4096


Training: 100%|██████████| 1600/1600 [00:51<00:00, 30.96it/s]
Validation: 100%|██████████| 128/128 [00:01<00:00, 81.43it/s]
Beam: 100%|██████████| 4096/4096 [01:50<00:00, 36.96it/s]


Epoch : 8
              Train Accuracy : 28.1029, Train Loss : 0.8417
              Validation Accuracy Char Level : 23.5560, Validation Loss : 0.9722
              Validation Accuracy Word Level : 39.5752,  Correctly predicted : 1621/4096


Training: 100%|██████████| 1600/1600 [00:51<00:00, 30.88it/s]
Validation: 100%|██████████| 128/128 [00:01<00:00, 87.22it/s]
Beam: 100%|██████████| 4096/4096 [01:53<00:00, 36.24it/s]


Epoch : 9
              Train Accuracy : 28.3662, Train Loss : 0.8187
              Validation Accuracy Char Level : 23.5740, Validation Loss : 0.9726
              Validation Accuracy Word Level : 40.1123,  Correctly predicted : 1643/4096


Training: 100%|██████████| 1600/1600 [00:51<00:00, 30.93it/s]
Validation: 100%|██████████| 128/128 [00:01<00:00, 86.03it/s]
Beam: 100%|██████████| 4096/4096 [01:52<00:00, 36.47it/s]

Epoch : 10
              Train Accuracy : 28.5620, Train Loss : 0.8019
              Validation Accuracy Char Level : 23.6319, Validation Loss : 0.9732
              Validation Accuracy Word Level : 40.7227,  Correctly predicted : 1668/4096





# Prediction

In [26]:
def predict(model, word, preprocessed_data, params):
    data = np.zeros((preprocessed_data['max_len'] + 1,1), dtype= int)
    pred = ''
    for t, char in enumerate(word):
        data[t, 0] = preprocessed_data['input_index'][char]
    data[(t+1),0] = preprocessed_data['input_index'][preprocessed_data['EOS']]
    data = torch.tensor(data,dtype = torch.int64).to(device)
    with torch.no_grad():
        if params['cell_type'] == 'LSTM':
            hidden, cell = model.encoder(data)
        else:
            hidden = model.encoder(data)
    x = torch.tensor([preprocessed_data['output_index'][preprocessed_data['SOS']]]).to(device)
    for t in range(1, len(preprocessed_data['output_index'])):
        if params['cell_type'] == 'LSTM':
            output, hidden, cell = model.decoder(x, hidden, cell)
        else:
            output, hidden = model.decoder(x, hidden, None)
        character = preprocessed_data['output_index_rev'][output.argmax(1).item()]
        if character != preprocessed_data['EOS']:
            pred = pred + character
        else:
            break
        x = torch.tensor([output.argmax(1)]).to(device)        
    return pred

words = ['harsh', 'iit', 'madras', 'nirav', 'nishchal', 'nishant', 'neymar', 'neha', 'raghav', 'rahul', 'rohit', 'hahahahaha', 'ohohohoh']
print('################################## using predict function ############################################################')
for w in words:
    output_sequence = predict(trained_model, w, preprocessed_data, params)
    print(w,'->',output_sequence)
for w in preprocessed_data['val_words'][:10]:
    output_sequence = predict(trained_model, w[:-1], preprocessed_data, params)
    print(w,'->',output_sequence)
print('################################## using beam ############################################################')
for w in words:
    output_sequence = beam_search(trained_model, w, preprocessed_data, params['beam_width'], params['length_penalty'], params['cell_type'])
    print(w,'->',output_sequence)
for w in preprocessed_data['val_words'][:10]:
    output_sequence = beam_search(trained_model, w, preprocessed_data, params['beam_width'], params['length_penalty'], params['cell_type'])
    print(w,'->',output_sequence)        


################################## using predict ############################################################
harsh -> हर्ष
iit -> आईआईटी
madras -> मदरास
nirav -> निरव
nishchal -> निश्चल
nishant -> निशंत
neymar -> नेमार
neha -> नेहा
raghav -> रघव
rahul -> राहुल
rohit -> रोहित
hahahahaha -> हहहहहाहा
ohohohoh -> ओहोहोह
jaisawal$ -> जसावाल
bajai$ -> बजाई
sanghthan$ -> संघठन
haiwaan$ -> हैवान
nilgiri$ -> निलगिरी
drutgrami$ -> द्रत्ग्रामी
jhadapon$ -> झड़पों
nakronda$ -> नकरोंडा
eesl$ -> ईएसआईएल
bachta$ -> बचता
################################## using beam ############################################################
harsh -> हर्ष
iit -> आईआईटी
madras -> मदरास
nirav -> निरव
nishchal -> निश्चल
nishant -> निशंत
neymar -> नेमार
neha -> नेहा
raghav -> रघव
rahul -> राहुल
rohit -> रोहित
hahahahaha -> हहहहहाहा
ohohohoh -> ओहोहोह
jaisawal$ -> जैसावाल
bajai$ -> बजाई
sanghthan$ -> संघठन
haiwaan$ -> हैवान
nilgiri$ -> निलगिरी
drutgrami$ -> द्रत्ग्रामी
jhadapon$ -> झड़पों
nakronda$ -> नकरोंडा
eesl$ -> ईई

# Question 4 : Test Accuracy

In [29]:
trained_model.eval()
correct_pred = 0
words, translations, predictions, results = [], [], [], []
total_words = len(preprocessed_data['test_words'])
with tqdm(total = total_words, desc = 'Test_beam') as pbar_:
    for word, translation in zip(preprocessed_data['test_words'], preprocessed_data['test_translations']):
        ans = beam_search(trained_model, word, preprocessed_data, params['beam_width'], params['length_penalty'], params['cell_type'])
        words.append(word[:-1])
        translations.append(translation[1:-1])
        predictions.append(ans)
        if ans == translation[1:-1]:
            correct_pred += 1
            results.append('Yes')
        else:
            results.append('No')
        pbar_.update(1)
test_accuracy = (correct_pred / total_words) * 100
print(f'''Test Accuracy : {test_accuracy:.4f}, Correctly predicted : {correct_pred}/{total_words}''')

# Logging Results
log = {'Word': words, 'Translation' : translations, 'Prediction' : predictions, 'Result' : results}
path = '/kaggle/working/predictions_vanilla.csv'
data_frame = pd.DataFrame(log)
data_frame.to_csv(path, header = True, index = False)
pd.DataFrame(log)

Test_beam: 100%|██████████| 4096/4096 [02:06<00:00, 32.43it/s]


Test Accuracy : 38.6475, Correctly predicted : 1583/4096


Unnamed: 0,Word,Translation,Prediction,Result
0,thermax,थरमैक्स,थर्मक्स,No
1,sikhaaega,सिखाएगा,सिखाएगा,Yes
2,learn,लर्न,लीरन,No
3,twitters,ट्विटर्स,ट्विटर्स,Yes
4,tirunelveli,तिरुनेलवेली,तिरुनेलेवीली,No
...,...,...,...,...
4091,saflata,सफ़लता,सफलता,No
4092,shbana,शबाना,श्बाना,No
4093,khaatootolaa,खातूटोला,खातूतोला,No
4094,shivastava,शिवास्तव,शिवस्तवा,No


# QUESTION 2 : Tuning Hyperparameters

In [None]:
# sweep_config = {
#             'name': 'sweep 1 and 1.1 : random',
#             'method': 'random',
#             'metric': { 'goal': 'maximize','name': 'Accuracy'},
#             'parameters': 
#                 {
#                     'num_epochs': {'values': [10]},
#                     'cell_type': {'values': ['RNN', 'LSTM', 'GRU']},
#                     'embedding_size': {'values': [128, 256, 512]},
#                     'hidden_size': {'values': [128, 256, 512]},
#                     'num_layers': {'values': [1, 2, 3]},
#                     'dropout': {'values': [0.3, 0.5, 0.7]},
#                     'optimizer' : {'values' : ['adam', 'sgd', 'rmsprop', 'adagrad']},
#                     'learning_rate': {'values': [0.001, 0.005, 0.01, 0.1]},
#                     'batch_size': {'values': [32, 64]},
#                     'teacher_fr' : {'values': [0.3, 0.5, 0.7]},
#                     'length_penalty' : {'values': [0.4, 0.5, 0.6]},
#                     'bi_dir' : {'values': [True, False]},
#                     'beam_width': {'values': [1, 2, 3]}
#                 }
#             }

In [None]:
# sweep_config = {
#             'name': 'sweep 2 : bayes',
#             'method': 'bayes',
#             'metric': { 'goal': 'maximize','name': 'Accuracy'},
#             'parameters': 
#                 {
#                     'num_epochs': {'values': [10]},
#                     'cell_type': {'values': ['LSTM', 'GRU']},
#                     'embedding_size': {'values': [128, 256]},
#                     'hidden_size': {'values': [128, 256, 512]},
#                     'num_layers': {'values': [1, 2, 3]},
#                     'dropout': {'values': [0.3, 0.5]},
#                     'optimizer' : {'values' : ['adam']},
#                     'learning_rate': {'values': [0.001, 0.005, 0.01, 0.1]},
#                     'batch_size': {'values': [32, 64]},
#                     'teacher_fr' : {'values': [0.3, 0.5, 0.7]},
#                     'length_penalty' : {'values': [0.5, 0.6]},
#                     'bi_dir' : {'values': [True]},
#                     'beam_width': {'values': [1]}
#                 }
#             }

In [None]:
def run_sweep():
    init = wandb.init(project = 'DL-Assignment-3')
    config = init.config
    params = {
        'language' : 'hin',
        'dataset_path' : '/kaggle/input/dl-ass3/aksharantar_sampled',
        'num_epochs': config.num_epochs,
        'cell_type': config.cell_type,
        'embedding_size': config.embedding_size,
        'hidden_size': config.hidden_size,
        'num_layers_enc': config.num_layers,
        'num_layers_dec': config.num_layers,
        'dropout': config.dropout,
        'optimizer' : config.optimizer,
        'learning_rate': config.learning_rate,
        'batch_size': config.batch_size,
        'teacher_fr' : config.teacher_fr,
        'length_penalty' : config.length_penalty,
        'bi_dir' : config.bi_dir,
        'beam_width' : config.beam_width,
        'w_log' : 1
    }
    
    wandb.run.name = (
        'Q2_c:' + params['cell_type'] +
        '_e' + str(params['num_epochs']) +
        '_es:' + str(params['embedding_size']) +
        '_hs:' + str(params['hidden_size']) +
        '_nle:' + str(params['num_layers_enc']) +
        '_nld:' + str(params['num_layers_dec']) +
        '_o:' + params['optimizer'] +
        '_lr:' + str(params['learning_rate']) +
        '_bs:' + str(params['batch_size']) +
        '_tf:' + str(params['teacher_fr']) +
        '_lp:' + str(params['length_penalty']) +
        '_b:' + str(params['bi_dir']) +
        '_bw:' + str(params['beam_width'])
    )
    preprocessed_data = loadData(params)
    tensors = create_tensor(preprocessed_data)
    
    encoder = Encoder(params, preprocessed_data).to(device)
    decoder = Decoder(params, preprocessed_data).to(device)
    model = Seq2Seq(encoder, decoder, params, preprocessed_data).to(device) 
    
    criterion = nn.CrossEntropyLoss(ignore_index = 0)
    optimizer = get_optim(model,params)
    _, _, v_acc_beam = train(model, criterion, optimizer, preprocessed_data, tensors, params)
    wandb.log({'Accuracy': v_acc_beam})

In [None]:
# sweep_id = wandb.sweep(sweep_config, project='DL-Assignment-3')
# wandb.agent(sweep_id, run_sweep, count = 30)
# wandb.finish()