# IMPORTS

In [163]:
import csv
import numpy as np
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable 
import heapq
import torch.nn.functional as F
from tqdm import tqdm
# from torch.utils.data import DataLoader, TensorDataset
from torch.nn.utils import clip_grad_norm_
import random
import wandb

In [164]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [165]:
!wandb login 3c81526a5ec348850a4c9d0f852f6631959307ed

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


# PREPROCESSING

In [166]:
def loadData(params):
    language = params['language']
    dataset_path = params['dataset_path']
    train_path = os.path.join(dataset_path, language, language + '_train.csv')
    val_path = os.path.join(dataset_path, language, language + '_valid.csv')
    test_path = os.path.join(dataset_path, language, language + '_test.csv')
    train_data = csv.reader(open(train_path,encoding='utf8'))
    val_data = csv.reader(open(val_path,encoding='utf8'))
    test_data = csv.reader(open(test_path,encoding='utf8'))
    train_words , train_translations = [], []
    val_words , val_translations = [], []
    test_words , test_translations = [], []
    pad, start, end ='', '^', '$'
    
    for pair in train_data:
        train_words.append(pair[0] + end)
        train_translations.append(start + pair[1] + end)
    for pair in val_data:
        val_words.append(pair[0] + end)
        val_translations.append(start + pair[1] + end)
    for pair in test_data:
        test_words.append(pair[0] + end)
        test_translations.append(start + pair[1] + end)
    
    train_words , train_translations = np.array(train_words), np.array(train_translations)
    val_words , val_translations = np.array(val_words), np.array(val_translations)
    test_words , test_translations = np.array(test_words), np.array(test_translations)
    inout_vocab = set()
    output_vocab = set()
    
    for w in train_words:
        for c in w:
            inout_vocab.add(c)
    for w in val_words:
        for c in w:
            inout_vocab.add(c)
    for w in test_words:
        for c in w:
            inout_vocab.add(c)
            
    for w in train_translations:
        for c in w:
            output_vocab.add(c)
    for w in val_translations:
        for c in w:
            output_vocab.add(c)
    for w in test_translations:
        for c in w:
            output_vocab.add(c)
    
    inout_vocab.remove(end)
    output_vocab.remove(start)
    output_vocab.remove(end)  
    inout_vocab, output_vocab = [pad, start, end] + list(sorted(inout_vocab)), [pad, start, end] + list(sorted(output_vocab))
            
    input_index = dict([(char, idx) for idx, char in enumerate(inout_vocab)])
    output_index =  dict([(char, idx) for idx, char in enumerate(output_vocab)])
    input_index_rev = dict([(idx, char) for char, idx in input_index.items()])
    output_index_rev = dict([(idx, char) for char, idx in output_index.items()])
    
    max_enc_len = max([len(word) for word in np.hstack((train_words, test_words, val_words))])
    max_dec_len = max([len(word) for word in np.hstack((train_translations, val_translations, test_translations))])
    max_len = max(max_enc_len, max_dec_len)
        
    preprocessed_data = {
        'SOS' : start,
        'EOS' : end,
        'PAD' : pad,
        'train_words' : train_words,
        'train_translations' : train_translations,
        'val_words' : val_words,
        'val_translations' : val_translations,
        'test_words' : test_words,
        'test_translations' : test_translations,
        'max_enc_len' : max_enc_len,
        'max_dec_len' : max_dec_len,
        'max_len' : max_len,
        'input_index' : input_index,
        'output_index' : output_index,
        'input_index_rev' : input_index_rev,
        'output_index_rev' : output_index_rev
    }
    return preprocessed_data

In [167]:
def create_tensor(preprocessed_data):
    input_data = np.zeros((preprocessed_data['max_len'],len(preprocessed_data['train_words'])), dtype = 'int64')
    output_data = np.zeros((preprocessed_data['max_len'],len(preprocessed_data['train_words'])), dtype = 'int64')
    
    val_input_data = np.zeros((preprocessed_data['max_len'],len(preprocessed_data['val_words'])), dtype = 'int64')
    val_output_data = np.zeros((preprocessed_data['max_len'],len(preprocessed_data['val_words'])), dtype = 'int64')
    
    test_input_data = np.zeros((preprocessed_data['max_len'],len(preprocessed_data['test_words'])), dtype = 'int64')
    test_output_data = np.zeros((preprocessed_data['max_len'],len(preprocessed_data['test_words'])), dtype = 'int64')
    
    for idx, (w, t) in enumerate(zip(preprocessed_data['train_words'], preprocessed_data['train_translations'])):
        for i, char in enumerate(w):
            input_data[i, idx] = preprocessed_data['input_index'][char]
        for i, char in enumerate(t):
            output_data[i, idx] = preprocessed_data['output_index'][char]
        
    for idx, (w, t) in enumerate(zip(preprocessed_data['val_words'], preprocessed_data['val_translations'])):
        for i, char in enumerate(w):
            val_input_data[i, idx] = preprocessed_data['input_index'][char]
        for i, char in enumerate(t):
            val_output_data[i, idx] = preprocessed_data['output_index'][char]
    
    for idx, (w, t) in enumerate(zip(preprocessed_data['test_words'], preprocessed_data['test_translations'])):
        for i, char in enumerate(w):
            test_input_data[i, idx] = preprocessed_data['input_index'][char]
        for i, char in enumerate(t):
            test_output_data[i, idx] = preprocessed_data['output_index'][char]
    
    input_data, output_data = torch.tensor(input_data,dtype = torch.int64), torch.tensor(output_data, dtype = torch.int64)
    val_input_data, val_output_data = torch.tensor(val_input_data,dtype = torch.int64), torch.tensor(val_output_data, dtype = torch.int64)
    test_input_data, test_output_data = torch.tensor(test_input_data,dtype = torch.int64), torch.tensor(test_output_data, dtype = torch.int64)
    
    tensors = {
        'input_data' : input_data,
        'output_data' : output_data,
        'val_input_data' : val_input_data,
        'val_output_data' : val_output_data, 
        'test_input_data' : test_input_data,
        'test_output_data' : test_output_data
    }
    return tensors

In [168]:
# language = 'hin'
# # dataset_path = r'C:\Users\gragh\OneDrive\Desktop\Codes\CS6910 DL\Assignment 3\DataSet\aksharantar_sampled'
# dataset_path = '/kaggle/input/dl-ass3/aksharantar_sampled'

# train_path = os.path.join(dataset_path, language, language + '_train.csv')
# val_path = os.path.join(dataset_path, language, language + '_valid.csv')
# test_path = os.path.join(dataset_path, language, language + '_test.csv')
# preprocessed_data = loadData(train_path, val_path, test_path)
# tensors = create_tensor(preprocessed_data)

# print('Input data : ', preprocessed_data['train_words'])
# print('Output data : ', preprocessed_data['train_translations'])
# print('Number of samples : ', len(preprocessed_data['train_words']))

# print('Input data : ', preprocessed_data['val_words'])
# print('Output data : ', preprocessed_data['val_translations'])
# print('Number of val samples : ', len(preprocessed_data['val_words']))

# print('Input data : ', preprocessed_data['test_words'])
# print('Output data : ', preprocessed_data['test_translations'])
# print('Number of test samples : ', len(preprocessed_data['test_words']))

# print('Max incoder length : ', preprocessed_data['max_enc_len'])
# print('Max decoder length : ', preprocessed_data['max_dec_len'])

# print('Input index length', len(preprocessed_data['input_index']))
# print('Output index length', len(preprocessed_data['output_index']))
# print('Input index', preprocessed_data['input_index'])
# print('Output index', preprocessed_data['output_index'])
# print('Input index Rev', preprocessed_data['input_index_rev'])
# print('Output index Rev', preprocessed_data['output_index_rev'])

# print('Input Data', tensors['input_data'].shape)
# print('Output Data', tensors['output_data'].shape)
# print('Input Data Val', tensors['val_input_data'].shape)
# print('Output Data Val', tensors['val_output_data'].shape)
# print('Input Data Test', tensors['test_input_data'].shape)
# print('Output Data Test', tensors['test_output_data'].shape)

# # print(tensors['input_data'][:,0])
# # print(tensors['output_data'][:,0])

# LSTM

In [169]:
class EncoderLSTM(nn.Module): 
    def __init__(self, params, preprocessed_data):
        super(EncoderLSTM, self).__init__()
        self.dropout = nn.Dropout(params['dropout'])
        self.embedding = nn.Embedding(len(preprocessed_data['input_index']), params['embedding_size'])
        self.rnn = nn.LSTM(params['embedding_size'], params['hidden_size'], params['num_layers'], dropout = params['dropout'], bidirectional = params['bi_dir'])

    def forward(self, x):
        drop_par = self.embedding(x)
        outputs, (hidden, cell) = self.rnn(self.dropout(drop_par))
        return hidden, cell
    
class DecoderLSTM(nn.Module):
    def __init__(self,params, preprocessed_data):
        super(DecoderLSTM, self).__init__()
        self.dropout = nn.Dropout(params['dropout'])
        self.embedding = nn.Embedding(len(preprocessed_data['output_index']), params['embedding_size'])
        self.rnn = nn.LSTM(params['embedding_size'], params['hidden_size'], params['num_layers'], dropout = params['dropout'], bidirectional = params['bi_dir'])
        self.fc = nn.Linear(params['hidden_size'], len(preprocessed_data['output_index']))

    def forward(self, x, hidden, cell):
        embedding = self.embedding(x.unsqueeze(0))
        outputs, (hidden, cell) = self.rnn(self.dropout(embedding), (hidden, cell))
        predictions = self.fc(outputs).squeeze(0)
        predictions = F.log_softmax(predictions, dim = 1)
        return predictions, hidden, cell
    
class Seq2SeqLSTM(nn.Module):
    def __init__(self, encoder, decoder, params, preprocessed_data):
        super(Seq2SeqLSTM, self).__init__()  
        self.decoder, self.encoder = decoder, encoder
        self.output_index_len = len(preprocessed_data['output_index'])
        self.tfr = params['teacher_fr']

    def forward(self, source, target):
        batch_size, target_len, target_vocab_size = source.shape[1], target.shape[0], self.output_index_len
        outputs = torch.zeros(target_len, batch_size, target_vocab_size).to(device)
        x = target[0]
        hidden, cell = self.encoder(source)
        for t in range(1, target_len):
            output, hidden, cell = self.decoder(x, hidden, cell)
            outputs[t], best_guess = output, output.argmax(1)
            x = best_guess if random.random() >= self.tfr else target[t]
        return outputs

# RNN + GRU

In [170]:
class Encoder(nn.Module): 
    def __init__(self, params, preprocessed_data):
        super(Encoder, self).__init__()
        self.dropout = nn.Dropout(params['dropout'])
        self.embedding = nn.Embedding(len(preprocessed_data['input_index']), params['embedding_size'])
        if params['cell_type'] == 'RNN':
            self.cell = nn.RNN(params['embedding_size'], params['hidden_size'], params['num_layers'], dropout = params['dropout'], bidirectional = params['bi_dir'])
        if params['cell_type'] == 'GRU':
            self.cell = nn.GRU(params['embedding_size'], params['hidden_size'], params['num_layers'], dropout = params['dropout'], bidirectional = params['bi_dir'])

    def forward(self, x):
        drop_par = self.embedding(x)
        embedding = self.dropout(drop_par)
        outputs, hidden = self.cell(self.dropout(drop_par))
        return hidden
    
class Decoder(nn.Module):
    def __init__(self, params, preprocessed_data):
        super(Decoder, self).__init__()
        self.dropout = nn.Dropout(params['dropout'])
        self.embedding = nn.Embedding(len(preprocessed_data['output_index']), params['embedding_size'])
        if params['cell_type'] == 'RNN':
            self.cell = nn.RNN(params['embedding_size'], params['hidden_size'], params['num_layers'], dropout = params['dropout'], bidirectional = params['bi_dir'])
        if params['cell_type'] == 'GRU':
            self.cell = nn.GRU(params['embedding_size'], params['hidden_size'], params['num_layers'], dropout = params['dropout'], bidirectional = params['bi_dir'])
        self.fc = nn.Linear(params['hidden_size'], len(preprocessed_data['output_index']))

    def forward(self, x, hidden):
        x = x.unsqueeze(0)
        drop_par = self.embedding(x)
        embedding = self.dropout(drop_par)
        outputs, hidden = self.cell(self.dropout(drop_par), hidden)
        predictions = self.fc(outputs).squeeze(0)
        return predictions, hidden
    
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, params,  preprocessed_data):
        super(Seq2Seq, self).__init__()
        self.decoder, self.encoder  = decoder, encoder
        self.target_vocab_size = len(preprocessed_data['output_index'])
        self.tfr = params['teacher_fr']

    def forward(self, source, target):
        batch_size, target_len = source.shape[1], target.shape[0]
        x = target[0]
        outputs = torch.zeros(target_len, batch_size, self.target_vocab_size).to(device)
        hidden= self.encoder(source)
        for t in range(1, target_len):
            output, hidden = self.decoder(x, hidden)
            outputs[t], best_guess = output, output.argmax(1)
            x = best_guess if random.random() >= self.tfr else target[t]
        return outputs

# GET OPTIMIZERS

In [171]:
def get_optim(model, params):
    if params['optimizer'].lower() == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr = params['learning_rate'], momentum = 0.9)
    if params['optimizer'].lower() == 'adam':
        optimizer = optim.Adam(model.parameters(), lr = params['learning_rate'], betas = (0.9, 0.999), eps = 1e-8)
    if params['optimizer'].lower() == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(), lr = params['learning_rate'], alpha = 0.99, eps = 1e-8)
    if params['optimizer'].lower() == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr = params['learning_rate'], lr_decay = 0, weight_decay = 0, initial_accumulator_value = 0, eps = 1e-10)
    return optimizer

# GET TOTAL PARAMETERS

In [172]:
def get_total_parameters(model):
    total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return total_params

# BEAM SEARCH

In [173]:
def beam_search(model, word, preprocessed_data, params):
    data = np.zeros((preprocessed_data['max_len'], 1), dtype=np.int32)
    for idx, char in enumerate(word):
        data[idx, 0] = preprocessed_data['input_index'][char]
    data[idx, 0] = preprocessed_data['input_index'][preprocessed_data['EOS']]
    data = torch.tensor(data, dtype=torch.int32).to(device)
    with torch.no_grad():
        if params['cell_type'] == 'LSTM':
           hidden, cell = model.encoder(data)
        else:
           hidden = model.encoder(data)
    output_start = preprocessed_data['output_index'][preprocessed_data['SOS']]
    out_reshape = np.array(output_start).reshape(1,)
    hidden_par = hidden.unsqueeze(0)
    initial_sequence = torch.tensor(out_reshape).to(device)
    beam = [(0.0, initial_sequence, hidden_par)]
    for i in range(len(preprocessed_data['output_index'])):
        candidates = []
        for score, seq, hidden in beam:
            if seq[-1].item() == preprocessed_data['output_index'][preprocessed_data['EOS']]:
                candidates.append((score, seq, hidden))
                continue
            reshape_last = np.array(seq[-1].item()).reshape(1, )
            hdn = hidden.squeeze(0) 
            x = torch.tensor(reshape_last).to(device)
            if params['cell_type'] == 'LSTM':
                output, hidden, cell = model.decoder(x, hdn, cell)
            else:
                output, hidden = model.decoder(x, hdn)
            topk_probs, topk_tokens = torch.topk(F.softmax(output, dim=1), k = params['beam_width'])               
            for prob, token in zip(topk_probs[0], topk_tokens[0]):
                new_seq = torch.cat((seq, token.unsqueeze(0)), dim=0)
                ln_ns = len(new_seq)
                ln_pf = ((ln_ns - 1) / 5)
                candidate_score = score + torch.log(prob).item() / (ln_pf ** params['length_penalty'])
                candidates.append((candidate_score, new_seq, hidden.unsqueeze(0)))
        beam = heapq.nlargest(params['beam_width'], candidates, key=lambda x: x[0])
    _, best_sequence, _ = max(beam, key=lambda x: x[0]) 
    prediction = ''.join([preprocessed_data['output_index_rev'][token.item()] for token in best_sequence[1:]])
    return prediction[:-1]          


# TRAIN MODEL

In [174]:
def train(model, criterion, optimizer, preprocessed_data, tensors, params):
    train_data, train_result = torch.split(tensors['input_data'], params['batch_size'], dim = 1), torch.split(tensors['output_data'], params['batch_size'], dim = 1)
    val_data, val_result = torch.split(tensors['val_input_data'], params['batch_size'], dim=1), torch.split(tensors['val_output_data'], params['batch_size'], dim=1)
    for epoch in range(params['num_epochs']):
        total_words = 0
        correct_pred = 0
        total_loss = 0
        model.train()
        with tqdm(total = len(train_data), desc = 'Training') as pbar:
            for i, (x, y) in enumerate(zip(train_data, train_result)):
                target, inp_data = y.to(device), x.to(device)
                output = model(inp_data, target)
                pad_mask = (target != preprocessed_data['output_index'][preprocessed_data['PAD']])
                non_pad_targets = target[pad_mask]
                non_pad_outputs = output[pad_mask].reshape(-1, output.shape[2])
                loss = criterion(non_pad_outputs, non_pad_targets)
                optimizer.zero_grad()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
                optimizer.step()
                total_words += non_pad_targets.size(0)
                correct_pred += torch.sum(torch.argmax(non_pad_outputs, dim=1) == non_pad_targets).item()
                total_loss += loss.item()
                pbar.update(1)
        train_accuracy = (correct_pred / total_words)*100
        train_loss = total_loss / len(train_data)
        model.eval()
        with torch.no_grad():
            val_total_loss = 0
            val_total_words = 0
            val_correct_pred = 0
            with tqdm(total = len(val_data), desc = 'Validation') as pbar:
                for x_val, y_val in zip(val_data, val_result):
                    target_val, inp_data_val = y_val.to(device), x_val.to(device)
                    output_val = model(inp_data_val, target_val)
                    pad_mask = (target_val != preprocessed_data['output_index'][preprocessed_data['PAD']])
                    non_pad_targets = target_val[pad_mask]
                    non_pad_outputs = output_val[pad_mask].reshape(-1, output_val.shape[2])
                    val_loss = criterion(non_pad_outputs, non_pad_targets)
                    val_total_loss += val_loss.item()
                    val_total_words += non_pad_targets.size(0)
                    val_correct_pred += torch.sum(torch.argmax(non_pad_outputs, dim=1) == non_pad_targets).item()
                    pbar.update(1)
            correct_pred = 0
            total_words = len(preprocessed_data['val_words'])
            with tqdm(total = total_words, desc = 'Beam') as pbar_:
                for word, translation in zip(preprocessed_data['val_words'], preprocessed_data['val_translations']):
                    ans = beam_search(model, word, preprocessed_data, params)
                    if ans == translation[1:-1]:
                        correct_pred += 1
                    pbar_.update(1)
        val_accuracy_beam = (correct_pred / total_words) * 100
        val_accuracy = (val_correct_pred / val_total_words) * 100
        val_loss = val_total_loss / len(val_data)
        print(f'''Epoch : {epoch+1}
              Train Accuracy : {train_accuracy:.4f} Train Loss : {train_loss:.4f}
              Validation Accuracy : {val_accuracy:.4f}, Validation Loss : {val_loss:.4f}
              Validation Accuracy Beam : {val_accuracy_beam:.4f},  Correctly predicted : {correct_pred}/{total_words}''')
        if params['w_log']:
            wandb.log(
                    {
                        'epoch': epoch+1,
                        'training_loss' : train_loss,
                        'training_accuracy' : train_accuracy,
                        'validation_loss' : val_loss,
                        'validation_accuracy' : val_accuracy,
                        'validation_accuracy_beam' : val_accuracy_beam,
                        'correctly_predicted' : correct_pred
                    }
                )
    return model, val_accuracy, val_accuracy_beam

# QUESTION 1

# HYPERPARAMETERS

In [178]:
params = {
    # 'dataset_path' : r'C:\Users\gragh\OneDrive\Desktop\Codes\CS6910 DL\Assignment 3\DataSet\aksharantar_sampled',
    'language' : 'hin',
    'dataset_path' : '/kaggle/input/dl-ass3/aksharantar_sampled',
    'embedding_size': 256,
    'hidden_size': 512,
    'num_layers': 2,
    'cell_type': 'GRU',
    'dropout': 0.5,
    'optimizer' : 'adam',
    'learning_rate': 0.001,
    'batch_size': 64,
    'num_epochs': 1,
    'teacher_fr' : 0.5,
    'length_penalty' : 0.6,
    'beam_width': 1,
    'bi_dir' : False,
    'w_log' : 0
}
preprocessed_data = loadData(params)
tensors = create_tensor(preprocessed_data)

if params['cell_type'] == 'LSTM':
    encoder = EncoderLSTM(params, preprocessed_data).to(device)
    decoder = DecoderLSTM(params, preprocessed_data).to(device)
    model = Seq2SeqLSTM(encoder, decoder, params, preprocessed_data).to(device)
else:    
    encoder = Encoder(params, preprocessed_data).to(device)
    decoder = Decoder(params, preprocessed_data).to(device)
    model = Seq2Seq(encoder, decoder, params, preprocessed_data).to(device)  

criterion = nn.CrossEntropyLoss()
optimizer = get_optim(model,params)
# Print total number of parameters in the model
total_parameters = get_total_parameters(model)
print(f'Total Trainable Parameters: {total_parameters}')

if params['w_log']:
    wandb.init(project = 'DL-Assignment-3')
    wandb.run.name = (
        'check_e' + str(params['num_epochs']) +
        '_c:' + params['cell_type'] +
        '_es:' + str(params['embedding_size']) +
        '_hs:' + str(params['hidden_size']) +
        '_nl:' + str(params['num_layers']) +
        '_o:' + params['optimizer'] +
        '_lr:' + str(params['learning_rate']) +
        '_bs:' + str(params['batch_size']) +
        '_tf:' + str(params['teacher_fr']) +
        '_lp:' + str(params['length_penalty']) +
        '_b:' + str(params['bi_dir']) +
        '_bw:' + str(params['beam_width'])
    )
trained_model, _, _ = train(model, criterion, optimizer, preprocessed_data, tensors, params)
if params['w_log']:
    wandb.finish()

Total Trainable Parameters: 5577028


Training: 100%|██████████| 800/800 [00:29<00:00, 26.94it/s]
Validation: 100%|██████████| 64/64 [00:00<00:00, 79.56it/s]
Beam: 100%|██████████| 4096/4096 [00:30<00:00, 132.50it/s]

Epoch : 1
              Train Accuracy : 41.5914 Train Loss : 2.0835
              Validation Accuracy : 59.7087, Validation Loss : 1.4192
              Validation Accuracy Beam : 15.7471,  Correctly predicted : 645/4096





In [177]:
# def predict(model, word, input_char_index, output_char_index, reverse_target_char_index):
#     data, word_t = np.zeros((len(input_char_index),1), dtype= int), ''
#     t_z = 0
#     for t, char in enumerate(word):
#         data[t, 0] = input_char_index[char]
#     t_z = t+1   
#     data[t_z :,0] = input_char_index[preprocessed_data['EOS']]
#     data = torch.tensor(data,dtype = torch.int64).to(device)
#     with torch.no_grad():
#         hidden, cell = model.encoder(data)
#     out_t = output_char_index[preprocessed_data['SOS']]    
#     out_chr_reshape = np.array(out_t).reshape(1,)    
#     x = torch.tensor(out_chr_reshape).to(device)

#     for t in range(1, len(output_char_index)):
#         output, hidden, cell = model.decoder(x, hidden, cell)
#         ch = reverse_target_char_index[output.argmax(1).item()]
#         if ch != preprocessed_data['EOS']:
#             word_t = word_t+ch
#         else:
#             break
#     return word_t

# words = ['harsh', 'iit', 'madras', 'nirav', 'nidhi', 'nishchal', 'nishant', 'neymar', 'neha', 'raghav', 'rahul', 'rohit', 'hahahahaha', 'ohohohoh']
# for w in words:
#     output_sequence = predict(trained_model, w, preprocessed_data['input_index'], preprocessed_data['output_index'], preprocessed_data['output_index_rev'])
#     print(w,'->',output_sequence)
# for w in preprocessed_data['val_words'][:10]:
#     output_sequence = predict(trained_model, w[:-1], preprocessed_data['input_index'], preprocessed_data['output_index'], preprocessed_data['output_index_rev'])
#     print(w[:-1],'->',output_sequence)

# words = ['harsh', 'iit', 'madras', 'nirav', 'nidhi', 'nishchal', 'nishant', 'neymar', 'neha', 'raghav', 'rahul', 'rohit', 'hahahahaha', 'ohohohoh', 'anshuman', 'nilgiri']
# for w in words:
#     output_sequence = beam_search(trained_model, w, preprocessed_data,1)
#     print(w,'->',output_sequence)

# QUESTION 2

In [None]:
sweep_config = {
            'name': 'sweep check',
            'method': 'random',
            'metric': { 'goal': 'maximize','name': 'Accuracy'},
            'parameters': 
                {
                    'language' : 'hin',
                    'dataset_path' : '/kaggle/input/dl-ass3/aksharantar_sampled',
                    'num_epochs': {'values': [10, 15, 20]},
                    'cell_type': {'values': ['RNN', 'LSTM', 'GRU']},
                    'embedding_size': {'values': [128, 256, 512]},
                    'hidden_size': {'values': [128, 256, 512, 1024]},
                    'num_layers': {'values': [1, 2, 3]},
                    'dropout': {'values': [0.3, 0.5, 0.7]},
                    'optimizer' : {'values' : ['adam', 'sgd', 'rmsprop', 'adagrad']},
                    'learning_rate': {'values': [0.001, 0.005, 0.01, 0.1]},
                    'batch_size': {'values': [32, 64]},
                    'teacher_fr' : {'values': [0.3, 0.5, 0.7]},
                    'length_penalty' : {'values': [0.4, 0.5, 0.6]},
                    'bi_dir' : {'values': [True, False]},
                    'beam_width': {'values': [1, 2, 3, 4, 5]}
                }
            }

In [None]:
def run_sweep():
    init = wandb.init(project = 'DL-Assignment-3')
    config = init.config
    params = {
        'language' : 'hin',
        'dataset_path' : '/kaggle/input/dl-ass3/aksharantar_sampled',
        'num_epochs': config.num_epochs,
        'cell_type': config.cell_type,
        'embedding_size': config.embedding_size,
        'hidden_size': config.hidden_size,
        'num_layers': config.num_layers,
        'dropout': config.dropout,
        'optimizer' : config.optimizer,
        'learning_rate': config.learning_rate,
        'batch_size': config.batch_size,
        'teacher_fr' : config.teacher_fr,
        'length_penalty' : config.length_penalty,
        'bi_dir' : config.bi_dir,
        'beam_width' : config.beam_width,
        'w_log' : 1
    }
    
    wandb.run.name = (
        'Q2_e' + str(params['num_epochs']) +
        '_c:' + params['cell_type'] +
        '_es:' + str(params['embedding_size']) +
        '_hs:' + str(params['hidden_size']) +
        '_nl:' + str(params['num_layers']) +
        '_o:' + params['optimizer'] +
        '_lr:' + str(params['learning_rate']) +
        '_bs:' + str(params['batch_size']) +
        '_tf:' + str(params['teacher_fr']) +
        '_lp:' + str(params['length_penalty']) +
        '_b:' + str(params['bi_dir']) +
        '_bw:' + str(params['beam_width'])
    )
    
    preprocessed_data = loadData(params)
    tensors = create_tensor(preprocessed_data)
    encoder = Encoder(params, preprocessed_data).to(device)
    decoder = Decoder(params, preprocessed_data).to(device)
    model = Seq2Seq(encoder, decoder, params, preprocessed_data).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = get_optim(model,params)
    # Print total number of parameters in the model
    total_parameters = get_total_parameters(model)
    print(f'Total Trainable Parameters: {total_parameters}')

    _, v_acc, v_acc_beam = train(model, criterion, optimizer, preprocessed_data, tensors, params)
    wandb.log({'Accuracy': v_acc})

In [None]:
# sweep_id = wandb.sweep(sweep_config, project='DL-Assignment-3')
# wandb.agent(sweep_id, run_sweep, count = 20)
# wandb.finish()