In [1]:
import wandb
import numpy as np
import pandas as pd
import torch.nn as nn
import torch
from types import SimpleNamespace
from torch.nn.utils.rnn import pad_sequence
import torch.optim as optim
import random
import torch.nn.functional as F
from utilities import *

In [None]:
#loading data
train,valid,test=load_data(data_path,lang)

add_start_end(train) #adding start and end characters
add_start_end(valid)
add_start_end(test)

train_src_chars,train_target_chars=get_unique_chars(train) # obtain unique charcaters
valid_src_chars,valid_target_chars=get_unique_chars(valid)
test_src_chars,test_target_chars=get_unique_chars(test)
train_target_chars.add('*') # extra char to handle unknowns in valid and test data.
    
src_char_idx,src_idx_char=get_char_map(train_src_chars) # create map for each unique charcter to -> integer
target_char_idx,target_idx_char=get_char_map(train_target_chars)

encoder_vocab_size=len(src_char_idx)+1 # one extra for padding
decoder_vocab_size=len(target_char_idx)+1 # one extra for padding

max_seq_length=train[0].apply(lambda x:len(x)).max() # maximum sequence lenght in Latin
max_target_length=train[1].apply(lambda x:len(x)).max() # maximum target length


#creating word vectors
train_src_int,train_target_int=vectorize(train,src_char_idx,target_char_idx,max_seq_length)
valid_src_int,valid_target_int=vectorize(valid,src_char_idx,target_char_idx,max_seq_length)
test_src_int,test_target_int=vectorize(test,src_char_idx,target_char_idx,max_seq_length)


def main():
    '''
    this methid will be called with different configuration from WandB sweep.
    '''
    wandb.init()
    config=wandb.config 
    run_name=f'Cell-{config.cell_type} Hidden-{config.hidden_size} Embedding-{config.embedding_size} Bidir-{config.bidirectional} Dropout -{config.dropout} EL-{config.encoder_num_layers} DL-{config.decoder_num_layers}'
    wandb.run.name=run_name
    config.encoder_vocab_size=encoder_vocab_size
    config.decoder_vocab_size=decoder_vocab_size
    config.max_seq_length=max_seq_length

    model=Seq2Seq(config).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(ignore_index=0)
    
    for epoch in range(config.epochs):
        train_loss=0
        train_acc=0
        model.train()
        batch_no=0
        for data in get_batch(train_src_int,train_target_int,config.batch_size):
#             print(batch_no)
            batch_no+=1
            x=data[0]
            y=data[1]
            x=x.to(torch.int64).T
            y=y.to(torch.int64).T
            outputs,_=model.forward(x,y)
            output=outputs.reshape(-1,outputs.shape[2])
            target=y.reshape(-1)
            optimizer.zero_grad()
            target=target-1
            target[target<0]=0
            loss = criterion(output, target)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1) # gradient clipping 
            optimizer.step() # update parameters
            train_loss+=loss.item()*config.batch_size

            batch_acc=cal_acc(outputs,y)
            train_acc+=batch_acc
        train_loss/=len(train_src_int)
        train_acc/=batch_no 
        model.eval()

        valid_loss=0
        valid_acc=0
        batch_no=0
        with torch.no_grad():# disable storing computation graph
            for data in get_batch(valid_src_int,valid_target_int,config.batch_size):
                batch_no+=1
                x=data[0]
                y=data[1]
                x=x.to(torch.int64).T
                y=y.to(torch.int64).T
                outputs,_=model.forward(x,y,prediction=True) # prediction set to True to disable teacher forcing
                output=outputs.reshape(-1,outputs.shape[2])
                target=y.reshape(-1)
                target=target-1
                target[target<0]=0
                loss = criterion(output, target)
                valid_loss+=loss.item()*config.batch_size
                valid_acc+=cal_acc(outputs,y)
            valid_loss/=len(valid_src_int)
            valid_acc/=batch_no
        print(f'Epoch: {epoch+1} Train Loss: {train_loss:.4f} Valid Loss: {valid_loss:.4f} Train Acc: {train_acc:.4f}  Valid Acc: {valid_acc:.4f}')
        wandb.log({'train accuracy':train_acc,'train loss':train_loss,'valid accuracy':valid_acc,'valid loss':valid_loss})
    wandb.finish()
    return model


sweep_config= {
    'method': 'bayes',
    'name': 'Vanilla Sweep',
    'metric': {
        'goal': 'maximize', 
        'name': 'valid accuracy'
      },
    "parameters":
    {
        'hidden_size': {"values":[32,64,128,256]},
        'batch_size': {"values":[64,128,256]},
        'encoder_num_layers': {"values":[1,2,3]},
        'decoder_num_layers': {"values":[1,2,3]},
        'embedding_size': {"values":[128,256]},
        'dropout': {"values":[0,0.2,0.3]},
        'epochs':{"values":[15]},
        'cell_type':{"values":['LSTM',"GRU","RNN"]},
        'bidirectional':{"values":["Yes","No"]}
        
    }
}


sweep_id = wandb.sweep(sweep_config, project='DL_3_Assign')
wandb.agent(sweep_id, main, count=100)
wandb.finish()