In [1]:
import numpy as np
import pandas as pd
import torch.nn as nn
import torch
from types import SimpleNamespace
from torch.nn.utils.rnn import pad_sequence
import torch.optim as optim
import random
import torch.nn.functional as F
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.font_manager import FontProperties
import wandb
from utilities import *

In [8]:
#loading data
train,valid,test=load_data(data_path,lang)

add_start_end(train) #adding start and end characters
add_start_end(valid)
add_start_end(test)

train_src_chars,train_target_chars=get_unique_chars(train) # obtain unique charcaters
valid_src_chars,valid_target_chars=get_unique_chars(valid)
test_src_chars,test_target_chars=get_unique_chars(test)
train_target_chars.add('*') # extra char to handle unknowns in valid and test data.
    
src_char_idx,src_idx_char=get_char_map(train_src_chars) # create map for each unique charcter to -> integer
target_char_idx,target_idx_char=get_char_map(train_target_chars)

encoder_vocab_size=len(src_char_idx)+1 # one extra for padding
decoder_vocab_size=len(target_char_idx)+1 # one extra for padding

max_seq_length=train[0].apply(lambda x:len(x)).max() # maximum sequence lenght in Latin
max_target_length=train[1].apply(lambda x:len(x)).max() # maximum target length


#creating word vectors
train_src_int,train_target_int=vectorize(train,src_char_idx,target_char_idx,max_seq_length)
valid_src_int,valid_target_int=vectorize(valid,src_char_idx,target_char_idx,max_seq_length)
test_src_int,test_target_int=vectorize(test,src_char_idx,target_char_idx,max_seq_length)



def main():
    config.encoder_vocab_size=encoder_vocab_size
    config.decoder_vocab_size=decoder_vocab_size
    config.max_seq_length=max_seq_length

    model=Seq2Seq(config).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss(ignore_index=0)
    
    for epoch in range(config.epochs):
        train_loss=0
        train_acc=0
        model.train()
        batch_no=0
        for data in get_batch(train_src_int,train_target_int,config.batch_size):
#             print(batch_no)
            batch_no+=1
            x=data[0]
            y=data[1]
            x=x.to(torch.int64).T
            y=y.to(torch.int64).T
            outputs,attention_scores=model.forward(x,y)
            output=outputs.reshape(-1,outputs.shape[2])
            target=y.reshape(-1)
            optimizer.zero_grad()
            target=target-1
            target[target<0]=0
            loss = criterion(output, target)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)# gradient clipping 
            optimizer.step()# update parameters
            train_loss+=loss.item()*config.batch_size

            batch_acc=cal_acc(outputs,y)
            train_acc+=batch_acc
        train_loss/=len(train_src_int)
        train_acc/=batch_no 
        model.eval()

        valid_loss=0
        valid_acc=0
        batch_no=0
        with torch.no_grad():# disable storing computation graph
            for data in get_batch(valid_src_int,valid_target_int,config.batch_size):
                batch_no+=1
                x=data[0]
                y=data[1]
                x=x.to(torch.int64).T
                y=y.to(torch.int64).T
                outputs,attention_scores=model.forward(x,y,prediction=True)# prediction set to True to disable teacher forcing
                output=outputs.reshape(-1,outputs.shape[2])
                target=y.reshape(-1)
                target=target-1
                target[target<0]=0
                loss = criterion(output, target)
                valid_loss+=loss.item()*config.batch_size
                valid_acc+=cal_acc(outputs,y)
            valid_loss/=len(valid_src_int)
            valid_acc/=batch_no
        print(f'Epoch: {epoch+1} Train Loss: {train_loss:.4f} Valid Loss: {valid_loss:.4f} Train Acc: {train_acc:.4f}  Valid Acc: {valid_acc:.4f}')
    return model


config={'batch_size': 64, 'bidirectional': 'Yes', 'cell_type': 'LSTM', 'decoder_num_layers': 3, 'dropout': 0.2, 'embedding_size': 128, 'encoder_num_layers': 3, 'epochs': 15, 'hidden_size': 256}

config=SimpleNamespace(**config)

model=main()

Epoch: 1 Train Loss: 2.3823 Valid Loss: 1.6061 Train Acc: 0.0046  Valid Acc: 0.0562
Epoch: 2 Train Loss: 1.0629 Valid Loss: 1.1384 Train Acc: 0.1237  Valid Acc: 0.2603
Epoch: 3 Train Loss: 0.7914 Valid Loss: 1.0388 Train Acc: 0.2440  Valid Acc: 0.3540
Epoch: 4 Train Loss: 0.6971 Valid Loss: 1.0026 Train Acc: 0.3098  Valid Acc: 0.3540
Epoch: 5 Train Loss: 0.6347 Valid Loss: 0.9763 Train Acc: 0.3563  Valid Acc: 0.3918
Epoch: 6 Train Loss: 0.5969 Valid Loss: 0.9754 Train Acc: 0.3857  Valid Acc: 0.4019
Epoch: 7 Train Loss: 0.5695 Valid Loss: 0.9564 Train Acc: 0.4113  Valid Acc: 0.4175
Epoch: 8 Train Loss: 0.5497 Valid Loss: 0.9606 Train Acc: 0.4316  Valid Acc: 0.4080
Epoch: 9 Train Loss: 0.5290 Valid Loss: 0.9601 Train Acc: 0.4497  Valid Acc: 0.4260
Epoch: 10 Train Loss: 0.5163 Valid Loss: 0.9544 Train Acc: 0.4659  Valid Acc: 0.4221
Epoch: 11 Train Loss: 0.5003 Valid Loss: 0.9813 Train Acc: 0.4819  Valid Acc: 0.4346
Epoch: 12 Train Loss: 0.4911 Valid Loss: 0.9904 Train Acc: 0.4930  Valid A

In [9]:
# test data predictions into a csv file

target_end_index=target_char_idx['$']
li=[]
batch_no=0
test_acc=0
for data in get_batch(test_src_int,test_target_int,config.batch_size):
    batch_no+=1
    x=data[0]
    y=data[1]
    x=x.to(torch.int64).T
    y=y.to(torch.int64).T
    target=y.detach().cpu().numpy()
    src=x.detach().cpu().numpy()
    outputs,_=model.forward(x,y,prediction=True)
    batch_acc=cal_acc(outputs,y)
    test_acc+=batch_acc
    out=outputs.argmax(axis=2).detach().cpu().numpy()+1
    for idx in range(out.shape[1]):
        each_out=out[:,idx]
        each_target=target[:,idx]
        each_src=src[:,idx]
        src_last_index=np.max(np.nonzero(each_src))
        src_str=decode_src(each_src[:src_last_index],src_idx_char)
#         print(src_str,end=" ")
        target_last_index=np.max(np.nonzero(each_target))
        target_str=decode_target(each_target[:target_last_index],target_idx_char,target_end_index)
#         print(target_str,end=" ")
        
        for i,value in enumerate(each_out):
            if value==target_end_index:
                pred_end_index=i
                break
        pred_str=decode_target(each_out[:pred_end_index],target_idx_char,target_end_index)
#         print(pred_str)
        li.append([src_str,target_str,pred_str])

test_acc/=batch_no
print(f"test accuracy is:  {test_acc:.4f}")
df=pd.DataFrame(li,columns=['Source','Target','Predicted'])
df.to_csv('Test_predictions_Vanilla.csv',index=False)

test accuracy is:  0.4111


In [12]:
wandb.login(key='24434976526d9265fdbe2b2150787f46522f5da4')
wandb.init(project='Best_Model_Vanilla')

wandb.log({'test accuracy':test_acc})

wandb.finish()