In [1]:
import torch
from torch import optim
from functools import partial
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
import sys
import os
import torch.nn as nn
import numpy as np



path_to_helper_files = os.path.join('..', 'py_files')
base_saved_models_dir = os.path.join('..', 'saved_models' )



sys.path.append(path_to_helper_files)


import global_variables
import dataset_helper
import nnet_models
import train_utilities

device = global_variables.device;








In [2]:
def print_bleu_scores(source_name, target_name, embed_dim, rnn_layers, source_rnn_type, attention):
    
    MAX_LEN = 48
    batchSize = 128

    
    source_embed_dim = embed_dim
    source_hidden_size = embed_dim

    target_embed_dim= embed_dim
    target_hidden_size = 2*embed_dim
    source_rnn_layers = rnn_layers
    target_rnn_layers = rnn_layers
    
    if source_name == 'vi' and target_name == 'en':
            target_train_path = '../Data/iwslt-vi-en/train.tok.en'
            source_train_path = '../Data/iwslt-vi-en/train.tok.vi'

            target_val_path = '../Data/iwslt-vi-en/dev.tok.en'
            source_val_path = '../Data/iwslt-vi-en/dev.tok.vi'

            target_test_path = '../Data/iwslt-vi-en/test.tok.en'
            source_test_path = '../Data/iwslt-vi-en/test.tok.vi'

    elif source_name == 'zh' and target_name == 'en':
            target_train_path = '../Data/iwslt-zh-en/train.tok.en'
            source_train_path = '../Data/iwslt-zh-en/train.tok.zh'

            target_val_path = '../Data/iwslt-zh-en/dev.tok.en'
            source_val_path = '../Data/iwslt-zh-en/dev.tok.zh'

            target_test_path = '../Data/iwslt-zh-en/test.tok.en'
            source_test_path = '../Data/iwslt-zh-en/test.tok.zh'
    else:
            sys.exit(source_name+'->'+target_name+' is invalid!')




    saved_models_dir = os.path.join(base_saved_models_dir, source_name+'2'+target_name)




    pth_save_folder_name = source_name+'2'+target_name+'_' + \
                            'source_embed_dim='+str(source_embed_dim) +  \
                            '-source_hidden_size='+str(source_hidden_size) +  \
                            '-source_rnn_layers=' + str(source_rnn_layers) + \
                            '-source_rnn_type='+str(source_rnn_type)+ \
                            '-target_embed_dim='+str(target_embed_dim) + \
                            '-target_hidden_size='+str(target_hidden_size) + \
                            '-target_rnn_layers='+str(target_rnn_layers) + \
                            '-attention='+str(attention);
    pth_saved_dir = os.path.join(saved_models_dir, pth_save_folder_name)



    config_string = source_name+'2'+target_name+'\n' + \
                            'source_embed_dim='+str(source_embed_dim) +  \
                            '\n source_hidden_size='+str(source_hidden_size) +  \
                            '\n source_rnn_layers=' + str(source_rnn_layers) + \
                            '\n source_rnn_type='+str(source_rnn_type)+ \
                            '\n target_embed_dim='+str(target_embed_dim) + \
                            '\n target_hidden_size='+str(target_hidden_size) + \
                            '\n target_rnn_layers='+str(target_rnn_layers) + \
                            '\n attention='+str(attention);
    
    print(config_string)
    sys.stdout.flush()

    saved_language_model_dir = os.path.join(saved_models_dir, 'lang_obj')



    dataset_dict = {'val': dataset_helper.LanguagePair(source_name = source_name, target_name=target_name, 
                                                                                            source_path = source_val_path, target_path = target_val_path, 
                                                                                            lang_obj_path = saved_language_model_dir, val = True), 

                    'test': dataset_helper.LanguagePair(source_name = source_name, target_name=target_name, 
                                                                                            source_path = source_test_path, target_path = target_test_path, 
                                                                                            lang_obj_path = saved_language_model_dir, val = True)} 






    dataloader_dict = {'val': DataLoader(dataset_dict['val'], batch_size = 1, 
                                                                            collate_fn = dataset_helper.vocab_collate_func_val,
                                                                    shuffle = True, num_workers=0), 
                                        'test': DataLoader(dataset_dict['test'], batch_size = 1, 
                                                                            collate_fn = dataset_helper.vocab_collate_func_val,
                                                                    shuffle = True, num_workers=0)}





    encoder = nnet_models.EncoderRNN(dataset_dict['val'].source_lang_obj.n_words, 
                                                                     embed_dim = source_embed_dim, 
                                                                     hidden_size = source_hidden_size,
                                                                     rnn_layers = source_rnn_layers, 
                                                                     rnn_type = source_rnn_type).to(device);






    decoder = nnet_models.DecoderRNN(dataset_dict['val'].target_lang_obj.n_words, 
                                                                                            embed_dim = target_embed_dim, 
                                                                                            hidden_size = target_hidden_size, 
                                                                                            n_layers = target_rnn_layers, 
                                                                                            attention = attention).to(device)   


    encoder.load_state_dict(torch.load( os.path.join( pth_saved_dir, 'encoder.pth')))
    decoder.load_state_dict(torch.load( os.path.join( pth_saved_dir, 'decoder.pth')))

    val_bleu_greedy_with_unknown = train_utilities.validation_function(encoder, decoder, dataloader_dict['val'], dataset_dict['val'].target_lang_obj, keep_unk = True)
    val_bleu_greedy_without_uknown = train_utilities.validation_function(encoder, decoder, dataloader_dict['val'], dataset_dict['val'].target_lang_obj, keep_unk = False)
    val_bleu_beam_with_unknown = train_utilities.validation_beam_search(encoder, decoder, dataloader_dict['val'], dataset_dict['val'].target_lang_obj, beam_size = 3, keep_unk = True)
    val_bleu_beam_without_unknown = train_utilities.validation_beam_search(encoder, decoder, dataloader_dict['val'], dataset_dict['val'].target_lang_obj, beam_size = 3, keep_unk = False)

    test_bleu_greedy_with_unknown = train_utilities.validation_function(encoder, decoder, dataloader_dict['test'], dataset_dict['test'].target_lang_obj, keep_unk = True)
    test_bleu_greedy_without_uknown = train_utilities.validation_function(encoder, decoder, dataloader_dict['test'], dataset_dict['test'].target_lang_obj, keep_unk = False)
    test_bleu_beam_with_unknown = train_utilities.validation_beam_search(encoder, decoder, dataloader_dict['test'], dataset_dict['test'].target_lang_obj, beam_size = 3, keep_unk = True)
    test_bleu_beam_without_unknown = train_utilities.validation_beam_search(encoder, decoder, dataloader_dict['test'], dataset_dict['test'].target_lang_obj, beam_size = 3, keep_unk = False)

    result_string = 'val_bleu_greedy_with_unknown = ' + str(val_bleu_beam_with_unknown) + '\n' + \
                    'val_bleu_greedy_without_uknown = ' + str(val_bleu_greedy_without_uknown) + '\n' + \
                    'val_bleu_beam_with_unknown = ' + str(val_bleu_beam_with_unknown) + '\n' + \
                    'val_bleu_beam_without_unknown = ' + str(val_bleu_beam_without_unknown) + '\n' + \
                    'test_bleu_greedy_with_unknown = ' + str(test_bleu_beam_with_unknown) + '\n' + \
                    'test_bleu_greedy_without_uknown = ' + str(test_bleu_greedy_without_uknown) + '\n' + \
                    'test_bleu_beam_with_unknown = ' + str(test_bleu_beam_with_unknown) + '\n' + \
                    'test_bleu_beam_without_unknown = ' + str(test_bleu_beam_without_unknown) + '\n' 
    
    print(result_string)

## Chinese to English Without Attention

In [3]:
source_name = 'zh'
target_name = 'en'
attention = False


In [4]:
source_rnn_type_array = ['lstm', 'gru']
embed_dim_array = [256, 512]
rnn_layers_array = [1, 2]

In [None]:
for source_rnn_type in source_rnn_type_array:
    for embed_dim in embed_dim_array:
        for rnn_layers in rnn_layers_array:

            print_bleu_scores(source_name, target_name, embed_dim, rnn_layers, source_rnn_type, attention)
            print('='*50)
            print('\n \n \n')

zh2en
source_embed_dim=256
 source_hidden_size=256
 source_rnn_layers=1
 source_rnn_type=lstm
 target_embed_dim=256
 target_hidden_size=512
 target_rnn_layers=1
 attention=False


  "num_layers={}".format(dropout, num_layers))


## Chinese to English With Attention

In [None]:
attention = True

In [None]:
source_rnn_type_array = ['lstm']
embed_dim_array = [256, 512]
rnn_layers_array = [1, 2]

In [None]:
for source_rnn_type in source_rnn_type_array:
    for embed_dim in embed_dim_array:
        for rnn_layers in rnn_layers_array:

            print_bleu_scores(source_name, target_name, embed_dim, rnn_layers, source_rnn_type, attention)
            print('='*50)
            print('\n \n \n')