In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim

import import_ipynb
from load_data import QA_Preprocessor

from abcnn import Basic_AP, Attentive_Pooling, Attentive_Pooling_1d, Attentive_Pooling_2d, ABCNN

from datetime import datetime
import argparse
import numpy as np

importing Jupyter notebook from load_data.ipynb
importing Jupyter notebook from G:\GitHub\jupyter_test\ABCNN\abcnn\basic.ipynb
importing Jupyter notebook from G:\GitHub\jupyter_test\ABCNN\abcnn\attentive_pooling.ipynb
importing Jupyter notebook from G:\GitHub\jupyter_test\ABCNN\abcnn\abcnn.ipynb


In [2]:
parser = argparse.ArgumentParser(description='ABCNN-implementation in pytorch')
parser.add_argument('--cuda', action='store_true',
                    help='use CUDA (default: False)')
parser.add_argument('--eval', action='store_true',
                    help='do evaluate (default: False)')
parser.add_argument('--dropout', type=float, default=0,
                    help='dropout applied to layers (default: 0)')
parser.add_argument('--clip', type=float, default=-1,
                    help='gradient clip, -1 means no clip (default: -1)')
parser.add_argument('--epochs', type=int, default=15,
                    help='upper epoch limit (default: 500)')
parser.add_argument('--report_step', type=int, default=1, metavar='N',
                    help='report interval (default: 20')
parser.add_argument('--valid_step', type=int, default=1, metavar='N',
                    help='validation interval (default: 20')
parser.add_argument('--lr', type=float, default=0.001,
                    help='initial learning rate (default: 1e-3)')
parser.add_argument('--optim', type=str, default='Adam',
                    help='optimizer to use (default: Adam)')
parser.add_argument('--batchs', type=int, default=128,
                    help='number of batchs (default: 10)')
parser.add_argument('--val_batchs', type=int, default=32,
                    help='number of batchs (default: 10)')
parser.add_argument('--train_data', type=str, default='data/WikiQACorpus/WikiQA.train',
                    help='the train dataset to run (default: data/WikiQACorpus/WikiQA-train.txt)')
parser.add_argument('--dev_data', type=str, default='data/WikiQACorpus/WikiQA.dev',
                    help='the dataset to run')
parser.add_argument('--test_data', type=str, default='data/WikiQACorpus/WikiQA.test',
                    help='the dataset to run (default: WikiQA)')
parser.add_argument('--seed', type=int, default=190330,
                    help='random seed (default: 190330)')
parser.add_argument('--model_name', type=str, default='AP',
                    help='the dataset to run (default: )')
parser.add_argument('--filter_width', type=int, default=3,
                    help='width of all filters (default: 4)')
parser.add_argument('--num_layer', type=int, default=1,
                    help='number of layers (default: 1)')
parser.add_argument('--embedding_size', type=int, default=300,
                    help='dimension of embeddings (default: 5)')
parser.add_argument('--max_length', type=int, default=40,
                    help='maximum of tokens for each sentence (default: 40)')

args = parser.parse_args(['--cuda', '--eval', '--model_name', 'AP2d'])

In [3]:
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    if not args.cuda:
        print("WARNING: Please use a CUDA device. Run with --cuda")

print(args)

Namespace(batchs=128, clip=-1, cuda=True, dev_data='data/WikiQACorpus/WikiQA.dev', dropout=0, embedding_size=300, epochs=15, eval=True, filter_width=3, lr=0.001, max_length=40, model_name='AP2d', num_layer=1, optim='Adam', report_step=1, seed=190330, test_data='data/WikiQACorpus/WikiQA.test', train_data='data/WikiQACorpus/WikiQA.train', val_batchs=32, valid_step=1)


In [4]:
data_preprocessor = QA_Preprocessor(args.train_data, length_limit=args.max_length)
data = data_preprocessor.data
Q_train = torch.tensor(data['q'], dtype=torch.long)
A_train = torch.tensor(data['a'], dtype=torch.long)
Y_train = torch.tensor(data['y'], dtype=torch.float)
assert Q_train.shape[0] == A_train.shape[0] == Y_train.shape[0]
#embeds = nn.Embedding(len_vocab, 5, padding_idx=0)
#hello_embed = embeds(Q_train)

data_preprocessor.reset_data()
data_preprocessor.load_raw_file(args.dev_data)
data = data_preprocessor.data
Q_dev = torch.tensor(data['q'], dtype=torch.long)
A_dev = torch.tensor(data['a'], dtype=torch.long)
Y_dev = torch.tensor(data['y'], dtype=torch.float)

Q_test = None
A_test = None
Y_test = None
if args.eval:
    data_preprocessor.reset_data()
    data_preprocessor.load_raw_eval_file(args.test_data)
    data = data_preprocessor.data
    Q_test = data['q']
    A_test = data['a']
    Y_test = data['y']

vocab_size = len(data_preprocessor.vocab)
print('\tVocabulary size:', vocab_size)

[2019-04-03 00:51:54.200763] Data_load done. Max token size: 409
	Vocabulary size: 8869


In [5]:
model = None
if args.model_name == 'Basic_AP':
    model = Basic_AP(vocab_size, args.embedding_size, args.max_length)
elif args.model_name == 'AP':
    model = Attentive_Pooling(vocab_size, args.embedding_size, args.max_length, kernel_size=args.filter_width)
elif args.model_name == 'AP1d':
    model = Attentive_Pooling_1d(vocab_size, args.embedding_size, args.max_length, 
                                 kernel_size=args.filter_width)
elif args.model_name == 'AP2d':
    model = Attentive_Pooling_2d(vocab_size, args.embedding_size, args.max_length, 
                                 kernel_size=args.filter_width)
elif args.model_name == 'ABCNN':
    model = ABCNN(vocab_size, args.embedding_size, args.max_length, 
                                 kernel_size=args.filter_width)
else:
    print('No such model name')
    exit()
    
if args.cuda:
    model.cuda()

optimizer = None
if args.optim == 'Adam' or args.optim == 'RMSprop':
    optimizer = getattr(optim, args.optim)(model.parameters(), lr=args.lr)
    
#criterion = nn.CosineEmbeddingLoss(margin=0.1)
criterion = nn.BCELoss(reduction='sum')
#criterion = nn.MSELoss(reduction='sum')

for parameter in model.parameters():
    print(parameter)

x_q = Q_train[:2].cuda()
x_a = A_train[:2].cuda()
model.forward(x_q, x_a)

In [6]:
def generate_batch_index(total_length, batch_size, do_shuffle = True):
    train_idx_list = np.arange(total_length)
    if do_shuffle:
        np.random.shuffle(train_idx_list)
    batch_indices = []
    for i in range((total_length // batch_size)+1):
        start_idx = i*batch_size
        end_idx = min(total_length, (i+1)*batch_size)
        if start_idx == end_idx:
            break
        sub_indices = train_idx_list[start_idx : end_idx]
        batch_indices.append(sub_indices)
    return batch_indices

def train_per_epoch(batch_size):
    model.train()
    total_loss = 0
    count = len(Q_train)
    batch_indices = generate_batch_index(count, batch_size)
    for idx_list in batch_indices:
        x_q = Variable(Q_train[idx_list])
        x_a = Variable(A_train[idx_list])
        y = Variable(Y_train[idx_list])
        if args.cuda:
            x_q, x_a, y = x_q.cuda(), x_a.cuda(), y.cuda()
        optimizer.zero_grad()

        output = model(x_q, x_a)
        loss = criterion(output, y)

        total_loss += loss.item() /count
        if args.clip > 0:
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        loss.backward()
        optimizer.step()
    return total_loss

In [7]:
def evaluate(Q_test, A_test, Y_test):
    model.eval()
    mean_avg_precision = 0
    mean_rr = 0
    num_q = len(Q_test)
    with torch.no_grad():
        for test_step in range(num_q):
            x_q = torch.tensor(Q_test[test_step], dtype=torch.long)
            x_q = Variable(x_q)
            x_a = torch.tensor(A_test[test_step], dtype=torch.long)
            x_a = Variable(x_a)
            y = torch.tensor(Y_test[test_step], dtype=torch.long)
            y = Variable(y)
            if args.cuda:
                x_q, x_a = x_q.cuda(), x_a.cuda()
            output = model(x_q, x_a)
            output = output.cpu()
            if torch.sum(torch.isnan(output)) > 0:
                print('output error on evaluation')
                exit()
            add_mrr = True
            num_rel = 0
            avg_pre = 0
            for rank, idx in enumerate(np.argsort(-output, axis=0)):                
                if y[idx] == 1:
                    #MRR
                    if add_mrr:
                        mean_rr += 1/(rank+1)
                        add_mrr = False
                    #MAP
                    num_rel += 1
                    avg_pre += num_rel / (rank+1)
            if num_rel > 0:
                avg_pre = avg_pre / num_rel
            mean_avg_precision += avg_pre
    mean_rr = mean_rr / num_q
    mean_avg_precision = mean_avg_precision / num_q
    return mean_avg_precision, mean_rr


In [8]:
def validate(batch_size):
    model.eval()
    total_loss = 0.0
    count = len(Q_dev)
    batch_indices = generate_batch_index(count, batch_size, do_shuffle = False)
    with torch.no_grad():
        for idx_list in batch_indices:
            x_q = Variable(Q_dev[idx_list])
            x_a = Variable(A_dev[idx_list])
            y = Variable(Y_dev[idx_list])
            if args.cuda:
                x_q, x_a, y = x_q.cuda(), x_a.cuda(), y.cuda()
            output = model(x_q, x_a)
            loss = criterion(output, y)
            total_loss += loss.item() 
        eval_loss = total_loss / count
        return eval_loss

In [9]:
if __name__ == "__main__":    
    best_vloss = None
    model_file = "./save/{0}.pt".format(args.model_name)
    start_time = datetime.now()
    print("[{:s}] model: <{:s}>; lr: <{:.5f}>; optimizer: <{:s}>".format(
        str(start_time), args.model_name, args.lr, args.optim))
    for ep in range(1, args.epochs+1):
        rloss = train_per_epoch(args.batchs)       
        
        if ep % args.report_step == 0:
            now = datetime.now()
            dist_time = now - start_time            
            print("[{:s}] Step {:2d}/{:2d}; loss: {:.5f}; {:.1f}s ".format(str(now), 
                                                                           ep, 
                                                                           args.epochs, 
                                                                           rloss, 
                                                                           dist_time.total_seconds()))

        if ep % args.valid_step == 0:
            vloss = validate(args.val_batchs)
            print("\t Validation loss {:.5f} ".format(vloss))
            if (best_vloss == None) or vloss < best_vloss:
                torch.save({'state_dict': model.state_dict()}, model_file)
                best_vloss = vloss
    print('-' * 80)
    checkpoint = torch.load(model_file)
    model.load_state_dict(checkpoint['state_dict'])
    
    
    best_vloss = validate(args.val_batchs)
    print('Best performance: {:.5f}'.format(best_vloss))    
    if args.eval:
        mAP_score, mrr_score = evaluate(Q_test, A_test, Y_test)
        print('Evaluation- MAP: {:.5f}, MRR: {:.5f}'.format(mAP_score, mrr_score))

[2019-04-03 00:51:56.154125] model: <AP2d>; lr: <0.00100>; optimizer: <Adam>
[2019-04-03 00:52:09.038004] Step  1/15; loss: 0.20630; 12.9s 
	 Validation loss 0.19965 
[2019-04-03 00:52:21.154130] Step  2/15; loss: 0.19629; 25.0s 
	 Validation loss 0.19522 
[2019-04-03 00:52:33.194279] Step  3/15; loss: 0.18961; 37.0s 
	 Validation loss 0.19087 
[2019-04-03 00:52:45.232429] Step  4/15; loss: 0.17896; 49.1s 
	 Validation loss 0.18431 
[2019-04-03 00:52:57.282575] Step  5/15; loss: 0.16760; 61.1s 
	 Validation loss 0.18121 
[2019-04-03 00:53:09.329722] Step  6/15; loss: 0.15567; 73.2s 
	 Validation loss 0.17984 
[2019-04-03 00:53:21.422855] Step  7/15; loss: 0.14411; 85.3s 
	 Validation loss 0.18269 
[2019-04-03 00:53:33.599960] Step  8/15; loss: 0.13297; 97.4s 
	 Validation loss 0.18321 
[2019-04-03 00:53:45.636111] Step  9/15; loss: 0.12230; 109.5s 
	 Validation loss 0.18791 
[2019-04-03 00:53:57.720246] Step 10/15; loss: 0.11230; 121.6s 
	 Validation loss 0.19154 
[2019-04-03 00:54:09.